mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/66746 Modified loops in files under fbsource/fbcode/caffe2/ from the format `for(TYPE var=x0;var<x_max;x++)` to the format `for(const auto var: irange(xmax))` This was achieved by running r-barnes's loop upgrader script (D28874212) with some modification to exclude all files under /torch/jit and a number of reversions or unused variable suppression warnings added by hand. Test Plan: Sandcastle Reviewed By: malfet Differential Revision: D31705361 fbshipit-source-id: 33fd22eb03086d114e2c98e56703e8ec84460268
228 lines
6.1 KiB
C++
228 lines
6.1 KiB
C++
#ifndef CAFFE2_CORE_CONTEXT_H_
|
|
#define CAFFE2_CORE_CONTEXT_H_
|
|
|
|
#include <cstdlib>
|
|
#include <ctime>
|
|
#include <random>
|
|
#include <unordered_map>
|
|
|
|
#include <c10/util/typeid.h>
|
|
#include "caffe2/core/allocator.h"
|
|
#include "caffe2/core/context_base.h"
|
|
#include "caffe2/core/event.h"
|
|
#include "caffe2/core/logging.h"
|
|
#include "caffe2/proto/caffe2_pb.h"
|
|
|
|
#include <c10/util/ArrayRef.h>
|
|
|
|
#if !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
|
|
#include <c10/core/GeneratorImpl.h>
|
|
#include <c10/util/irange.h>
|
|
#include <ATen/core/DistributionsHelper.h>
|
|
#include <ATen/core/MT19937RNGEngine.h>
|
|
#else
|
|
#include "caffe2/core/distributions_stubs.h"
|
|
#endif
|
|
|
|
C10_DECLARE_bool(caffe2_report_cpu_memory_usage);
|
|
|
|
namespace caffe2 {
|
|
|
|
/**
|
|
* A function to generate a random number seed that is unique in a best-effort
|
|
* basis, using an ever-incrementing seed and the current time.
|
|
*/
|
|
TORCH_API uint32_t RandomNumberSeed();
|
|
|
|
/**
|
|
* The CPU Context, representing the bare minimum of what a Context class in
|
|
* Caffe2 should implement.
|
|
*
|
|
* // TODO modify docs
|
|
* See operator.h, especially Operator<Context>, for how Context are used in
|
|
* actual operator implementations that are associated with specific devices.
|
|
* In general, the Context class is passed in as a template argument, and
|
|
* the operator can use the functions defined in the context to execute whatever
|
|
* computation it has.
|
|
*
|
|
*/
|
|
class TORCH_API CPUContext final : public BaseContext {
|
|
public:
|
|
#if !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
|
|
class rand_gen_type {
|
|
public:
|
|
explicit rand_gen_type(uint64_t seed_in = default_rng_seed_val)
|
|
: engine_{seed_in} {}
|
|
|
|
uint32_t random() {
|
|
return engine_();
|
|
}
|
|
uint64_t random64() {
|
|
uint32_t random1 = engine_();
|
|
uint32_t random2 = engine_();
|
|
return (static_cast<uint64_t>(random1) << 32) | random2;
|
|
}
|
|
|
|
c10::optional<float> next_float_normal_sample() {
|
|
return next_float_normal_sample_;
|
|
}
|
|
c10::optional<double> next_double_normal_sample() {
|
|
return next_double_normal_sample_;
|
|
}
|
|
void set_next_float_normal_sample(c10::optional<float> randn) {
|
|
next_float_normal_sample_ = randn;
|
|
}
|
|
void set_next_double_normal_sample(c10::optional<double> randn) {
|
|
next_double_normal_sample_ = randn;
|
|
}
|
|
|
|
private:
|
|
at::mt19937 engine_;
|
|
c10::optional<float> next_float_normal_sample_;
|
|
c10::optional<double> next_double_normal_sample_;
|
|
};
|
|
#else
|
|
typedef std::mt19937 rand_gen_type;
|
|
#endif
|
|
|
|
CPUContext() {}
|
|
explicit CPUContext(const DeviceOption& option)
|
|
: random_seed_(option.has_random_seed() ? option.random_seed() : 1701),
|
|
random_seed_set_(option.has_random_seed() ? true : false) {
|
|
CAFFE_ENFORCE_EQ(option.device_type(), PROTO_CPU);
|
|
}
|
|
explicit CPUContext(const at::Device& device)
|
|
: CPUContext(DeviceToOption(device)) {}
|
|
|
|
~CPUContext() noexcept override {}
|
|
|
|
inline void SwitchToDevice(int64_t /*stream_id*/) override {}
|
|
|
|
using BaseContext::SwitchToDevice;
|
|
|
|
inline void WaitEvent(const Event& ev) override {
|
|
ev.Wait(CPU, this);
|
|
}
|
|
|
|
inline void Record(Event* ev, const char* err_msg = nullptr) const override {
|
|
CAFFE_ENFORCE(ev, "Event must not be null.");
|
|
ev->Record(CPU, this, err_msg);
|
|
}
|
|
|
|
inline void FinishDeviceComputation() override {}
|
|
|
|
inline rand_gen_type* RandGenerator() {
|
|
if (!random_generator_.get()) {
|
|
random_generator_.reset(new rand_gen_type(RandSeed()));
|
|
}
|
|
return random_generator_.get();
|
|
}
|
|
|
|
inline uint32_t RandSeed() {
|
|
if (!random_seed_set_) {
|
|
random_seed_ = RandomNumberSeed();
|
|
random_seed_set_ = true;
|
|
}
|
|
return static_cast<uint32_t>(random_seed_);
|
|
}
|
|
|
|
inline static at::DataPtr New(size_t nbytes) {
|
|
return GetCPUAllocator()->allocate(nbytes);
|
|
}
|
|
|
|
void CopyBytesSameDevice(size_t nbytes, const void* src, void* dst) override;
|
|
|
|
void CopyBytesFromCPU(size_t nbytes, const void* src, void* dst) override {
|
|
CopyBytesSameDevice(nbytes, src, dst);
|
|
}
|
|
|
|
void CopyBytesToCPU(size_t nbytes, const void* src, void* dst) override {
|
|
CopyBytesSameDevice(nbytes, src, dst);
|
|
}
|
|
|
|
bool SupportsNonFundamentalTypes() const override {
|
|
// CPU non fumdamental type copy OK
|
|
return true;
|
|
}
|
|
|
|
template <class SrcContext, class DstContext>
|
|
inline void CopyBytes(size_t nbytes, const void* src, void* dst);
|
|
|
|
template <typename T, class SrcContext, class DstContext>
|
|
inline void Copy(size_t n, const T* src, T* dst) {
|
|
if (c10::guts::is_fundamental<T>::value) {
|
|
CopyBytes<SrcContext, DstContext>(
|
|
n * sizeof(T),
|
|
static_cast<const void*>(src),
|
|
static_cast<void*>(dst));
|
|
} else {
|
|
for (const auto i : c10::irange(n)) {
|
|
dst[i] = src[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
template <class SrcContext, class DstContext>
|
|
inline void
|
|
CopyItems(const TypeMeta meta, size_t n, const void* src, void* dst) {
|
|
if (meta.copy()) {
|
|
meta.copy()(src, dst, n);
|
|
} else {
|
|
CopyBytes<SrcContext, DstContext>(n * meta.itemsize(), src, dst);
|
|
}
|
|
}
|
|
|
|
// By default CPU operators don't have async device parts
|
|
static bool HasAsyncPartDefault() {
|
|
return false;
|
|
}
|
|
|
|
static bool SupportsAsyncScheduling() {
|
|
return false;
|
|
}
|
|
|
|
// CPU streams are not implemented and are silently ignored by CPU ops,
|
|
// return true to signal executor to schedule a CPU op
|
|
static bool IsStreamFree(
|
|
const DeviceOption& /* option */,
|
|
int /* stream_id */) {
|
|
return true;
|
|
}
|
|
|
|
at::Device device() const override {
|
|
// TODO: numa?
|
|
return at::Device(CPU);
|
|
}
|
|
|
|
DeviceType device_type() const override {
|
|
return CPU;
|
|
}
|
|
|
|
static constexpr DeviceType GetDeviceType() {
|
|
return CPU;
|
|
}
|
|
|
|
protected:
|
|
// TODO(jiayq): instead of hard-coding a generator, make it more flexible.
|
|
int random_seed_{1701};
|
|
bool random_seed_set_{false};
|
|
std::unique_ptr<rand_gen_type> random_generator_;
|
|
};
|
|
|
|
template <>
|
|
inline void CPUContext::CopyBytes<CPUContext, CPUContext>(
|
|
size_t nbytes,
|
|
const void* src,
|
|
void* dst) {
|
|
if (nbytes == 0) {
|
|
return;
|
|
}
|
|
CAFFE_ENFORCE(src);
|
|
CAFFE_ENFORCE(dst);
|
|
memcpy(dst, src, nbytes);
|
|
}
|
|
|
|
} // namespace caffe2
|
|
|
|
#endif // CAFFE2_CORE_CONTEXT_H_
|