pytorch/caffe2/core/context.h

#ifndef CAFFE2_CORE_CONTEXT_H_
#define CAFFE2_CORE_CONTEXT_H_

#include <cstdlib>
#include <ctime>
#include <random>
#include <unordered_map>

#include <c10/util/typeid.h>
#include "caffe2/core/allocator.h"
#include "caffe2/core/context_base.h"
#include "caffe2/core/event.h"
#include "caffe2/core/logging.h"
#include "caffe2/proto/caffe2_pb.h"

#include <c10/util/ArrayRef.h>

#if !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
#include <c10/core/GeneratorImpl.h>
#include <c10/util/irange.h>
#include <ATen/core/DistributionsHelper.h>
#include <ATen/core/MT19937RNGEngine.h>
#else
#include "caffe2/core/distributions_stubs.h"
#endif

C10_DECLARE_bool(caffe2_report_cpu_memory_usage);

namespace caffe2 {

/**
 * A function to generate a random number seed that is unique in a best-effort
 * basis, using an ever-incrementing seed and the current time.
 */
TORCH_API uint32_t RandomNumberSeed();

/**
 * The CPU Context, representing the bare minimum of what a Context class in
 * Caffe2 should implement.
 *
 * // TODO modify docs
 * See operator.h, especially Operator<Context>, for how Context are used in
 * actual operator implementations that are associated with specific devices.
 * In general, the Context class is passed in as a template argument, and
 * the operator can use the functions defined in the context to execute whatever
 * computation it has.
 *
 */
class TORCH_API CPUContext final : public BaseContext {
 public:
#if !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
  class rand_gen_type {
   public:
    explicit rand_gen_type(uint64_t seed_in = default_rng_seed_val)
        : engine_{seed_in} {}

    uint32_t random() {
      return engine_();
    }
    uint64_t random64() {
      uint32_t random1 = engine_();
      uint32_t random2 = engine_();
      return (static_cast<uint64_t>(random1) << 32) | random2;
    }

    c10::optional<float> next_float_normal_sample() {
      return next_float_normal_sample_;
    }
    c10::optional<double> next_double_normal_sample() {
      return next_double_normal_sample_;
    }
    void set_next_float_normal_sample(c10::optional<float> randn) {
      next_float_normal_sample_ = randn;
    }
    void set_next_double_normal_sample(c10::optional<double> randn) {
      next_double_normal_sample_ = randn;
    }

   private:
    at::mt19937 engine_;
    c10::optional<float> next_float_normal_sample_;
    c10::optional<double> next_double_normal_sample_;
  };
#else
  typedef std::mt19937 rand_gen_type;
#endif

  CPUContext() {}
  explicit CPUContext(const DeviceOption& option)
      : random_seed_(option.has_random_seed() ? option.random_seed() : 1701),
        random_seed_set_(option.has_random_seed() ? true : false) {
    CAFFE_ENFORCE_EQ(option.device_type(), PROTO_CPU);
  }
  explicit CPUContext(const at::Device& device)
      : CPUContext(DeviceToOption(device)) {}

  ~CPUContext() noexcept override {}

  inline void SwitchToDevice(int64_t /*stream_id*/) override {}

  using BaseContext::SwitchToDevice;

  inline void WaitEvent(const Event& ev) override {
    ev.Wait(CPU, this);
  }

  inline void Record(Event* ev, const char* err_msg = nullptr) const override {
    CAFFE_ENFORCE(ev, "Event must not be null.");
    ev->Record(CPU, this, err_msg);
  }

  inline void FinishDeviceComputation() override {}

  inline rand_gen_type* RandGenerator() {
    if (!random_generator_.get()) {
      random_generator_.reset(new rand_gen_type(RandSeed()));
    }
    return random_generator_.get();
  }

  inline uint32_t RandSeed() {
    if (!random_seed_set_) {
      random_seed_ = RandomNumberSeed();
      random_seed_set_ = true;
    }
    return static_cast<uint32_t>(random_seed_);
  }

  inline static at::DataPtr New(size_t nbytes) {
    return GetCPUAllocator()->allocate(nbytes);
  }

  void CopyBytesSameDevice(size_t nbytes, const void* src, void* dst) override;

  void CopyBytesFromCPU(size_t nbytes, const void* src, void* dst) override {
    CopyBytesSameDevice(nbytes, src, dst);
  }

  void CopyBytesToCPU(size_t nbytes, const void* src, void* dst) override {
    CopyBytesSameDevice(nbytes, src, dst);
  }

  bool SupportsNonFundamentalTypes() const override {
    // CPU non fumdamental type copy OK
    return true;
  }

  template <class SrcContext, class DstContext>
  inline void CopyBytes(size_t nbytes, const void* src, void* dst);

  template <typename T, class SrcContext, class DstContext>
  inline void Copy(size_t n, const T* src, T* dst) {
    if (c10::guts::is_fundamental<T>::value) {
      CopyBytes<SrcContext, DstContext>(
          n * sizeof(T),
          static_cast<const void*>(src),
          static_cast<void*>(dst));
    } else {
      for (const auto i : c10::irange(n)) {
        dst[i] = src[i];
      }
    }
  }

  template <class SrcContext, class DstContext>
  inline void
  CopyItems(const TypeMeta meta, size_t n, const void* src, void* dst) {
    if (meta.copy()) {
      meta.copy()(src, dst, n);
    } else {
      CopyBytes<SrcContext, DstContext>(n * meta.itemsize(), src, dst);
    }
  }

  // By default CPU operators don't have async device parts
  static bool HasAsyncPartDefault() {
    return false;
  }

  static bool SupportsAsyncScheduling() {
    return false;
  }

  // CPU streams are not implemented and are silently ignored by CPU ops,
  // return true to signal executor to schedule a CPU op
  static bool IsStreamFree(
      const DeviceOption& /* option */,
      int /* stream_id */) {
    return true;
  }

  at::Device device() const override {
    // TODO: numa?
    return at::Device(CPU);
  }

  DeviceType device_type() const override {
    return CPU;
  }

  static constexpr DeviceType GetDeviceType() {
    return CPU;
  }

 protected:
  // TODO(jiayq): instead of hard-coding a generator, make it more flexible.
  int random_seed_{1701};
  bool random_seed_set_{false};
  std::unique_ptr<rand_gen_type> random_generator_;
};

template <>
inline void CPUContext::CopyBytes<CPUContext, CPUContext>(
    size_t nbytes,
    const void* src,
    void* dst) {
  if (nbytes == 0) {
    return;
  }
  CAFFE_ENFORCE(src);
  CAFFE_ENFORCE(dst);
  memcpy(dst, src, nbytes);
}

} // namespace caffe2

#endif // CAFFE2_CORE_CONTEXT_H_