set up c10 scaffolding. Move macros proper first.

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/11939 Reviewed By: orionr, dzhulgakov Differential Revision: D10004629 Pulled By: Yangqing fbshipit-source-id: ba50a96820d35c7922d81c78c4cbe849c85c251c
2025-12-06 12:20:52 +01:00 · 2018-09-24 11:02:46 -07:00 · 2018-09-24 11:02:46 -07:00 · a6f1ae7f20
commit a6f1ae7f20
parent 1a1d79e761
59 changed files with 412 additions and 242 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -5,6 +5,11 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
 # ---[ Project and semantic versioning.
 project(Caffe2 CXX C)

+set(CMAKE_CXX_STANDARD 11)
+if (NOT MSVC)
+  set(CMAKE_C_STANDARD 11)
+endif()
+
 set(CAFFE2_VERSION_MAJOR 0)
 set(CAFFE2_VERSION_MINOR 8)
 set(CAFFE2_VERSION_PATCH 2)
@ -294,6 +299,7 @@ include_directories(BEFORE ${PROJECT_BINARY_DIR})
 include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/)

 # ---[ Main build
+add_subdirectory(c10)
 add_subdirectory(caffe2)

 # --[ Documentation
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -262,9 +262,9 @@ than Linux, which are worth keeping in mind when fixing these problems.
 1. Symbols are NOT exported by default on Windows; instead, you have to explicitly
   mark a symbol as exported/imported in a header file with `__declspec(dllexport)` /
   `__declspec(dllimport)`.  We have codified this pattern into a set of macros
-   which follow the convention `*_API`, e.g., `AT_API` inside ATen. (Every separate
-   shared library needs a unique macro name, because symbol visibility is on a per
-   shared library basis.)
+   which follow the convention `*_API`, e.g., `CAFFE2_API` inside Caffe2 and ATen.
+   (Every separate shared library needs a unique macro name, because symbol visibility
+   is on a per shared library basis. See c10/macros/Macros.h for more details.)

   The upshot is if you see an "unresolved external" error in your Windows build, this
   is probably because you forgot to mark a function with `*_API`.  However, there is
--- a/aten/src/ATen/CPUGeneral.h
+++ b/aten/src/ATen/CPUGeneral.h
@ -1,12 +1,12 @@
 #pragma once

-// Using AT_API is crucial as otherwise you'll see
+// Using CAFFE2_API is crucial as otherwise you'll see
 // linking errors using MSVC
 // See https://msdn.microsoft.com/en-us/library/a90k134d.aspx
-// This header adds this if using AT_API
+// This header adds this if using CAFFE2_API
 #include "ATen/core/ATenGeneral.h"

 namespace at {
-AT_API void set_num_threads(int);
-AT_API int get_num_threads();
+CAFFE2_API void set_num_threads(int);
+CAFFE2_API int get_num_threads();
 }
--- a/aten/src/ATen/CPUTypeDefault.h
+++ b/aten/src/ATen/CPUTypeDefault.h
@ -3,7 +3,7 @@

 namespace at {

-struct AT_API CPUTypeDefault : public TypeDefault {
+struct CAFFE2_API CPUTypeDefault : public TypeDefault {
  CPUTypeDefault(TensorTypeId type_id, bool is_variable, bool is_undefined)
      : TypeDefault(type_id, is_variable, is_undefined) {}
  Allocator* allocator() const override;
--- a/aten/src/ATen/Context.h
+++ b/aten/src/ATen/Context.h
@ -24,8 +24,8 @@ namespace at {

 struct Tensor;

-class AT_API Context {
-public:
+class CAFFE2_API Context {
+ public:
  Context();
  TypeExtendedInterface* getNonVariableTypeRaw(Backend p, ScalarType s) {
    return static_cast<TypeExtendedInterface*>(globalLegacyTypeDispatch().getNonVariableTypeRaw(p, s));
@ -133,7 +133,7 @@ private:
  friend struct Type;
 };

-AT_API Context & globalContext();
+CAFFE2_API Context& globalContext();

 static inline void init() {
  globalContext();
@ -153,11 +153,11 @@ static inline TypeExtendedInterface& getNonVariableType(DeviceType p, ScalarType
  return globalContext().getNonVariableType(deviceTypeToBackend(p), s);
 }

-AT_API TypeExtendedInterface& getType(TensorOptions options);
-AT_API TypeExtendedInterface& getType(const TensorImpl*);
-AT_API TypeExtendedInterface& getType(const Tensor&);
+CAFFE2_API TypeExtendedInterface& getType(TensorOptions options);
+CAFFE2_API TypeExtendedInterface& getType(const TensorImpl*);
+CAFFE2_API TypeExtendedInterface& getType(const Tensor&);

-AT_API Allocator* getCPUAllocator();
+CAFFE2_API Allocator* getCPUAllocator();

 static inline TypeExtendedInterface& CPU(ScalarType s) {
  return getNonVariableType(Backend::CPU, s);
--- a/aten/src/ATen/DLConvertor.h
+++ b/aten/src/ATen/DLConvertor.h
@ -10,8 +10,8 @@

 namespace at {

-AT_API ScalarType toScalarType(const DLDataType& dtype);
-AT_API DLManagedTensor * toDLPack(const Tensor& src);
-AT_API Tensor fromDLPack(const DLManagedTensor* src);
+CAFFE2_API ScalarType toScalarType(const DLDataType& dtype);
+CAFFE2_API DLManagedTensor* toDLPack(const Tensor& src);
+CAFFE2_API Tensor fromDLPack(const DLManagedTensor* src);

 } //namespace at
--- a/aten/src/ATen/ExpandUtils.h
+++ b/aten/src/ATen/ExpandUtils.h
@ -9,9 +9,12 @@

 namespace at {

-AT_API std::vector<int64_t> infer_size(IntList a, IntList b);
-AT_API std::tuple<std::vector<int64_t>, std::vector<int64_t> > inferExpandGeometry(
-    IntList tensor_sizes, IntList tensor_strides, IntList sizes);
+CAFFE2_API std::vector<int64_t> infer_size(IntList a, IntList b);
+CAFFE2_API std::tuple<std::vector<int64_t>, std::vector<int64_t>>
+inferExpandGeometry(
+    IntList tensor_sizes,
+    IntList tensor_strides,
+    IntList sizes);

 // avoid copy-construction of Tensor by using a reference_wrapper.
 inline void check_defined(std::initializer_list<std::reference_wrapper<const Tensor>> tensors, const char *api_name) {
--- a/aten/src/ATen/SparseTensorImpl.h
+++ b/aten/src/ATen/SparseTensorImpl.h
@ -5,7 +5,7 @@
 #include "ATen/core/Error.h"

 namespace at {
-struct AT_API SparseTensorImpl : public TensorImpl {
+struct CAFFE2_API SparseTensorImpl : public TensorImpl {
  // Stored in COO format, indices + values.

  // INVARIANTS:
--- a/aten/src/ATen/TensorGeometry.h
+++ b/aten/src/ATen/TensorGeometry.h
@ -5,7 +5,7 @@

 namespace at {

-struct AT_API TensorGeometry {
+struct CAFFE2_API TensorGeometry {
  TensorGeometry() : storage_offset_(0) {}

  explicit TensorGeometry(IntList sizes)
--- a/aten/src/ATen/TensorUtils.h
+++ b/aten/src/ATen/TensorUtils.h
@ -12,7 +12,7 @@ namespace at {
 // make sense.  These are particularly useful for native functions,
 // which do NO argument checking by default.

-struct AT_API TensorArg {
+struct CAFFE2_API TensorArg {
  Tensor tensor;
  const char* name;
  int pos; // 1-indexed
@ -22,7 +22,7 @@ struct AT_API TensorArg {
  const Tensor& operator*() const { return tensor; }
 };

-struct AT_API TensorGeometryArg {
+struct CAFFE2_API TensorGeometryArg {
  TensorGeometry tensor;
  const char* name;
  int pos; // 1-indexed
@ -49,40 +49,80 @@ using CheckedFrom = const char*;
 // not TensorGeometryArg, because the Tensor to TensorGeometry
 // conversion will blow up if you have undefined tensors.

-AT_API std::ostream& operator<<(std::ostream & out, TensorGeometryArg t);
-AT_API void checkDim(CheckedFrom c, const TensorGeometryArg& t, int64_t dim);
+CAFFE2_API std::ostream& operator<<(std::ostream& out, TensorGeometryArg t);
+CAFFE2_API void checkDim(
+    CheckedFrom c,
+    const TensorGeometryArg& t,
+    int64_t dim);
 // NB: this is an inclusive-exclusive range
-AT_API void checkDimRange(CheckedFrom c, const TensorGeometryArg& t, int64_t dim_start, int64_t dim_end);
-AT_API void checkSameDim(CheckedFrom c, const TensorGeometryArg& t1, const TensorGeometryArg& t2);
-AT_API void checkContiguous(CheckedFrom c, const TensorGeometryArg& t);
-AT_API void checkAllContiguous(CheckedFrom c, at::ArrayRef<TensorArg> ts);
-AT_API void checkSize(CheckedFrom c, const TensorGeometryArg& t, IntList sizes);
-AT_API void checkSize(CheckedFrom c, const TensorGeometryArg& t, int64_t dim, int64_t size);
-AT_API void checkNumel(CheckedFrom c, const TensorGeometryArg& t, int64_t numel);
-AT_API void checkSameNumel(CheckedFrom c, const TensorGeometryArg& t1, const TensorGeometryArg& t2);
-AT_API void checkAllSameNumel(CheckedFrom c, ArrayRef<TensorArg> tensors);
-AT_API void checkScalarType(CheckedFrom c, const TensorArg& t, ScalarType s);
-AT_API void checkScalarTypes(CheckedFrom c, const TensorArg& t, at::ArrayRef<ScalarType> l);
-AT_API void checkSameGPU(CheckedFrom c, const TensorArg& t1, const TensorArg& t2);
-AT_API void checkAllSameGPU(CheckedFrom c, ArrayRef<TensorArg> tensors);
-AT_API void checkSameType(CheckedFrom c, const TensorArg& t1, const TensorArg& t2);
-AT_API void checkAllSameType(CheckedFrom c, ArrayRef<TensorArg> tensors);
-AT_API void checkSameSize(CheckedFrom c, const TensorArg& t1, const TensorArg& t2);
-AT_API void checkDefined(CheckedFrom c, const TensorArg& t);
-AT_API void checkAllDefined(CheckedFrom c, at::ArrayRef<TensorArg> t);
+CAFFE2_API void checkDimRange(
+    CheckedFrom c,
+    const TensorGeometryArg& t,
+    int64_t dim_start,
+    int64_t dim_end);
+CAFFE2_API void checkSameDim(
+    CheckedFrom c,
+    const TensorGeometryArg& t1,
+    const TensorGeometryArg& t2);
+CAFFE2_API void checkContiguous(CheckedFrom c, const TensorGeometryArg& t);
+CAFFE2_API void checkAllContiguous(CheckedFrom c, at::ArrayRef<TensorArg> ts);
+CAFFE2_API void checkSize(
+    CheckedFrom c,
+    const TensorGeometryArg& t,
+    IntList sizes);
+CAFFE2_API void checkSize(
+    CheckedFrom c,
+    const TensorGeometryArg& t,
+    int64_t dim,
+    int64_t size);
+CAFFE2_API void checkNumel(
+    CheckedFrom c,
+    const TensorGeometryArg& t,
+    int64_t numel);
+CAFFE2_API void checkSameNumel(
+    CheckedFrom c,
+    const TensorGeometryArg& t1,
+    const TensorGeometryArg& t2);
+CAFFE2_API void checkAllSameNumel(CheckedFrom c, ArrayRef<TensorArg> tensors);
+CAFFE2_API void checkScalarType(
+    CheckedFrom c,
+    const TensorArg& t,
+    ScalarType s);
+CAFFE2_API void checkScalarTypes(
+    CheckedFrom c,
+    const TensorArg& t,
+    at::ArrayRef<ScalarType> l);
+CAFFE2_API void checkSameGPU(
+    CheckedFrom c,
+    const TensorArg& t1,
+    const TensorArg& t2);
+CAFFE2_API void checkAllSameGPU(CheckedFrom c, ArrayRef<TensorArg> tensors);
+CAFFE2_API void checkSameType(
+    CheckedFrom c,
+    const TensorArg& t1,
+    const TensorArg& t2);
+CAFFE2_API void checkAllSameType(CheckedFrom c, ArrayRef<TensorArg> tensors);
+CAFFE2_API void checkSameSize(
+    CheckedFrom c,
+    const TensorArg& t1,
+    const TensorArg& t2);
+CAFFE2_API void checkDefined(CheckedFrom c, const TensorArg& t);
+CAFFE2_API void checkAllDefined(CheckedFrom c, at::ArrayRef<TensorArg> t);

 // FixMe: does TensorArg slow things down?
-AT_API void checkBackend(CheckedFrom c, at::ArrayRef<Tensor> t, at::Backend backend);
+CAFFE2_API void checkBackend(
+    CheckedFrom c,
+    at::ArrayRef<Tensor> t,
+    at::Backend backend);

 // Methods for getting data_ptr if tensor is defined
-AT_API void * maybe_data_ptr(const Tensor& tensor);
-AT_API void * maybe_data_ptr(const TensorArg& tensor);
+CAFFE2_API void* maybe_data_ptr(const Tensor& tensor);
+CAFFE2_API void* maybe_data_ptr(const TensorArg& tensor);

 // Return if the tensor geometry represented by `sizes` and `strides` is contiguous
 // Although we cache is_contiguous in tensor now, this is till useful because it
 // allows checking if a particular geometry is contiguous without explicitly
 // constructing a tensor, e.g., when you want to choose a kernel strategy based
 // on whether a subgeometry is contiguous.
-AT_API bool geometry_is_contiguous(IntList sizes, IntList strides);
-
+CAFFE2_API bool geometry_is_contiguous(IntList sizes, IntList strides);
 }
--- a/aten/src/ATen/Utils.h
+++ b/aten/src/ATen/Utils.h
@ -24,7 +24,7 @@

 namespace at {

-AT_API int _crash_if_asan(int);
+CAFFE2_API int _crash_if_asan(int);

 static inline const Storage& checked_storage(
    const Storage& expr,
--- a/aten/src/ATen/core/ATenGeneral.h
+++ b/aten/src/ATen/core/ATenGeneral.h
@ -3,6 +3,5 @@
 #include "ATen/core/Macros.h"

 // TODO: Merge the *_API macros.
-#define AT_API AT_CORE_API
 #define AT_EXPORT AT_CORE_EXPORT
 #define AT_IMPORT AT_CORE_IMPORT
--- a/aten/src/ATen/core/Formatting.h
+++ b/aten/src/ATen/core/Formatting.h
@ -8,10 +8,13 @@

 namespace at {

-AT_API std::ostream& operator<<(std::ostream & out, IntList list);
-AT_API std::ostream& operator<<(std::ostream & out, Backend b);
-AT_API std::ostream& operator<<(std::ostream & out, const Type & t);
-AT_API std::ostream& print(std::ostream& stream, const Tensor & tensor, int64_t linesize);
+CAFFE2_API std::ostream& operator<<(std::ostream& out, IntList list);
+CAFFE2_API std::ostream& operator<<(std::ostream& out, Backend b);
+CAFFE2_API std::ostream& operator<<(std::ostream& out, const Type& t);
+CAFFE2_API std::ostream& print(
+    std::ostream& stream,
+    const Tensor& tensor,
+    int64_t linesize);
 static inline std::ostream& operator<<(std::ostream & out, const Tensor & t) {
  return print(out,t,80);
 }
--- a/aten/src/ATen/core/Generator.h
+++ b/aten/src/ATen/core/Generator.h
@ -5,7 +5,7 @@

 namespace at {

-struct AT_API Generator {
+struct CAFFE2_API Generator {
  Generator() {};
  Generator(const Generator& other) = delete;
  Generator(Generator&& other) = delete;
--- a/aten/src/ATen/core/Macros.h
+++ b/aten/src/ATen/core/Macros.h
@ -3,41 +3,7 @@
 #include <sstream>
 #include <string>

-// You can use the definition AT_CORE_STATIC_WINDOWS to control whether
-// or not we apply __declspec.  You will want to set this as
-// -DAT_CORE_STATIC_WINDOWS=1 when compiling code which links
-// against ATen/core on Windows, when ATen/core is built as a
-// static library (in which case, saying the symbol is coming
-// from a DLL would be incorrect).
-
-#ifdef _WIN32
-#if !defined(AT_CORE_STATIC_WINDOWS)
-#define AT_CORE_EXPORT __declspec(dllexport)
-#define AT_CORE_IMPORT __declspec(dllimport)
-#else // !defined(AT_CORE_STATIC_WINDOWS)
-#define AT_CORE_EXPORT
-#define AT_CORE_IMPORT
-#endif // !defined(AT_CORE_STATIC_WINDOWS)
-#else  // _WIN32
-#if defined(__GNUC__)
-#define AT_CORE_EXPORT __attribute__((__visibility__("default")))
-#else // defined(__GNUC__)
-#define AT_CORE_EXPORT
-#endif // defined(__GNUC__)
-#define AT_CORE_IMPORT AT_CORE_EXPORT
-#endif  // _WIN32
-
-// AT_CORE_API is a macro that, depends on whether you are building the
-// main library or not, resolves to either AT_CORE_EXPORT or
-// AT_CORE_IMPORT.
-//
-
-// TODO: unify the controlling macros.
-#if defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
-#define AT_CORE_API AT_CORE_EXPORT
-#else // defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
-#define AT_CORE_API AT_CORE_IMPORT
-#endif // defined(CAFFE2_BUILD_MAIN_LIBS) || defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
+#include "c10/macros/Macros.h"

 #ifdef __CUDACC__
 // Designates functions callable from the host (CPU) and the device (GPU)
@ -50,13 +16,6 @@
 #define AT_DEVICE
 #endif

-// Disable the copy and assignment operator for a class. Note that this will
-// disable the usage of the class in std containers.
-#define AT_DISABLE_COPY_AND_ASSIGN(classname) \
-  classname(const classname&) = delete;       \
-  classname& operator=(const classname&) = delete
-
-
 #if defined(__ANDROID__)
 #define AT_ANDROID 1
 #define AT_MOBILE 1
--- a/aten/src/ATen/core/OptionsGuard.h
+++ b/aten/src/ATen/core/OptionsGuard.h
@ -20,7 +20,7 @@ struct DefaultTensorOptions {
  /// Defined in OptionsGuard.cpp because we can't use optional in headers, due
  /// to Windows and other compilers.
  /// TODO: The inability to use optional in headers is no longer true
-  AT_API static TensorOptions& get();
+  CAFFE2_API static TensorOptions& get();

 private:
  /// This is an optional because of compiler bugs that mis-initialize static
@ -64,8 +64,9 @@ struct OptionsGuard {
 #else // AT_MOBILE

 struct DefaultTensorOptions {
-  AT_API static const TensorOptions& get();
-private:
+  CAFFE2_API static const TensorOptions& get();
+
+ private:
  static TensorOptions options_;
 };

--- a/aten/src/ATen/core/Registry.h
+++ b/aten/src/ATen/core/Registry.h
@ -44,7 +44,7 @@ inline void PrintOffendingKey(const std::string& key) {
 * objects.
 */
 template <class SrcType, class ObjectPtrType, class... Args>
-class AT_API Registry {
+class CAFFE2_API Registry {
 public:
  typedef std::function<ObjectPtrType(Args...)> Creator;

@ -114,7 +114,7 @@ class AT_API Registry {
 };

 template <class SrcType, class ObjectPtrType, class... Args>
-class AT_API Registerer {
+class CAFFE2_API Registerer {
 public:
  Registerer(
      const SrcType& key,
@ -152,11 +152,12 @@ class AT_API Registerer {
 * declaration, as well as creating a convenient typename for its corresponding
 * registerer.
 */
-#define AT_DECLARE_TYPED_REGISTRY(                                    \
-    RegistryName, SrcType, ObjectType, PtrType, ...)                     \
-  AT_API Registry<SrcType, PtrType<ObjectType>, __VA_ARGS__>* RegistryName(); \
-  typedef Registerer<SrcType, PtrType<ObjectType>, __VA_ARGS__>        \
-      Registerer##RegistryName; \
+#define AT_DECLARE_TYPED_REGISTRY(                                \
+    RegistryName, SrcType, ObjectType, PtrType, ...)              \
+  CAFFE2_API Registry<SrcType, PtrType<ObjectType>, __VA_ARGS__>* \
+  RegistryName();                                                 \
+  typedef Registerer<SrcType, PtrType<ObjectType>, __VA_ARGS__>   \
+      Registerer##RegistryName;                                   \
  extern template class Registerer<SrcType, PtrType<ObjectType>, __VA_ARGS__>;

 #define AT_DEFINE_TYPED_REGISTRY(                                         \
--- a/aten/src/ATen/core/Scalar.h
+++ b/aten/src/ATen/core/Scalar.h
@ -14,8 +14,8 @@ namespace at {

 struct Tensor;

-class AT_API Scalar {
-public:
+class CAFFE2_API Scalar {
+ public:
  Scalar() : Scalar(int64_t(0)) {}

 #define DEFINE_IMPLICIT_CTOR(type,name,member) \
--- a/aten/src/ATen/core/Storage.h
+++ b/aten/src/ATen/core/Storage.h
@ -4,8 +4,8 @@

 namespace at {

-struct AT_API Storage {
-public:
+struct CAFFE2_API Storage {
+ public:
  Storage() {}
  Storage(c10::intrusive_ptr<StorageImpl> ptr) : storage_impl_(std::move(ptr)) {}
  Storage(
--- a/aten/src/ATen/core/StorageImpl.h
+++ b/aten/src/ATen/core/StorageImpl.h
@ -10,7 +10,7 @@ namespace at {

 struct Type;

-struct AT_API StorageImpl : public c10::intrusive_ptr_target {
+struct CAFFE2_API StorageImpl : public c10::intrusive_ptr_target {
 public:
  StorageImpl(
      caffe2::TypeMeta data_type,
--- a/aten/src/ATen/core/Tensor.h
+++ b/aten/src/ATen/core/Tensor.h
@ -37,7 +37,7 @@ namespace at {
 //
 // Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and
 // special care must be taken to handle this.
-struct AT_API Tensor {
+struct CAFFE2_API Tensor {
  Tensor(){};
  Tensor(c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl)
      : tensor_impl_(std::move(tensor_impl)) {
@ -648,7 +648,7 @@ protected:
  c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl_;
 };

-struct AT_API WeakTensor {
+struct CAFFE2_API WeakTensor {
  WeakTensor(const Tensor& t) : weak_tensor_impl_(t.tensor_impl_) {}

  // XXX: this can return undefined tensors
--- a/aten/src/ATen/core/TensorImpl.h
+++ b/aten/src/ATen/core/TensorImpl.h
@ -20,7 +20,7 @@ struct Tensor;
 } // namespace at

 namespace at {
-struct AT_API TensorImpl : public c10::intrusive_ptr_target {
+struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
  TensorImpl() = delete;
  TensorImpl(TensorTypeId type_id, const caffe2::TypeMeta& data_type, Allocator *allocator, bool is_variable);
  TensorImpl(Storage&& storage, TensorTypeId type_id, bool is_variable);
--- a/aten/src/ATen/core/TensorOptions.h
+++ b/aten/src/ATen/core/TensorOptions.h
@ -47,7 +47,7 @@ namespace at {
 ///     at::zeros({2,2}, at::device({at::kCUDA, 1})); // place on device 1
 ///     at::zeros({2,2}, at::requires_grad());
 ///
-struct AT_API TensorOptions {
+struct CAFFE2_API TensorOptions {
  TensorOptions() : TensorOptions(/*use_thread_local_default_options=*/true) {}

  /// Constructs the `TensorOptions` with defaults taken from the thread local
--- a/aten/src/ATen/core/Type.h
+++ b/aten/src/ATen/core/Type.h
@ -76,7 +76,7 @@ enum class TypeID {
  NumOptions
 };

-struct AT_API Type {
+struct CAFFE2_API Type {
  explicit Type(TensorTypeId type_id, bool is_variable, bool is_undefined)
      : type_id_(type_id), is_variable_(is_variable), is_undefined_(is_undefined) {}

@ -613,7 +613,6 @@ protected:
  TensorTypeId type_id_;
  bool is_variable_;
  bool is_undefined_;
-
 };

 } // namespace at
--- a/aten/src/ATen/core/UndefinedTensorImpl.h
+++ b/aten/src/ATen/core/UndefinedTensorImpl.h
@ -4,8 +4,8 @@

 namespace at {

-struct AT_API UndefinedTensorImpl final : public TensorImpl {
-public:
+struct CAFFE2_API UndefinedTensorImpl final : public TensorImpl {
+ public:
  // Without this, we get:
  //  error: identifier "at::UndefinedTensorImpl::_singleton" is undefined in device code
  // (ostensibly because the constexpr tricks MSVC into trying to compile this
--- a/aten/src/ATen/core/VariableHooksInterface.h
+++ b/aten/src/ATen/core/VariableHooksInterface.h
@ -20,8 +20,7 @@ namespace at {
 //
 // We may choose to absorb autograd into ATen, in which case this interface is obsolete.
 //
-struct AT_API VariableHooksInterface {
-
+struct CAFFE2_API VariableHooksInterface {
  // This should never actually be implemented, but it is used to
  // squelch -Werror=non-virtual-dtor
  virtual ~VariableHooksInterface() {}
@ -34,18 +33,17 @@ struct AT_API VariableHooksInterface {
    // no-op if Variable not available; it'll get handled (if at all) when
    // libtorch.so gets loaded
  }
-
 };

 // NB: dummy argument to suppress "ISO C++11 requires at least one argument
 // for the "..." in a variadic macro"
-struct AT_API VariableHooksArgs {};
+struct CAFFE2_API VariableHooksArgs {};

 AT_DECLARE_REGISTRY(VariableHooksRegistry, VariableHooksInterface, VariableHooksArgs)
 #define REGISTER_VARIABLE_HOOKS(clsname) AT_REGISTER_CLASS(VariableHooksRegistry, clsname, clsname)

 namespace detail {
-  AT_API const VariableHooksInterface& getVariableHooks();
+CAFFE2_API const VariableHooksInterface& getVariableHooks();
 }

 } // namespace at
--- a/aten/src/ATen/core/context_base.h
+++ b/aten/src/ATen/core/context_base.h
@ -192,9 +192,9 @@ using at::BaseContext;
 using at::BaseStaticContext;

 using StaticContextMap = std::unordered_map<at::DeviceType, BaseStaticContext*>;
-AT_API StaticContextMap& GetStaticContexts();
-AT_API void set_static_context(at::DeviceType t, BaseStaticContext* ptr);
-AT_API BaseStaticContext* get_static_context(at::DeviceType t);
+CAFFE2_API StaticContextMap& GetStaticContexts();
+CAFFE2_API void set_static_context(at::DeviceType t, BaseStaticContext* ptr);
+CAFFE2_API BaseStaticContext* get_static_context(at::DeviceType t);

 template <at::DeviceType t>
 struct StaticContextFunctionRegisterer {
--- a/aten/src/ATen/core/ivalue.cpp
+++ b/aten/src/ATen/core/ivalue.cpp
@ -6,7 +6,8 @@

 namespace torch { namespace jit {

-AT_API c10::intrusive_ptr<ConstantString> ConstantString::create(std::string str_) {
+CAFFE2_API c10::intrusive_ptr<ConstantString> ConstantString::create(
+    std::string str_) {
  return c10::make_intrusive<ConstantString>(std::move(str_));
 }

--- a/aten/src/ATen/core/ivalue.h
+++ b/aten/src/ATen/core/ivalue.h
@ -14,7 +14,7 @@ template <typename T>
 using Shared = c10::intrusive_ptr<T>;

 // string
-struct AT_API ConstantString final : c10::intrusive_ptr_target {
+struct CAFFE2_API ConstantString final : c10::intrusive_ptr_target {
 private:
  const std::string str_;
 public:
@ -27,7 +27,7 @@ struct AT_API ConstantString final : c10::intrusive_ptr_target {
  operator const std::string & () const {
    return string();
  }
-  AT_API friend std::ostream& operator<<(
+  CAFFE2_API friend std::ostream& operator<<(
      std::ostream& out,
      const ConstantString& v);
 };
@ -67,7 +67,7 @@ using DoubleList = ConstantList<double>;
 #define TORCH_FORALL_TAGS(_) \
  _(None) _(Tensor) _(Double) _(Int) _(Tuple) _(IntList) _(DoubleList) _(String) _(TensorList)

-struct AT_API IValue final {
+struct CAFFE2_API IValue final {
  IValue()
  : payload{0}
  , tag(Tag::None)
@ -277,7 +277,9 @@ struct AT_API IValue final {
  template<typename T>
  T to() const &;

-  AT_API friend std::ostream& operator<<(std::ostream& out, const IValue& v);
+  CAFFE2_API friend std::ostream& operator<<(
+      std::ostream& out,
+      const IValue& v);

 private:
  // NOTE: IValue tags are intentionally private. In the future we may encode
--- a/aten/src/ATen/cuda/CUDAContext.h
+++ b/aten/src/ATen/cuda/CUDAContext.h
@ -35,32 +35,31 @@ manage their own state. There is only a single CUDA context/state.
 */

 /* Device info */
-AT_API int64_t getNumGPUs();
+CAFFE2_API int64_t getNumGPUs();

-AT_API int64_t current_device();
+CAFFE2_API int64_t current_device();

-AT_API void set_device(int64_t device);
+CAFFE2_API void set_device(int64_t device);

-AT_API cudaDeviceProp* getCurrentDeviceProperties();
+CAFFE2_API cudaDeviceProp* getCurrentDeviceProperties();

-AT_API cudaDeviceProp* getDeviceProperties(int64_t device);
+CAFFE2_API cudaDeviceProp* getDeviceProperties(int64_t device);

 /* Streams */
-AT_API CUDAStream createCUDAStream(
-  const bool isHighPriority = false
-, int64_t device = -1);
+CAFFE2_API CUDAStream
+createCUDAStream(const bool isHighPriority = false, int64_t device = -1);

-AT_API CUDAStream getDefaultCUDAStream(int64_t device = -1);
-AT_API CUDAStream getCurrentCUDAStream(int64_t device = -1);
+CAFFE2_API CUDAStream getDefaultCUDAStream(int64_t device = -1);
+CAFFE2_API CUDAStream getCurrentCUDAStream(int64_t device = -1);

-AT_API void setCurrentCUDAStream(CUDAStream stream);
-AT_API void uncheckedSetCurrentCUDAStream(CUDAStream stream);
+CAFFE2_API void setCurrentCUDAStream(CUDAStream stream);
+CAFFE2_API void uncheckedSetCurrentCUDAStream(CUDAStream stream);

-AT_API Allocator* getCUDADeviceAllocator();
+CAFFE2_API Allocator* getCUDADeviceAllocator();

 /* Handles */
 #ifndef __HIP_PLATFORM_HCC__
-  AT_API cusparseHandle_t getCurrentCUDASparseHandle();
+CAFFE2_API cusparseHandle_t getCurrentCUDASparseHandle();
 #endif


--- a/aten/src/ATen/detail/CUDAHooksInterface.h
+++ b/aten/src/ATen/detail/CUDAHooksInterface.h
@ -47,7 +47,7 @@ constexpr const char* CUDA_HELP =
 // TODO: Consider putting the stub definitions in another class, so that one
 // never forgets to implement each virtual function in the real implementation
 // in CUDAHooks.  This probably doesn't buy us much though.
-struct AT_API CUDAHooksInterface {
+struct CAFFE2_API CUDAHooksInterface {
  // This should never actually be implemented, but it is used to
  // squelch -Werror=non-virtual-dtor
  virtual ~CUDAHooksInterface() {}
@ -129,14 +129,14 @@ struct AT_API CUDAHooksInterface {

 // NB: dummy argument to suppress "ISO C++11 requires at least one argument
 // for the "..." in a variadic macro"
-struct AT_API CUDAHooksArgs {};
+struct CAFFE2_API CUDAHooksArgs {};

 AT_DECLARE_REGISTRY(CUDAHooksRegistry, CUDAHooksInterface, CUDAHooksArgs)
 #define REGISTER_CUDA_HOOKS(clsname) \
  AT_REGISTER_CLASS(CUDAHooksRegistry, clsname, clsname)

 namespace detail {
-AT_API const CUDAHooksInterface& getCUDAHooks();
+CAFFE2_API const CUDAHooksInterface& getCUDAHooks();

 /// This class exists to let us access `cudaSetDevice`, `cudaGetDevice` and CUDA
 /// error handling functions, when CUDA is available. These functions will first
@ -144,7 +144,7 @@ AT_API const CUDAHooksInterface& getCUDAHooks();
 /// the `cudaSetDevice`/`cudaGetDevice` functions. This allows us to access them
 /// with only a single pointer indirection, while virtual dispatch would require
 /// two (one for the virtual call, one for `cudaSetDevice`/`cudaGetDevice`).
-struct AT_API DynamicCUDAInterface {
+struct CAFFE2_API DynamicCUDAInterface {
  static void (*set_device)(int32_t);
  static void (*get_device)(int32_t*);
  static void (*unchecked_set_device)(int32_t);
--- a/aten/src/ATen/detail/ComplexHooksInterface.h
+++ b/aten/src/ATen/detail/ComplexHooksInterface.h
@ -7,7 +7,7 @@ namespace at {

 class Context;

-struct AT_API ComplexHooksInterface {
+struct CAFFE2_API ComplexHooksInterface {
  virtual ~ComplexHooksInterface() {}

  virtual void registerComplexTypes(Context*) const {
@ -15,13 +15,13 @@ struct AT_API ComplexHooksInterface {
  }
 };

-struct AT_API ComplexHooksArgs {};
+struct CAFFE2_API ComplexHooksArgs {};
 AT_DECLARE_REGISTRY(ComplexHooksRegistry, ComplexHooksInterface, ComplexHooksArgs)
 #define REGISTER_COMPLEX_HOOKS(clsname) \
  AT_REGISTER_CLASS(ComplexHooksRegistry, clsname, clsname)

 namespace detail {
-AT_API const ComplexHooksInterface& getComplexHooks();
+CAFFE2_API const ComplexHooksInterface& getComplexHooks();
 }

 }
--- a/aten/src/ATen/function_wrapper.py
+++ b/aten/src/ATen/function_wrapper.py
@ -154,7 +154,7 @@ static inline ${return_type} ${api_name}(${formals}) {
 """)
 # add a native declaration for a native function
 NATIVE_DECLARATION = CodeTemplate("""\
-AT_API ${return_type} ${native_type_method_dispatch}(${formals_with_defaults});
+CAFFE2_API ${return_type} ${native_type_method_dispatch}(${formals_with_defaults});
 """)

 # special method definition for factory functions in Functions.h
--- a/aten/src/ATen/native/DispatchStub.h
+++ b/aten/src/ATen/native/DispatchStub.h
@ -49,10 +49,10 @@ enum class CPUCapability {
 CPUCapability get_cpu_capability();

 template <typename FnPtr, typename T>
-struct AT_API DispatchStub;
+struct CAFFE2_API DispatchStub;

 template <typename rT, typename T, typename... Args>
-struct AT_API DispatchStub<rT (*) (Args...), T> {
+struct CAFFE2_API DispatchStub<rT (*)(Args...), T> {
  using FnPtr = rT (*) (Args...);

  template <typename... ArgTypes>
@ -114,9 +114,9 @@ struct RegisterDispatch {
 // adding parentheses and using helper struct to get rid of the parentheses, do
 // not work with MSVC. So do a `using`-declaration if you need to pass in such
 // `fn`, e.g., grid_sampler_2d_backward_cpu_kernel in GridSampleKernel.h.
-#define DECLARE_DISPATCH(fn, name) \
+#define DECLARE_DISPATCH(fn, name)         \
  struct name : DispatchStub<fn, name> {}; \
-  extern AT_API struct name name
+  extern CAFFE2_API struct name name

 #define DEFINE_DISPATCH(name) struct name name

--- a/aten/src/ATen/native/TensorIterator.h
+++ b/aten/src/ATen/native/TensorIterator.h
@ -50,7 +50,7 @@

 namespace at {

-struct AT_API OperandInfo {
+struct CAFFE2_API OperandInfo {
  OperandInfo() {}
  OperandInfo(const Tensor& t) : tensor(const_cast<Tensor*>(&t)) {}

@ -82,7 +82,7 @@ struct AT_API OperandInfo {

 struct SplitUntil32Bit;

-struct AT_API TensorIterator {
+struct CAFFE2_API TensorIterator {
  struct Builder;
  friend struct Builder;

@ -212,8 +212,8 @@ private:
 /// A container-like struct that acts as if it contains splits of a
 /// TensorIterator that can use 32-bit indexing. Taken together the splits cover
 /// the original TensorIterator.
-struct AT_API SplitUntil32Bit {
-  struct AT_API iterator {
+struct CAFFE2_API SplitUntil32Bit {
+  struct CAFFE2_API iterator {
    iterator() {};
    iterator(const TensorIterator& iter);
    iterator(iterator&&) = default;
--- a/aten/src/ATen/templates/NativeFunctions.h
+++ b/aten/src/ATen/templates/NativeFunctions.h
@ -48,23 +48,23 @@ inline Tensor from_blob(
 }

 // These functions are defined in native/TensorFactories.cpp.
-#define TENSOR(T, S, _1)                                               \
-  AT_API Tensor tensor(ArrayRef<T> values, const TensorOptions& options);     \
-  inline Tensor tensor(                                                \
-      std::initializer_list<T> values, const TensorOptions& options) { \
-    return native::tensor(ArrayRef<T>(values), options);               \
-  }                                                                    \
-  inline Tensor tensor(T value, const TensorOptions& options) {        \
-    return native::tensor(ArrayRef<T>(value), options);                \
-  }                                                                    \
-  inline Tensor tensor(ArrayRef<T> values) {                           \
-    return native::tensor(std::move(values), at::dtype(k##S));         \
-  }                                                                    \
-  inline Tensor tensor(std::initializer_list<T> values) {              \
-    return native::tensor(ArrayRef<T>(values));                        \
-  }                                                                    \
-  inline Tensor tensor(T value) {                                      \
-    return native::tensor(ArrayRef<T>(value));                         \
+#define TENSOR(T, S, _1)                                                      \
+  CAFFE2_API Tensor tensor(ArrayRef<T> values, const TensorOptions& options); \
+  inline Tensor tensor(                                                       \
+      std::initializer_list<T> values, const TensorOptions& options) {        \
+    return native::tensor(ArrayRef<T>(values), options);                      \
+  }                                                                           \
+  inline Tensor tensor(T value, const TensorOptions& options) {               \
+    return native::tensor(ArrayRef<T>(value), options);                       \
+  }                                                                           \
+  inline Tensor tensor(ArrayRef<T> values) {                                  \
+    return native::tensor(std::move(values), at::dtype(k##S));                \
+  }                                                                           \
+  inline Tensor tensor(std::initializer_list<T> values) {                     \
+    return native::tensor(ArrayRef<T>(values));                               \
+  }                                                                           \
+  inline Tensor tensor(T value) {                                             \
+    return native::tensor(ArrayRef<T>(value));                                \
  }
 AT_FORALL_SCALAR_TYPES_EXCEPT_HALF(TENSOR)
 #undef TENSOR
--- a/aten/src/ATen/templates/Tensor.h
+++ b/aten/src/ATen/templates/Tensor.h
@ -37,7 +37,7 @@ namespace at {
 //
 // Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and
 // special care must be taken to handle this.
-struct AT_API Tensor {
+struct CAFFE2_API Tensor {
  Tensor(){};
  Tensor(c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl)
      : tensor_impl_(std::move(tensor_impl)) {
@ -262,7 +262,7 @@ protected:
  c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl_;
 };

-struct AT_API WeakTensor {
+struct CAFFE2_API WeakTensor {
  WeakTensor(const Tensor& t) : weak_tensor_impl_(t.tensor_impl_) {}

  // XXX: this can return undefined tensors
--- a/aten/src/ATen/templates/Type.h
+++ b/aten/src/ATen/templates/Type.h
@ -47,7 +47,7 @@ enum class TypeID {
  NumOptions
 };

-struct AT_API Type {
+struct CAFFE2_API Type {
  explicit Type(TensorTypeId type_id, bool is_variable, bool is_undefined)
      : type_id_(type_id), is_variable_(is_variable), is_undefined_(is_undefined) {}

@ -140,7 +140,6 @@ protected:
  TensorTypeId type_id_;
  bool is_variable_;
  bool is_undefined_;
-
 };

 } // namespace at
--- a/aten/src/ATen/templates/TypeDefault.h
+++ b/aten/src/ATen/templates/TypeDefault.h
@ -6,7 +6,7 @@

 namespace at {

-struct AT_API TypeDefault : public TypeExtendedInterface {
+struct CAFFE2_API TypeDefault : public TypeExtendedInterface {
  explicit TypeDefault(TensorTypeId type_id, bool is_variable, bool is_undefined)
      : TypeExtendedInterface(type_id, is_variable, is_undefined) {}

--- a/aten/src/ATen/templates/TypeExtendedInterface.h
+++ b/aten/src/ATen/templates/TypeExtendedInterface.h
@ -3,7 +3,7 @@

 namespace at {

-struct AT_API TypeExtendedInterface : public Type {
+struct CAFFE2_API TypeExtendedInterface : public Type {
  explicit TypeExtendedInterface(TensorTypeId type_id, bool is_variable, bool is_undefined)
      : Type(type_id, is_variable, is_undefined) {}
  ${pure_virtual_extended_type_method_declarations}
--- a/aten/src/TH/THAllocator.h
+++ b/aten/src/TH/THAllocator.h
@ -32,8 +32,8 @@ TH_API THAllocator* getTHDefaultAllocator(void);
 // the non-file descriptor constructor
 enum WithFd { WITH_FD };

-class AT_API THMapAllocator {
-public:
+class CAFFE2_API THMapAllocator {
+ public:
  THMapAllocator(const char *filename, int flags, size_t size);
  THMapAllocator(WithFd, const char *filename, int fd, int flags, size_t size);
  THMapAllocator(const THMapAllocator&) = delete;
@ -82,12 +82,14 @@ protected:
 };

 // Base-from-member idiom
-struct AT_API THRefcountedMapAllocatorArgCheck {
+struct CAFFE2_API THRefcountedMapAllocatorArgCheck {
  THRefcountedMapAllocatorArgCheck(int flags);
 };

-class AT_API THRefcountedMapAllocator : private THRefcountedMapAllocatorArgCheck, public THMapAllocator {
-public:
+class CAFFE2_API THRefcountedMapAllocator
+    : private THRefcountedMapAllocatorArgCheck,
+      public THMapAllocator {
+ public:
  THRefcountedMapAllocator(const char *filename, int flags, size_t size);
  THRefcountedMapAllocator(WithFd, const char *filename, int fd, int flags, size_t size);

--- a/aten/src/THC/THCAllocator.h
+++ b/aten/src/THC/THCAllocator.h
@ -7,8 +7,8 @@ THC_API THAllocator* getTHCudaHostAllocator(void);
 // IPC doesn't support (re)allocation

 #ifdef __cplusplus
-class AT_API THCIpcDeleter {
-public:
+class CAFFE2_API THCIpcDeleter {
+ public:
  THCIpcDeleter(void* data, int device) : data_(data), device_(device) {};
  ~THCIpcDeleter();
  static at::DataPtr makeDataPtr(void* data, int device);
--- a/c10/CMakeLists.txt
+++ b/c10/CMakeLists.txt
@ -0,0 +1,38 @@
+# Main build file for the C10 library.
+#
+# Note that the C10 library should maintain minimal dependencies - especially,
+# it should not depend on any library that is implementation specific or
+# backend specific. It should in particular NOT be dependent on any generated
+# protobuf header files, because protobuf header files will transitively force
+# one to link against a specific protobuf version.
+
+# ---[ Configure macro file.
+set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
+configure_file(
+    ${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in
+    ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h)
+
+# Note: if you want to add ANY dependency to the c10 library, make sure you
+# check with the core PyTorch developers as the dependendency will be
+# transitively passed on to all libraries dependent on PyTorch.
+file(GLOB_RECURSE C10_SRCS *.cpp)
+file(GLOB_RECURSE C10_HEADERS *.h)
+add_library(c10 ${C10_SRCS} ${C10_HEADERS})
+# If building shared library, set dllimport/dllexport proper.
+target_compile_options(c10 PRIVATE "-DC10_BUILD_MAIN_LIB")
+target_include_directories(
+    c10 PUBLIC
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../>
+    $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
+    $<INSTALL_INTERFACE:include>)
+
+# ---[ Installation
+# Note: for now, we will put all export path into one single Caffe2Targets group
+# to deal with the cmake deployment need. Inside the Caffe2Targets set, the
+# individual libraries like libc10.so and libcaffe2.so are still self-contained.
+install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib)
+install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
+        DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
+install(FILES ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h
+        DESTINATION include/c10/macros)
--- a/c10/c10_dummy.cpp
+++ b/c10/c10_dummy.cpp
@ -0,0 +1,7 @@
+#include "c10/c10_dummy.h"
+
+namespace c10 {
+bool HasC10() {
+  return true;
+}
+} // namespace c10
--- a/c10/c10_dummy.h
+++ b/c10/c10_dummy.h
@ -0,0 +1,7 @@
+#pragma once
+
+#include "c10/macros/Macros.h"
+
+namespace c10 {
+C10_API bool HasC10();
+}
--- a/c10/macros/Export.h
+++ b/c10/macros/Export.h
@ -0,0 +1,76 @@
+/* Header file to define the common scaffolding for exported symbols.
+ *
+ * Export is by itself a quite tricky situation to deal with, and if you are
+ * hitting this file, make sure you start with the background here:
+ * - Linux: https://gcc.gnu.org/wiki/Visibility
+ * - Windows:
+ * https://docs.microsoft.com/en-us/cpp/cpp/dllexport-dllimport?view=vs-2017
+ *
+ * Do NOT include this file directly. Instead, use c10/macros/Macros.h
+ */
+
+#pragma once
+
+// You do not need to edit this part of file unless you are changing the core
+// pytorch export abstractions.
+//
+// This part defines the C10 core export and import macros. This is controlled
+// by whether we are building shared libraries or not, which is determined
+// during build time and codified in c10/core/cmake_macros.h.
+// When the library is built as a shared lib, EXPORT and IMPORT will contain
+// visibility attributes. If it is being built as a static lib, then EXPORT
+// and IMPORT basically have no effect.
+
+// As a rule of thumb, you should almost NEVER mix static and shared builds for
+// libraries that depend on c10. AKA, if c10 is built as a static library, we
+// recommend everything dependent on c10 to be built statically. If c10 is built
+// as a shared library, everything dependent on it should be built as shared. In
+// the PyTorch project, all native libraries shall use the macro
+// C10_BUILD_SHARED_LIB to check whether pytorch is building shared or static
+// libraries.
+
+#ifdef _WIN32
+#if defined(C10_BUILD_SHARED_LIBS)
+#define C10_EXPORT __declspec(dllexport)
+#define C10_IMPORT __declspec(dllimport)
+#else
+#define C10_EXPORT
+#define C10_IMPORT
+#endif
+#else // _WIN32
+#if defined(__GNUC__)
+#define C10_EXPORT __attribute__((__visibility__("default")))
+#else // defined(__GNUC__)
+#define C10_EXPORT
+#endif // defined(__GNUC__)
+#define C10_IMPORT C10_EXPORT
+#endif // _WIN32
+
+// Definition of an adaptive XX_API macro, that depends on whether you are
+// building the library itself or not, routes to XX_EXPORT and XX_IMPORT.
+// Basically, you will need to do this for each shared library that you are
+// building, and the instruction is as follows: assuming that you are building
+// a library called libawesome.so. You should:
+// (1) for your cmake target (usually done by "add_library(awesome, ...)"),
+//     define a macro called AWESOME_BUILD_MAIN_DLL using
+//     target_compile_options.
+// (2) define the AWESOME_API macro similar to the one below.
+// And in the source file of your awesome library, use AWESOME_API to
+// annotate public symbols.
+
+// Here, for the C10 library, we will define the macro C10_API for both import
+// and export.
+
+// This one is being used by libc10.so
+#ifdef C10_BUILD_MAIN_DLL
+#define C10_API C10_EXPORT
+#else
+#define C10_API C10_IMPORT
+#endif
+
+// This one is being used by libcaffe2.so
+#ifdef CAFFE2_BUILD_MAIN_LIB
+#define CAFFE2_API C10_EXPORT
+#else
+#define CAFFE2_API C10_IMPORT
+#endif
--- a/c10/macros/Legacy.h
+++ b/c10/macros/Legacy.h
@ -0,0 +1,20 @@
+/* A centralized location to provide legacy macro support, and a warning about
+ * when this legacy compatibility symbol is going to removed in the future.
+ *
+ * Do NOT include this file directly. Instead, use c10/macros/Macros.h
+ */
+
+#pragma once
+
+// Note: this is for caffe2/*. Will need to codemod to use direct C10.
+#define CAFFE2_EXPORT C10_EXPORT
+#define CAFFE2_IMPORT C10_IMPORT
+
+// Note: this is for aten/src/*. Will need to codemod.
+#define AT_CORE_API CAFFE2_API
+#define AT_CORE_EXPORT C10_EXPORT
+#define AT_CORE_IMPORT C10_IMPORT
+
+// Note: this is for both aten and c2, due to cross reference between c2 and
+// aten that we try to unentangle. Will need to codemod.
+#define AT_DISABLE_COPY_AND_ASSIGN C10_DISABLE_COPY_AND_ASSIGN
--- a/c10/macros/Macros.h
+++ b/c10/macros/Macros.h
@ -0,0 +1,32 @@
+/* Main entry for c10/macros.
+ *
+ * In your code, include c10/macros/Macros.h directly, instead of individual
+ * files in this folder.
+ */
+
+#pragma once
+
+// For build systems that do not directly depend on CMake and directly build
+// from the source directory (such as Buck), one may not have a cmake_macros.h
+// file at all. In this case, the build system is responsible for providing
+// correct macro definitions corresponding to the cmake_macros.h.in file.
+//
+// In such scenarios, one should define the macro
+//     C10_USING_CUSTOM_GENERATED_MACROS
+// to inform this header that it does not need to include the cmake_macros.h
+// file.
+
+#ifndef C10_USING_CUSTOM_GENERATED_MACROS
+#include "c10/macros/cmake_macros.h"
+#endif // C10_USING_CUSTOM_GENERATED_MACROS
+
+#include "c10/macros/Export.h"
+
+// Disable the copy and assignment operator for a class. Note that this will
+// disable the usage of the class in std containers.
+#define C10_DISABLE_COPY_AND_ASSIGN(classname) \
+  classname(const classname&) = delete;        \
+  classname& operator=(const classname&) = delete
+
+// Finally, file that provides legacy support for macros
+#include "c10/macros/Legacy.h"
--- a/c10/macros/cmake_macros.h.in
+++ b/c10/macros/cmake_macros.h.in
@ -0,0 +1,6 @@
+// Automatically generated header file for the C10 library.
+// Do not include this file directly. Instead, include c10/macros/Macros.h.
+
+#pragma once
+
+#cmakedefine C10_BUILD_SHARED_LIBS
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@ -194,7 +194,6 @@ target_include_directories(caffe2_protos INTERFACE $<INSTALL_INTERFACE:include>)
 target_link_libraries(caffe2_protos PUBLIC protobuf::libprotobuf)

 # Compile exposed libraries.
-list(APPEND Caffe2_CPU_SRCs $<TARGET_OBJECTS:c10>)
 add_library(caffe2 ${Caffe2_CPU_SRCS})
 if (NOT WIN32)
  target_compile_options(caffe2 PRIVATE "-fvisibility=hidden")
@ -206,6 +205,7 @@ if (${CAFFE2_LINK_LOCAL_PROTOBUF})
 else()
  target_link_libraries(caffe2 PUBLIC protobuf::libprotobuf)
 endif()
+target_link_libraries(caffe2 PUBLIC c10)
 target_link_libraries(caffe2 PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
 target_link_libraries(caffe2 PRIVATE ${Caffe2_DEPENDENCY_LIBS})
 target_link_libraries(caffe2 PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
@ -333,7 +333,7 @@ if(USE_CUDA)
  # NB: This must be target_compile_definitions, not target_compile_options,
  # as the latter is not respected by nvcc
  if (MSVC)
-	  target_compile_definitions(caffe2_gpu PRIVATE "-DCAFFE2_CUDA_BUILD_MAIN_LIB")
+    target_compile_definitions(caffe2_gpu PRIVATE "-DCAFFE2_CUDA_BUILD_MAIN_LIB")
  endif()

  # Set standard properties on the target
--- a/caffe2/core/common.h
+++ b/caffe2/core/common.h
@ -26,7 +26,7 @@
 // is automatically generated by the cmake script during build.
 #include "caffe2/core/macros.h"

-#include "ATen/core/Macros.h"
+#include "c10/macros/Macros.h"

 namespace caffe2 {

@ -90,48 +90,6 @@ using std::vector;
 #define CAFFE2_NORETURN __attribute__((noreturn))
 #endif

-// Defines CAFFE2_EXPORT and CAFFE2_IMPORT. On Windows, this corresponds to
-// different declarations (dllexport and dllimport). On Linux/Mac, it just
-// resolves to the same "default visibility" setting.
-#if defined(_MSC_VER)
-#if defined(CAFFE2_BUILD_SHARED_LIBS)
-#define CAFFE2_EXPORT __declspec(dllexport)
-#define CAFFE2_IMPORT __declspec(dllimport)
-#else
-#define CAFFE2_EXPORT
-#define CAFFE2_IMPORT
-#endif
-#else
-#if defined(__GNUC__)
-#define CAFFE2_EXPORT __attribute__((__visibility__("default")))
-#else
-#define CAFFE2_EXPORT
-#endif
-#define CAFFE2_IMPORT CAFFE2_EXPORT
-#endif
-
-// CAFFE2_API is a macro that, depends on whether you are building the
-// main caffe2 library or not, resolves to either CAFFE2_EXPORT or
-// CAFFE2_IMPORT.
-//
-// This is used in e.g. Caffe2's protobuf files: when building the main library,
-// it is defined as CAFFE2_EXPORT to fix a Windows global-variable-in-dll
-// issue, and for anyone dependent on Caffe2 it will be defined as
-// CAFFE2_IMPORT.
-
-#ifdef CAFFE2_BUILD_MAIN_LIB
-#define CAFFE2_API CAFFE2_EXPORT
-#else
-#define CAFFE2_API CAFFE2_IMPORT
-#endif
-
-#ifdef CAFFE2_BUILD_OBSERVER_LIB
-#define CAFFE2_OBSERVER_API CAFFE2_EXPORT
-#else
-#define CAFFE2_OBSERVER_API CAFFE2_IMPORT
-#endif
-
-
 #if defined(_MSC_VER)
 #define NOMINMAX
 #endif
--- a/caffe2/core/logging.h
+++ b/caffe2/core/logging.h
@ -8,6 +8,7 @@
 #include <sstream>

 #include <ATen/core/Error.h>
+#include "caffe2/core/common.h"
 #include "caffe2/core/flags.h"

 // CAFFE2_LOG_THRESHOLD is a compile time flag that would allow us to turn off
--- a/caffe2/perfkernels/CMakeLists.txt
+++ b/caffe2/perfkernels/CMakeLists.txt
@ -17,8 +17,8 @@ set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${common_srcs})
 if (NOT MSVC AND CAFFE2_COMPILER_SUPPORTS_AVX2_EXTENSIONS)
  add_library(Caffe2_perfkernels_avx OBJECT ${avx_srcs})
  add_library(Caffe2_perfkernels_avx2 OBJECT ${avx2_srcs})
-  add_dependencies(Caffe2_perfkernels_avx Caffe2_PROTO)
-  add_dependencies(Caffe2_perfkernels_avx2 Caffe2_PROTO)
+  add_dependencies(Caffe2_perfkernels_avx Caffe2_PROTO c10)
+  add_dependencies(Caffe2_perfkernels_avx2 Caffe2_PROTO c10)
  if (MSVC)
    set_target_properties(
        Caffe2_perfkernels_avx PROPERTIES COMPILE_FLAGS "/arch:AVX")
--- a/modules/observers/macros.h
+++ b/modules/observers/macros.h
@ -0,0 +1,7 @@
+#include "c10/macros/Macros.h"
+
+#ifdef CAFFE2_BUILD_OBSERVER_LIB
+#define CAFFE2_OBSERVER_API C10_EXPORT
+#else
+#define CAFFE2_OBSERVER_API C10_IMPORT
+#endif
--- a/modules/observers/net_observer_reporter.h
+++ b/modules/observers/net_observer_reporter.h
@ -4,6 +4,7 @@

 #include "caffe2/core/common.h"
 #include "caffe2/core/net.h"
+#include "observers/macros.h"

 namespace caffe2 {

--- a/modules/observers/net_observer_reporter_print.h
+++ b/modules/observers/net_observer_reporter_print.h
@ -1,5 +1,6 @@
 #pragma once

+#include "observers/macros.h"
 #include "observers/net_observer_reporter.h"

 #include "caffe2/core/common.h"
--- a/modules/observers/observer_config.h
+++ b/modules/observers/observer_config.h
@ -1,5 +1,6 @@
 #pragma once

+#include "observers/macros.h"
 #include "observers/net_observer_reporter.h"

 #include "caffe2/core/common.h"
--- a/modules/observers/perf_observer.h
+++ b/modules/observers/perf_observer.h
@ -4,6 +4,7 @@
 #include "caffe2/core/net.h"
 #include "caffe2/core/observer.h"
 #include "caffe2/core/timer.h"
+#include "observers/macros.h"

 #include <unordered_map>

--- a/setup.py
+++ b/setup.py
@ -1208,6 +1208,8 @@ if __name__ == '__main__':
                'lib/include/ATen/cudnn/*.h',
                'lib/include/ATen/detail/*.h',
                'lib/include/caffe2/utils/*.h',
+                'lib/include/c10/*.h',
+                'lib/include/c10/macros/*.h',
                'lib/include/torch/*.h',
                'lib/include/torch/csrc/*.h',
                'lib/include/torch/csrc/api/include/torch/detail/ordered_dict.h',