migrate export_caffe2_op_to_c10.h macros to the new dispatcher registration API (#48097)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/48097 Test Plan: Imported from OSS Reviewed By: ezyang Differential Revision: D25056091 Pulled By: bdhirsh fbshipit-source-id: 0f647ab9bc5e5aee497dac058df492f6e742cfe9
2025-12-06 12:20:52 +01:00 · 2020-11-19 17:54:25 -08:00 · 2020-11-19 17:54:25 -08:00 · 0ea4982cf3
commit 0ea4982cf3
parent 4b56aef05d
4 changed files with 51 additions and 67 deletions
--- a/c10/macros/Macros.h
+++ b/c10/macros/Macros.h
@ -77,8 +77,10 @@
 * str and ending with a number that varies with the line.
 */
 #ifdef __COUNTER__
+#define C10_UID __COUNTER__
 #define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__)
 #else
+#define C10_UID __LINE__
 #define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__)
 #endif

--- a/caffe2/core/export_caffe2_op_to_c10.h
+++ b/caffe2/core/export_caffe2_op_to_c10.h
@ -8,6 +8,8 @@
 #include <ATen/core/grad_mode.h>
 #include <ATen/core/op_registration/op_registration.h>
 #include <torch/csrc/jit/frontend/function_schema_parser.h>
+#include <c10/core/CompileTimeFunctionPointer.h>
+#include <torch/library.h>
 #include <vector>

 namespace caffe2 {
@ -191,48 +193,52 @@ inline FunctionSchema make_function_schema_for_c10(const char* schema_str) {
        ::caffe2::detail::make_function_schema_for_c10(OperatorSchema);       \
    return schema;                                                            \
  }                                                                           \
+  TORCH_LIBRARY_FRAGMENT(_caffe2, m) {                                        \
+      m.def(::caffe2::detail::make_function_schema_for_c10(OperatorSchema));  \
+  }                                                                           \
  }                                                                           \
  }

 #define C10_EXPORT_CAFFE2_OP_TO_C10_CPU_KERNEL_ONLY(                         \
    OperatorName, OperatorClass)                                             \
  /* Register call_caffe2_op_from_c10 as a kernel with the c10 dispatcher */ \
-  static auto registry_##OperatorName##_##__COUNTER__ =                      \
-      ::c10::RegisterOperators().op(                                         \
-          ::caffe2::_c10_ops::schema_##OperatorName(),                       \
-          ::c10::RegisterOperators::options()                                \
-              .kernel<&::caffe2::detail::call_caffe2_op_from_c10<            \
-                  ::caffe2::_c10_ops::schema_##OperatorName,                 \
-                  OperatorClass>>(::c10::DispatchKey::CPU));
+    TORCH_LIBRARY_IMPL(_caffe2, CPU, m) {                                    \
+        m.impl("_caffe2::" #OperatorName,                                    \
+            torch::CppFunction::makeFromBoxedFunction<                       \
+                ::caffe2::detail::call_caffe2_op_from_c10<                   \
+                    ::caffe2::_c10_ops::schema_##OperatorName,               \
+                    OperatorClass>>());                                      \
+    }

-#define C10_EXPORT_CAFFE2_OP_TO_C10_CPU(                                \
-    OperatorName, OperatorSchema, OperatorClass)                        \
-  C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY(OperatorName, OperatorSchema) \
+#define C10_EXPORT_CAFFE2_OP_TO_C10_CPU(                                     \
+    OperatorName, OperatorSchema, OperatorClass)                             \
+  C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY(OperatorName, OperatorSchema)      \
  C10_EXPORT_CAFFE2_OP_TO_C10_CPU_KERNEL_ONLY(OperatorName, OperatorClass)

 #define C10_EXPORT_CAFFE2_OP_TO_C10_CUDA(OperatorName, OperatorClass)        \
  /* Register call_caffe2_op_from_c10 as a kernel with the c10 dispatcher */ \
-  static auto registry_##OperatorName##_##__COUNTER__ =                      \
-      ::c10::RegisterOperators().op(                                         \
-          ::caffe2::_c10_ops::schema_##OperatorName(),                       \
-          ::c10::RegisterOperators::options()                                \
-              .kernel<&::caffe2::detail::call_caffe2_op_from_c10<            \
-                  ::caffe2::_c10_ops::schema_##OperatorName,                 \
-                  OperatorClass>>(::c10::DispatchKey::CUDA));
+    TORCH_LIBRARY_IMPL(_caffe2, CUDA, m) {                                   \
+        m.impl("_caffe2::" #OperatorName,                                    \
+            torch::CppFunction::makeFromBoxedFunction<                       \
+                ::caffe2::detail::call_caffe2_op_from_c10<                   \
+                    ::caffe2::_c10_ops::schema_##OperatorName,               \
+                    OperatorClass>>());                                      \
+    }
+

 // You should never manually call the C10_EXPORT_CAFFE2_OP_TO_C10_HIP macro .
 // The C10_EXPORT_CAFFE2_OP_TO_C10_CUDA macro from above will be automatically
 // rewritten to C10_EXPORT_CAFFE2_OP_TO_C10_HIP by hipify .
 #define C10_EXPORT_CAFFE2_OP_TO_C10_HIP(OperatorName, OperatorClass)         \
  /* Register call_caffe2_op_from_c10 as a kernel with the c10 dispatcher */ \
-  static auto registry_##OperatorName##_##__COUNTER__ =                      \
-      ::c10::RegisterOperators().op(                                         \
-          ::caffe2::_c10_ops::schema_##OperatorName(),                       \
-          ::c10::RegisterOperators()                                         \
-              .options()                                                     \
-              .kernel<&::caffe2::detail::call_caffe2_op_from_c10<            \
-                  ::caffe2::_c10_ops::schema_##OperatorName,                 \
-                  OperatorClass>>(::c10::DispatchKey::HIP));
+    TORCH_LIBRARY_IMPL(_caffe2, HIP, m) {                                    \
+        m.impl("_caffe2::" #OperatorName,                                    \
+            torch::CppFunction::makeFromBoxedFunction<                       \
+                ::caffe2::detail::call_caffe2_op_from_c10<                   \
+                    ::caffe2::_c10_ops::schema_##OperatorName,               \
+                    OperatorClass>>());                                      \
+    }
+

 #else
 // Don't use c10 dispatcher on mobile because of binary size
--- a/tools/code_analyzer/run_analyzer.sh
+++ b/tools/code_analyzer/run_analyzer.sh
@ -19,8 +19,8 @@ echo "Analyze: ${INPUT}"
  -op_invoke_pattern="c10::Dispatcher::findSchema" \
  -root_symbol_pattern="torch::jit::[^(]" \
  -torch_library_init_pattern="^.*TORCH_LIBRARY_init_([^(]+)(\(.*)?$" \
-  -torch_library_init_pattern="^.*TORCH_LIBRARY_FRAGMENT_init_([^(]+)(\(.*)?$" \
-  -torch_library_init_pattern="^.*TORCH_LIBRARY_IMPL_init_([^(]+)_([^_]+)(\(.*)?$" \
+  -torch_library_init_pattern="^.*TORCH_LIBRARY_FRAGMENT_init_([^_]+)_[0-9]+(\(.*)?$" \
+  -torch_library_init_pattern="^.*TORCH_LIBRARY_IMPL_init_([^_]+)_([^_]+)_[0-9]+(\(.*)?$" \
  ${EXTRA_ANALYZER_FLAGS} \
  "${INPUT}" \
  > "${OUTPUT}"
--- a/torch/library.h
+++ b/torch/library.h
@ -658,25 +658,16 @@ public:
 /// within the same namespace cannot be easily put into one macro block
 /// (this is mostly the case for custom ops in fbcode that were ported from
 /// the old API)
-#define TORCH_LIBRARY_FRAGMENT(ns, m) \
-  static void TORCH_LIBRARY_FRAGMENT_init_ ## ns ## _ ## k (torch::Library&); \
-  static torch::detail::TorchLibraryInit TORCH_LIBRARY_FRAGMENT_static_init_ ## ns ## _ ## k ( \
-    torch::Library::FRAGMENT, \
-    &TORCH_LIBRARY_FRAGMENT_init_ ## ns ## _ ## k, \
-    #ns, c10::nullopt, __FILE__, __LINE__ \
-  ); \
-  void TORCH_LIBRARY_FRAGMENT_init_ ## ns ## _ ## k (torch::Library& m)
+#define TORCH_LIBRARY_FRAGMENT(ns,  m) _TORCH_LIBRARY_FRAGMENT(ns, m, C10_UID)

 /// \private
 ///
-/// This macro should only be used in a few legacy areas.
-/// This macro is a version of TORCH_LIBRARY() that:
-///     - doesn't enforce that there is only library for the given namespace
-///     - takes in a unique identifier that it appends to the names
-///       of each static variable that it creates
-/// It effectively lets you define multiple TORCH_LIBRARY_FRAGMENT blocks
-/// for the same namespace within the same translation unit, avoiding naming collisions.
-#define TORCH_LIBRARY_FRAGMENT_UNIQUE(ns, m, uid) \
+/// The above macro requires an extra unique identifier (uid) to prevent variable name collisions
+/// This can happen if TORCH_LIBRARY_FRAGMENT is called multiple times with the same namespace
+/// in the same translation unit.
+/// Note that the TORCH_LIBRARY variant doesn't run into this problem, because it enforces
+/// that it can only be called once for a given namespace.
+#define _TORCH_LIBRARY_FRAGMENT(ns, m, uid) \
  static void C10_CONCATENATE(TORCH_LIBRARY_FRAGMENT_init_ ## ns ## _, uid) (torch::Library&); \
  static torch::detail::TorchLibraryInit C10_CONCATENATE(TORCH_LIBRARY_FRAGMENT_static_init_ ## ns ## _, uid) ( \
    torch::Library::FRAGMENT, \
@ -725,40 +716,25 @@ public:
 ///
 // NB: if the dispatch key is not whitelisted, we simply omit the Library
 // call entirely
-#define TORCH_LIBRARY_IMPL(ns, k, m) \
-  static void TORCH_LIBRARY_IMPL_init_ ## ns ## _ ## k (torch::Library&); \
-  static torch::detail::TorchLibraryInit TORCH_LIBRARY_IMPL_static_init_ ## ns ## _ ## k ( \
-    torch::Library::IMPL, \
-    c10::guts::if_constexpr<c10::impl::dispatch_key_whitelist_check(c10::DispatchKey::k)>( \
-      []() { return & TORCH_LIBRARY_IMPL_init_ ## ns ## _ ## k; }, \
-      []() { return [](torch::Library&) -> void {}; } \
-    ), \
-    #ns, c10::make_optional(c10::DispatchKey::k), \
-    __FILE__, __LINE__ \
-  ); \
-  void TORCH_LIBRARY_IMPL_init_ ## ns ## _ ## k (torch::Library& m)
+#define TORCH_LIBRARY_IMPL(ns, k, m) _TORCH_LIBRARY_IMPL(ns, k, m, C10_UID)

 /// \private
 ///
-/// This macro should only be used in a few legacy areas.
-/// This macro is a version of TORCH_LIBRARY_IMPL() that:
-///     - takes in a unique identifier that it appends to the names
-///       of each static variable that it creates
-/// It effectively lets you define multiple TORCH_LIBRARY_IMPL blocks
-/// for the same namespace/dispatch key within the same translation unit,
-/// avoiding naming collisions.
-#define TORCH_LIBRARY_IMPL_UNIQUE(ns, k, m, uid) \
-  static void C10_CONCATENATE(TORCH_LIBRARY_IMPL_init_ ## ns ## _ ## k, uid) (torch::Library&); \
-  static torch::detail::TorchLibraryInit C10_CONCATENATE(TORCH_LIBRARY_IMPL_static_init_ ## ns ## _ ## k, uid) ( \
+/// The above macro requires an extra unique identifier (uid) to prevent variable name collisions.
+/// This can happen if TORCH_LIBRARY_IMPL is called multiple times with the same namespace
+/// and dispatch key in the same translation unit.
+#define _TORCH_LIBRARY_IMPL(ns, k, m, uid) \
+  static void C10_CONCATENATE(TORCH_LIBRARY_IMPL_init_ ## ns ## _ ## k ## _, uid) (torch::Library&); \
+  static torch::detail::TorchLibraryInit C10_CONCATENATE(TORCH_LIBRARY_IMPL_static_init_ ## ns ## _ ## k ## _, uid) ( \
    torch::Library::IMPL, \
    c10::guts::if_constexpr<c10::impl::dispatch_key_whitelist_check(c10::DispatchKey::k)>( \
-      []() { return & C10_CONCATENATE(TORCH_LIBRARY_IMPL_init_ ## ns ## _ ## k, uid); }, \
+      []() { return & C10_CONCATENATE(TORCH_LIBRARY_IMPL_init_ ## ns ## _ ## k ## _, uid); }, \
      []() { return [](torch::Library&) -> void {}; } \
    ), \
    #ns, c10::make_optional(c10::DispatchKey::k), \
    __FILE__, __LINE__ \
  ); \
-  void C10_CONCATENATE(TORCH_LIBRARY_IMPL_init_ ## ns ## _ ## k, uid) (torch::Library& m)
+  void C10_CONCATENATE(TORCH_LIBRARY_IMPL_init_ ## ns ## _ ## k ## _, uid) (torch::Library& m)


 // These are variants of the macros above which are to be used for testing (they