[torchscript] Add a sampled logging integration point. (#133484)

Test Plan: test script: ``` def test_zhxchen17(self): from libfb.py.pyinit import initFacebook initFacebook() class M(torch.nn.Module): def forward(self, x): return torch.add(x, x) def tmptmp(x, y): return torch.mul(x, y) m = M() n = torch.jit.script(m) print(n(torch.tensor(1))) print(torch.jit.script(tmptmp)(torch.tensor(1), torch.tensor(2))) ``` ``` I0802 12:01:23.932929 4079081 init.cc:407] Logging to scuba: run __torch__.caffe2.test.export.test_export.M.forward sample rate: 1000000 ``` Differential Revision: D60920867 Pull Request resolved: https://github.com/pytorch/pytorch/pull/133484 Approved by: https://github.com/davidberard98
2025-12-06 12:20:52 +01:00 · 2024-08-19 18:04:45 +00:00 · 2024-08-19 18:04:45 +00:00 · 517aee5369
commit 517aee5369
parent 6564e746ed
4 changed files with 73 additions and 1 deletions
--- a/c10/util/Logging.cpp
+++ b/c10/util/Logging.cpp
@ -1,4 +1,5 @@
 #include <c10/util/Backtrace.h>
+#include <c10/util/CallOnce.h>
 #include <c10/util/Flags.h>
 #include <c10/util/Lazy.h>
 #include <c10/util/Logging.h>
@ -147,8 +148,46 @@ DDPUsageLoggerType* GetDDPUsageLogger() {
  static DDPUsageLoggerType func = [](const DDPLoggingData&) {};
  return &func;
 }
+
+auto& EventSampledHandlerRegistry() {
+  static auto& registry =
+      *new std::map<std::string, std::unique_ptr<EventSampledHandler>>();
+  return registry;
+}
+
 } // namespace

+void InitEventSampledHandlers(
+    std::vector<
+        std::pair<std::string_view, std::unique_ptr<EventSampledHandler>>>
+        handlers) {
+  static c10::once_flag flag;
+  c10::call_once(flag, [&]() {
+    auto& registry = EventSampledHandlerRegistry();
+    for (auto& [event, handler] : handlers) {
+      auto entry = registry.find(std::string{event});
+      if (entry == registry.end()) {
+        entry = registry.emplace(event, nullptr).first;
+      }
+      entry->second = std::move(handler);
+    }
+  });
+}
+
+const std::unique_ptr<EventSampledHandler>& GetEventSampledHandler(
+    std::string_view event) {
+  static std::mutex guard;
+  auto& registry = EventSampledHandlerRegistry();
+
+  // The getter can be executed from different threads.
+  std::lock_guard<std::mutex> lock(guard);
+  auto entry = registry.find(std::string{event});
+  if (entry == registry.end()) {
+    entry = registry.emplace(event, nullptr).first;
+  }
+  return entry->second;
+}
+
 void SetAPIUsageLogger(std::function<void(const std::string&)> logger) {
  TORCH_CHECK(logger);
  *GetAPIUsageLogger() = std::move(logger);
--- a/c10/util/Logging.h
+++ b/c10/util/Logging.h
@ -287,6 +287,29 @@ void enforceThatImpl(
  CAFFE_ENFORCE_BINARY_OP_WITH_CALLER(          \
      std::greater<void>(), >, x, y, ##__VA_ARGS__)

+struct IValue;
+class C10_API EventSampledHandler {
+ public:
+  virtual void log(
+      std::string_view model_id,
+      const std::vector<c10::IValue>& args) = 0;
+  virtual ~EventSampledHandler() = default;
+};
+
+#define C10_LOG_EVENT_SAMPLED(event, ...)                                    \
+  static const std::unique_ptr<::c10::EventSampledHandler>&                  \
+      _##event##EventSampledHandler = ::c10::GetEventSampledHandler(#event); \
+  if (_##event##EventSampledHandler) {                                       \
+    _##event##EventSampledHandler->log(__VA_ARGS__);                         \
+  }
+
+// Must be called in the main thread before any other threads are spawned.
+C10_API void InitEventSampledHandlers(
+    std::vector<
+        std::pair<std::string_view, std::unique_ptr<EventSampledHandler>>>);
+C10_API const std::unique_ptr<EventSampledHandler>& GetEventSampledHandler(
+    std::string_view);
+
 /**
 * Very lightweight logging for the first time API usage. It's beneficial for
 * tracking of individual functionality usage in larger applications.
--- a/torch/csrc/jit/api/function_impl.cpp
+++ b/torch/csrc/jit/api/function_impl.cpp
@ -66,6 +66,7 @@ static void placeholderCreator(GraphFunction&) {
 }

 void GraphFunction::run(Stack& stack) {
+  C10_LOG_EVENT_SAMPLED(run, qualname().qualifiedName(), stack);
  get_executor().run(stack);
 }

--- a/torch/csrc/jit/frontend/script_type_parser.cpp
+++ b/torch/csrc/jit/frontend/script_type_parser.cpp
@ -362,7 +362,16 @@ std::vector<IValue> ScriptTypeParser::evaluateDefaults(
  // XXX: We need to turn optimization off here because otherwise we try to
  // recursively initialize stuff in DecomposeOps.
  GraphOptimizerEnabledGuard guard(false);
-  cu.get_function(def.name().name()).run(stack);
+  auto& f = cu.get_function(def.name().name());
+  auto* gf = dynamic_cast<GraphFunction*>(&f);
+  TORCH_INTERNAL_ASSERT(gf);
+  // 2024.08.14: Since we are starting to deprecate Torchscript usages,
+  // we are going to log all the calls for GraphFunction::run. The logging was
+  // noisy we also call GraphFunction::run for the default value evaluation
+  // which generates a lot of useless log samples. Therefore as a workaround we
+  // just directly use the executor API which avoids this placing producing
+  // un-necessary log entries.
+  gf->get_executor().run(stack);
  return stack.at(0).toTupleRef().elements().vec();
 }