[torchscript] Add a sampled logging integration point. (#133484)

Test Plan:
test script:
```
    def test_zhxchen17(self):
        from libfb.py.pyinit import initFacebook

        initFacebook()

        class M(torch.nn.Module):
            def forward(self, x):
                return torch.add(x, x)

        def tmptmp(x, y):
            return torch.mul(x, y)

        m = M()
        n = torch.jit.script(m)
        print(n(torch.tensor(1)))
        print(torch.jit.script(tmptmp)(torch.tensor(1), torch.tensor(2)))
```

```
I0802 12:01:23.932929 4079081 init.cc:407] Logging to scuba: run __torch__.caffe2.test.export.test_export.M.forward sample rate: 1000000
```

Differential Revision: D60920867

Pull Request resolved: https://github.com/pytorch/pytorch/pull/133484
Approved by: https://github.com/davidberard98
This commit is contained in:
Zhengxu Chen 2024-08-19 18:04:45 +00:00 committed by PyTorch MergeBot
parent 6564e746ed
commit 517aee5369
4 changed files with 73 additions and 1 deletions

View File

@ -1,4 +1,5 @@
#include <c10/util/Backtrace.h>
#include <c10/util/CallOnce.h>
#include <c10/util/Flags.h>
#include <c10/util/Lazy.h>
#include <c10/util/Logging.h>
@ -147,8 +148,46 @@ DDPUsageLoggerType* GetDDPUsageLogger() {
static DDPUsageLoggerType func = [](const DDPLoggingData&) {};
return &func;
}
auto& EventSampledHandlerRegistry() {
static auto& registry =
*new std::map<std::string, std::unique_ptr<EventSampledHandler>>();
return registry;
}
} // namespace
void InitEventSampledHandlers(
std::vector<
std::pair<std::string_view, std::unique_ptr<EventSampledHandler>>>
handlers) {
static c10::once_flag flag;
c10::call_once(flag, [&]() {
auto& registry = EventSampledHandlerRegistry();
for (auto& [event, handler] : handlers) {
auto entry = registry.find(std::string{event});
if (entry == registry.end()) {
entry = registry.emplace(event, nullptr).first;
}
entry->second = std::move(handler);
}
});
}
const std::unique_ptr<EventSampledHandler>& GetEventSampledHandler(
std::string_view event) {
static std::mutex guard;
auto& registry = EventSampledHandlerRegistry();
// The getter can be executed from different threads.
std::lock_guard<std::mutex> lock(guard);
auto entry = registry.find(std::string{event});
if (entry == registry.end()) {
entry = registry.emplace(event, nullptr).first;
}
return entry->second;
}
void SetAPIUsageLogger(std::function<void(const std::string&)> logger) {
TORCH_CHECK(logger);
*GetAPIUsageLogger() = std::move(logger);

View File

@ -287,6 +287,29 @@ void enforceThatImpl(
CAFFE_ENFORCE_BINARY_OP_WITH_CALLER( \
std::greater<void>(), >, x, y, ##__VA_ARGS__)
struct IValue;
class C10_API EventSampledHandler {
public:
virtual void log(
std::string_view model_id,
const std::vector<c10::IValue>& args) = 0;
virtual ~EventSampledHandler() = default;
};
#define C10_LOG_EVENT_SAMPLED(event, ...) \
static const std::unique_ptr<::c10::EventSampledHandler>& \
_##event##EventSampledHandler = ::c10::GetEventSampledHandler(#event); \
if (_##event##EventSampledHandler) { \
_##event##EventSampledHandler->log(__VA_ARGS__); \
}
// Must be called in the main thread before any other threads are spawned.
C10_API void InitEventSampledHandlers(
std::vector<
std::pair<std::string_view, std::unique_ptr<EventSampledHandler>>>);
C10_API const std::unique_ptr<EventSampledHandler>& GetEventSampledHandler(
std::string_view);
/**
* Very lightweight logging for the first time API usage. It's beneficial for
* tracking of individual functionality usage in larger applications.

View File

@ -66,6 +66,7 @@ static void placeholderCreator(GraphFunction&) {
}
void GraphFunction::run(Stack& stack) {
C10_LOG_EVENT_SAMPLED(run, qualname().qualifiedName(), stack);
get_executor().run(stack);
}

View File

@ -362,7 +362,16 @@ std::vector<IValue> ScriptTypeParser::evaluateDefaults(
// XXX: We need to turn optimization off here because otherwise we try to
// recursively initialize stuff in DecomposeOps.
GraphOptimizerEnabledGuard guard(false);
cu.get_function(def.name().name()).run(stack);
auto& f = cu.get_function(def.name().name());
auto* gf = dynamic_cast<GraphFunction*>(&f);
TORCH_INTERNAL_ASSERT(gf);
// 2024.08.14: Since we are starting to deprecate Torchscript usages,
// we are going to log all the calls for GraphFunction::run. The logging was
// noisy we also call GraphFunction::run for the default value evaluation
// which generates a lot of useless log samples. Therefore as a workaround we
// just directly use the executor API which avoids this placing producing
// un-necessary log entries.
gf->get_executor().run(stack);
return stack.at(0).toTupleRef().elements().vec();
}