mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[Profiler][Minor] Group and consolidate stub APIs (#85510)
There is a concept in profiler of a stub that wraps a profiling API. It was introduced for CUDA profiling before Kineto, and ITT has adopted it to call into VTune APIs. However for the most part we don't really interact with them when developing the PyTorch profiler. Thus it makes sense to unify the fallback registration mechanism and create a subfolder to free up real estate in the top level `torch/csrc/profiler` directory. Differential Revision: [D39108647](https://our.internmc.facebook.com/intern/diff/D39108647/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/85510 Approved by: https://github.com/aaronenyeshi
This commit is contained in:
parent
bc4ca4c2c4
commit
b8f14b7877
|
|
@ -133,7 +133,6 @@ libtorch_sources_common = sorted(core_sources_common + torch_unpickler_common)
|
|||
libtorch_profiler_sources = [
|
||||
"torch/csrc/autograd/profiler_legacy.cpp",
|
||||
"torch/csrc/autograd/profiler_kineto.cpp",
|
||||
"torch/csrc/profiler/api.cpp",
|
||||
"torch/csrc/profiler/collection.cpp",
|
||||
"torch/csrc/profiler/execution_graph_observer.cpp",
|
||||
"torch/csrc/profiler/kineto_shim.cpp",
|
||||
|
|
@ -142,6 +141,7 @@ libtorch_profiler_sources = [
|
|||
"torch/csrc/profiler/itt_observer.cpp",
|
||||
"torch/csrc/profiler/orchestration/observer.cpp",
|
||||
"torch/csrc/profiler/orchestration/python_tracer.cpp",
|
||||
"torch/csrc/profiler/stubs/base.cpp",
|
||||
"torch/csrc/monitor/counters.cpp",
|
||||
"torch/csrc/monitor/events.cpp",
|
||||
]
|
||||
|
|
@ -661,7 +661,7 @@ libtorch_cuda_core_sources = [
|
|||
"torch/csrc/cuda/comm.cpp",
|
||||
"torch/csrc/cuda/memory_snapshot.cpp",
|
||||
"torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp",
|
||||
"torch/csrc/profiler/cuda.cpp",
|
||||
"torch/csrc/profiler/stubs/cuda.cpp",
|
||||
"torch/csrc/autograd/functions/comm.cpp",
|
||||
"torch/csrc/jit/codegen/cuda/arith.cpp",
|
||||
"torch/csrc/jit/codegen/cuda/compute_at.cpp",
|
||||
|
|
|
|||
|
|
@ -585,7 +585,7 @@ endif()
|
|||
if(${USE_ITT})
|
||||
list(APPEND TORCH_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/itt_wrapper.cpp
|
||||
${TORCH_SRC_DIR}/csrc/profiler/itt.cpp
|
||||
${TORCH_SRC_DIR}/csrc/profiler/stubs/itt.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
|
|
|
|||
1
setup.py
1
setup.py
|
|
@ -1115,6 +1115,7 @@ def main():
|
|||
'include/torch/csrc/onnx/*.h',
|
||||
'include/torch/csrc/profiler/*.h',
|
||||
'include/torch/csrc/profiler/orchestration/*.h',
|
||||
'include/torch/csrc/profiler/stubs/*.h',
|
||||
'include/torch/csrc/utils/*.h',
|
||||
'include/torch/csrc/tensor/*.h',
|
||||
'include/torch/csrc/lazy/backend/*.h',
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include <torch/csrc/profiler/api.h>
|
||||
#include <torch/csrc/profiler/stubs/base.h>
|
||||
#include <torch/csrc/profiler/util.h>
|
||||
|
||||
namespace torch {
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <torch/csrc/Export.h>
|
||||
#include <torch/csrc/profiler/api.h>
|
||||
#include <torch/csrc/profiler/stubs/base.h>
|
||||
#include <torch/csrc/profiler/util.h>
|
||||
|
||||
namespace torch {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include <c10/macros/Export.h>
|
||||
#include <ittnotify.h>
|
||||
#include <torch/csrc/profiler/api.h>
|
||||
#include <torch/csrc/profiler/stubs/base.h>
|
||||
|
||||
namespace torch {
|
||||
namespace profiler {
|
||||
|
|
|
|||
|
|
@ -1,129 +0,0 @@
|
|||
#include <torch/csrc/profiler/api.h>
|
||||
|
||||
#include <torch/csrc/profiler/util.h>
|
||||
|
||||
namespace torch {
|
||||
namespace profiler {
|
||||
namespace impl {
|
||||
|
||||
ProfilerStubs::~ProfilerStubs() = default;
|
||||
|
||||
namespace {
|
||||
struct DefaultCUDAStubs : public ProfilerStubs {
|
||||
void record(
|
||||
int* /*device*/,
|
||||
ProfilerEventStub* /*event*/,
|
||||
int64_t* /*cpu_ns*/) const override {
|
||||
fail();
|
||||
}
|
||||
float elapsed(
|
||||
const ProfilerEventStub* /*event*/,
|
||||
const ProfilerEventStub* /*event2*/) const override {
|
||||
fail();
|
||||
return 0.f;
|
||||
}
|
||||
void mark(const char* /*name*/) const override {
|
||||
fail();
|
||||
}
|
||||
void rangePush(const char* /*name*/) const override {
|
||||
fail();
|
||||
}
|
||||
void rangePop() const override {
|
||||
fail();
|
||||
}
|
||||
bool enabled() const override {
|
||||
return false;
|
||||
}
|
||||
void onEachDevice(std::function<void(int)> /*op*/) const override {
|
||||
fail();
|
||||
}
|
||||
void synchronize() const override {
|
||||
fail();
|
||||
}
|
||||
~DefaultCUDAStubs() override = default;
|
||||
|
||||
private:
|
||||
void fail() const {
|
||||
AT_ERROR("CUDA used in profiler but not enabled.");
|
||||
}
|
||||
};
|
||||
|
||||
const DefaultCUDAStubs default_cuda_stubs;
|
||||
constexpr const DefaultCUDAStubs* default_cuda_stubs_addr = &default_cuda_stubs;
|
||||
// Constant initialization, so it is guaranteed to be initialized before
|
||||
// static initialization calls which may invoke registerCUDAMethods
|
||||
inline const ProfilerStubs*& cuda_stubs() {
|
||||
static const ProfilerStubs* stubs_ =
|
||||
static_cast<const ProfilerStubs*>(default_cuda_stubs_addr);
|
||||
return stubs_;
|
||||
}
|
||||
|
||||
struct DefaultITTStubs : public ProfilerStubs {
|
||||
void record(
|
||||
int* /*device*/,
|
||||
ProfilerEventStub* /*event*/,
|
||||
int64_t* /*cpu_ns*/) const override {
|
||||
fail();
|
||||
}
|
||||
float elapsed(
|
||||
const ProfilerEventStub* /*event*/,
|
||||
const ProfilerEventStub* /*event2*/) const override {
|
||||
fail();
|
||||
return 0.f;
|
||||
}
|
||||
void mark(const char* /*name*/) const override {
|
||||
fail();
|
||||
}
|
||||
void rangePush(const char* /*name*/) const override {
|
||||
fail();
|
||||
}
|
||||
void rangePop() const override {
|
||||
fail();
|
||||
}
|
||||
bool enabled() const override {
|
||||
return false;
|
||||
}
|
||||
void onEachDevice(std::function<void(int)> /*op*/) const override {
|
||||
fail();
|
||||
}
|
||||
void synchronize() const override {
|
||||
fail();
|
||||
}
|
||||
~DefaultITTStubs() override = default;
|
||||
|
||||
private:
|
||||
void fail() const {
|
||||
AT_ERROR("ITT used in profiler but not enabled.");
|
||||
}
|
||||
};
|
||||
|
||||
const DefaultITTStubs default_itt_stubs;
|
||||
constexpr const DefaultITTStubs* default_itt_stubs_addr = &default_itt_stubs;
|
||||
// Constant initialization, so it is guaranteed to be initialized before
|
||||
// static initialization calls which may invoke registerITTMethods
|
||||
inline const ProfilerStubs*& itt_stubs() {
|
||||
static const ProfilerStubs* stubs_ =
|
||||
static_cast<const ProfilerStubs*>(default_itt_stubs_addr);
|
||||
return stubs_;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
const ProfilerStubs* cudaStubs() {
|
||||
return cuda_stubs();
|
||||
}
|
||||
|
||||
void registerCUDAMethods(ProfilerStubs* stubs) {
|
||||
cuda_stubs() = stubs;
|
||||
}
|
||||
|
||||
const ProfilerStubs* ittStubs() {
|
||||
return itt_stubs();
|
||||
}
|
||||
|
||||
void registerITTMethods(ProfilerStubs* stubs) {
|
||||
itt_stubs() = stubs;
|
||||
}
|
||||
|
||||
} // namespace impl
|
||||
} // namespace profiler
|
||||
} // namespace torch
|
||||
|
|
@ -1,46 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <ATen/record_function.h>
|
||||
#include <torch/csrc/Export.h>
|
||||
#include <torch/csrc/profiler/orchestration/observer.h>
|
||||
|
||||
struct CUevent_st;
|
||||
|
||||
namespace torch {
|
||||
namespace profiler {
|
||||
namespace impl {
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// -- Annotation --------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------
|
||||
using ProfilerEventStub = std::shared_ptr<CUevent_st>;
|
||||
|
||||
struct TORCH_API ProfilerStubs {
|
||||
virtual void record(int* device, ProfilerEventStub* event, int64_t* cpu_ns)
|
||||
const = 0;
|
||||
virtual float elapsed(
|
||||
const ProfilerEventStub* event,
|
||||
const ProfilerEventStub* event2) const = 0;
|
||||
virtual void mark(const char* name) const = 0;
|
||||
virtual void rangePush(const char* name) const = 0;
|
||||
virtual void rangePop() const = 0;
|
||||
virtual bool enabled() const {
|
||||
return false;
|
||||
}
|
||||
virtual void onEachDevice(std::function<void(int)> op) const = 0;
|
||||
virtual void synchronize() const = 0;
|
||||
virtual ~ProfilerStubs();
|
||||
};
|
||||
|
||||
TORCH_API void registerCUDAMethods(ProfilerStubs* stubs);
|
||||
TORCH_API const ProfilerStubs* cudaStubs();
|
||||
TORCH_API void registerITTMethods(ProfilerStubs* stubs);
|
||||
TORCH_API const ProfilerStubs* ittStubs();
|
||||
|
||||
} // namespace impl
|
||||
} // namespace profiler
|
||||
} // namespace torch
|
||||
|
||||
// There are some components which use these symbols. Until we migrate them
|
||||
// we have to mirror them in the old autograd namespace.
|
||||
namespace torch {
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
#include <torch/csrc/profiler/containers.h>
|
||||
#include <torch/csrc/profiler/kineto_shim.h>
|
||||
#include <torch/csrc/profiler/orchestration/python_tracer.h>
|
||||
#include <torch/csrc/profiler/stubs/base.h>
|
||||
#include <torch/csrc/profiler/util.h>
|
||||
#include <torch/csrc/utils/python_stub.h>
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include <torch/csrc/profiler/itt_observer.h>
|
||||
|
||||
#include <torch/csrc/profiler/stubs/base.h>
|
||||
#include <torch/csrc/profiler/util.h>
|
||||
|
||||
namespace torch {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include <torch/csrc/profiler/nvtx_observer.h>
|
||||
|
||||
#include <torch/csrc/profiler/stubs/base.h>
|
||||
#include <torch/csrc/profiler/util.h>
|
||||
|
||||
namespace torch {
|
||||
|
|
|
|||
81
torch/csrc/profiler/stubs/base.cpp
Normal file
81
torch/csrc/profiler/stubs/base.cpp
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#include <torch/csrc/profiler/stubs/base.h>
|
||||
|
||||
#include <c10/util/Exception.h>
|
||||
|
||||
namespace torch {
|
||||
namespace profiler {
|
||||
namespace impl {
|
||||
|
||||
ProfilerStubs::~ProfilerStubs() = default;
|
||||
|
||||
namespace {
|
||||
struct DefaultStubs : public ProfilerStubs {
|
||||
DefaultStubs(const char* name) : name_{name} {}
|
||||
|
||||
void record(int*, ProfilerEventStub*, int64_t*) const override {
|
||||
fail();
|
||||
}
|
||||
float elapsed(const ProfilerEventStub*, const ProfilerEventStub*)
|
||||
const override {
|
||||
fail();
|
||||
return 0.f;
|
||||
}
|
||||
void mark(const char*) const override {
|
||||
fail();
|
||||
}
|
||||
void rangePush(const char*) const override {
|
||||
fail();
|
||||
}
|
||||
void rangePop() const override {
|
||||
fail();
|
||||
}
|
||||
bool enabled() const override {
|
||||
return false;
|
||||
}
|
||||
void onEachDevice(std::function<void(int)>) const override {
|
||||
fail();
|
||||
}
|
||||
void synchronize() const override {
|
||||
fail();
|
||||
}
|
||||
~DefaultStubs() override = default;
|
||||
|
||||
private:
|
||||
void fail() const {
|
||||
AT_ERROR(name_, " used in profiler but not enabled.");
|
||||
}
|
||||
|
||||
const char* const name_;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
#define REGISTER_DEFAULT(name, upper_name) \
|
||||
namespace { \
|
||||
const DefaultStubs default_##name##_stubs{#upper_name}; \
|
||||
constexpr const DefaultStubs* default_##name##_stubs_addr = \
|
||||
&default_##name##_stubs; \
|
||||
\
|
||||
/* Constant initialization, so it is guaranteed to be initialized before*/ \
|
||||
/* static initialization calls which may invoke register<name>Methods*/ \
|
||||
inline const ProfilerStubs*& name##_stubs() { \
|
||||
static const ProfilerStubs* stubs_ = \
|
||||
static_cast<const ProfilerStubs*>(default_##name##_stubs_addr); \
|
||||
return stubs_; \
|
||||
} \
|
||||
} /*namespace*/ \
|
||||
\
|
||||
const ProfilerStubs* name##Stubs() { \
|
||||
return name##_stubs(); \
|
||||
} \
|
||||
\
|
||||
void register##upper_name##Methods(ProfilerStubs* stubs) { \
|
||||
name##_stubs() = stubs; \
|
||||
}
|
||||
|
||||
REGISTER_DEFAULT(cuda, CUDA)
|
||||
REGISTER_DEFAULT(itt, ITT)
|
||||
#undef REGISTER_DEFAULT
|
||||
|
||||
} // namespace impl
|
||||
} // namespace profiler
|
||||
} // namespace torch
|
||||
43
torch/csrc/profiler/stubs/base.h
Normal file
43
torch/csrc/profiler/stubs/base.h
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
#include <torch/csrc/Export.h>
|
||||
|
||||
struct CUevent_st;
|
||||
|
||||
namespace torch {
|
||||
namespace profiler {
|
||||
namespace impl {
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// -- Annotation --------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------
|
||||
using ProfilerEventStub = std::shared_ptr<CUevent_st>;
|
||||
|
||||
struct TORCH_API ProfilerStubs {
|
||||
virtual void record(int* device, ProfilerEventStub* event, int64_t* cpu_ns)
|
||||
const = 0;
|
||||
virtual float elapsed(
|
||||
const ProfilerEventStub* event,
|
||||
const ProfilerEventStub* event2) const = 0;
|
||||
virtual void mark(const char* name) const = 0;
|
||||
virtual void rangePush(const char* name) const = 0;
|
||||
virtual void rangePop() const = 0;
|
||||
virtual bool enabled() const {
|
||||
return false;
|
||||
}
|
||||
virtual void onEachDevice(std::function<void(int)> op) const = 0;
|
||||
virtual void synchronize() const = 0;
|
||||
virtual ~ProfilerStubs();
|
||||
};
|
||||
|
||||
TORCH_API void registerCUDAMethods(ProfilerStubs* stubs);
|
||||
TORCH_API const ProfilerStubs* cudaStubs();
|
||||
TORCH_API void registerITTMethods(ProfilerStubs* stubs);
|
||||
TORCH_API const ProfilerStubs* ittStubs();
|
||||
|
||||
} // namespace impl
|
||||
} // namespace profiler
|
||||
} // namespace torch
|
||||
|
|
@ -1,9 +1,11 @@
|
|||
#include <sstream>
|
||||
|
||||
#include <nvToolsExt.h>
|
||||
|
||||
#include <c10/cuda/CUDAGuard.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <nvToolsExt.h>
|
||||
#include <torch/csrc/autograd/profiler.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <torch/csrc/profiler/stubs/base.h>
|
||||
#include <torch/csrc/profiler/util.h>
|
||||
|
||||
namespace torch {
|
||||
namespace profiler {
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
#include <c10/util/irange.h>
|
||||
#include <torch/csrc/autograd/profiler.h>
|
||||
#include <torch/csrc/itt_wrapper.h>
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/csrc/itt_wrapper.h>
|
||||
#include <torch/csrc/profiler/stubs/base.h>
|
||||
|
||||
namespace torch {
|
||||
namespace profiler {
|
||||
namespace impl {
|
||||
Loading…
Reference in New Issue
Block a user