mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/49408 Nearly every non-test callsite doesn't need to capture any variables anyway, and this saves 48 bytes per callback. ghstack-source-id: 118665808 Test Plan: Wait for GitHub CI since we had C++14-specific issues with this one in previous PR https://github.com/pytorch/pytorch/pull/48629 Reviewed By: malfet Differential Revision: D25563207 fbshipit-source-id: 6a2831205917d465f8248ca37429ba2428d5626d
291 lines
9.6 KiB
C++
291 lines
9.6 KiB
C++
#include <torch/csrc/python_headers.h>
|
|
|
|
#include <c10/core/DeviceType.h>
|
|
#include <torch/csrc/Exceptions.h>
|
|
#include <torch/csrc/utils/pybind.h>
|
|
#include <torch/csrc/autograd/grad_mode.h>
|
|
#include <ATen/autocast_mode.h>
|
|
#include <torch/csrc/autograd/profiler.h>
|
|
#include <torch/csrc/autograd/python_function.h>
|
|
#include <torch/csrc/autograd/function.h>
|
|
|
|
PyObject* THPAutograd_initExtension(PyObject* _unused, PyObject *unused) {
|
|
using namespace torch::autograd::profiler;
|
|
auto tensor_module = THPObjectPtr(PyImport_ImportModule("torch.tensor"));
|
|
if (!tensor_module)
|
|
return nullptr;
|
|
|
|
// NOTE: "leaks" THPVariableClass
|
|
THPVariableClass = PyObject_GetAttrString(tensor_module, "Tensor");
|
|
if (!THPVariableClass)
|
|
return nullptr;
|
|
|
|
auto autograd_module = THPObjectPtr(PyImport_ImportModule("torch.autograd"));
|
|
if (!autograd_module)
|
|
return nullptr;
|
|
|
|
// NOTE: "leaks" Function
|
|
THPFunctionClass = PyObject_GetAttrString(autograd_module, "Function");
|
|
if (!THPFunctionClass)
|
|
return nullptr;
|
|
|
|
auto torch_C_module = THPObjectPtr(PyImport_ImportModule("torch._C"));
|
|
if (!torch_C_module)
|
|
return nullptr;
|
|
auto _C_m = py::handle(torch_C_module).cast<py::module>();
|
|
auto m = _C_m.def_submodule("_autograd", "autograd bindings");
|
|
|
|
|
|
py::enum_<ProfilerState>(m, "ProfilerState")
|
|
.value("Disabled", ProfilerState::Disabled)
|
|
.value("CPU", ProfilerState::CPU)
|
|
.value("CUDA", ProfilerState::CUDA)
|
|
.value("NVTX", ProfilerState::NVTX)
|
|
.value("KINETO", ProfilerState::KINETO);
|
|
|
|
py::enum_<ActivityType>(m, "ProfilerActivity")
|
|
.value("CPU", ActivityType::CPU)
|
|
.value("CUDA", ActivityType::CUDA);
|
|
|
|
py::class_<ProfilerConfig>(m, "ProfilerConfig")
|
|
.def(py::init<ProfilerState, bool, bool, bool>());
|
|
|
|
py::class_<LegacyEvent>(m, "ProfilerEvent")
|
|
.def("kind", &LegacyEvent::kindStr)
|
|
.def("name", [](const LegacyEvent& e) { return e.name(); })
|
|
.def("thread_id", &LegacyEvent::threadId)
|
|
.def("fwd_thread_id", &LegacyEvent::fwdThreadId)
|
|
.def("device", &LegacyEvent::device)
|
|
.def("cpu_elapsed_us", &LegacyEvent::cpuElapsedUs)
|
|
.def("cuda_elapsed_us", &LegacyEvent::cudaElapsedUs)
|
|
.def("has_cuda", &LegacyEvent::hasCuda)
|
|
.def("shapes", &LegacyEvent::shapes)
|
|
.def("cpu_memory_usage", &LegacyEvent::cpuMemoryUsage)
|
|
.def("cuda_memory_usage", &LegacyEvent::cudaMemoryUsage)
|
|
.def("handle", &LegacyEvent::handle)
|
|
.def("node_id", &LegacyEvent::nodeId)
|
|
.def("is_remote", &LegacyEvent::isRemote)
|
|
.def("sequence_nr", &LegacyEvent::sequenceNr)
|
|
.def("stack", &LegacyEvent::stack)
|
|
.def("scope", &LegacyEvent::scope)
|
|
.def("correlation_id", &LegacyEvent::correlationId)
|
|
.def("start_us", &LegacyEvent::cpuUs);
|
|
|
|
py::enum_<c10::DeviceType>(m, "DeviceType")
|
|
.value("CPU", c10::DeviceType::CPU)
|
|
.value("CUDA", c10::DeviceType::CUDA)
|
|
.value("MKLDNN", c10::DeviceType::MKLDNN)
|
|
.value("OPENGL", c10::DeviceType::OPENGL)
|
|
.value("OPENCL", c10::DeviceType::OPENCL)
|
|
.value("IDEEP", c10::DeviceType::IDEEP)
|
|
.value("HIP", c10::DeviceType::HIP)
|
|
.value("FPGA", c10::DeviceType::FPGA)
|
|
.value("MSNPU", c10::DeviceType::MSNPU)
|
|
.value("XLA", c10::DeviceType::XLA)
|
|
.value("Vulkan", c10::DeviceType::Vulkan)
|
|
.value("Metal", c10::DeviceType::Metal);
|
|
|
|
#ifdef USE_KINETO
|
|
py::class_<KinetoEvent>(m, "KinetoEvent")
|
|
// name of the event
|
|
.def("name", &KinetoEvent::name)
|
|
// PyTorch thread id of the start callback
|
|
.def("start_thread_id", [](const KinetoEvent& e) {
|
|
return e.startThreadId();
|
|
})
|
|
// PyTorch thread id of the end callback
|
|
.def("end_thread_id", [](const KinetoEvent& e) {
|
|
return e.endThreadId();
|
|
})
|
|
// for events of scope BACKWARD_FUNCTION - PyTorch thread id
|
|
// of the corresponding forward op
|
|
.def("fwd_thread_id", [](const KinetoEvent& e) {
|
|
return e.fwdThreadId();
|
|
})
|
|
// together with fwd_thread_id, used to uniquely identify
|
|
// the forward op
|
|
.def("sequence_nr", [](const KinetoEvent& e) {
|
|
return e.sequenceNr();
|
|
})
|
|
// absolute start time (since unix epoch) in us
|
|
.def("start_us", &KinetoEvent::startUs)
|
|
// duration in us
|
|
.def("duration_us", &KinetoEvent::durationUs)
|
|
// used for correlation between high-level PyTorch events
|
|
// and low-level device events
|
|
.def("correlation_id", [](const KinetoEvent& e) {
|
|
return e.correlationId();
|
|
})
|
|
// shapes of input tensors
|
|
.def("shapes", [](const KinetoEvent& e) {
|
|
if (e.hasShapes()) {
|
|
return e.shapes();
|
|
} else {
|
|
return std::vector<std::vector<int64_t>>();
|
|
}
|
|
})
|
|
// stack traces of the PyTorch CPU events
|
|
.def("stack", [](const KinetoEvent& e) {
|
|
if (e.hasStack()) {
|
|
return e.stack();
|
|
} else {
|
|
return std::vector<std::string>();
|
|
}
|
|
})
|
|
// type of the RecordFunction that generated a PyTorch CPU event
|
|
// (op, torchscript function, user label, etc)
|
|
.def("scope", [](const KinetoEvent& e) {
|
|
return e.scope();
|
|
})
|
|
// device number, for CPU - process id
|
|
.def("device_index", &KinetoEvent::deviceIndex)
|
|
// for CUDA - stream id, for CPU - start thread id
|
|
.def("device_resource_id", &KinetoEvent::deviceResourceId)
|
|
// device type
|
|
.def("device_type", [](const KinetoEvent& e) {
|
|
return e.deviceType();
|
|
})
|
|
// correlation id of a linked event
|
|
.def("linked_correlation_id", &KinetoEvent::linkedCorrelationId);
|
|
|
|
py::class_<ProfilerResult>(m, "ProfilerResult")
|
|
.def("events", &ProfilerResult::events)
|
|
.def("legacy_events", &ProfilerResult::legacy_events)
|
|
.def("save", &ProfilerResult::save);
|
|
|
|
m.def("_enable_profiler", enableProfiler);
|
|
m.def("_disable_profiler", disableProfiler);
|
|
m.def("_prepare_profiler", prepareProfiler);
|
|
#endif
|
|
|
|
m.def("kineto_available", kinetoAvailable);
|
|
|
|
m.def("_enable_profiler_legacy", enableProfilerLegacy);
|
|
py::class_<ProfilerDisableOptions>(m, "_ProfilerDisableOptions")
|
|
.def(py::init<bool, bool>());
|
|
m.def(
|
|
"_disable_profiler_legacy",
|
|
disableProfilerLegacy,
|
|
py::arg("profiler_disable_options") = ProfilerDisableOptions());
|
|
m.def("_profiler_enabled", profilerEnabled);
|
|
m.def("_enable_record_function", [](bool enable) {
|
|
at::enableRecordFunction(enable);
|
|
});
|
|
m.def("_set_empty_test_observer", [](bool is_global, double sampling_prob) {
|
|
auto cb = at::RecordFunctionCallback(nullptr)
|
|
.needsInputs(true)
|
|
.samplingProb(sampling_prob);
|
|
if (is_global) {
|
|
at::addGlobalCallback(cb);
|
|
} else {
|
|
at::addThreadLocalCallback(cb);
|
|
}
|
|
});
|
|
m.def("_clear_callbacks", []() {
|
|
at::clearCallbacks();
|
|
});
|
|
|
|
Py_RETURN_TRUE;
|
|
}
|
|
|
|
namespace torch { namespace autograd {
|
|
|
|
static PyObject * set_autocast_enabled(PyObject* _unused, PyObject *arg) {
|
|
HANDLE_TH_ERRORS
|
|
if (!PyBool_Check(arg)) {
|
|
throw TypeError("enabled must be a bool (got %s)", Py_TYPE(arg)->tp_name);
|
|
}
|
|
at::autocast::set_enabled(arg == Py_True);
|
|
Py_RETURN_NONE;
|
|
END_HANDLE_TH_ERRORS
|
|
}
|
|
|
|
static PyObject * is_autocast_enabled(PyObject* _unused, PyObject *arg) {
|
|
HANDLE_TH_ERRORS
|
|
if (at::autocast::is_enabled()) {
|
|
Py_RETURN_TRUE;
|
|
} else {
|
|
Py_RETURN_FALSE;
|
|
}
|
|
END_HANDLE_TH_ERRORS
|
|
}
|
|
|
|
static PyObject * clear_autocast_cache(PyObject* _unused, PyObject *arg) {
|
|
HANDLE_TH_ERRORS
|
|
at::autocast::clear_cache();
|
|
Py_RETURN_NONE;
|
|
END_HANDLE_TH_ERRORS
|
|
}
|
|
|
|
static PyObject * autocast_increment_nesting(PyObject* _unused, PyObject *arg) {
|
|
HANDLE_TH_ERRORS
|
|
return THPUtils_packInt64(at::autocast::increment_nesting());
|
|
END_HANDLE_TH_ERRORS
|
|
}
|
|
|
|
static PyObject * autocast_decrement_nesting(PyObject* _unused, PyObject *arg) {
|
|
HANDLE_TH_ERRORS
|
|
return THPUtils_packInt64(at::autocast::decrement_nesting());
|
|
END_HANDLE_TH_ERRORS
|
|
}
|
|
|
|
static PyObject * set_grad_enabled(PyObject* _unused, PyObject *arg) {
|
|
HANDLE_TH_ERRORS
|
|
if (!PyBool_Check(arg)) {
|
|
throw TypeError("enabled must be a bool (got %s)", Py_TYPE(arg)->tp_name);
|
|
}
|
|
GradMode::set_enabled(arg == Py_True);
|
|
Py_RETURN_NONE;
|
|
END_HANDLE_TH_ERRORS
|
|
}
|
|
|
|
static PyObject * is_grad_enabled(PyObject* _unused, PyObject *arg) {
|
|
HANDLE_TH_ERRORS
|
|
if (GradMode::is_enabled()) {
|
|
Py_RETURN_TRUE;
|
|
} else {
|
|
Py_RETURN_FALSE;
|
|
}
|
|
END_HANDLE_TH_ERRORS
|
|
}
|
|
|
|
static PyObject * set_anomaly_mode_enabled(PyObject* _unused, PyObject *arg) {
|
|
HANDLE_TH_ERRORS
|
|
if (!PyBool_Check(arg)) {
|
|
throw TypeError("enabled must be a bool (got %s)", Py_TYPE(arg)->tp_name);
|
|
}
|
|
AnomalyMode::set_enabled(arg == Py_True);
|
|
Py_RETURN_NONE;
|
|
END_HANDLE_TH_ERRORS
|
|
}
|
|
|
|
static PyObject * is_anomaly_mode_enabled(PyObject* _unused, PyObject *arg) {
|
|
HANDLE_TH_ERRORS
|
|
if (AnomalyMode::is_enabled()) {
|
|
Py_RETURN_TRUE;
|
|
} else {
|
|
Py_RETURN_FALSE;
|
|
}
|
|
END_HANDLE_TH_ERRORS
|
|
}
|
|
|
|
// autograd methods on torch._C
|
|
static PyMethodDef methods[] = { // NOLINT
|
|
{"_set_grad_enabled", set_grad_enabled, METH_O, nullptr},
|
|
{"is_grad_enabled", is_grad_enabled, METH_NOARGS, nullptr},
|
|
{"set_autocast_enabled", set_autocast_enabled, METH_O, nullptr},
|
|
{"is_autocast_enabled", is_autocast_enabled, METH_NOARGS, nullptr},
|
|
{"clear_autocast_cache", clear_autocast_cache, METH_NOARGS, nullptr},
|
|
{"autocast_increment_nesting", autocast_increment_nesting, METH_NOARGS, nullptr},
|
|
{"autocast_decrement_nesting", autocast_decrement_nesting, METH_NOARGS, nullptr},
|
|
{"set_anomaly_enabled", set_anomaly_mode_enabled, METH_O, nullptr},
|
|
{"is_anomaly_enabled", is_anomaly_mode_enabled, METH_NOARGS, nullptr},
|
|
{nullptr, nullptr, 0, nullptr}
|
|
};
|
|
|
|
PyMethodDef* python_functions() {
|
|
return methods;
|
|
}
|
|
|
|
}} // namespace torch::autograd
|