mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-08 07:39:33 +01:00
Summary:
Things changed in this PR that requires review:
1. aten/src/ATen/core/interned_strings.h
2. torch/csrc/jit/ir/alias_analysis.h : exposing createValue to allow efficient mutation
3. torch/csrc/jit/runtime/symbolic_shape_registry.cpp : added gelu/tanh/erf in registry
4. torch/jit/_script.py : throws scripting model sees autocast as decorator since it's not supported
nvfuser code update:
1. codegen improvements and performance tuning
2. integration bug fixes for shape expression logic
3. kernel segmentation update to address perf regression from horizontal fusion
4. scalar cpu tensor promotion to support inter-device operation between cpu scalar tensor and cuda tensor
Things reverted from local changes:
aten::gelu with approximation (tracked in PR: https://github.com/pytorch/pytorch/pull/61439)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/72127
Reviewed By: HamidShojanazeri
Differential Revision: D34113233
Pulled By: jbschlosser
fbshipit-source-id: b82cde32b71e324eca0ea57cb8c9f9647278ca74
(cherry picked from commit e009bc5c4e)
77 lines
1.8 KiB
C++
77 lines
1.8 KiB
C++
#include <torch/csrc/jit/codegen/cuda/instrumentation.h>
|
|
|
|
#include <c10/macros/Export.h>
|
|
|
|
#ifdef _WIN32
|
|
#include <c10/util/win32-headers.h>
|
|
#else
|
|
#include <pthread.h>
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
namespace torch {
|
|
namespace jit {
|
|
namespace fuser {
|
|
namespace cuda {
|
|
namespace inst {
|
|
|
|
Trace::Trace() {
|
|
const char* trace_filename = getenv("PYTORCH_NVFUSER_TRACE");
|
|
if (trace_filename != nullptr) {
|
|
log_file_ = fopen(trace_filename, "w");
|
|
TORCH_CHECK(log_file_ != nullptr, "Can't open trace file");
|
|
|
|
// Disable the file stream buffering, since it may result
|
|
// in torn writes in multi-threaded tracing
|
|
setbuf(log_file_, nullptr);
|
|
|
|
// Print the trace prologue
|
|
// (including a dummy TRACE_START event)
|
|
fprintf(log_file_, "{\n\"traceEvents\": [\n");
|
|
start_timestamp_ = Clock::now();
|
|
logEvent('I', "TRACE_START");
|
|
}
|
|
|
|
if (getenv("PYTORCH_NVFUSER_DISABLE_NVTX")) {
|
|
record_nvtx_range_ = false;
|
|
}
|
|
}
|
|
|
|
Trace::~Trace() {
|
|
if (log_file_ != nullptr) {
|
|
// Print trace epilogue
|
|
logEvent('I', "TRACE_END", ' ');
|
|
fprintf(log_file_, "],\n\"displayTimeUnit\": \"ms\"\n}\n");
|
|
fclose(log_file_);
|
|
}
|
|
}
|
|
|
|
void Trace::logEvent(char ph, const char* name, char sep) {
|
|
const std::chrono::duration<double> d = Clock::now() - start_timestamp_;
|
|
const double elapsed = d.count() * 1e6;
|
|
|
|
#ifdef _WIN32
|
|
const unsigned int pid = GetCurrentProcessId();
|
|
const unsigned int tid = GetCurrentThreadId();
|
|
#else
|
|
const unsigned int pid = getpid();
|
|
const unsigned int tid = std::hash<pthread_t>{}(pthread_self());
|
|
#endif // _WIN32
|
|
|
|
fprintf(
|
|
log_file_,
|
|
"{ \"name\": \"%s\", \"ph\": \"%c\", \"pid\": %u, \"tid\": %u, \"ts\": %.0f }%c\n",
|
|
name,
|
|
ph,
|
|
pid,
|
|
tid,
|
|
elapsed,
|
|
sep);
|
|
}
|
|
|
|
} // namespace inst
|
|
} // namespace cuda
|
|
} // namespace fuser
|
|
} // namespace jit
|
|
} // namespace torch
|