mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Skip the source info in the error report if the source code is too large (#105608)
Summary: A small model (<100MB) took about 20mins to load, and consume 16GB memory. Strobelight profiling: https://fburl.com/strobelight/abwtz0ry We realized that calc_line_start_offsets is culprit, and the line_starting_offsets_ is a vector of line numbers. There are >20000 places we generate such ErrorReport, and the line number is ~100000. So total memory cost is about 100000 x 20000 x 8 = ~16GB. We propose to skip the error info for extreme large source file (>1MB). And keep an environment variable to keep the ability to print the source code info for large source file. Test Plan: buck run mode/opt-split-dwarf scripts/lufang:load_pt_model -- --model_file_path=/data/local/models/961746678/2/961746678_2.predictor.disagg.gpu.local before the change, it takes 20mins to load, and the model costs 16GB memory (the model itself is only <100MB) after the change, it takes 15s to load. The most of the time / space is spent on calc_line_start_offsets, https://fburl.com/code/2to60zqu Differential Revision: D47610805 Pull Request resolved: https://github.com/pytorch/pytorch/pull/105608 Approved by: https://github.com/hl475
This commit is contained in:
parent
e3539a0e54
commit
c44ae5544f
|
|
@ -43,6 +43,23 @@
|
|||
#include <set>
|
||||
#include <stack>
|
||||
|
||||
namespace {
|
||||
bool reportSourceLocation(size_t file_size) {
|
||||
if (file_size < 512 * 1024) {
|
||||
return true;
|
||||
}
|
||||
const char* enable_env =
|
||||
std::getenv("PYTORCH_JIT_ENABLE_LARGE_SOURCE_LOCATION");
|
||||
bool flag = true;
|
||||
if (enable_env == nullptr || std::strcmp(enable_env, "0") == 0 ||
|
||||
std::strcmp(enable_env, "FALSE") == 0 ||
|
||||
std::strcmp(enable_env, "false") == 0) {
|
||||
flag = false;
|
||||
}
|
||||
return flag;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
using FunctionTable = std::unordered_map<std::string, Function&>;
|
||||
|
|
@ -1987,11 +2004,22 @@ struct to_ir {
|
|||
if (save_false->findInAnyFrame(v) || false_exits) {
|
||||
mutated_variables.insert(v);
|
||||
} else {
|
||||
ErrorReport error(loc);
|
||||
environment_stack->setVariableTypeError(v, [=]() -> std::string {
|
||||
error << v << " is not defined in the false branch";
|
||||
return error.what();
|
||||
});
|
||||
if (reportSourceLocation(loc.source()->size())) {
|
||||
ErrorReport error(loc);
|
||||
environment_stack->setVariableTypeError(v, [=]() -> std::string {
|
||||
error << v << " is not defined in the false branch";
|
||||
return error.what();
|
||||
});
|
||||
} else {
|
||||
environment_stack->setVariableTypeError(v, [=]() -> std::string {
|
||||
std::stringstream ss;
|
||||
ss << v << " is not defined in the false branch. "
|
||||
<< "The source info is eliminated due to the source file is too large. "
|
||||
<< "To get it back, please set PYTORCH_JIT_ENABLE_LARGE_SOURCE_LOCATION=1 "
|
||||
<< "as env var";
|
||||
return ss.str();
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2001,11 +2029,22 @@ struct to_ir {
|
|||
if (save_true->findInAnyFrame(v) || true_exits) {
|
||||
mutated_variables.insert(v);
|
||||
} else {
|
||||
ErrorReport error(loc);
|
||||
environment_stack->setVariableTypeError(v, [=]() -> std::string {
|
||||
error << v << " is not defined in the true branch";
|
||||
return error.what();
|
||||
});
|
||||
if (reportSourceLocation(loc.source()->size())) {
|
||||
ErrorReport error(loc);
|
||||
environment_stack->setVariableTypeError(v, [=]() -> std::string {
|
||||
error << v << " is not defined in the true branch";
|
||||
return error.what();
|
||||
});
|
||||
} else {
|
||||
environment_stack->setVariableTypeError(v, [=]() -> std::string {
|
||||
std::stringstream ss;
|
||||
ss << v << " is not defined in the false branch. "
|
||||
<< "The source info is eliminated due to the source file is too large. "
|
||||
<< "To get it back, please set PYTORCH_JIT_ENABLE_LARGE_SOURCE_LOCATION=1 "
|
||||
<< "as env var";
|
||||
return ss.str();
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user