mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-07 12:20:24 +01:00
Add support to generate pprof results to tf.profiler
A fun thing is, it can not only profile time,memory but also parameters, etc. PiperOrigin-RevId: 163767517
This commit is contained in:
parent
78a90370ef
commit
edac90c7c7
|
|
@ -181,6 +181,7 @@ seq2seq_attention_model.py:363:build_graph:self._add_train_o..., cpu: 1.28sec, a
|
||||||
|
|
||||||
### Visualize time and memory.
|
### Visualize time and memory.
|
||||||
```
|
```
|
||||||
|
# The following example generates a timeline.
|
||||||
tfprof> graph -step 0 -max_depth 100000 -output timeline:outfile=<filename>
|
tfprof> graph -step 0 -max_depth 100000 -output timeline:outfile=<filename>
|
||||||
|
|
||||||
generating trace file.
|
generating trace file.
|
||||||
|
|
@ -191,9 +192,29 @@ Open a Chrome browser, enter URL chrome://tracing and load the timeline file.
|
||||||
******************************************************
|
******************************************************
|
||||||
```
|
```
|
||||||
<left>
|
<left>
|
||||||
[CodeTimeline](g3doc/graph_timeline.png)
|
[Timeline](g3doc/graph_timeline.png)
|
||||||
</left>
|
</left>
|
||||||
|
|
||||||
|
```
|
||||||
|
# The following example generates a pprof graph (only supported by code view).
|
||||||
|
# Since TensorFlow runs the graph instead of Python code, the pprof graph
|
||||||
|
# doesn't profile the statistics of Python, but the TensorFlow graph
|
||||||
|
# nodes created by the Python call stack.
|
||||||
|
# Nevertheless, it pops critical Python code path for us.
|
||||||
|
#
|
||||||
|
# `-trim_name_regexes` trims the python call stack, which are always the same
|
||||||
|
# for the leaves.
|
||||||
|
# `-select accelerator_micros` pick accelerator time for pprof graph. User
|
||||||
|
# can also generate memory profile using `-select bytes`
|
||||||
|
tfprof> code -max_depth 100 -trim_name_regexes '^ops.py.*' -select accelerator_micros -output pprof:outfile=<filename>
|
||||||
|
|
||||||
|
# Use pprof to visualize the generated file.
|
||||||
|
pprof -png --nodecount=20 --sample_index=1 <filename>
|
||||||
|
```
|
||||||
|
|
||||||
|
<left>
|
||||||
|
[PprofGraph](g3doc/pprof.jpg)
|
||||||
|
</left>
|
||||||
|
|
||||||
### Feature Request and Bug Report
|
### Feature Request and Bug Report
|
||||||
|
|
||||||
|
|
|
||||||
BIN
tensorflow/core/profiler/g3doc/pprof.jpg
Normal file
BIN
tensorflow/core/profiler/g3doc/pprof.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 261 KiB |
|
|
@ -21,6 +21,8 @@ limitations under the License.
|
||||||
#include "tensorflow/c/c_api.h"
|
#include "tensorflow/c/c_api.h"
|
||||||
#include "tensorflow/core/framework/tensor.h"
|
#include "tensorflow/core/framework/tensor.h"
|
||||||
#include "tensorflow/core/lib/io/path.h"
|
#include "tensorflow/core/lib/io/path.h"
|
||||||
|
#include "tensorflow/core/lib/io/zlib_compression_options.h"
|
||||||
|
#include "tensorflow/core/lib/io/zlib_outputbuffer.h"
|
||||||
#include "tensorflow/core/lib/strings/str_util.h"
|
#include "tensorflow/core/lib/strings/str_util.h"
|
||||||
#include "tensorflow/core/lib/strings/strcat.h"
|
#include "tensorflow/core/lib/strings/strcat.h"
|
||||||
#include "tensorflow/core/lib/strings/stringprintf.h"
|
#include "tensorflow/core/lib/strings/stringprintf.h"
|
||||||
|
|
@ -46,66 +48,335 @@ string GetTraceString(const CodeDef::Trace& trace) {
|
||||||
}
|
}
|
||||||
return ntrace;
|
return ntrace;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StringTable maps each string to an id.
|
||||||
|
class StringTable {
|
||||||
|
public:
|
||||||
|
StringTable() {
|
||||||
|
// Pprof requires first entry in string_table to be ''.
|
||||||
|
string_id_[""] = 0;
|
||||||
|
all_strings_.push_back("");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the index of a string. If not found, inserts the string and
|
||||||
|
// return the inserted index.
|
||||||
|
uint64 GetIndex(const string& str) {
|
||||||
|
auto idx = string_id_.find(str);
|
||||||
|
if (idx != string_id_.end()) {
|
||||||
|
return idx->second;
|
||||||
|
}
|
||||||
|
all_strings_.push_back(str);
|
||||||
|
return string_id_.insert(std::pair<string, int64>(str, string_id_.size()))
|
||||||
|
.first->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<string>& strings() const { return all_strings_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::map<string, uint64> string_id_;
|
||||||
|
std::vector<string> all_strings_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// FunctionTable maps each function to an id.
|
||||||
|
class FunctionTable {
|
||||||
|
public:
|
||||||
|
explicit FunctionTable(StringTable* string_table)
|
||||||
|
: string_table_(string_table) {}
|
||||||
|
|
||||||
|
// Returns the index of a function. If not found, adds a function proto
|
||||||
|
// and returns the function index.
|
||||||
|
uint64 GetIndex(const string& file_path, const string& func_name,
|
||||||
|
uint64 func_start_line) {
|
||||||
|
auto key = std::tuple<string, string, uint64>(file_path, func_name,
|
||||||
|
func_start_line);
|
||||||
|
auto idx = function_table_.find(key);
|
||||||
|
if (idx != function_table_.end()) {
|
||||||
|
return idx->second.id();
|
||||||
|
}
|
||||||
|
pprof::Function* func_pb = &function_table_[key];
|
||||||
|
// function index should start from 1.
|
||||||
|
func_pb->set_id(function_table_.size());
|
||||||
|
func_pb->set_name(string_table_->GetIndex(func_name));
|
||||||
|
func_pb->set_filename(string_table_->GetIndex(file_path));
|
||||||
|
func_pb->set_start_line(func_start_line);
|
||||||
|
return func_pb->id();
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::map<std::tuple<string, string, uint64>, pprof::Function>&
|
||||||
|
functions() const {
|
||||||
|
return function_table_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
StringTable* string_table_;
|
||||||
|
std::map<std::tuple<string, string, uint64>, pprof::Function> function_table_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// LocationTable maps each function call to an id.
|
||||||
|
class LocationTable {
|
||||||
|
public:
|
||||||
|
explicit LocationTable(FunctionTable* function_table)
|
||||||
|
: function_table_(function_table) {}
|
||||||
|
|
||||||
|
// Returns the index of a function call localtion. If not found, adds a
|
||||||
|
// location proto and returns the location index.
|
||||||
|
uint64 GetIndex(const string& file_path, uint64 line_number,
|
||||||
|
const string& called_function_name,
|
||||||
|
const string& called_file_path,
|
||||||
|
uint64 called_func_start_line) {
|
||||||
|
auto key = std::tuple<string, string, uint64>(
|
||||||
|
file_path, called_function_name, line_number);
|
||||||
|
auto idx = location_table_.find(key);
|
||||||
|
if (idx != location_table_.end()) {
|
||||||
|
return idx->second.id();
|
||||||
|
}
|
||||||
|
pprof::Location* location_pb = &location_table_[key];
|
||||||
|
location_pb->set_id(location_table_.size());
|
||||||
|
pprof::Line* line_pb = location_pb->add_line();
|
||||||
|
line_pb->set_function_id(function_table_->GetIndex(
|
||||||
|
called_file_path, called_function_name, called_func_start_line));
|
||||||
|
line_pb->set_line(line_number);
|
||||||
|
return location_pb->id();
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::map<std::tuple<string, string, uint64>, pprof::Location>&
|
||||||
|
locations() const {
|
||||||
|
return location_table_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
FunctionTable* function_table_;
|
||||||
|
std::map<std::tuple<string, string, uint64>, pprof::Location> location_table_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Samples stores samples of all calls. A sample is a single call trace,
|
||||||
|
// that is, the call path from top caller to the leaf callee.
|
||||||
|
class Samples {
|
||||||
|
public:
|
||||||
|
explicit Samples(StringTable* string_table, const Options* opts)
|
||||||
|
: string_table_(string_table), opts_(opts) {}
|
||||||
|
|
||||||
|
// 'node' is the leaf of the displayed trace. It includes all graph nodes
|
||||||
|
// created by it. 'location_ids' contains
|
||||||
|
// the call stack, from callee to caller.
|
||||||
|
// This method adds the statistics of graph nodes created by the python
|
||||||
|
// call.
|
||||||
|
void Add(const CodeNode* node, const std::vector<uint64>& location_ids) {
|
||||||
|
// displayed leaf might not be true leaf. Retrive the true leaves for
|
||||||
|
// stats.
|
||||||
|
std::vector<const CodeNode*> all_leaf = FetchAllLeaf(node);
|
||||||
|
CHECK(!all_leaf.empty()) << node->name();
|
||||||
|
|
||||||
|
for (const CodeNode* cn : all_leaf) {
|
||||||
|
for (auto gn_it : cn->node->graph_nodes()) {
|
||||||
|
const TFGraphNode* gn = gn_it.second;
|
||||||
|
pprof::Sample* sample_pb = &sample_table_[gn->name()];
|
||||||
|
for (uint64 id : location_ids) {
|
||||||
|
sample_pb->mutable_location_id()->Add(id);
|
||||||
|
}
|
||||||
|
pprof::Label* label_pb = sample_pb->mutable_label()->Add();
|
||||||
|
label_pb->set_key(string_table_->GetIndex("node_name"));
|
||||||
|
label_pb->set_str(string_table_->GetIndex(gn->name()));
|
||||||
|
|
||||||
|
sample_pb->mutable_value()->Add(1);
|
||||||
|
string type = *opts_->select.begin();
|
||||||
|
if (type == kShown[1]) {
|
||||||
|
sample_pb->mutable_value()->Add(gn->exec_micros(node->node->step()));
|
||||||
|
} else if (type == kShown[9]) {
|
||||||
|
sample_pb->mutable_value()->Add(
|
||||||
|
gn->accelerator_exec_micros(node->node->step()));
|
||||||
|
} else if (type == kShown[10]) {
|
||||||
|
sample_pb->mutable_value()->Add(
|
||||||
|
gn->cpu_exec_micros(node->node->step()));
|
||||||
|
} else if (type == kShown[0]) {
|
||||||
|
sample_pb->mutable_value()->Add(
|
||||||
|
gn->requested_bytes(node->node->step()));
|
||||||
|
} else if (type == kShown[2]) {
|
||||||
|
sample_pb->mutable_value()->Add(gn->parameters());
|
||||||
|
} else if (type == kShown[3]) {
|
||||||
|
sample_pb->mutable_value()->Add(gn->float_ops(node->node->step()));
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "pprof doesn't support -select=%s\n", type.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::map<string, pprof::Sample>& samples() const {
|
||||||
|
return sample_table_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<const CodeNode*> FetchAllLeaf(const CodeNode* root) {
|
||||||
|
if (root->children.empty()) {
|
||||||
|
return {root};
|
||||||
|
}
|
||||||
|
std::vector<const CodeNode*> ret;
|
||||||
|
for (auto& n : root->children) {
|
||||||
|
std::vector<const CodeNode*> nodes = FetchAllLeaf(n);
|
||||||
|
ret.insert(ret.end(), nodes.begin(), nodes.end());
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
StringTable* string_table_;
|
||||||
|
const Options* opts_;
|
||||||
|
std::map<string, pprof::Sample> sample_table_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class PprofProfileImpl : public PprofProfile {
|
||||||
|
public:
|
||||||
|
explicit PprofProfileImpl(const Options* opts)
|
||||||
|
: opts_(opts),
|
||||||
|
func_table_(new FunctionTable(&string_table_)),
|
||||||
|
loc_table_(new LocationTable(func_table_.get())),
|
||||||
|
samples_(new Samples(&string_table_, opts)) {}
|
||||||
|
|
||||||
|
uint64 AddLocation(const CodeNode* callee, const CodeNode* caller) override {
|
||||||
|
const string& file_path = caller->trace->file();
|
||||||
|
uint64 lineno = caller->trace->lineno();
|
||||||
|
const string& callee_file_path = callee->trace->file();
|
||||||
|
const string& callee_function = callee->trace->function();
|
||||||
|
uint64 callee_func_start_line = callee->trace->func_start_line();
|
||||||
|
|
||||||
|
return loc_table_->GetIndex(file_path, lineno, callee_function,
|
||||||
|
callee_file_path, callee_func_start_line);
|
||||||
|
}
|
||||||
|
|
||||||
|
void AddSample(const CodeNode* leaf, std::vector<uint64>* call_ids) override {
|
||||||
|
std::vector<uint64> reversed_call_ids;
|
||||||
|
std::reverse_copy(call_ids->begin(), call_ids->end(),
|
||||||
|
std::back_inserter(reversed_call_ids));
|
||||||
|
samples_->Add(leaf, reversed_call_ids);
|
||||||
|
}
|
||||||
|
|
||||||
|
Status WritePprofProfile(const string& filename) override {
|
||||||
|
pprof::Profile profile_pb;
|
||||||
|
Build(&profile_pb);
|
||||||
|
|
||||||
|
std::unique_ptr<WritableFile> file;
|
||||||
|
Status s = Env::Default()->NewWritableFile(filename, &file);
|
||||||
|
if (!s.ok()) return s;
|
||||||
|
|
||||||
|
int32 buf_size = 1024 * 1024;
|
||||||
|
io::ZlibOutputBuffer* zlib_output_buffer = new io::ZlibOutputBuffer(
|
||||||
|
file.get(), buf_size, buf_size, io::ZlibCompressionOptions::GZIP());
|
||||||
|
s = zlib_output_buffer->Init();
|
||||||
|
if (!s.ok()) return s;
|
||||||
|
s = zlib_output_buffer->Append(profile_pb.SerializeAsString());
|
||||||
|
if (!s.ok()) return s;
|
||||||
|
s = zlib_output_buffer->Close();
|
||||||
|
if (!s.ok()) return s;
|
||||||
|
fprintf(stdout, "\nRun pprof -png --nodecount=20 --sample_index=1 <%s>\n",
|
||||||
|
filename.c_str());
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Build(pprof::Profile* profile_pb) {
|
||||||
|
string sample_type_description = "count";
|
||||||
|
auto sample_type = profile_pb->mutable_sample_type()->Add();
|
||||||
|
sample_type->set_type(string_table_.GetIndex(sample_type_description));
|
||||||
|
sample_type->set_unit(string_table_.GetIndex("count"));
|
||||||
|
|
||||||
|
string type = *opts_->select.begin();
|
||||||
|
sample_type_description = type;
|
||||||
|
sample_type = profile_pb->mutable_sample_type()->Add();
|
||||||
|
sample_type->set_type(string_table_.GetIndex(sample_type_description));
|
||||||
|
if (type == kShown[1] || type == kShown[9] || type == kShown[10]) {
|
||||||
|
sample_type->set_unit(string_table_.GetIndex("microseconds"));
|
||||||
|
if (type == kShown[1]) {
|
||||||
|
profile_pb->mutable_comment()->Add(string_table_.GetIndex(
|
||||||
|
"Sum of accelerator execution time and cpu execution time."));
|
||||||
|
} else if (type == kShown[9]) {
|
||||||
|
profile_pb->mutable_comment()->Add(
|
||||||
|
string_table_.GetIndex("Accelerator execution time."));
|
||||||
|
} else if (type == kShown[10]) {
|
||||||
|
profile_pb->mutable_comment()->Add(
|
||||||
|
string_table_.GetIndex("CPU execution time."));
|
||||||
|
}
|
||||||
|
} else if (type == kShown[0]) {
|
||||||
|
sample_type->set_unit(string_table_.GetIndex("bytes"));
|
||||||
|
profile_pb->mutable_comment()->Add(
|
||||||
|
string_table_.GetIndex("Sum of operation output memory."));
|
||||||
|
} else if (type == kShown[2]) {
|
||||||
|
sample_type->set_unit(string_table_.GetIndex("count"));
|
||||||
|
profile_pb->mutable_comment()->Add(
|
||||||
|
string_table_.GetIndex("Model parameters."));
|
||||||
|
} else if (type == kShown[3]) {
|
||||||
|
sample_type->set_unit(string_table_.GetIndex("count"));
|
||||||
|
profile_pb->mutable_comment()->Add(string_table_.GetIndex(
|
||||||
|
"Model float operations (Only available if defined)."));
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "pprof doesn't support selecting: %s\n", type.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const string& str : string_table_.strings()) {
|
||||||
|
*profile_pb->mutable_string_table()->Add() = str;
|
||||||
|
}
|
||||||
|
for (const auto& sample_it : samples_->samples()) {
|
||||||
|
// TODO(xpan): Consider swap.
|
||||||
|
profile_pb->mutable_sample()->Add()->MergeFrom(sample_it.second);
|
||||||
|
}
|
||||||
|
for (const auto& function_it : func_table_->functions()) {
|
||||||
|
profile_pb->mutable_function()->Add()->MergeFrom(function_it.second);
|
||||||
|
}
|
||||||
|
for (const auto& location_it : loc_table_->locations()) {
|
||||||
|
profile_pb->mutable_location()->Add()->MergeFrom(location_it.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const Options* opts_;
|
||||||
|
StringTable string_table_;
|
||||||
|
std::unique_ptr<FunctionTable> func_table_;
|
||||||
|
std::unique_ptr<LocationTable> loc_table_;
|
||||||
|
std::unique_ptr<Samples> samples_;
|
||||||
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void TFCode::AddNode(TFGraphNode* node) {
|
void TFCode::AddNode(TFGraphNode* node) {
|
||||||
if (node->code().traces_size() == 0) {
|
if (node->code().traces_size() == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
TFMultiGraphNode* pre_trace_node = nullptr;
|
if (!root_) {
|
||||||
|
graph_root_.reset(new TFMultiGraphNode(kTFProfRoot));
|
||||||
|
root_.reset(new CodeNode(graph_root_.get(), nullptr));
|
||||||
|
}
|
||||||
|
|
||||||
|
CodeNode* pre_code_node = root_.get();
|
||||||
// TODO(xpan): Consider to release CodeDef after TFCode is built. It
|
// TODO(xpan): Consider to release CodeDef after TFCode is built. It
|
||||||
// takes a lot of memory.
|
// takes a lot of memory.
|
||||||
for (int i = 0; i < node->code().traces_size(); ++i) {
|
for (int i = 0; i < node->code().traces_size(); ++i) {
|
||||||
// Unlike op name, which is globally unique, trace name is only unique
|
// Unlike op name, which is globally unique, trace name is only unique
|
||||||
// w.r.t. it's parent.
|
// w.r.t. it's parent.
|
||||||
const string& trace = GetTraceString(node->code().traces(i));
|
const string& trace = GetTraceString(node->code().traces(i));
|
||||||
if (i == 0) {
|
pre_code_node = pre_code_node->AddChildren(trace, &node->code().traces(i));
|
||||||
if (!trace_root_) {
|
|
||||||
trace_root_.reset(new TFMultiGraphNode(trace));
|
|
||||||
}
|
|
||||||
CHECK(trace_root_->name() == trace) << "Different trace root";
|
|
||||||
pre_trace_node = trace_root_.get();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
pre_trace_node->AddChildren(trace);
|
|
||||||
TFMultiGraphNode* trace_node = pre_trace_node->children().at(trace).get();
|
|
||||||
|
|
||||||
if (i == node->code().traces_size() - 1) {
|
if (i == node->code().traces_size() - 1) {
|
||||||
trace_node->AddGraphNode(node);
|
pre_code_node->node->AddGraphNode(node);
|
||||||
}
|
}
|
||||||
pre_trace_node = trace_node;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void TFCode::Build() {
|
void TFCode::Build() {
|
||||||
if (root_) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
tfprof_trace_root_.reset(new TFMultiGraphNode(kTFProfRoot));
|
|
||||||
root_.reset(new CodeNode(tfprof_trace_root_.get()));
|
|
||||||
|
|
||||||
if (trace_root_) {
|
|
||||||
code_root_ = BuildCodeNodes(trace_root_.get());
|
|
||||||
root_->children.push_back(code_root_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
CodeNode* TFCode::BuildCodeNodes(TFMultiGraphNode* root) {
|
|
||||||
auto code_root = std::unique_ptr<CodeNode>(new CodeNode(root));
|
|
||||||
CodeNode* code_root_ptr = code_root.get();
|
|
||||||
code_nodes_.insert(std::move(code_root));
|
|
||||||
|
|
||||||
for (auto it = root->children().cbegin(); it != root->children().cend();
|
|
||||||
++it) {
|
|
||||||
code_root_ptr->children.push_back(BuildCodeNodes(it->second.get()));
|
|
||||||
}
|
|
||||||
return code_root_ptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const ShowMultiNode* TFCode::ShowInternal(const Options& opts,
|
const ShowMultiNode* TFCode::ShowInternal(const Options& opts,
|
||||||
Timeline* timeline) {
|
Timeline* timeline) {
|
||||||
std::vector<CodeNode*> roots = Account(root_->children, opts);
|
|
||||||
root_->ResetTotalStats();
|
root_->ResetTotalStats();
|
||||||
|
if (opts.output_type == kOutput[3]) {
|
||||||
|
if (opts.select.size() != 1) {
|
||||||
|
fprintf(stderr, "Can only select 1 attribute for pprof output.\n");
|
||||||
|
return root_.get();
|
||||||
|
}
|
||||||
|
string select = *opts.select.begin();
|
||||||
|
if (select != kShown[0] && select != kShown[1] && select != kShown[2] &&
|
||||||
|
select != kShown[3] && select != kShown[9] && select != kShown[10]) {
|
||||||
|
fprintf(stderr, "pprof doesn't support -select=%s\n", select.c_str());
|
||||||
|
return root_.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<CodeNode*> roots = Account(root_->children, opts);
|
||||||
root_->show_children.clear();
|
root_->show_children.clear();
|
||||||
for (CodeNode* n : roots) {
|
for (CodeNode* n : roots) {
|
||||||
root_->AggregateTotalStats(n);
|
root_->AggregateTotalStats(n);
|
||||||
|
|
@ -121,21 +392,46 @@ const ShowMultiNode* TFCode::ShowInternal(const Options& opts,
|
||||||
CodeNode* root = PrintScope({root_.get()}, opts, 1, 0)[0];
|
CodeNode* root = PrintScope({root_.get()}, opts, 1, 0)[0];
|
||||||
|
|
||||||
root->formatted_str = FormatLegend(opts) + root->formatted_str;
|
root->formatted_str = FormatLegend(opts) + root->formatted_str;
|
||||||
Format(root->show_children, &root->formatted_str, root->mutable_proto());
|
|
||||||
|
|
||||||
if (timeline) {
|
if (opts.output_type == kOutput[3]) {
|
||||||
timeline->GenerateCodeTimeline(root);
|
std::vector<uint64> call_ids;
|
||||||
|
pprof_profile_.reset(new PprofProfileImpl(&opts));
|
||||||
|
Format(root, root->show_children, opts, &root->formatted_str,
|
||||||
|
root->mutable_proto(), &call_ids);
|
||||||
|
Status s = pprof_profile_->WritePprofProfile(
|
||||||
|
opts.output_options.at(kPprofOpts[0]));
|
||||||
|
if (!s.ok()) {
|
||||||
|
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Format(root, root->show_children, opts, &root->formatted_str,
|
||||||
|
root->mutable_proto(), nullptr);
|
||||||
|
if (timeline) {
|
||||||
|
timeline->GenerateCodeTimeline(root);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return root;
|
return root;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TFCode::Format(const std::vector<CodeNode*> roots, string* display_str,
|
void TFCode::Format(const CodeNode* root, const std::vector<CodeNode*>& nodes,
|
||||||
MultiGraphNodeProto* proto) {
|
const Options& opts, string* display_str,
|
||||||
for (CodeNode* node : roots) {
|
MultiGraphNodeProto* proto, std::vector<uint64>* call_ids) {
|
||||||
|
if (nodes.empty() && root->trace && opts.output_type == kOutput[3]) {
|
||||||
|
pprof_profile_->AddSample(root, call_ids);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (CodeNode* node : nodes) {
|
||||||
|
if (root->trace && opts.output_type == kOutput[3]) {
|
||||||
|
uint64 loc_id = pprof_profile_->AddLocation(node, root);
|
||||||
|
call_ids->push_back(loc_id);
|
||||||
|
}
|
||||||
display_str->append(node->formatted_str);
|
display_str->append(node->formatted_str);
|
||||||
MultiGraphNodeProto* child = proto->add_children();
|
MultiGraphNodeProto* child = proto->add_children();
|
||||||
child->MergeFrom(node->proto());
|
child->MergeFrom(node->proto());
|
||||||
Format(node->show_children, display_str, child);
|
Format(node, node->show_children, opts, display_str, child, call_ids);
|
||||||
|
if (root->trace && opts.output_type == kOutput[3]) {
|
||||||
|
call_ids->pop_back();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -170,14 +466,15 @@ std::vector<CodeNode*> TFCode::PrintScope(const std::vector<CodeNode*> roots,
|
||||||
std::vector<CodeNode*> show_nodes;
|
std::vector<CodeNode*> show_nodes;
|
||||||
|
|
||||||
for (CodeNode* node : roots) {
|
for (CodeNode* node : roots) {
|
||||||
|
if (ShouldTrim(node, opts.trim_name_regexes) || depth > opts.max_depth) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
int ident = last_ident;
|
int ident = last_ident;
|
||||||
bool show = ShouldShow(node, opts, depth);
|
bool show = ShouldShow(node, opts, depth);
|
||||||
if (show) ident += 2;
|
if (show) ident += 2;
|
||||||
|
|
||||||
std::vector<CodeNode*> show_cnodes;
|
std::vector<CodeNode*> show_cnodes =
|
||||||
if (!ShouldTrim(node, opts.trim_name_regexes) && depth <= opts.max_depth) {
|
PrintScope(node->show_children, opts, depth + 1, ident);
|
||||||
show_cnodes = PrintScope(node->show_children, opts, depth + 1, ident);
|
|
||||||
}
|
|
||||||
if (show) {
|
if (show) {
|
||||||
node->show_children.clear();
|
node->show_children.clear();
|
||||||
if (opts.account_displayed_op_only) {
|
if (opts.account_displayed_op_only) {
|
||||||
|
|
|
||||||
|
|
@ -32,15 +32,29 @@ limitations under the License.
|
||||||
#include "tensorflow/core/profiler/internal/tfprof_show_multi.h"
|
#include "tensorflow/core/profiler/internal/tfprof_show_multi.h"
|
||||||
#include "tensorflow/core/profiler/internal/tfprof_timeline.h"
|
#include "tensorflow/core/profiler/internal/tfprof_timeline.h"
|
||||||
#include "tensorflow/core/profiler/internal/tfprof_utils.h"
|
#include "tensorflow/core/profiler/internal/tfprof_utils.h"
|
||||||
|
#include "tensorflow/core/profiler/profile.pb.h"
|
||||||
#include "tensorflow/core/profiler/tfprof_log.pb.h"
|
#include "tensorflow/core/profiler/tfprof_log.pb.h"
|
||||||
#include "tensorflow/core/profiler/tfprof_output.pb.h"
|
#include "tensorflow/core/profiler/tfprof_output.pb.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
|
|
||||||
|
class PprofProfile {
|
||||||
|
public:
|
||||||
|
virtual ~PprofProfile() {}
|
||||||
|
|
||||||
|
virtual uint64 AddLocation(const CodeNode* callee,
|
||||||
|
const CodeNode* caller) = 0;
|
||||||
|
|
||||||
|
virtual void AddSample(const CodeNode* leaf,
|
||||||
|
std::vector<uint64>* call_ids) = 0;
|
||||||
|
|
||||||
|
virtual Status WritePprofProfile(const string& filename) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
class TFCode : public TFMultiShow {
|
class TFCode : public TFMultiShow {
|
||||||
public:
|
public:
|
||||||
explicit TFCode() : code_root_(nullptr), trace_root_(nullptr) {}
|
TFCode() {}
|
||||||
~TFCode() override {}
|
~TFCode() override {}
|
||||||
|
|
||||||
void AddNode(TFGraphNode* node) override;
|
void AddNode(TFGraphNode* node) override;
|
||||||
|
|
@ -48,8 +62,6 @@ class TFCode : public TFMultiShow {
|
||||||
void Build() override;
|
void Build() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
CodeNode* BuildCodeNodes(TFMultiGraphNode* root);
|
|
||||||
|
|
||||||
const ShowMultiNode* ShowInternal(const Options& opts,
|
const ShowMultiNode* ShowInternal(const Options& opts,
|
||||||
Timeline* timeline) override;
|
Timeline* timeline) override;
|
||||||
|
|
||||||
|
|
@ -63,16 +75,15 @@ class TFCode : public TFMultiShow {
|
||||||
std::vector<CodeNode*> Account(const std::vector<CodeNode*>& roots,
|
std::vector<CodeNode*> Account(const std::vector<CodeNode*>& roots,
|
||||||
const Options& opts);
|
const Options& opts);
|
||||||
|
|
||||||
void Format(const std::vector<CodeNode*> roots, string* display_str,
|
void Format(const CodeNode* root, const std::vector<CodeNode*>& nodes,
|
||||||
MultiGraphNodeProto* proto);
|
const Options& opts, string* display_str,
|
||||||
|
MultiGraphNodeProto* proto, std::vector<uint64>* call_ids);
|
||||||
|
|
||||||
string FormatNode(CodeNode* node, const Options& opts, int64 indent);
|
string FormatNode(CodeNode* node, const Options& opts, int64 indent);
|
||||||
|
|
||||||
std::unique_ptr<CodeNode> root_;
|
std::unique_ptr<CodeNode> root_;
|
||||||
CodeNode* code_root_;
|
std::unique_ptr<TFMultiGraphNode> graph_root_;
|
||||||
std::unique_ptr<TFMultiGraphNode> trace_root_;
|
std::unique_ptr<PprofProfile> pprof_profile_;
|
||||||
std::unique_ptr<TFMultiGraphNode> tfprof_trace_root_;
|
|
||||||
std::set<std::unique_ptr<CodeNode>> code_nodes_;
|
|
||||||
};
|
};
|
||||||
} // namespace tfprof
|
} // namespace tfprof
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,11 @@ void TFGraph::Build() {
|
||||||
const ShowNode* TFGraph::ShowInternal(const Options& opts, Timeline* timeline) {
|
const ShowNode* TFGraph::ShowInternal(const Options& opts, Timeline* timeline) {
|
||||||
root_->ResetTotalStats();
|
root_->ResetTotalStats();
|
||||||
root_->show_children.clear();
|
root_->show_children.clear();
|
||||||
|
|
||||||
|
if (opts.output_type == kOutput[3]) {
|
||||||
|
fprintf(stderr, "Only 'code' view supports pprof output now.\n");
|
||||||
|
return root_;
|
||||||
|
}
|
||||||
if (timeline && timeline->step() < 0) {
|
if (timeline && timeline->step() < 0) {
|
||||||
// TODO(xpan): Maybe pick a default step for users.
|
// TODO(xpan): Maybe pick a default step for users.
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
|
|
|
||||||
|
|
@ -340,6 +340,27 @@ class TFGraphNode {
|
||||||
return exec->second.allocator_bytes_in_use();
|
return exec->second.allocator_bytes_in_use();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int64 parameters() const {
|
||||||
|
if (!shape().empty()) {
|
||||||
|
int64 params = 1;
|
||||||
|
bool complete_shape = true;
|
||||||
|
for (int64 d : shape()) {
|
||||||
|
// Sometimes parameters could be <0 when a dim is unknown.
|
||||||
|
if (d < 0) {
|
||||||
|
complete_shape = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
params *= d;
|
||||||
|
}
|
||||||
|
if (complete_shape) {
|
||||||
|
return params;
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Incomplete shape.\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int64 float_ops(int64 step) const {
|
int64 float_ops(int64 step) const {
|
||||||
// If not run, return static analysis.
|
// If not run, return static analysis.
|
||||||
if (execs_.empty()) {
|
if (execs_.empty()) {
|
||||||
|
|
@ -400,12 +421,14 @@ class TFMultiGraphNode {
|
||||||
public:
|
public:
|
||||||
TFMultiGraphNode(const string& name)
|
TFMultiGraphNode(const string& name)
|
||||||
: name_(name),
|
: name_(name),
|
||||||
|
step_(-1),
|
||||||
run_count_(0),
|
run_count_(0),
|
||||||
exec_micros_(0),
|
exec_micros_(0),
|
||||||
accelerator_exec_micros_(0),
|
accelerator_exec_micros_(0),
|
||||||
cpu_exec_micros_(0),
|
cpu_exec_micros_(0),
|
||||||
requested_bytes_(0),
|
requested_bytes_(0),
|
||||||
float_ops_(0) {}
|
float_ops_(0),
|
||||||
|
parameters_(0) {}
|
||||||
|
|
||||||
bool SnapshotNodes(int64 step, const std::vector<string>& type_regexes) {
|
bool SnapshotNodes(int64 step, const std::vector<string>& type_regexes) {
|
||||||
run_count_ = 0;
|
run_count_ = 0;
|
||||||
|
|
@ -415,11 +438,13 @@ class TFMultiGraphNode {
|
||||||
|
|
||||||
requested_bytes_ = 0;
|
requested_bytes_ = 0;
|
||||||
float_ops_ = 0;
|
float_ops_ = 0;
|
||||||
|
parameters_ = 0;
|
||||||
op_types_.clear();
|
op_types_.clear();
|
||||||
shapes_.clear();
|
shapes_.clear();
|
||||||
devices_.clear();
|
devices_.clear();
|
||||||
snapshot_nodes_.clear();
|
snapshot_nodes_.clear();
|
||||||
|
|
||||||
|
step_ = step;
|
||||||
std::vector<const TFGraphNode*> nodes = pick_nodes(type_regexes);
|
std::vector<const TFGraphNode*> nodes = pick_nodes(type_regexes);
|
||||||
|
|
||||||
if (nodes.empty()) {
|
if (nodes.empty()) {
|
||||||
|
|
@ -436,6 +461,7 @@ class TFMultiGraphNode {
|
||||||
|
|
||||||
requested_bytes_ += node->requested_bytes(step);
|
requested_bytes_ += node->requested_bytes(step);
|
||||||
float_ops_ += node->float_ops(step);
|
float_ops_ += node->float_ops(step);
|
||||||
|
parameters_ += node->parameters();
|
||||||
if (node->shape().size() > 0) {
|
if (node->shape().size() > 0) {
|
||||||
shapes_.push_back(node->shape());
|
shapes_.push_back(node->shape());
|
||||||
}
|
}
|
||||||
|
|
@ -445,6 +471,8 @@ class TFMultiGraphNode {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int64 step() const { return step_; }
|
||||||
|
|
||||||
void AddGraphNode(const TFGraphNode* node) {
|
void AddGraphNode(const TFGraphNode* node) {
|
||||||
if (nodes_.find(node->name()) != nodes_.end()) {
|
if (nodes_.find(node->name()) != nodes_.end()) {
|
||||||
return;
|
return;
|
||||||
|
|
@ -456,16 +484,6 @@ class TFMultiGraphNode {
|
||||||
return snapshot_nodes_;
|
return snapshot_nodes_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AddChildren(const string& name) {
|
|
||||||
if (children_.find(name) != children_.end()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
children_[name].reset(new TFMultiGraphNode(name));
|
|
||||||
}
|
|
||||||
const std::map<string, std::unique_ptr<TFMultiGraphNode>>& children() const {
|
|
||||||
return children_;
|
|
||||||
}
|
|
||||||
|
|
||||||
const string& name() const { return name_; }
|
const string& name() const { return name_; }
|
||||||
|
|
||||||
int64 run_count() const { return run_count_; }
|
int64 run_count() const { return run_count_; }
|
||||||
|
|
@ -477,6 +495,8 @@ class TFMultiGraphNode {
|
||||||
|
|
||||||
int64 float_ops() const { return float_ops_; }
|
int64 float_ops() const { return float_ops_; }
|
||||||
|
|
||||||
|
int64 parameters() const { return parameters_; }
|
||||||
|
|
||||||
const std::set<string>& devices() const { return devices_; }
|
const std::set<string>& devices() const { return devices_; }
|
||||||
|
|
||||||
const std::set<string>& op_types() const { return op_types_; }
|
const std::set<string>& op_types() const { return op_types_; }
|
||||||
|
|
@ -511,6 +531,7 @@ class TFMultiGraphNode {
|
||||||
}
|
}
|
||||||
|
|
||||||
const string name_;
|
const string name_;
|
||||||
|
int64 step_;
|
||||||
// Snapshot based on type_regexes
|
// Snapshot based on type_regexes
|
||||||
std::set<string> op_types_;
|
std::set<string> op_types_;
|
||||||
int64 run_count_;
|
int64 run_count_;
|
||||||
|
|
@ -520,13 +541,13 @@ class TFMultiGraphNode {
|
||||||
|
|
||||||
int64 requested_bytes_;
|
int64 requested_bytes_;
|
||||||
int64 float_ops_;
|
int64 float_ops_;
|
||||||
|
int64 parameters_;
|
||||||
std::set<string> devices_;
|
std::set<string> devices_;
|
||||||
std::vector<std::vector<int64>> shapes_;
|
std::vector<std::vector<int64>> shapes_;
|
||||||
std::map<string, const TFGraphNode*> snapshot_nodes_;
|
std::map<string, const TFGraphNode*> snapshot_nodes_;
|
||||||
|
|
||||||
// Overall data held by the TFMultiGraphNode.
|
// Overall data held by the TFMultiGraphNode.
|
||||||
std::map<string, const TFGraphNode*> nodes_;
|
std::map<string, const TFGraphNode*> nodes_;
|
||||||
std::map<string, std::unique_ptr<TFMultiGraphNode>> children_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
bool IsPlacedOnAccelerator(const string& device);
|
bool IsPlacedOnAccelerator(const string& device);
|
||||||
|
|
|
||||||
|
|
@ -45,25 +45,7 @@ void ShowNode::ReInit(int64 step) {
|
||||||
(*mutable_proto()->mutable_input_shapes())[inp.first].MergeFrom(
|
(*mutable_proto()->mutable_input_shapes())[inp.first].MergeFrom(
|
||||||
VecToShapeProto(inp.second));
|
VecToShapeProto(inp.second));
|
||||||
}
|
}
|
||||||
|
proto_.set_parameters(node->parameters());
|
||||||
proto_.clear_parameters();
|
|
||||||
if (!node->shape().empty()) {
|
|
||||||
int64 params = 1;
|
|
||||||
bool complete_shape = true;
|
|
||||||
for (int64 d : node->shape()) {
|
|
||||||
// Sometimes parameters could be <0 when a dim is unknown.
|
|
||||||
if (d < 0) {
|
|
||||||
complete_shape = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
params *= d;
|
|
||||||
}
|
|
||||||
if (complete_shape) {
|
|
||||||
mutable_proto()->set_parameters(proto_.parameters() + params);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "Incomplete shape.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GraphNodeProto* ShowNode::mutable_proto() { return &proto_; }
|
GraphNodeProto* ShowNode::mutable_proto() { return &proto_; }
|
||||||
|
|
@ -114,6 +96,8 @@ void ShowNode::AddSelfToTotalStats() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ShowNode::ResetTotalStats() {
|
void ShowNode::ResetTotalStats() {
|
||||||
|
formatted_str.clear();
|
||||||
|
|
||||||
mutable_proto()->set_total_definition_count(0);
|
mutable_proto()->set_total_definition_count(0);
|
||||||
mutable_proto()->set_total_run_count(0);
|
mutable_proto()->set_total_run_count(0);
|
||||||
mutable_proto()->set_total_exec_micros(0);
|
mutable_proto()->set_total_exec_micros(0);
|
||||||
|
|
@ -153,26 +137,7 @@ bool ShowMultiNode::ReInit(int64 step,
|
||||||
mutable_proto()->set_requested_bytes(node->requested_bytes());
|
mutable_proto()->set_requested_bytes(node->requested_bytes());
|
||||||
mutable_proto()->set_float_ops(node->float_ops());
|
mutable_proto()->set_float_ops(node->float_ops());
|
||||||
|
|
||||||
mutable_proto()->clear_parameters();
|
mutable_proto()->set_parameters(node->parameters());
|
||||||
if (!node->shapes().empty()) {
|
|
||||||
for (const std::vector<int64>& shape : node->shapes()) {
|
|
||||||
int64 params = 1;
|
|
||||||
bool complete_shape = true;
|
|
||||||
for (int64 d : shape) {
|
|
||||||
// Sometimes parameters could be <0 when a dim is unknown.
|
|
||||||
if (d < 0) {
|
|
||||||
complete_shape = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
params *= d;
|
|
||||||
}
|
|
||||||
if (complete_shape) {
|
|
||||||
mutable_proto()->set_parameters(proto().parameters() + params);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "Incomplete shape.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return has_matched_type;
|
return has_matched_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -216,6 +181,7 @@ void ShowMultiNode::AddSelfToTotalStats() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ShowMultiNode::ResetTotalStats() {
|
void ShowMultiNode::ResetTotalStats() {
|
||||||
|
formatted_str.clear();
|
||||||
mutable_proto()->set_total_exec_micros(0);
|
mutable_proto()->set_total_exec_micros(0);
|
||||||
mutable_proto()->set_total_accelerator_exec_micros(0);
|
mutable_proto()->set_total_accelerator_exec_micros(0);
|
||||||
mutable_proto()->set_total_cpu_exec_micros(0);
|
mutable_proto()->set_total_cpu_exec_micros(0);
|
||||||
|
|
|
||||||
|
|
@ -111,11 +111,31 @@ class ShowMultiNode {
|
||||||
|
|
||||||
class CodeNode : public ShowMultiNode {
|
class CodeNode : public ShowMultiNode {
|
||||||
public:
|
public:
|
||||||
explicit CodeNode(TFMultiGraphNode* node) : ShowMultiNode(node) {}
|
explicit CodeNode(TFMultiGraphNode* node, const CodeDef::Trace* trace)
|
||||||
|
: ShowMultiNode(node), trace(trace) {}
|
||||||
~CodeNode() override {}
|
~CodeNode() override {}
|
||||||
|
|
||||||
|
CodeNode* AddChildren(const string& name, const CodeDef::Trace* trace) {
|
||||||
|
auto it = children_.find(name);
|
||||||
|
if (it != children_.end()) {
|
||||||
|
return it->second.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
graph_children_.push_back(
|
||||||
|
std::unique_ptr<TFMultiGraphNode>(new TFMultiGraphNode(name)));
|
||||||
|
auto child = &children_[name];
|
||||||
|
child->reset(new CodeNode(graph_children_.back().get(), trace));
|
||||||
|
children.push_back(child->get());
|
||||||
|
return child->get();
|
||||||
|
}
|
||||||
|
|
||||||
|
const CodeDef::Trace* trace;
|
||||||
std::vector<CodeNode*> children;
|
std::vector<CodeNode*> children;
|
||||||
std::vector<CodeNode*> show_children;
|
std::vector<CodeNode*> show_children;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<std::unique_ptr<TFMultiGraphNode>> graph_children_;
|
||||||
|
std::map<string, std::unique_ptr<CodeNode>> children_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class OpNode : public ShowMultiNode {
|
class OpNode : public ShowMultiNode {
|
||||||
|
|
|
||||||
|
|
@ -103,8 +103,13 @@ void TFOp::Build() {
|
||||||
}
|
}
|
||||||
|
|
||||||
const ShowMultiNode* TFOp::ShowInternal(const Options& opts,
|
const ShowMultiNode* TFOp::ShowInternal(const Options& opts,
|
||||||
Timeline* timeline) {
|
Timeline* timeline) {
|
||||||
root_->ResetTotalStats();
|
root_->ResetTotalStats();
|
||||||
|
if (opts.output_type == kOutput[3]) {
|
||||||
|
fprintf(stderr, "Only 'code' view supports pprof output now.\n");
|
||||||
|
return root_.get();
|
||||||
|
}
|
||||||
|
|
||||||
if (opts.output_type == kOutput[1] || opts.output_type == kOutput[2]) {
|
if (opts.output_type == kOutput[1] || opts.output_type == kOutput[2]) {
|
||||||
root_->formatted_str = FormatNode(root_.get(), root_.get(), opts);
|
root_->formatted_str = FormatNode(root_.get(), root_.get(), opts);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -84,6 +84,13 @@ tensorflow::Status ParseOutput(const string& output_opt, string* output_type,
|
||||||
required_options.insert(kFileRequiredOpts,
|
required_options.insert(kFileRequiredOpts,
|
||||||
kFileRequiredOpts + sizeof(kFileRequiredOpts) /
|
kFileRequiredOpts + sizeof(kFileRequiredOpts) /
|
||||||
sizeof(*kFileRequiredOpts));
|
sizeof(*kFileRequiredOpts));
|
||||||
|
} else if (*output_type == kOutput[3]) {
|
||||||
|
valid_options.insert(kPprofOpts,
|
||||||
|
kPprofOpts + sizeof(kPprofOpts) / sizeof(*kPprofOpts));
|
||||||
|
required_options.insert(
|
||||||
|
kPprofRequiredOpts,
|
||||||
|
kPprofRequiredOpts +
|
||||||
|
sizeof(kPprofRequiredOpts) / sizeof(*kPprofRequiredOpts));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const string& kv_str : kv_split) {
|
for (const string& kv_str : kv_split) {
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,8 @@ static const char* const kCmds[] = {
|
||||||
"scope", "graph", "code", "op", "advise", "set", "help",
|
"scope", "graph", "code", "op", "advise", "set", "help",
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char* const kOutput[] = {"timeline", "stdout", "file", "none"};
|
static const char* const kOutput[] = {"timeline", "stdout", "file", "pprof",
|
||||||
|
"none"};
|
||||||
|
|
||||||
static const char* const kTimelineOpts[] = {
|
static const char* const kTimelineOpts[] = {
|
||||||
"outfile",
|
"outfile",
|
||||||
|
|
@ -78,6 +79,14 @@ static const char* const kFileRequiredOpts[] = {
|
||||||
"outfile",
|
"outfile",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const char* const kPprofOpts[] = {
|
||||||
|
"outfile",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char* const kPprofRequiredOpts[] = {
|
||||||
|
"outfile",
|
||||||
|
};
|
||||||
|
|
||||||
struct Options {
|
struct Options {
|
||||||
public:
|
public:
|
||||||
static tensorflow::Status FromProtoStr(const string& opts_proto_str,
|
static tensorflow::Status FromProtoStr(const string& opts_proto_str,
|
||||||
|
|
|
||||||
|
|
@ -78,8 +78,13 @@ void TFScope::Build() {
|
||||||
}
|
}
|
||||||
|
|
||||||
const ShowNode* TFScope::ShowInternal(const Options& opts, Timeline* timeline) {
|
const ShowNode* TFScope::ShowInternal(const Options& opts, Timeline* timeline) {
|
||||||
std::vector<ScopeNode*> roots = Account(root_->children, opts);
|
|
||||||
root_->ResetTotalStats();
|
root_->ResetTotalStats();
|
||||||
|
if (opts.output_type == kOutput[3]) {
|
||||||
|
fprintf(stderr, "Only 'code' view supports pprof output now.\n");
|
||||||
|
return root_;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<ScopeNode*> roots = Account(root_->children, opts);
|
||||||
root_->show_children.clear();
|
root_->show_children.clear();
|
||||||
for (ScopeNode* n : roots) {
|
for (ScopeNode* n : roots) {
|
||||||
root_->AggregateTotalStats(n);
|
root_->AggregateTotalStats(n);
|
||||||
|
|
|
||||||
|
|
@ -26,25 +26,27 @@ namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
|
|
||||||
const GraphNodeProto& TFShow::Show(const Options& opts) {
|
const GraphNodeProto& TFShow::Show(const Options& opts) {
|
||||||
if (opts.output_type == kOutput[3]) {
|
if (opts.output_type == kOutput[0]) {
|
||||||
return ShowInternal(opts, nullptr)->proto();
|
|
||||||
} else if (opts.output_type == kOutput[0]) {
|
|
||||||
Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
|
Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
|
||||||
return ShowInternal(opts, &timeline)->proto();
|
return ShowInternal(opts, &timeline)->proto();
|
||||||
} else if (opts.output_type == kOutput[2]) {
|
|
||||||
const ShowNode* root = ShowInternal(opts, nullptr);
|
|
||||||
Status s =
|
|
||||||
WriteStringToFile(Env::Default(), opts.output_options.at(kFileOpts[0]),
|
|
||||||
root->formatted_str);
|
|
||||||
if (!s.ok()) {
|
|
||||||
fprintf(stderr, "%s\n", s.ToString().c_str());
|
|
||||||
}
|
|
||||||
return root->proto();
|
|
||||||
} else {
|
} else {
|
||||||
const ShowNode* root = ShowInternal(opts, nullptr);
|
const ShowNode* ret = ShowInternal(opts, nullptr);
|
||||||
printf("%s", root->formatted_str.c_str());
|
if (opts.output_type == kOutput[1]) {
|
||||||
fflush(stdout);
|
printf("%s", ret->formatted_str.c_str());
|
||||||
return root->proto();
|
fflush(stdout);
|
||||||
|
} else if (opts.output_type == kOutput[2]) {
|
||||||
|
Status s = WriteStringToFile(Env::Default(),
|
||||||
|
opts.output_options.at(kFileOpts[0]),
|
||||||
|
ret->formatted_str);
|
||||||
|
if (!s.ok()) {
|
||||||
|
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||||
|
}
|
||||||
|
} else if (opts.output_type == kOutput[3] ||
|
||||||
|
opts.output_type == kOutput[4]) {
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Unknown output type: %s\n", opts.output_type.c_str());
|
||||||
|
}
|
||||||
|
return ret->proto();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,25 +28,27 @@ namespace tensorflow {
|
||||||
namespace tfprof {
|
namespace tfprof {
|
||||||
|
|
||||||
const MultiGraphNodeProto& TFMultiShow::Show(const Options& opts) {
|
const MultiGraphNodeProto& TFMultiShow::Show(const Options& opts) {
|
||||||
if (opts.output_type == kOutput[3]) {
|
if (opts.output_type == kOutput[0]) {
|
||||||
return ShowInternal(opts, nullptr)->proto();
|
|
||||||
} else if (opts.output_type == kOutput[0]) {
|
|
||||||
Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
|
Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
|
||||||
return ShowInternal(opts, &timeline)->proto();
|
return ShowInternal(opts, &timeline)->proto();
|
||||||
} else if (opts.output_type == kOutput[2]) {
|
|
||||||
const ShowMultiNode* root = ShowInternal(opts, nullptr);
|
|
||||||
Status s =
|
|
||||||
WriteStringToFile(Env::Default(), opts.output_options.at(kFileOpts[0]),
|
|
||||||
root->formatted_str);
|
|
||||||
if (!s.ok()) {
|
|
||||||
fprintf(stderr, "%s\n", s.ToString().c_str());
|
|
||||||
}
|
|
||||||
return root->proto();
|
|
||||||
} else {
|
} else {
|
||||||
const ShowMultiNode* root = ShowInternal(opts, nullptr);
|
const ShowMultiNode* ret = ShowInternal(opts, nullptr);
|
||||||
printf("%s", root->formatted_str.c_str());
|
if (opts.output_type == kOutput[1]) {
|
||||||
fflush(stdout);
|
printf("%s", ret->formatted_str.c_str());
|
||||||
return root->proto();
|
fflush(stdout);
|
||||||
|
} else if (opts.output_type == kOutput[2]) {
|
||||||
|
Status s = WriteStringToFile(Env::Default(),
|
||||||
|
opts.output_options.at(kFileOpts[0]),
|
||||||
|
ret->formatted_str);
|
||||||
|
if (!s.ok()) {
|
||||||
|
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||||
|
}
|
||||||
|
} else if (opts.output_type == kOutput[3] ||
|
||||||
|
opts.output_type == kOutput[4]) {
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Unknown output type: %s\n", opts.output_type.c_str());
|
||||||
|
}
|
||||||
|
return ret->proto();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
71
tensorflow/core/profiler/profile.proto
Normal file
71
tensorflow/core/profiler/profile.proto
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
// This proto intends to match format expected by pprof tool.
|
||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package tensorflow.tfprof.pprof;
|
||||||
|
|
||||||
|
message Profile {
|
||||||
|
repeated ValueType sample_type = 1;
|
||||||
|
repeated Sample sample = 2;
|
||||||
|
repeated Mapping mapping = 3;
|
||||||
|
repeated Location location = 4;
|
||||||
|
repeated Function function = 5;
|
||||||
|
repeated string string_table = 6;
|
||||||
|
int64 drop_frames = 7;
|
||||||
|
int64 keep_frames = 8;
|
||||||
|
int64 time_nanos = 9;
|
||||||
|
int64 duration_nanos = 10;
|
||||||
|
ValueType period_type = 11;
|
||||||
|
int64 period = 12;
|
||||||
|
repeated int64 comment = 13;
|
||||||
|
int64 default_sample_type = 14;
|
||||||
|
}
|
||||||
|
|
||||||
|
message ValueType {
|
||||||
|
int64 type = 1;
|
||||||
|
int64 unit = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Sample {
|
||||||
|
repeated uint64 location_id = 1;
|
||||||
|
repeated int64 value = 2;
|
||||||
|
repeated Label label = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Label {
|
||||||
|
int64 key = 1;
|
||||||
|
int64 str = 2;
|
||||||
|
int64 num = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Mapping {
|
||||||
|
uint64 id = 1;
|
||||||
|
uint64 memory_start = 2;
|
||||||
|
uint64 memory_limit = 3;
|
||||||
|
uint64 file_offset = 4;
|
||||||
|
int64 filename = 5;
|
||||||
|
int64 build_id = 6;
|
||||||
|
bool has_functions = 7;
|
||||||
|
bool has_filenames = 8;
|
||||||
|
bool has_line_numbers = 9;
|
||||||
|
bool has_inline_frames = 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Location {
|
||||||
|
uint64 id = 1;
|
||||||
|
uint64 mapping_id = 2;
|
||||||
|
uint64 address = 3;
|
||||||
|
repeated Line line = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Line {
|
||||||
|
uint64 function_id = 1;
|
||||||
|
int64 line = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Function {
|
||||||
|
uint64 id = 1;
|
||||||
|
int64 name = 2;
|
||||||
|
int64 system_name = 3;
|
||||||
|
int64 filename = 4;
|
||||||
|
int64 start_line = 5;
|
||||||
|
}
|
||||||
|
|
@ -10,6 +10,7 @@ message CodeDef {
|
||||||
int32 lineno = 2;
|
int32 lineno = 2;
|
||||||
string function = 3;
|
string function = 3;
|
||||||
string line = 4;
|
string line = 4;
|
||||||
|
int32 func_start_line = 5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,11 @@ from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import gzip
|
||||||
|
import io
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from tensorflow.core.profiler import profile_pb2
|
||||||
from tensorflow.core.protobuf import config_pb2
|
from tensorflow.core.protobuf import config_pb2
|
||||||
from tensorflow.core.protobuf import rewriter_config_pb2
|
from tensorflow.core.protobuf import rewriter_config_pb2
|
||||||
from tensorflow.python.client import session
|
from tensorflow.python.client import session
|
||||||
|
|
@ -200,7 +204,6 @@ class PrintModelAnalysisTest(test.TestCase):
|
||||||
|
|
||||||
def testTimeline(self):
|
def testTimeline(self):
|
||||||
ops.reset_default_graph()
|
ops.reset_default_graph()
|
||||||
opts = builder.trainable_variables_parameter()
|
|
||||||
outfile = os.path.join(test.get_temp_dir(), 'timeline')
|
outfile = os.path.join(test.get_temp_dir(), 'timeline')
|
||||||
opts = (builder(builder.trainable_variables_parameter())
|
opts = (builder(builder.trainable_variables_parameter())
|
||||||
.with_max_depth(100000)
|
.with_max_depth(100000)
|
||||||
|
|
@ -312,6 +315,61 @@ class PrintModelAnalysisTest(test.TestCase):
|
||||||
checker = advice_pb.checkers['ExpensiveOperationChecker']
|
checker = advice_pb.checkers['ExpensiveOperationChecker']
|
||||||
self.assertGreater(len(checker.reports), 0)
|
self.assertGreater(len(checker.reports), 0)
|
||||||
|
|
||||||
|
def pprof_test_helper(self, attribute, should_fail=False):
|
||||||
|
ops.reset_default_graph()
|
||||||
|
outfile = os.path.join(test.get_temp_dir(), attribute + '_pprof.pb.gz')
|
||||||
|
opts = (builder(builder.time_and_memory())
|
||||||
|
.select([attribute])
|
||||||
|
.with_max_depth(100000)
|
||||||
|
.with_node_names(trim_name_regexes=['ops.py.*'])
|
||||||
|
.with_pprof_output(outfile).build())
|
||||||
|
|
||||||
|
with session.Session() as sess:
|
||||||
|
x = lib.BuildFullModel()
|
||||||
|
|
||||||
|
sess.run(variables.global_variables_initializer())
|
||||||
|
run_meta = config_pb2.RunMetadata()
|
||||||
|
_ = sess.run(
|
||||||
|
x,
|
||||||
|
options=config_pb2.RunOptions(
|
||||||
|
trace_level=config_pb2.RunOptions.FULL_TRACE),
|
||||||
|
run_metadata=run_meta)
|
||||||
|
|
||||||
|
_ = model_analyzer.profile(
|
||||||
|
sess.graph, run_meta, cmd='code', options=opts)
|
||||||
|
|
||||||
|
if should_fail:
|
||||||
|
self.assertFalse(gfile.Exists(outfile))
|
||||||
|
return
|
||||||
|
|
||||||
|
profile_pb = profile_pb2.Profile()
|
||||||
|
with gfile.Open(outfile, 'rb') as f:
|
||||||
|
with gzip.GzipFile(fileobj=io.BytesIO(f.read())) as gzipf:
|
||||||
|
profile_pb.ParseFromString(gzipf.read())
|
||||||
|
|
||||||
|
self.assertGreater(len(profile_pb.sample), 10)
|
||||||
|
self.assertGreater(len(profile_pb.location), 10)
|
||||||
|
self.assertGreater(len(profile_pb.function), 10)
|
||||||
|
self.assertGreater(len(profile_pb.string_table), 30)
|
||||||
|
|
||||||
|
has_rnn = False
|
||||||
|
has_loop = False
|
||||||
|
for s in profile_pb.string_table:
|
||||||
|
if s.find('rnn') > 0:
|
||||||
|
has_rnn = True
|
||||||
|
if s.find('while') > 0:
|
||||||
|
has_loop = True
|
||||||
|
self.assertFalse(s.startswith('ops.py'))
|
||||||
|
self.assertTrue(has_rnn)
|
||||||
|
self.assertTrue(has_loop)
|
||||||
|
|
||||||
|
def testPprof(self):
|
||||||
|
for attr in ['micros', 'bytes', 'accelerator_micros', 'cpu_micros',
|
||||||
|
'params', 'float_ops']:
|
||||||
|
self.pprof_test_helper(attr)
|
||||||
|
for attr in ['op_types', 'device', 'input_shapes']:
|
||||||
|
self.pprof_test_helper(attr, True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test.main()
|
test.main()
|
||||||
|
|
|
||||||
|
|
@ -353,6 +353,20 @@ class ProfileOptionBuilder(object):
|
||||||
self._options['output'] = 'timeline:outfile=%s' % timeline_file
|
self._options['output'] = 'timeline:outfile=%s' % timeline_file
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def with_pprof_output(self, pprof_file):
|
||||||
|
"""Generate a pprof profile gzip file.
|
||||||
|
|
||||||
|
To use the pprof file:
|
||||||
|
pprof -png --nodecount=20 --sample_index=1 <pprof_file>
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pprof_file: filename for output, usually suffixed with .pb.gz.
|
||||||
|
Returns:
|
||||||
|
self.
|
||||||
|
"""
|
||||||
|
self._options['output'] = 'pprof:outfile=%s' % pprof_file
|
||||||
|
return self
|
||||||
|
|
||||||
def order_by(self, attribute):
|
def order_by(self, attribute):
|
||||||
# pylint: disable=line-too-long
|
# pylint: disable=line-too-long
|
||||||
"""Order the displayed profiler nodes based on a attribute.
|
"""Order the displayed profiler nodes based on a attribute.
|
||||||
|
|
|
||||||
|
|
@ -98,12 +98,13 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True,
|
||||||
add_entry = True
|
add_entry = True
|
||||||
|
|
||||||
if add_trace:
|
if add_trace:
|
||||||
for tb in op.traceback:
|
for tb in op.traceback_with_start_lines:
|
||||||
trace = entry.code_def.traces.add()
|
trace = entry.code_def.traces.add()
|
||||||
trace.file = tb[0] if tb[0] else 'none'
|
trace.file = tb[0] if tb[0] else 'none'
|
||||||
trace.lineno = tb[1] if tb[1] else -1
|
trace.lineno = tb[1] if tb[1] else -1
|
||||||
trace.function = tb[2] if tb[2] else 'none'
|
trace.function = tb[2] if tb[2] else 'none'
|
||||||
trace.line = tb[3] if tb[3] else 'none'
|
trace.line = tb[3] if tb[3] else 'none'
|
||||||
|
trace.func_start_line = tb[4] if tb[4] else -1
|
||||||
add_entry = True
|
add_entry = True
|
||||||
|
|
||||||
if add_entry:
|
if add_entry:
|
||||||
|
|
|
||||||
|
|
@ -74,6 +74,10 @@ tf_class {
|
||||||
name: "with_node_names"
|
name: "with_node_names"
|
||||||
argspec: "args=[\'self\', \'start_name_regexes\', \'show_name_regexes\', \'hide_name_regexes\', \'trim_name_regexes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
|
argspec: "args=[\'self\', \'start_name_regexes\', \'show_name_regexes\', \'hide_name_regexes\', \'trim_name_regexes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
|
||||||
}
|
}
|
||||||
|
member_method {
|
||||||
|
name: "with_pprof_output"
|
||||||
|
argspec: "args=[\'self\', \'pprof_file\'], varargs=None, keywords=None, defaults=None"
|
||||||
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "with_stdout_output"
|
name: "with_stdout_output"
|
||||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user