mirror of
https://github.com/zebrajr/tensorflow.git
synced 2025-12-06 12:20:11 +01:00
Increase the maximum HLO op chain length for profiling from 8192 to 16384.
This prevents max chains of trivial ops (e.g. add.fp32) to run faster than copying the data, which results in 'too fast to measure' error. PiperOrigin-RevId: 826079017
This commit is contained in:
parent
71e640f242
commit
3943b53326
|
|
@ -397,7 +397,9 @@ absl::StatusOr<HloInstructionProfile> HloOpProfiler::MeasureClockCyclesPerOp(
|
|||
|
||||
// Longer chains are too slow to compile.
|
||||
constexpr int kMinOpChainLength = 16;
|
||||
constexpr int kMaxOpChainLength = 8192;
|
||||
// If you get "too fast to measure" errors on faster GPUs, try increasing
|
||||
// kMaxOpChainLength.
|
||||
constexpr int kMaxOpChainLength = 16 * 1024;
|
||||
|
||||
absl::Duration duration = absl::ZeroDuration();
|
||||
int chain_length = kMinOpChainLength;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user