mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Revert "[reland2][dynamo] Revert "Revert "[reland][dynamo] use optimizers correctly in benchmar… (#90956)"
This reverts commit 8bc38ae4e2.
Reverted https://github.com/pytorch/pytorch/pull/90956 on behalf of https://github.com/desertfire due to Causing TIMM model failures
This commit is contained in:
parent
8cd1808dbf
commit
6bc6fb21db
|
|
@ -7,7 +7,7 @@ The runner integrates with models from TorchBenchmark, HuggingFace and TIMM suit
|
|||
|
||||
The infrastructure allows us to specify a loss function. For torchbench models, we use .sum().backward() call in place of the native loss function. For TIMM models, we use a CrossEntropy loss. And HF models contain a loss function inside the model itself, so we don't need any special loss computation handling.
|
||||
|
||||
Training benchmarks approximate training by running the model forward, computing loss, running backward, and then the optimizer (SGD). Note: the optimizer is currently not compiled by Torchdynamo.
|
||||
Training benchmarks approximate training by running the model forward, computing loss and then running backward. We entirely skip the optimizer step today.
|
||||
|
||||
Inference benchmarks and Training benchmarks measure correctness by comparing dynamo and eager model outputs given fixed inputs and seeds.
|
||||
|
||||
|
|
|
|||
|
|
@ -128,12 +128,6 @@ CI_SKIP_INDUCTOR_TRAINING = [
|
|||
]
|
||||
|
||||
|
||||
CI_SKIP_OPTIMIZER = {
|
||||
# TIMM
|
||||
"convmixer_768_32", # accuracy
|
||||
}
|
||||
|
||||
|
||||
def model_specified_by_path(path_and_class_str):
|
||||
return ":" in path_and_class_str
|
||||
|
||||
|
|
@ -878,7 +872,6 @@ class BenchmarkRunner:
|
|||
self.use_amp = False
|
||||
self.grad_scaler = DummyGradScaler()
|
||||
self.autocast = NullContext
|
||||
self.optimizer = None
|
||||
self._args = None
|
||||
|
||||
def setup_amp(self):
|
||||
|
|
@ -907,11 +900,16 @@ class BenchmarkRunner:
|
|||
# self.grad_scaler = torch.cuda.amp.GradScaler(init_scale=2.0)
|
||||
self.autocast = torch.cuda.amp.autocast
|
||||
|
||||
def init_optimizer(self, name, device, params):
|
||||
if device == "cuda" and self.args.training and name not in CI_SKIP_OPTIMIZER:
|
||||
self.optimizer = torch.optim.SGD(params, lr=0.01)
|
||||
else:
|
||||
self.optimizer = None
|
||||
def init_optimizer(self, device, params):
|
||||
self.optimizer = None
|
||||
# TODO - Currently, optimizers are used incorrectly. Fix optimizers with
|
||||
# https://github.com/pytorch/pytorch/pull/87492
|
||||
# param_list = list(params)
|
||||
# if device == "cuda" and len(param_list) != 0:
|
||||
# # capturable is only supported on cuda at the moment
|
||||
# self.optimizer = torch.optim.Adam(param_list, capturable=True)
|
||||
# else:
|
||||
# self.optimizer = None
|
||||
|
||||
@property
|
||||
def args(self):
|
||||
|
|
@ -1094,12 +1092,12 @@ class BenchmarkRunner:
|
|||
# Collect the fp64 reference outputs to be used later for accuracy checking.
|
||||
fp64_outputs = None
|
||||
try:
|
||||
model_fp64, inputs_fp64 = cast_to_fp64(
|
||||
deepcopy_and_maybe_ddp(model),
|
||||
clone_inputs(example_inputs),
|
||||
fp64_outputs = self.run_n_iterations(
|
||||
*cast_to_fp64(
|
||||
deepcopy_and_maybe_ddp(model),
|
||||
clone_inputs(example_inputs),
|
||||
)
|
||||
)
|
||||
self.init_optimizer(name, current_device, model_fp64.parameters())
|
||||
fp64_outputs = self.run_n_iterations(model_fp64, inputs_fp64)
|
||||
except Exception:
|
||||
log.warning(
|
||||
f"fp64 golden ref were not generated for {name}. Setting accuracy check to cosine"
|
||||
|
|
@ -1120,18 +1118,14 @@ class BenchmarkRunner:
|
|||
with self.pick_grad(name, self.args.training):
|
||||
# Get results of native pytorch
|
||||
reset_rng_state()
|
||||
model_copy = deepcopy_and_maybe_ddp(model)
|
||||
self.init_optimizer(name, current_device, model_copy.parameters())
|
||||
correct_result = self.run_n_iterations(
|
||||
model_copy, clone_inputs(example_inputs)
|
||||
deepcopy_and_maybe_ddp(model), clone_inputs(example_inputs)
|
||||
)
|
||||
|
||||
# Rerun native pytorch
|
||||
reset_rng_state()
|
||||
model_copy = deepcopy_and_maybe_ddp(model)
|
||||
self.init_optimizer(name, current_device, model_copy.parameters())
|
||||
correct_rerun_result = self.run_n_iterations(
|
||||
model_copy, clone_inputs(example_inputs)
|
||||
deepcopy_and_maybe_ddp(model), clone_inputs(example_inputs)
|
||||
)
|
||||
if not same(
|
||||
correct_result,
|
||||
|
|
@ -1147,11 +1141,11 @@ class BenchmarkRunner:
|
|||
reset_rng_state()
|
||||
torch._dynamo.reset()
|
||||
try:
|
||||
model_copy = deepcopy_and_maybe_ddp(model)
|
||||
self.init_optimizer(name, current_device, model_copy.parameters())
|
||||
optimized_model_iter_fn = optimize_ctx(self.run_n_iterations)
|
||||
|
||||
new_result = optimized_model_iter_fn(model_copy, example_inputs)
|
||||
new_result = optimized_model_iter_fn(
|
||||
deepcopy_and_maybe_ddp(model), example_inputs
|
||||
)
|
||||
except Exception as e:
|
||||
accuracy_status = "fail_to_run"
|
||||
print(
|
||||
|
|
@ -1199,7 +1193,6 @@ class BenchmarkRunner:
|
|||
|
||||
# Cast the model to float16/float32 as necessary
|
||||
model, example_inputs = self.maybe_cast(model, example_inputs)
|
||||
self.init_optimizer(name, current_device, model.parameters())
|
||||
with self.pick_grad(name, self.args.training):
|
||||
ok, total = Stats.reset_counters()
|
||||
experiment_kwargs = {}
|
||||
|
|
|
|||
|
|
@ -433,6 +433,8 @@ class HuggingfaceRunner(BenchmarkRunner):
|
|||
else:
|
||||
model.eval()
|
||||
|
||||
self.init_optimizer(device, model.parameters())
|
||||
|
||||
self.validate_model(model, example_inputs)
|
||||
return device, model_name, model, example_inputs, batch_size
|
||||
|
||||
|
|
|
|||
|
|
@ -261,6 +261,8 @@ class TimmRunnner(BenchmarkRunner):
|
|||
else:
|
||||
model.eval()
|
||||
|
||||
self.init_optimizer(device, model.parameters())
|
||||
|
||||
self.validate_model(model, example_inputs)
|
||||
|
||||
return device, model_name, model, example_inputs, batch_size
|
||||
|
|
|
|||
|
|
@ -295,6 +295,8 @@ class TorchBenchmarkRunner(BenchmarkRunner):
|
|||
gc.collect()
|
||||
batch_size = benchmark.batch_size
|
||||
|
||||
self.init_optimizer(device, model.parameters())
|
||||
|
||||
# Torchbench has quite different setup for yolov3, so directly passing
|
||||
# the right example_inputs
|
||||
if model_name == "yolov3":
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user