mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Revert "[BE]: Enable RUFF TRY400 rule - log.exception (#153473)"
This reverts commit 4f4ecc583e.
Reverted https://github.com/pytorch/pytorch/pull/153473 on behalf of https://github.com/jeanschmidt due to seems to have broken internal signals, @albanD may I count on you to help the author merge his PR? D74837988 ([comment](https://github.com/pytorch/pytorch/pull/153473#issuecomment-2886017075))
This commit is contained in:
parent
86c6f71ddb
commit
3443627e07
4
.flake8
4
.flake8
|
|
@ -16,9 +16,7 @@ ignore =
|
||||||
# these ignores are from flake8-comprehensions; please fix!
|
# these ignores are from flake8-comprehensions; please fix!
|
||||||
C407,
|
C407,
|
||||||
# these ignores are from flake8-logging-format; please fix!
|
# these ignores are from flake8-logging-format; please fix!
|
||||||
G100,G101,G200,
|
G100,G101,G200
|
||||||
# G201 replaced by LOG400 in ruff
|
|
||||||
G201,
|
|
||||||
# these ignores are from flake8-simplify. please fix or ignore with commented reason
|
# these ignores are from flake8-simplify. please fix or ignore with commented reason
|
||||||
SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
|
SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
|
||||||
# SIM104 is already covered by pyupgrade ruff
|
# SIM104 is already covered by pyupgrade ruff
|
||||||
|
|
|
||||||
6
.github/scripts/runner_determinator.py
vendored
6
.github/scripts/runner_determinator.py
vendored
|
|
@ -623,9 +623,9 @@ def main() -> None:
|
||||||
is_canary,
|
is_canary,
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception:
|
except Exception as e:
|
||||||
log.exception(
|
log.error(
|
||||||
"Failed to get issue. Defaulting to Meta runners and no experiments."
|
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
|
||||||
)
|
)
|
||||||
|
|
||||||
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
|
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
|
||||||
|
|
|
||||||
|
|
@ -1700,8 +1700,8 @@ def maybe_snapshot_memory(should_snapshot_memory, suffix):
|
||||||
f"{output_filename.rstrip('.csv')}_{suffix}.pickle",
|
f"{output_filename.rstrip('.csv')}_{suffix}.pickle",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
log.exception("Failed to save memory snapshot")
|
log.error("Failed to save memory snapshot, %s", e)
|
||||||
|
|
||||||
torch.cuda.memory._record_memory_history(enabled=None)
|
torch.cuda.memory._record_memory_history(enabled=None)
|
||||||
|
|
||||||
|
|
@ -2742,7 +2742,7 @@ class BenchmarkRunner:
|
||||||
try:
|
try:
|
||||||
shutil.move("repro.py", f"{repro_dir}/{name}_repro.py")
|
shutil.move("repro.py", f"{repro_dir}/{name}_repro.py")
|
||||||
except OSError:
|
except OSError:
|
||||||
log.exception("Could not find repro script for model %s", name)
|
log.error("Could not find repro script for model %s", name)
|
||||||
else:
|
else:
|
||||||
log.info(
|
log.info(
|
||||||
"Repro script for model %s with minified graph saved to %s",
|
"Repro script for model %s with minified graph saved to %s",
|
||||||
|
|
|
||||||
|
|
@ -197,7 +197,6 @@ select = [
|
||||||
"TC",
|
"TC",
|
||||||
"TRY002", # ban vanilla raise (todo fix NOQAs)
|
"TRY002", # ban vanilla raise (todo fix NOQAs)
|
||||||
"TRY203",
|
"TRY203",
|
||||||
"TRY400", # use logging.exception
|
|
||||||
"TRY401", # verbose-log-message
|
"TRY401", # verbose-log-message
|
||||||
"UP",
|
"UP",
|
||||||
"YTT",
|
"YTT",
|
||||||
|
|
|
||||||
|
|
@ -47,15 +47,11 @@ def requirements_installed() -> bool:
|
||||||
|
|
||||||
return True
|
return True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
logger.error( # noqa: TRY400
|
logger.error(
|
||||||
"Requirements not installed, run the following command to install:",
|
"Requirements not installed, run the following command to install:"
|
||||||
exc_info=False,
|
|
||||||
)
|
)
|
||||||
logger.error( # noqa: TRY400
|
logger.error(
|
||||||
" > %s -m pip install -r %s/requirements.txt",
|
" > %s -m pip install -r %s/requirements.txt", sys.executable, ROOT_PATH
|
||||||
sys.executable,
|
|
||||||
ROOT_PATH,
|
|
||||||
exc_info=False,
|
|
||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -138,7 +138,7 @@ def wrap_compiler_debug(
|
||||||
example_inputs,
|
example_inputs,
|
||||||
compiler_name,
|
compiler_name,
|
||||||
)
|
)
|
||||||
log.exception("CompilerError")
|
log.error("CompilerError")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# We may run regular PyTorch compute that may trigger Dynamo, do NOT
|
# We may run regular PyTorch compute that may trigger Dynamo, do NOT
|
||||||
|
|
|
||||||
|
|
@ -2148,7 +2148,7 @@ def torchscript(model, example_inputs, verbose=False):
|
||||||
if verbose:
|
if verbose:
|
||||||
log.exception("jit error")
|
log.exception("jit error")
|
||||||
else:
|
else:
|
||||||
log.error("Both torch.jit.trace and torch.jit.script failed") # noqa: TRY400
|
log.error("Both torch.jit.trace and torch.jit.script failed")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -359,7 +359,7 @@ class Guard:
|
||||||
except Exception:
|
except Exception:
|
||||||
log.exception("Error while creating guard:\n%s", str(self).rstrip())
|
log.exception("Error while creating guard:\n%s", str(self).rstrip())
|
||||||
if self.stack:
|
if self.stack:
|
||||||
log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip()) # noqa: TRY400
|
log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip())
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def is_specialized_nn_module(self):
|
def is_specialized_nn_module(self):
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,8 @@ def get_cuda_arch() -> Optional[str]:
|
||||||
major, minor = torch.cuda.get_device_capability(0)
|
major, minor = torch.cuda.get_device_capability(0)
|
||||||
return str(major * 10 + minor)
|
return str(major * 10 + minor)
|
||||||
return str(cuda_arch)
|
return str(cuda_arch)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
log.exception("Error getting cuda arch")
|
log.error("Error getting cuda arch: %s", e)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def get_cuda_version() -> Optional[str]:
|
||||||
if cuda_version is None:
|
if cuda_version is None:
|
||||||
cuda_version = torch.version.cuda
|
cuda_version = torch.version.cuda
|
||||||
return cuda_version
|
return cuda_version
|
||||||
except Exception:
|
except Exception as e:
|
||||||
log.exception("Error getting cuda version")
|
log.error("Error getting cuda version: %s", e)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -181,7 +181,7 @@ def _fx_compile_mode_default() -> tuple[FxCompileMode, bool]:
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
log.error( # noqa: TRY400
|
log.error(
|
||||||
"Invalid value of %s for %s. Expected one of %s. Using default.",
|
"Invalid value of %s for %s. Expected one of %s. Using default.",
|
||||||
value,
|
value,
|
||||||
name,
|
name,
|
||||||
|
|
|
||||||
|
|
@ -796,13 +796,13 @@ def create_node_mapping(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Since this is just logging code, it should never interfere with regular
|
# Since this is just logging code, it should never interfere with regular
|
||||||
# program execution, so we use this try-except to guard against any error
|
# program execution, so we use this try-except to guard against any error
|
||||||
log.error("Unexpected error in create_node_mapping: %s", e) # noqa: TRY400
|
log.error("Unexpected error in create_node_mapping: %s", e)
|
||||||
log.error("post_to_pre_grad_nodes_json: %s", post_to_pre_grad_nodes_json) # noqa: TRY400
|
log.error("post_to_pre_grad_nodes_json: %s", post_to_pre_grad_nodes_json)
|
||||||
log.error( # noqa: TRY400
|
log.error(
|
||||||
"triton_kernel_to_post_grad_json: %s", triton_kernel_to_post_grad_json
|
"triton_kernel_to_post_grad_json: %s", triton_kernel_to_post_grad_json
|
||||||
)
|
)
|
||||||
log.error("pre_grad_graph_id: %s", pre_grad_graph_id) # noqa: TRY400
|
log.error("pre_grad_graph_id: %s", pre_grad_graph_id)
|
||||||
log.error(traceback.format_exc()) # noqa: TRY400
|
log.error(traceback.format_exc())
|
||||||
return empty_return
|
return empty_return
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -718,7 +718,7 @@ class CompiledFxGraph(OutputCode):
|
||||||
)
|
)
|
||||||
self.compiled_fn_runner = getattr(code_cache, "runner", None)
|
self.compiled_fn_runner = getattr(code_cache, "runner", None)
|
||||||
except OSError:
|
except OSError:
|
||||||
log.exception("Failed to load artifact: %s", artifact_path)
|
log.error("Failed to load artifact: %s", artifact_path)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return artifact_path
|
return artifact_path
|
||||||
|
|
|
||||||
|
|
@ -2238,9 +2238,9 @@ class AlgorithmSelectorCache(PersistentCache):
|
||||||
try:
|
try:
|
||||||
timing = cls.benchmark_choice(choice, autotune_args)
|
timing = cls.benchmark_choice(choice, autotune_args)
|
||||||
except CUDACompileError as e:
|
except CUDACompileError as e:
|
||||||
log.error( # noqa: TRY400
|
log.error(
|
||||||
"CUDA compilation error during autotuning: \n%s. \nIgnoring this choice.",
|
"CUDA compilation error during autotuning: \n%s. \nIgnoring this choice.",
|
||||||
e,
|
str(e),
|
||||||
)
|
)
|
||||||
timing = float("inf")
|
timing = float("inf")
|
||||||
except NotImplementedError as e:
|
except NotImplementedError as e:
|
||||||
|
|
@ -2253,7 +2253,7 @@ class AlgorithmSelectorCache(PersistentCache):
|
||||||
else:
|
else:
|
||||||
if "illegal memory access" in msg:
|
if "illegal memory access" in msg:
|
||||||
msg += "\n\nEither error in template or triton bug.\n"
|
msg += "\n\nEither error in template or triton bug.\n"
|
||||||
log.error( # noqa: TRY400
|
log.error(
|
||||||
"Runtime error during autotuning: \n%s. \nIgnoring this choice.",
|
"Runtime error during autotuning: \n%s. \nIgnoring this choice.",
|
||||||
msg,
|
msg,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ def _orthogonalize_gram_schmidt(matrices, epsilon=0):
|
||||||
try:
|
try:
|
||||||
col /= torch.norm(col, dim=1, keepdim=True)
|
col /= torch.norm(col, dim=1, keepdim=True)
|
||||||
except ZeroDivisionError:
|
except ZeroDivisionError:
|
||||||
logger.exception(
|
logger.error(
|
||||||
"The matrices to be orthogonalized has at least a column of all 0s. Please set a small value such as 1e-8 "
|
"The matrices to be orthogonalized has at least a column of all 0s. Please set a small value such as 1e-8 "
|
||||||
"as `orthogonalization_epsilon` in PowerSGD state."
|
"as `orthogonalization_epsilon` in PowerSGD state."
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -235,7 +235,9 @@ class _AsyncCheckpointProcess:
|
||||||
f"Submitted checkpoint save request for checkpoint_id={obj.checkpoint_request_id}" # noqa: G004
|
f"Submitted checkpoint save request for checkpoint_id={obj.checkpoint_request_id}" # noqa: G004
|
||||||
)
|
)
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
logger.exception("Checkpoint background process encountered an exception")
|
logger.error(
|
||||||
|
f"Checkpoint background process encountered an exception: {e}" # noqa: G004
|
||||||
|
)
|
||||||
parent_conn.send(e)
|
parent_conn.send(e)
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
|
|
|
||||||
|
|
@ -90,7 +90,7 @@ def _dcp_method_logger(
|
||||||
msg_dict["event"] = "exception"
|
msg_dict["event"] = "exception"
|
||||||
msg_dict["error"] = f"{error}"
|
msg_dict["error"] = f"{error}"
|
||||||
msg_dict["time"] = time.time_ns()
|
msg_dict["time"] = time.time_ns()
|
||||||
_dcp_logger.error(msg_dict) # noqa: TRY400
|
_dcp_logger.error(msg_dict)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# end event
|
# end event
|
||||||
|
|
|
||||||
|
|
@ -141,7 +141,7 @@ class TailLog:
|
||||||
try:
|
try:
|
||||||
f.result()
|
f.result()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error( # noqa: TRY400
|
logger.error(
|
||||||
"error in log tailor for %s%s. %s: %s",
|
"error in log tailor for %s%s. %s: %s",
|
||||||
self._name,
|
self._name,
|
||||||
local_rank,
|
local_rank,
|
||||||
|
|
|
||||||
|
|
@ -1419,7 +1419,7 @@ class PipelineScheduleMulti(_PipelineSchedule):
|
||||||
# do the communication
|
# do the communication
|
||||||
_wait_batch_p2p(_batch_p2p(ops))
|
_wait_batch_p2p(_batch_p2p(ops))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error( # noqa: TRY400
|
logger.error(
|
||||||
"[Rank %s] pipeline schedule %s caught the following exception \
|
"[Rank %s] pipeline schedule %s caught the following exception \
|
||||||
at time_step %s when running action %s",
|
at time_step %s when running action %s",
|
||||||
self.rank,
|
self.rank,
|
||||||
|
|
@ -1427,7 +1427,7 @@ class PipelineScheduleMulti(_PipelineSchedule):
|
||||||
time_step,
|
time_step,
|
||||||
action,
|
action,
|
||||||
)
|
)
|
||||||
logger.error( # noqa: TRY400
|
logger.error(
|
||||||
"%s",
|
"%s",
|
||||||
_format_pipeline_order(
|
_format_pipeline_order(
|
||||||
self.pipeline_order, error_step_number=time_step
|
self.pipeline_order, error_step_number=time_step
|
||||||
|
|
@ -1739,7 +1739,7 @@ class _PipelineScheduleRuntime(PipelineScheduleMulti):
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"{action=} is unknown or unsupported")
|
raise ValueError(f"{action=} is unknown or unsupported")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error( # noqa: TRY400
|
logger.error(
|
||||||
"_PipelineScheduleRuntime caught exception at step %s when running action %s. Full Schedule:",
|
"_PipelineScheduleRuntime caught exception at step %s when running action %s. Full Schedule:",
|
||||||
time_step,
|
time_step,
|
||||||
action,
|
action,
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ def _group_membership_management(store, name, is_join):
|
||||||
try:
|
try:
|
||||||
store.wait([returned])
|
store.wait([returned])
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
logger.error( # noqa: TRY400
|
logger.error(
|
||||||
"Group membership token %s timed out waiting for %s to be released.",
|
"Group membership token %s timed out waiting for %s to be released.",
|
||||||
my_token,
|
my_token,
|
||||||
returned,
|
returned,
|
||||||
|
|
|
||||||
|
|
@ -297,7 +297,7 @@ def _barrier(worker_names):
|
||||||
try:
|
try:
|
||||||
_all_gather(None, set(worker_names))
|
_all_gather(None, set(worker_names))
|
||||||
except RuntimeError as ex:
|
except RuntimeError as ex:
|
||||||
logger.error("Failed to complete barrier, got error %s", ex) # noqa: TRY400
|
logger.error("Failed to complete barrier, got error %s", ex)
|
||||||
|
|
||||||
|
|
||||||
@_require_initialized
|
@_require_initialized
|
||||||
|
|
@ -312,7 +312,7 @@ def _wait_all_workers(timeout=DEFAULT_SHUTDOWN_TIMEOUT):
|
||||||
try:
|
try:
|
||||||
_all_gather(None, timeout=timeout)
|
_all_gather(None, timeout=timeout)
|
||||||
except RuntimeError as ex:
|
except RuntimeError as ex:
|
||||||
logger.error( # noqa: TRY400
|
logger.error(
|
||||||
"Failed to respond to 'Shutdown Proceed' in time, got error %s", ex
|
"Failed to respond to 'Shutdown Proceed' in time, got error %s", ex
|
||||||
)
|
)
|
||||||
raise ex
|
raise ex
|
||||||
|
|
|
||||||
|
|
@ -1135,7 +1135,7 @@ def _log_export_wrapper(fn):
|
||||||
error_type = t.__module__ + "." + t.__qualname__
|
error_type = t.__module__ + "." + t.__qualname__
|
||||||
case_name = get_class_if_classified_error(e)
|
case_name = get_class_if_classified_error(e)
|
||||||
if case_name is not None:
|
if case_name is not None:
|
||||||
log.error(exportdb_error_message(case_name)) # noqa: TRY400
|
log.error(exportdb_error_message(case_name))
|
||||||
log_export_usage(
|
log_export_usage(
|
||||||
event="export.error.classified",
|
event="export.error.classified",
|
||||||
type=error_type,
|
type=error_type,
|
||||||
|
|
|
||||||
|
|
@ -312,7 +312,7 @@ def record_shapeenv_event(
|
||||||
if not shape_env.should_record_events or shape_env.is_recording:
|
if not shape_env.should_record_events or shape_env.is_recording:
|
||||||
# If ShapeEnv is disabled or already recording an event, re-raise the exception without logging.
|
# If ShapeEnv is disabled or already recording an event, re-raise the exception without logging.
|
||||||
raise
|
raise
|
||||||
log.error( # noqa: G201, TRY400
|
log.error( # noqa: G201
|
||||||
"failed while running %s(*%s, **%s)",
|
"failed while running %s(*%s, **%s)",
|
||||||
name,
|
name,
|
||||||
args[1:],
|
args[1:],
|
||||||
|
|
@ -349,7 +349,7 @@ def replay_shape_env_events(events):
|
||||||
# change after each event is replayed.
|
# change after each event is replayed.
|
||||||
event.run(shape_env)
|
event.run(shape_env)
|
||||||
except Exception:
|
except Exception:
|
||||||
log.error("failed when running event: %s", event) # noqa: TRY400
|
log.error("failed when running event: %s", event)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return shape_env
|
return shape_env
|
||||||
|
|
|
||||||
|
|
@ -756,7 +756,7 @@ class MultiProcessTestCase(TestCase):
|
||||||
)
|
)
|
||||||
sys.exit(TEST_SKIPS["generic"].exit_code)
|
sys.exit(TEST_SKIPS["generic"].exit_code)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.error( # noqa: TRY400
|
logger.error(
|
||||||
"Caught exception: \n%s exiting " "process %s with exit code: %s",
|
"Caught exception: \n%s exiting " "process %s with exit code: %s",
|
||||||
traceback.format_exc(),
|
traceback.format_exc(),
|
||||||
self.rank,
|
self.rank,
|
||||||
|
|
@ -791,7 +791,7 @@ class MultiProcessTestCase(TestCase):
|
||||||
pipe.send(MultiProcessTestCase.Event.GET_TRACEBACK)
|
pipe.send(MultiProcessTestCase.Event.GET_TRACEBACK)
|
||||||
pipes.append((i, pipe))
|
pipes.append((i, pipe))
|
||||||
except ConnectionError as e:
|
except ConnectionError as e:
|
||||||
logger.error( # noqa: TRY400
|
logger.error(
|
||||||
"Encountered error while trying to get traceback for process %s: %s",
|
"Encountered error while trying to get traceback for process %s: %s",
|
||||||
i,
|
i,
|
||||||
e,
|
e,
|
||||||
|
|
@ -818,7 +818,7 @@ class MultiProcessTestCase(TestCase):
|
||||||
"Could not retrieve traceback for timed out process: %s", rank
|
"Could not retrieve traceback for timed out process: %s", rank
|
||||||
)
|
)
|
||||||
except ConnectionError as e:
|
except ConnectionError as e:
|
||||||
logger.error( # noqa: TRY400
|
logger.error(
|
||||||
"Encountered error while trying to get traceback for process %s: %s",
|
"Encountered error while trying to get traceback for process %s: %s",
|
||||||
rank,
|
rank,
|
||||||
e,
|
e,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user