diff --git a/.flake8 b/.flake8 index 1e61b459df9..bca578ce563 100644 --- a/.flake8 +++ b/.flake8 @@ -18,7 +18,7 @@ ignore = # these ignores are from flake8-comprehensions; please fix! C407, # these ignores are from flake8-logging-format; please fix! - G100,G101,G200 + G100,G101,G200,G201,G202 # these ignores are from flake8-simplify. please fix or ignore with commented reason SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12, # flake8-simplify code styles diff --git a/pyproject.toml b/pyproject.toml index 71157c4f3cf..279bd6fa058 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ ignore = [ "F821", "F841", # these ignores are from flake8-logging-format; please fix! - "G101", + "G101", "G201", "G202", # these ignores are from RUFF perf; please fix! "PERF203", "PERF4", # these ignores are from PYI; please fix! diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py index 0ef173155e2..1b068402019 100644 --- a/torch/_dynamo/guards.py +++ b/torch/_dynamo/guards.py @@ -1315,8 +1315,9 @@ def get_guard_fail_reason( GuardFail(reason_str or "unknown reason", orig_code_map[code]) ) except Exception as e: - log.exception( + log.error( "Failure in guard_fail_fn callback - raising here will cause a NULL Error on guard eval", + exc_info=True, ) return reason_str diff --git a/torch/_dynamo/utils.py b/torch/_dynamo/utils.py index 47275ea0418..ba876a0fbb8 100644 --- a/torch/_dynamo/utils.py +++ b/torch/_dynamo/utils.py @@ -400,7 +400,7 @@ def write_record_to_file(filename, exec_record): with open(filename, "wb") as f: exec_record.dump(f) except Exception: - log.exception("Unable to write execution record %s", filename) + log.error("Unable to write execution record %s", filename, exc_info=True) def count_calls(g: fx.Graph): diff --git a/torch/distributed/elastic/multiprocessing/api.py b/torch/distributed/elastic/multiprocessing/api.py index c7c870bdb07..32426be0801 100644 --- a/torch/distributed/elastic/multiprocessing/api.py +++ b/torch/distributed/elastic/multiprocessing/api.py @@ -477,13 +477,14 @@ class MultiprocessContext(PContext): failed_proc = self._pc.processes[failed_local_rank] error_filepath = self.error_files[failed_local_rank] - log.exception( + log.error( "failed (exitcode: %s)" " local_rank: %s (pid: %s)" " of fn: %s (start_method: %s)", failed_proc.exitcode, failed_local_rank, e.pid, fn_name, self.start_method, + exc_info=True, ) self.close() diff --git a/torch/distributed/elastic/timer/api.py b/torch/distributed/elastic/timer/api.py index 566a3d4acbc..6dd30889198 100644 --- a/torch/distributed/elastic/timer/api.py +++ b/torch/distributed/elastic/timer/api.py @@ -169,10 +169,11 @@ class TimerServer(abc.ABC): """ try: return self._reap_worker(worker_id) - except Exception: - log.exception( + except Exception as e: + log.error( "Uncaught exception thrown from _reap_worker(), " "check that the implementation correctly catches exceptions", + exc_info=e, ) return True @@ -180,8 +181,8 @@ class TimerServer(abc.ABC): while not self._stop_signaled: try: self._run_watchdog() - except Exception: - log.exception("Error running watchdog") + except Exception as e: + log.error("Error running watchdog", exc_info=e) def _run_watchdog(self): batch_size = max(1, self._request_queue.size()) diff --git a/torch/distributed/elastic/timer/file_based_local_timer.py b/torch/distributed/elastic/timer/file_based_local_timer.py index 26ebce33dcb..597000c6d20 100644 --- a/torch/distributed/elastic/timer/file_based_local_timer.py +++ b/torch/distributed/elastic/timer/file_based_local_timer.py @@ -225,8 +225,8 @@ class FileTimerServer: self._run_watchdog(fd) if run_once: break - except Exception: - log.exception("Error running watchdog") + except Exception as e: + log.error("Error running watchdog", exc_info=e) def _run_watchdog(self, fd: io.TextIOWrapper) -> None: timer_requests = self._get_requests(fd, self._max_interval) @@ -328,6 +328,6 @@ class FileTimerServer: except ProcessLookupError: log.info("Process with pid=%s does not exist. Skipping", worker_pid) return True - except Exception: - log.exception("Error terminating pid=%s", worker_pid) + except Exception as e: + log.error("Error terminating pid=%s", worker_pid, exc_info=e) return False diff --git a/torch/distributed/elastic/timer/local_timer.py b/torch/distributed/elastic/timer/local_timer.py index 05f467c807a..240163f1bf6 100644 --- a/torch/distributed/elastic/timer/local_timer.py +++ b/torch/distributed/elastic/timer/local_timer.py @@ -120,6 +120,6 @@ class LocalTimerServer(TimerServer): except ProcessLookupError: log.info("Process with pid=%s does not exist. Skipping", worker_id) return True - except Exception: - log.exception("Error terminating pid=%s", worker_id) + except Exception as e: + log.error("Error terminating pid=%s", worker_id, exc_info=e) return False