Revert "[BE]: Enable RUFF TRY400 rule - log.exception (#153473)"

This reverts commit 4f4ecc583e. Reverted https://github.com/pytorch/pytorch/pull/153473 on behalf of https://github.com/jeanschmidt due to seems to have broken internal signals, @albanD may I count on you to help the author merge his PR? D74837988 ([comment](https://github.com/pytorch/pytorch/pull/153473#issuecomment-2886017075))
2025-12-06 12:20:52 +01:00 · 2025-05-16 08:29:26 +00:00 · 2025-05-16 08:29:26 +00:00 · 3443627e07
commit 3443627e07
parent 86c6f71ddb
23 changed files with 46 additions and 51 deletions
--- a/.flake8
+++ b/.flake8
@ -16,9 +16,7 @@ ignore =
    # these ignores are from flake8-comprehensions; please fix!
    C407,
    # these ignores are from flake8-logging-format; please fix!
-    G100,G101,G200,
+    G100,G101,G200
    # G201 replaced by LOG400 in ruff
    G201,
    # these ignores are from flake8-simplify. please fix or ignore with commented reason
    SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
    # SIM104 is already covered by pyupgrade ruff
--- a/.github/scripts/runner_determinator.py
+++ b/.github/scripts/runner_determinator.py
@ -623,9 +623,9 @@ def main() -> None:
            is_canary,
        )
-    except Exception:
+    except Exception as e:
-        log.exception(
+        log.error(
-            "Failed to get issue. Defaulting to Meta runners and no experiments."
+            f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
        )
    set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@ -1700,8 +1700,8 @@ def maybe_snapshot_memory(should_snapshot_memory, suffix):
                        f"{output_filename.rstrip('.csv')}_{suffix}.pickle",
                    )
                )
-            except Exception:
+            except Exception as e:
-                log.exception("Failed to save memory snapshot")
+                log.error("Failed to save memory snapshot, %s", e)
            torch.cuda.memory._record_memory_history(enabled=None)
@ -2742,7 +2742,7 @@ class BenchmarkRunner:
        try:
            shutil.move("repro.py", f"{repro_dir}/{name}_repro.py")
        except OSError:
-            log.exception("Could not find repro script for model %s", name)
+            log.error("Could not find repro script for model %s", name)
        else:
            log.info(
                "Repro script for model %s with minified graph saved to %s",
--- a/pyproject.toml
+++ b/pyproject.toml
@ -197,7 +197,6 @@ select = [
    "TC",
    "TRY002", # ban vanilla raise (todo fix NOQAs)
    "TRY203",
    "TRY400", # use logging.exception
    "TRY401", # verbose-log-message
    "UP",
    "YTT",
--- a/tools/packaging/split_wheel.py
+++ b/tools/packaging/split_wheel.py
@ -47,15 +47,11 @@ def requirements_installed() -> bool:
        return True
    except ImportError:
-        logger.error(  # noqa: TRY400
+        logger.error(
-            "Requirements not installed, run the following command to install:",
+            "Requirements not installed, run the following command to install:"
            exc_info=False,
        )
-        logger.error(  # noqa: TRY400
+        logger.error(
-            "    > %s -m pip install -r %s/requirements.txt",
+            "    > %s -m pip install -r %s/requirements.txt", sys.executable, ROOT_PATH
            sys.executable,
            ROOT_PATH,
            exc_info=False,
        )
        return False
--- a/torch/_dynamo/repro/after_aot.py
+++ b/torch/_dynamo/repro/after_aot.py
@ -138,7 +138,7 @@ def wrap_compiler_debug(
                        example_inputs,
                        compiler_name,
                    )
-                log.exception("CompilerError")
+                log.error("CompilerError")
            raise
        # We may run regular PyTorch compute that may trigger Dynamo, do NOT
--- a/torch/_dynamo/utils.py
+++ b/torch/_dynamo/utils.py
@ -2148,7 +2148,7 @@ def torchscript(model, example_inputs, verbose=False):
            if verbose:
                log.exception("jit error")
            else:
-                log.error("Both torch.jit.trace and torch.jit.script failed")  # noqa: TRY400
+                log.error("Both torch.jit.trace and torch.jit.script failed")
    return None
--- a/torch/_guards.py
+++ b/torch/_guards.py
@ -359,7 +359,7 @@ class Guard:
        except Exception:
            log.exception("Error while creating guard:\n%s", str(self).rstrip())
            if self.stack:
-                log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip())  # noqa: TRY400
+                log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip())
            raise
    def is_specialized_nn_module(self):
--- a/torch/_inductor/codegen/cuda/cuda_env.py
+++ b/torch/_inductor/codegen/cuda/cuda_env.py
@ -22,8 +22,8 @@ def get_cuda_arch() -> Optional[str]:
            major, minor = torch.cuda.get_device_capability(0)
            return str(major * 10 + minor)
        return str(cuda_arch)
-    except Exception:
+    except Exception as e:
-        log.exception("Error getting cuda arch")
+        log.error("Error getting cuda arch: %s", e)
        return None
@ -35,8 +35,8 @@ def get_cuda_version() -> Optional[str]:
        if cuda_version is None:
            cuda_version = torch.version.cuda
        return cuda_version
-    except Exception:
+    except Exception as e:
-        log.exception("Error getting cuda version")
+        log.error("Error getting cuda version: %s", e)
        return None
--- a/torch/_inductor/compile_fx.py
+++ b/torch/_inductor/compile_fx.py
@ -181,7 +181,7 @@ def _fx_compile_mode_default() -> tuple[FxCompileMode, bool]:
        import logging
        log = logging.getLogger(__name__)
-        log.error(  # noqa: TRY400
+        log.error(
            "Invalid value of %s for %s. Expected one of %s. Using default.",
            value,
            name,
--- a/torch/_inductor/debug.py
+++ b/torch/_inductor/debug.py
@ -796,13 +796,13 @@ def create_node_mapping(
    except Exception as e:
        # Since this is just logging code, it should never interfere with regular
        # program execution, so we use this try-except to guard against any error
-        log.error("Unexpected error in create_node_mapping: %s", e)  # noqa: TRY400
+        log.error("Unexpected error in create_node_mapping: %s", e)
-        log.error("post_to_pre_grad_nodes_json:  %s", post_to_pre_grad_nodes_json)  # noqa: TRY400
+        log.error("post_to_pre_grad_nodes_json:  %s", post_to_pre_grad_nodes_json)
-        log.error(  # noqa: TRY400
+        log.error(
            "triton_kernel_to_post_grad_json:  %s", triton_kernel_to_post_grad_json
        )
-        log.error("pre_grad_graph_id:  %s", pre_grad_graph_id)  # noqa: TRY400
+        log.error("pre_grad_graph_id:  %s", pre_grad_graph_id)
-        log.error(traceback.format_exc())  # noqa: TRY400
+        log.error(traceback.format_exc())
        return empty_return
--- a/torch/_inductor/output_code.py
+++ b/torch/_inductor/output_code.py
@ -718,7 +718,7 @@ class CompiledFxGraph(OutputCode):
                )
                self.compiled_fn_runner = getattr(code_cache, "runner", None)
        except OSError:
-            log.exception("Failed to load artifact: %s", artifact_path)
+            log.error("Failed to load artifact: %s", artifact_path)
            raise
        return artifact_path
--- a/torch/_inductor/select_algorithm.py
+++ b/torch/_inductor/select_algorithm.py
@ -2238,9 +2238,9 @@ class AlgorithmSelectorCache(PersistentCache):
            try:
                timing = cls.benchmark_choice(choice, autotune_args)
            except CUDACompileError as e:
-                log.error(  # noqa: TRY400
+                log.error(
                    "CUDA compilation error during autotuning: \n%s. \nIgnoring this choice.",
-                    e,
+                    str(e),
                )
                timing = float("inf")
            except NotImplementedError as e:
@ -2253,7 +2253,7 @@ class AlgorithmSelectorCache(PersistentCache):
                else:
                    if "illegal memory access" in msg:
                        msg += "\n\nEither error in template or triton bug.\n"
-                log.error(  # noqa: TRY400
+                log.error(
                    "Runtime error during autotuning: \n%s. \nIgnoring this choice.",
                    msg,
                )
--- a/torch/distributed/algorithms/ddp_comm_hooks/powerSGD_hook.py
+++ b/torch/distributed/algorithms/ddp_comm_hooks/powerSGD_hook.py
@ -60,7 +60,7 @@ def _orthogonalize_gram_schmidt(matrices, epsilon=0):
            try:
                col /= torch.norm(col, dim=1, keepdim=True)
            except ZeroDivisionError:
-                logger.exception(
+                logger.error(
                    "The matrices to be orthogonalized has at least a column of all 0s. Please set a small value such as 1e-8 "
                    "as `orthogonalization_epsilon` in PowerSGD state."
                )
--- a/torch/distributed/checkpoint/_async_process_executor.py
+++ b/torch/distributed/checkpoint/_async_process_executor.py
@ -235,7 +235,9 @@ class _AsyncCheckpointProcess:
                    f"Submitted checkpoint save request for checkpoint_id={obj.checkpoint_request_id}"  # noqa: G004
                )
        except BaseException as e:
-            logger.exception("Checkpoint background process encountered an exception")
+            logger.error(
                f"Checkpoint background process encountered an exception: {e}"  # noqa: G004
            )
            parent_conn.send(e)
            raise
        finally:
--- a/torch/distributed/checkpoint/logger.py
+++ b/torch/distributed/checkpoint/logger.py
@ -90,7 +90,7 @@ def _dcp_method_logger(
                    msg_dict["event"] = "exception"
                    msg_dict["error"] = f"{error}"
                    msg_dict["time"] = time.time_ns()
-                    _dcp_logger.error(msg_dict)  # noqa: TRY400
+                    _dcp_logger.error(msg_dict)
                raise
            # end event
--- a/torch/distributed/elastic/multiprocessing/tail_log.py
+++ b/torch/distributed/elastic/multiprocessing/tail_log.py
@ -141,7 +141,7 @@ class TailLog:
            try:
                f.result()
            except Exception as e:
-                logger.error(  # noqa: TRY400
+                logger.error(
                    "error in log tailor for %s%s. %s: %s",
                    self._name,
                    local_rank,
--- a/torch/distributed/pipelining/schedules.py
+++ b/torch/distributed/pipelining/schedules.py
@ -1419,7 +1419,7 @@ class PipelineScheduleMulti(_PipelineSchedule):
                # do the communication
                _wait_batch_p2p(_batch_p2p(ops))
            except Exception as e:
-                logger.error(  # noqa: TRY400
+                logger.error(
                    "[Rank %s] pipeline schedule %s caught the following exception \
                     at time_step %s when running action %s",
                    self.rank,
@ -1427,7 +1427,7 @@ class PipelineScheduleMulti(_PipelineSchedule):
                    time_step,
                    action,
                )
-                logger.error(  # noqa: TRY400
+                logger.error(
                    "%s",
                    _format_pipeline_order(
                        self.pipeline_order, error_step_number=time_step
@ -1739,7 +1739,7 @@ class _PipelineScheduleRuntime(PipelineScheduleMulti):
                else:
                    raise ValueError(f"{action=} is unknown or unsupported")
            except Exception as e:
-                logger.error(  # noqa: TRY400
+                logger.error(
                    "_PipelineScheduleRuntime caught exception at step %s when running action %s.  Full Schedule:",
                    time_step,
                    action,
--- a/torch/distributed/rpc/_utils.py
+++ b/torch/distributed/rpc/_utils.py
@ -31,7 +31,7 @@ def _group_membership_management(store, name, is_join):
            try:
                store.wait([returned])
            except RuntimeError:
-                logger.error(  # noqa: TRY400
+                logger.error(
                    "Group membership token %s timed out waiting for %s to be released.",
                    my_token,
                    returned,
--- a/torch/distributed/rpc/api.py
+++ b/torch/distributed/rpc/api.py
@ -297,7 +297,7 @@ def _barrier(worker_names):
    try:
        _all_gather(None, set(worker_names))
    except RuntimeError as ex:
-        logger.error("Failed to complete barrier, got error %s", ex)  # noqa: TRY400
+        logger.error("Failed to complete barrier, got error %s", ex)
@_require_initialized
@ -312,7 +312,7 @@ def _wait_all_workers(timeout=DEFAULT_SHUTDOWN_TIMEOUT):
    try:
        _all_gather(None, timeout=timeout)
    except RuntimeError as ex:
-        logger.error(  # noqa: TRY400
+        logger.error(
            "Failed to respond to 'Shutdown Proceed' in time, got error %s", ex
        )
        raise ex
--- a/torch/export/_trace.py
+++ b/torch/export/_trace.py
@ -1135,7 +1135,7 @@ def _log_export_wrapper(fn):
            error_type = t.__module__ + "." + t.__qualname__
            case_name = get_class_if_classified_error(e)
            if case_name is not None:
-                log.error(exportdb_error_message(case_name))  # noqa: TRY400
+                log.error(exportdb_error_message(case_name))
                log_export_usage(
                    event="export.error.classified",
                    type=error_type,
--- a/torch/fx/experimental/recording.py
+++ b/torch/fx/experimental/recording.py
@ -312,7 +312,7 @@ def record_shapeenv_event(
                if not shape_env.should_record_events or shape_env.is_recording:
                    # If ShapeEnv is disabled or already recording an event, re-raise the exception without logging.
                    raise
-                log.error(  # noqa: G201, TRY400
+                log.error(  # noqa: G201
                    "failed while running %s(*%s, **%s)",
                    name,
                    args[1:],
@ -349,7 +349,7 @@ def replay_shape_env_events(events):
            # change after each event is replayed.
            event.run(shape_env)
        except Exception:
-            log.error("failed when running event: %s", event)  # noqa: TRY400
+            log.error("failed when running event: %s", event)
            raise
    return shape_env
--- a/torch/testing/_internal/common_distributed.py
+++ b/torch/testing/_internal/common_distributed.py
@ -756,7 +756,7 @@ class MultiProcessTestCase(TestCase):
            )
            sys.exit(TEST_SKIPS["generic"].exit_code)
        except Exception:
-            logger.error(  # noqa: TRY400
+            logger.error(
                "Caught exception: \n%s exiting " "process %s with exit code: %s",
                traceback.format_exc(),
                self.rank,
@ -791,7 +791,7 @@ class MultiProcessTestCase(TestCase):
                    pipe.send(MultiProcessTestCase.Event.GET_TRACEBACK)
                    pipes.append((i, pipe))
                except ConnectionError as e:
-                    logger.error(  # noqa: TRY400
+                    logger.error(
                        "Encountered error while trying to get traceback for process %s: %s",
                        i,
                        e,
@ -818,7 +818,7 @@ class MultiProcessTestCase(TestCase):
                        "Could not retrieve traceback for timed out process: %s", rank
                    )
            except ConnectionError as e:
-                logger.error(  # noqa: TRY400
+                logger.error(
                    "Encountered error while trying to get traceback for process %s: %s",
                    rank,
                    e,