Lint rule to forbid direct use of logging.info/etc APIs (#90907)

Signed-off-by: Edward Z. Yang <ezyang@fb.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/90907 Approved by: https://github.com/jansel
2025-12-06 12:20:52 +01:00 · 2022-12-16 08:56:17 +08:00 · 2022-12-16 08:56:17 +08:00 · eef019c14a
commit eef019c14a
parent 82a191313e
11 changed files with 79 additions and 35 deletions
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -615,6 +615,37 @@ command = [
    '@{{PATHSFILE}}'
 ]

+[[linter]]
+code = 'ROOT_LOGGING'
+include_patterns = [
+    '**/*.py',
+]
+# These are not library code, but scripts in their own right, and so
+# therefore are permitted to use logging
+exclude_patterns = [
+    'tools/**',
+    'test/**',
+    'benchmarks/**',
+    'torch/distributed/run.py',
+    'functorch/benchmarks/**',
+    # Grandfathered in
+    'caffe2/**',
+]
+command = [
+    'python3',
+    'tools/linter/adapters/grep_linter.py',
+    '--pattern=logging\.(debug|info|warn|warning|error|critical|log|exception)\(',
+    '--replace-pattern=s/logging\.(debug|info|warn|warning|error|critical|log|exception)\(/log.\1(/',
+    '--linter-name=ROOT_LOGGING',
+    '--error-name=use of root logger',
+    """--error-description=\
+        Do not use root logger (logging.info, etc) directly; instead \
+        define 'log = logging.getLogger(__name__)' and call, e.g., log.info().
+    """,
+    '--',
+    '@{{PATHSFILE}}'
+]
+
 [[linter]]
 code = 'CMAKE'
 include_patterns = [
--- a/torch/_dynamo/convert_frame.py
+++ b/torch/_dynamo/convert_frame.py
@ -481,11 +481,11 @@ def convert_frame(compiler_fn: CompilerFn, hooks: Hooks):
            counters["frames"]["ok"] += 1
            return result
        except (NotImplementedError, Unsupported):
-            logging.info("converting frame raised unsupported, leaving it unconverted")
+            log.info("converting frame raised unsupported, leaving it unconverted")
        except Exception:
            if not config.suppress_errors:
                raise
-            logging.info("converting frame raised error, suppressing error")
+            log.info("converting frame raised error, suppressing error")
        return None

    _convert_frame._torchdynamo_orig_callable = compiler_fn  # type: ignore[attr-defined]
--- a/torch/_functorch/aot_autograd.py
+++ b/torch/_functorch/aot_autograd.py
@ -1264,7 +1264,7 @@ def aot_wrapper_dedupe(flat_fn, flat_args: List[Tensor], aot_config: AOTConfig,
                flat_fn
            )(*flat_args)
    except RuntimeError as e:
-        logging.warning(
+        log.warning(
            "Failed to collect metadata on function, produced code may be suboptimal.  "
            "Known situations this can occur are inference mode only compilation involving "
            "resize_ or prims (!schema.hasAnyAliasInfo() INTERNAL ASSERT FAILED); "
--- a/torch/_functorch/compilers.py
+++ b/torch/_functorch/compilers.py
@ -22,6 +22,8 @@ from .partitioners import (
 import torch.utils._pytree as pytree


+log = logging.getLogger(__name__)
+

 # These canonicalizations are needed here (and not decompositions), as the ops
 # we're trying to canonicalize to CompositeImplicitAutograd.
@ -333,7 +335,7 @@ def _save_fx_default(current_name, folder_name, dump_example_input, gm, example_
    def graph_saver_helper(gm_to_save, args, type_name):
        global graph_index
        if len(gm_to_save.graph.nodes) == 0:
-            logging.log(
+            log.log(
                logging.WARNING,
                f"No nodes in graph {current_name}_{type_name}_{graph_index}.",
            )
--- a/torch/_guards.py
+++ b/torch/_guards.py
@ -6,12 +6,14 @@ from abc import ABC
 from contextlib import contextmanager
 from typing import Callable, Generic, List, NamedTuple, Optional, Set, TypeVar

+log = logging.getLogger(__name__)
+
 # TODO(voz): Stolen pattern, not sure why this is the case,
 # but mypy complains.
 try:
    import sympy  # type: ignore[import]
 except ImportError:
-    logging.warning("No sympy found")
+    log.warning("No sympy found")

 """
 torch._guards is the definitional source of truth for general purpose guard structures.
--- a/torch/distributed/elastic/timer/api.py
+++ b/torch/distributed/elastic/timer/api.py
@ -13,6 +13,8 @@ from typing import Any, Dict, List, Optional, Set

 __all__ = ['TimerRequest', 'TimerClient', 'RequestQueue', 'TimerServer', 'configure', 'expires']

+log = logging.getLogger(__name__)
+
 class TimerRequest:
    """
    Data object representing a countdown timer acquisition and release
@ -168,7 +170,7 @@ class TimerServer(abc.ABC):
        try:
            return self._reap_worker(worker_id)
        except Exception as e:
-            logging.error(
+            log.error(
                "Uncaught exception thrown from _reap_worker(), "
                "check that the implementation correctly catches exceptions",
                exc_info=e,
@ -180,7 +182,7 @@ class TimerServer(abc.ABC):
            try:
                self._run_watchdog()
            except Exception as e:
-                logging.error("Error running watchdog", exc_info=e)
+                log.error("Error running watchdog", exc_info=e)

    def _run_watchdog(self):
        batch_size = max(1, self._request_queue.size())
@ -189,15 +191,15 @@ class TimerServer(abc.ABC):
        now = time.time()
        reaped_worker_ids = set()
        for worker_id, expired_timers in self.get_expired_timers(now).items():
-            logging.info(
+            log.info(
                f"Reaping worker_id=[{worker_id}]."
                f" Expired timers: {self._get_scopes(expired_timers)}"
            )
            if self._reap_worker_no_throw(worker_id):
-                logging.info(f"Successfully reaped worker=[{worker_id}]")
+                log.info(f"Successfully reaped worker=[{worker_id}]")
                reaped_worker_ids.add(worker_id)
            else:
-                logging.error(
+                log.error(
                    f"Error reaping worker=[{worker_id}]. Will retry on next watchdog."
                )
        self.clear_timers(reaped_worker_ids)
@ -206,7 +208,7 @@ class TimerServer(abc.ABC):
        return [r.scope_id for r in timer_requests]

    def start(self) -> None:
-        logging.info(
+        log.info(
            f"Starting {type(self).__name__}..."
            f" max_interval={self._max_interval},"
            f" daemon={self._daemon}"
@ -214,18 +216,18 @@ class TimerServer(abc.ABC):
        self._watchdog_thread = threading.Thread(
            target=self._watchdog_loop, daemon=self._daemon
        )
-        logging.info("Starting watchdog thread...")
+        log.info("Starting watchdog thread...")
        self._watchdog_thread.start()

    def stop(self) -> None:
-        logging.info(f"Stopping {type(self).__name__}")
+        log.info(f"Stopping {type(self).__name__}")
        self._stop_signaled = True
        if self._watchdog_thread:
-            logging.info("Stopping watchdog thread...")
+            log.info("Stopping watchdog thread...")
            self._watchdog_thread.join(self._max_interval)
            self._watchdog_thread = None
        else:
-            logging.info("No watchdog thread running, doing nothing")
+            log.info("No watchdog thread running, doing nothing")


 _timer_client = None
@ -237,7 +239,7 @@ def configure(timer_client: TimerClient):
    """
    global _timer_client
    _timer_client = timer_client
-    logging.info(f"Timer client configured to: {type(_timer_client).__name__}")
+    log.info(f"Timer client configured to: {type(_timer_client).__name__}")


@contextmanager
--- a/torch/distributed/elastic/timer/file_based_local_timer.py
+++ b/torch/distributed/elastic/timer/file_based_local_timer.py
@ -19,6 +19,8 @@ from torch.distributed.elastic.timer.api import TimerClient, TimerRequest

 __all__ = ["FileTimerClient", "FileTimerRequest", "FileTimerServer"]

+log = logging.getLogger(__name__)
+
 class FileTimerRequest(TimerRequest):
    """
    Data object representing a countdown timer acquisition and release
@ -175,25 +177,25 @@ class FileTimerServer:


    def start(self) -> None:
-        logging.info(
+        log.info(
            f"Starting {type(self).__name__}..."
            f" max_interval={self._max_interval},"
            f" daemon={self._daemon}"
        )
        self._watchdog_thread = threading.Thread(target=self._watchdog_loop, daemon=self._daemon)
-        logging.info("Starting watchdog thread...")
+        log.info("Starting watchdog thread...")
        self._watchdog_thread.start()
        self._log_event("watchdog started", None)

    def stop(self) -> None:
-        logging.info(f"Stopping {type(self).__name__}")
+        log.info(f"Stopping {type(self).__name__}")
        self._stop_signaled = True
        if self._watchdog_thread:
-            logging.info("Stopping watchdog thread...")
+            log.info("Stopping watchdog thread...")
            self._watchdog_thread.join(self._max_interval)
            self._watchdog_thread = None
        else:
-            logging.info("No watchdog thread running, doing nothing")
+            log.info("No watchdog thread running, doing nothing")
        if os.path.exists(self._file_path):
            os.remove(self._file_path)
        self._log_event("watchdog stopped", None)
@ -201,11 +203,11 @@ class FileTimerServer:
    def run_once(self) -> None:
        self._run_once = True
        if self._watchdog_thread:
-            logging.info("Stopping watchdog thread...")
+            log.info("Stopping watchdog thread...")
            self._watchdog_thread.join()
            self._watchdog_thread = None
        else:
-            logging.info("No watchdog thread running, doing nothing")
+            log.info("No watchdog thread running, doing nothing")
        if os.path.exists(self._file_path):
            os.remove(self._file_path)

@ -223,7 +225,7 @@ class FileTimerServer:
                    if run_once:
                        break
                except Exception as e:
-                    logging.error("Error running watchdog", exc_info=e)
+                    log.error("Error running watchdog", exc_info=e)

    def _run_watchdog(self, fd: io.TextIOWrapper) -> None:
        timer_requests = self._get_requests(fd, self._max_interval)
@ -231,7 +233,7 @@ class FileTimerServer:
        now = time.time()
        reaped_worker_pids = set()
        for worker_pid, expired_timers in self.get_expired_timers(now).items():
-            logging.info(f"Reaping worker_pid=[{worker_pid}]." f" Expired timers: {self._get_scopes(expired_timers)}")
+            log.info(f"Reaping worker_pid=[{worker_pid}]." f" Expired timers: {self._get_scopes(expired_timers)}")
            reaped_worker_pids.add(worker_pid)
            # In case we have multiple expired timers, we find the first timer
            # with a valid signal (>0) in the expiration time order.
@ -245,13 +247,13 @@ class FileTimerServer:
                    expired_timer = timer
                    break
            if signal <= 0:
-                logging.info(f"No signal specified with worker=[{worker_pid}]. Do not reap it.")
+                log.info(f"No signal specified with worker=[{worker_pid}]. Do not reap it.")
                continue
            if self._reap_worker(worker_pid, signal):
-                logging.info(f"Successfully reaped worker=[{worker_pid}] with signal={signal}")
+                log.info(f"Successfully reaped worker=[{worker_pid}] with signal={signal}")
                self._log_event("kill worker process", expired_timer)
            else:
-                logging.error(f"Error reaping worker=[{worker_pid}]. Will retry on next watchdog.")
+                log.error(f"Error reaping worker=[{worker_pid}]. Will retry on next watchdog.")
        self.clear_timers(reaped_worker_pids)

    def _get_scopes(self, timer_requests: List[FileTimerRequest]) -> List[str]:
@ -323,8 +325,8 @@ class FileTimerServer:
            os.kill(worker_pid, signal)
            return True
        except ProcessLookupError:
-            logging.info(f"Process with pid={worker_pid} does not exist. Skipping")
+            log.info(f"Process with pid={worker_pid} does not exist. Skipping")
            return True
        except Exception as e:
-            logging.error(f"Error terminating pid={worker_pid}", exc_info=e)
+            log.error(f"Error terminating pid={worker_pid}", exc_info=e)
        return False
--- a/torch/distributed/elastic/timer/local_timer.py
+++ b/torch/distributed/elastic/timer/local_timer.py
@ -15,6 +15,8 @@ from .api import RequestQueue, TimerClient, TimerRequest, TimerServer

 __all__ = ['LocalTimerClient', 'MultiprocessingRequestQueue', 'LocalTimerServer']

+log = logging.getLogger(__name__)
+
 class LocalTimerClient(TimerClient):
    """
    Client side of ``LocalTimerServer``. This client is meant to be used
@ -116,8 +118,8 @@ class LocalTimerServer(TimerServer):
            os.kill(worker_id, signal.SIGKILL)
            return True
        except ProcessLookupError:
-            logging.info(f"Process with pid={worker_id} does not exist. Skipping")
+            log.info(f"Process with pid={worker_id} does not exist. Skipping")
            return True
        except Exception as e:
-            logging.error(f"Error terminating pid={worker_id}", exc_info=e)
+            log.error(f"Error terminating pid={worker_id}", exc_info=e)
        return False
--- a/torch/distributed/fsdp/sharded_grad_scaler.py
+++ b/torch/distributed/fsdp/sharded_grad_scaler.py
@ -9,6 +9,7 @@ from torch.cuda.amp.grad_scaler import _MultiDeviceReplicator, GradScaler, OptSt
 from torch.distributed.distributed_c10d import ProcessGroup
 from torch.optim.sgd import SGD

+log = logging.getLogger(__name__)

 def _refresh_per_optimizer_state():
    return {"stage": OptState.READY, "found_inf_per_device": {}}
@ -158,7 +159,7 @@ class ShardedGradScaler(GradScaler):
        for grad in grads:
            for tensor in grad:
                if tensor.device != expected_device:
-                    logging.error(
+                    log.error(
                        "tensor device is %s and expected device is %s"
                        % (tensor.device, expected_device)
                    )
--- a/torch/profiler/_memory_profiler.py
+++ b/torch/profiler/_memory_profiler.py
@ -32,6 +32,7 @@ from torch.profiler import _utils

 TensorAndID = Tuple["TensorKey", int]

+log = logging.getLogger(__name__)

 class Category(enum.Enum):
    INPUT = enum.auto()
@ -348,7 +349,7 @@ class SizeMap:
                    # the core PyTorch codebase.
                    if prior_size != new_size:
                        delta = f"{prior_size} vs. {new_size}"
-                        logging.warn(f"Mismatch between allocation and free: {delta}")
+                        log.warn(f"Mismatch between allocation and free: {delta}")

        self._values.update(allocations)

--- a/torch/utils/tensorboard/_caffe2_graph.py
+++ b/torch/utils/tensorboard/_caffe2_graph.py
@ -13,6 +13,7 @@ from caffe2.python import core, workspace

 from typing import Set, Dict, Tuple, List

+log = logging.getLogger(__name__)

 def _make_unique_name(seen: Set[str], name: str, min_version: int = 0):
    """
@ -754,7 +755,7 @@ def _try_get_shapes(nets):
        shapes, _ = workspace.InferShapesAndTypes(nets)
        return shapes
    except Exception as e:
-        logging.warning("Failed to compute shapes: %s", e)
+        log.warning("Failed to compute shapes: %s", e)
        return {}