mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Lint rule to forbid direct use of logging.info/etc APIs (#90907)
Signed-off-by: Edward Z. Yang <ezyang@fb.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/90907 Approved by: https://github.com/jansel
This commit is contained in:
parent
82a191313e
commit
eef019c14a
|
|
@ -615,6 +615,37 @@ command = [
|
|||
'@{{PATHSFILE}}'
|
||||
]
|
||||
|
||||
[[linter]]
|
||||
code = 'ROOT_LOGGING'
|
||||
include_patterns = [
|
||||
'**/*.py',
|
||||
]
|
||||
# These are not library code, but scripts in their own right, and so
|
||||
# therefore are permitted to use logging
|
||||
exclude_patterns = [
|
||||
'tools/**',
|
||||
'test/**',
|
||||
'benchmarks/**',
|
||||
'torch/distributed/run.py',
|
||||
'functorch/benchmarks/**',
|
||||
# Grandfathered in
|
||||
'caffe2/**',
|
||||
]
|
||||
command = [
|
||||
'python3',
|
||||
'tools/linter/adapters/grep_linter.py',
|
||||
'--pattern=logging\.(debug|info|warn|warning|error|critical|log|exception)\(',
|
||||
'--replace-pattern=s/logging\.(debug|info|warn|warning|error|critical|log|exception)\(/log.\1(/',
|
||||
'--linter-name=ROOT_LOGGING',
|
||||
'--error-name=use of root logger',
|
||||
"""--error-description=\
|
||||
Do not use root logger (logging.info, etc) directly; instead \
|
||||
define 'log = logging.getLogger(__name__)' and call, e.g., log.info().
|
||||
""",
|
||||
'--',
|
||||
'@{{PATHSFILE}}'
|
||||
]
|
||||
|
||||
[[linter]]
|
||||
code = 'CMAKE'
|
||||
include_patterns = [
|
||||
|
|
|
|||
|
|
@ -481,11 +481,11 @@ def convert_frame(compiler_fn: CompilerFn, hooks: Hooks):
|
|||
counters["frames"]["ok"] += 1
|
||||
return result
|
||||
except (NotImplementedError, Unsupported):
|
||||
logging.info("converting frame raised unsupported, leaving it unconverted")
|
||||
log.info("converting frame raised unsupported, leaving it unconverted")
|
||||
except Exception:
|
||||
if not config.suppress_errors:
|
||||
raise
|
||||
logging.info("converting frame raised error, suppressing error")
|
||||
log.info("converting frame raised error, suppressing error")
|
||||
return None
|
||||
|
||||
_convert_frame._torchdynamo_orig_callable = compiler_fn # type: ignore[attr-defined]
|
||||
|
|
|
|||
|
|
@ -1264,7 +1264,7 @@ def aot_wrapper_dedupe(flat_fn, flat_args: List[Tensor], aot_config: AOTConfig,
|
|||
flat_fn
|
||||
)(*flat_args)
|
||||
except RuntimeError as e:
|
||||
logging.warning(
|
||||
log.warning(
|
||||
"Failed to collect metadata on function, produced code may be suboptimal. "
|
||||
"Known situations this can occur are inference mode only compilation involving "
|
||||
"resize_ or prims (!schema.hasAnyAliasInfo() INTERNAL ASSERT FAILED); "
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ from .partitioners import (
|
|||
import torch.utils._pytree as pytree
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# These canonicalizations are needed here (and not decompositions), as the ops
|
||||
# we're trying to canonicalize to CompositeImplicitAutograd.
|
||||
|
|
@ -333,7 +335,7 @@ def _save_fx_default(current_name, folder_name, dump_example_input, gm, example_
|
|||
def graph_saver_helper(gm_to_save, args, type_name):
|
||||
global graph_index
|
||||
if len(gm_to_save.graph.nodes) == 0:
|
||||
logging.log(
|
||||
log.log(
|
||||
logging.WARNING,
|
||||
f"No nodes in graph {current_name}_{type_name}_{graph_index}.",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -6,12 +6,14 @@ from abc import ABC
|
|||
from contextlib import contextmanager
|
||||
from typing import Callable, Generic, List, NamedTuple, Optional, Set, TypeVar
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# TODO(voz): Stolen pattern, not sure why this is the case,
|
||||
# but mypy complains.
|
||||
try:
|
||||
import sympy # type: ignore[import]
|
||||
except ImportError:
|
||||
logging.warning("No sympy found")
|
||||
log.warning("No sympy found")
|
||||
|
||||
"""
|
||||
torch._guards is the definitional source of truth for general purpose guard structures.
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@ from typing import Any, Dict, List, Optional, Set
|
|||
|
||||
__all__ = ['TimerRequest', 'TimerClient', 'RequestQueue', 'TimerServer', 'configure', 'expires']
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
class TimerRequest:
|
||||
"""
|
||||
Data object representing a countdown timer acquisition and release
|
||||
|
|
@ -168,7 +170,7 @@ class TimerServer(abc.ABC):
|
|||
try:
|
||||
return self._reap_worker(worker_id)
|
||||
except Exception as e:
|
||||
logging.error(
|
||||
log.error(
|
||||
"Uncaught exception thrown from _reap_worker(), "
|
||||
"check that the implementation correctly catches exceptions",
|
||||
exc_info=e,
|
||||
|
|
@ -180,7 +182,7 @@ class TimerServer(abc.ABC):
|
|||
try:
|
||||
self._run_watchdog()
|
||||
except Exception as e:
|
||||
logging.error("Error running watchdog", exc_info=e)
|
||||
log.error("Error running watchdog", exc_info=e)
|
||||
|
||||
def _run_watchdog(self):
|
||||
batch_size = max(1, self._request_queue.size())
|
||||
|
|
@ -189,15 +191,15 @@ class TimerServer(abc.ABC):
|
|||
now = time.time()
|
||||
reaped_worker_ids = set()
|
||||
for worker_id, expired_timers in self.get_expired_timers(now).items():
|
||||
logging.info(
|
||||
log.info(
|
||||
f"Reaping worker_id=[{worker_id}]."
|
||||
f" Expired timers: {self._get_scopes(expired_timers)}"
|
||||
)
|
||||
if self._reap_worker_no_throw(worker_id):
|
||||
logging.info(f"Successfully reaped worker=[{worker_id}]")
|
||||
log.info(f"Successfully reaped worker=[{worker_id}]")
|
||||
reaped_worker_ids.add(worker_id)
|
||||
else:
|
||||
logging.error(
|
||||
log.error(
|
||||
f"Error reaping worker=[{worker_id}]. Will retry on next watchdog."
|
||||
)
|
||||
self.clear_timers(reaped_worker_ids)
|
||||
|
|
@ -206,7 +208,7 @@ class TimerServer(abc.ABC):
|
|||
return [r.scope_id for r in timer_requests]
|
||||
|
||||
def start(self) -> None:
|
||||
logging.info(
|
||||
log.info(
|
||||
f"Starting {type(self).__name__}..."
|
||||
f" max_interval={self._max_interval},"
|
||||
f" daemon={self._daemon}"
|
||||
|
|
@ -214,18 +216,18 @@ class TimerServer(abc.ABC):
|
|||
self._watchdog_thread = threading.Thread(
|
||||
target=self._watchdog_loop, daemon=self._daemon
|
||||
)
|
||||
logging.info("Starting watchdog thread...")
|
||||
log.info("Starting watchdog thread...")
|
||||
self._watchdog_thread.start()
|
||||
|
||||
def stop(self) -> None:
|
||||
logging.info(f"Stopping {type(self).__name__}")
|
||||
log.info(f"Stopping {type(self).__name__}")
|
||||
self._stop_signaled = True
|
||||
if self._watchdog_thread:
|
||||
logging.info("Stopping watchdog thread...")
|
||||
log.info("Stopping watchdog thread...")
|
||||
self._watchdog_thread.join(self._max_interval)
|
||||
self._watchdog_thread = None
|
||||
else:
|
||||
logging.info("No watchdog thread running, doing nothing")
|
||||
log.info("No watchdog thread running, doing nothing")
|
||||
|
||||
|
||||
_timer_client = None
|
||||
|
|
@ -237,7 +239,7 @@ def configure(timer_client: TimerClient):
|
|||
"""
|
||||
global _timer_client
|
||||
_timer_client = timer_client
|
||||
logging.info(f"Timer client configured to: {type(_timer_client).__name__}")
|
||||
log.info(f"Timer client configured to: {type(_timer_client).__name__}")
|
||||
|
||||
|
||||
@contextmanager
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@ from torch.distributed.elastic.timer.api import TimerClient, TimerRequest
|
|||
|
||||
__all__ = ["FileTimerClient", "FileTimerRequest", "FileTimerServer"]
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
class FileTimerRequest(TimerRequest):
|
||||
"""
|
||||
Data object representing a countdown timer acquisition and release
|
||||
|
|
@ -175,25 +177,25 @@ class FileTimerServer:
|
|||
|
||||
|
||||
def start(self) -> None:
|
||||
logging.info(
|
||||
log.info(
|
||||
f"Starting {type(self).__name__}..."
|
||||
f" max_interval={self._max_interval},"
|
||||
f" daemon={self._daemon}"
|
||||
)
|
||||
self._watchdog_thread = threading.Thread(target=self._watchdog_loop, daemon=self._daemon)
|
||||
logging.info("Starting watchdog thread...")
|
||||
log.info("Starting watchdog thread...")
|
||||
self._watchdog_thread.start()
|
||||
self._log_event("watchdog started", None)
|
||||
|
||||
def stop(self) -> None:
|
||||
logging.info(f"Stopping {type(self).__name__}")
|
||||
log.info(f"Stopping {type(self).__name__}")
|
||||
self._stop_signaled = True
|
||||
if self._watchdog_thread:
|
||||
logging.info("Stopping watchdog thread...")
|
||||
log.info("Stopping watchdog thread...")
|
||||
self._watchdog_thread.join(self._max_interval)
|
||||
self._watchdog_thread = None
|
||||
else:
|
||||
logging.info("No watchdog thread running, doing nothing")
|
||||
log.info("No watchdog thread running, doing nothing")
|
||||
if os.path.exists(self._file_path):
|
||||
os.remove(self._file_path)
|
||||
self._log_event("watchdog stopped", None)
|
||||
|
|
@ -201,11 +203,11 @@ class FileTimerServer:
|
|||
def run_once(self) -> None:
|
||||
self._run_once = True
|
||||
if self._watchdog_thread:
|
||||
logging.info("Stopping watchdog thread...")
|
||||
log.info("Stopping watchdog thread...")
|
||||
self._watchdog_thread.join()
|
||||
self._watchdog_thread = None
|
||||
else:
|
||||
logging.info("No watchdog thread running, doing nothing")
|
||||
log.info("No watchdog thread running, doing nothing")
|
||||
if os.path.exists(self._file_path):
|
||||
os.remove(self._file_path)
|
||||
|
||||
|
|
@ -223,7 +225,7 @@ class FileTimerServer:
|
|||
if run_once:
|
||||
break
|
||||
except Exception as e:
|
||||
logging.error("Error running watchdog", exc_info=e)
|
||||
log.error("Error running watchdog", exc_info=e)
|
||||
|
||||
def _run_watchdog(self, fd: io.TextIOWrapper) -> None:
|
||||
timer_requests = self._get_requests(fd, self._max_interval)
|
||||
|
|
@ -231,7 +233,7 @@ class FileTimerServer:
|
|||
now = time.time()
|
||||
reaped_worker_pids = set()
|
||||
for worker_pid, expired_timers in self.get_expired_timers(now).items():
|
||||
logging.info(f"Reaping worker_pid=[{worker_pid}]." f" Expired timers: {self._get_scopes(expired_timers)}")
|
||||
log.info(f"Reaping worker_pid=[{worker_pid}]." f" Expired timers: {self._get_scopes(expired_timers)}")
|
||||
reaped_worker_pids.add(worker_pid)
|
||||
# In case we have multiple expired timers, we find the first timer
|
||||
# with a valid signal (>0) in the expiration time order.
|
||||
|
|
@ -245,13 +247,13 @@ class FileTimerServer:
|
|||
expired_timer = timer
|
||||
break
|
||||
if signal <= 0:
|
||||
logging.info(f"No signal specified with worker=[{worker_pid}]. Do not reap it.")
|
||||
log.info(f"No signal specified with worker=[{worker_pid}]. Do not reap it.")
|
||||
continue
|
||||
if self._reap_worker(worker_pid, signal):
|
||||
logging.info(f"Successfully reaped worker=[{worker_pid}] with signal={signal}")
|
||||
log.info(f"Successfully reaped worker=[{worker_pid}] with signal={signal}")
|
||||
self._log_event("kill worker process", expired_timer)
|
||||
else:
|
||||
logging.error(f"Error reaping worker=[{worker_pid}]. Will retry on next watchdog.")
|
||||
log.error(f"Error reaping worker=[{worker_pid}]. Will retry on next watchdog.")
|
||||
self.clear_timers(reaped_worker_pids)
|
||||
|
||||
def _get_scopes(self, timer_requests: List[FileTimerRequest]) -> List[str]:
|
||||
|
|
@ -323,8 +325,8 @@ class FileTimerServer:
|
|||
os.kill(worker_pid, signal)
|
||||
return True
|
||||
except ProcessLookupError:
|
||||
logging.info(f"Process with pid={worker_pid} does not exist. Skipping")
|
||||
log.info(f"Process with pid={worker_pid} does not exist. Skipping")
|
||||
return True
|
||||
except Exception as e:
|
||||
logging.error(f"Error terminating pid={worker_pid}", exc_info=e)
|
||||
log.error(f"Error terminating pid={worker_pid}", exc_info=e)
|
||||
return False
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ from .api import RequestQueue, TimerClient, TimerRequest, TimerServer
|
|||
|
||||
__all__ = ['LocalTimerClient', 'MultiprocessingRequestQueue', 'LocalTimerServer']
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
class LocalTimerClient(TimerClient):
|
||||
"""
|
||||
Client side of ``LocalTimerServer``. This client is meant to be used
|
||||
|
|
@ -116,8 +118,8 @@ class LocalTimerServer(TimerServer):
|
|||
os.kill(worker_id, signal.SIGKILL)
|
||||
return True
|
||||
except ProcessLookupError:
|
||||
logging.info(f"Process with pid={worker_id} does not exist. Skipping")
|
||||
log.info(f"Process with pid={worker_id} does not exist. Skipping")
|
||||
return True
|
||||
except Exception as e:
|
||||
logging.error(f"Error terminating pid={worker_id}", exc_info=e)
|
||||
log.error(f"Error terminating pid={worker_id}", exc_info=e)
|
||||
return False
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from torch.cuda.amp.grad_scaler import _MultiDeviceReplicator, GradScaler, OptSt
|
|||
from torch.distributed.distributed_c10d import ProcessGroup
|
||||
from torch.optim.sgd import SGD
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def _refresh_per_optimizer_state():
|
||||
return {"stage": OptState.READY, "found_inf_per_device": {}}
|
||||
|
|
@ -158,7 +159,7 @@ class ShardedGradScaler(GradScaler):
|
|||
for grad in grads:
|
||||
for tensor in grad:
|
||||
if tensor.device != expected_device:
|
||||
logging.error(
|
||||
log.error(
|
||||
"tensor device is %s and expected device is %s"
|
||||
% (tensor.device, expected_device)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ from torch.profiler import _utils
|
|||
|
||||
TensorAndID = Tuple["TensorKey", int]
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
class Category(enum.Enum):
|
||||
INPUT = enum.auto()
|
||||
|
|
@ -348,7 +349,7 @@ class SizeMap:
|
|||
# the core PyTorch codebase.
|
||||
if prior_size != new_size:
|
||||
delta = f"{prior_size} vs. {new_size}"
|
||||
logging.warn(f"Mismatch between allocation and free: {delta}")
|
||||
log.warn(f"Mismatch between allocation and free: {delta}")
|
||||
|
||||
self._values.update(allocations)
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from caffe2.python import core, workspace
|
|||
|
||||
from typing import Set, Dict, Tuple, List
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
def _make_unique_name(seen: Set[str], name: str, min_version: int = 0):
|
||||
"""
|
||||
|
|
@ -754,7 +755,7 @@ def _try_get_shapes(nets):
|
|||
shapes, _ = workspace.InferShapesAndTypes(nets)
|
||||
return shapes
|
||||
except Exception as e:
|
||||
logging.warning("Failed to compute shapes: %s", e)
|
||||
log.warning("Failed to compute shapes: %s", e)
|
||||
return {}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user