mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Fix flake8 B028 warnings (#166224)
This PR fixes flake8 B028 warning by specifying stacklevel=2 in `warnings.warn`. The advantage is that users can know more contextual information about PyTorch warnings. Pull Request resolved: https://github.com/pytorch/pytorch/pull/166224 Approved by: https://github.com/ezyang
This commit is contained in:
parent
f863550192
commit
a60d9e1f6d
|
|
@ -2653,7 +2653,8 @@ def compile(
|
|||
if torch.compiler.is_exporting():
|
||||
warnings.warn(
|
||||
"You are calling torch.compile inside torch.export region. "
|
||||
"To capture an useful graph, we will implicitly switch to torch.compile(backend=eager)"
|
||||
"To capture an useful graph, we will implicitly switch to torch.compile(backend=eager)",
|
||||
stacklevel=2,
|
||||
)
|
||||
from torch._higher_order_ops.utils import setup_compilation_env
|
||||
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ def warn_deprecated():
|
|||
"torch._custom_op is deprecated and will be removed in PyTorch 2.6, please "
|
||||
"use the equivalent torch.library API instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -704,7 +704,8 @@ class TS2FXGraphConverter:
|
|||
# In a sense, the converter now becomes an stateful interpreter
|
||||
warnings.warn(
|
||||
"Converting aten::append.t, which is a inplace mutation of the list. "
|
||||
"This makes the converter non-functional: the result depends on the order of the append nodes being converter!"
|
||||
"This makes the converter non-functional: the result depends on the order of the append nodes being converter!",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
args = tuple(self.get_fx_value_by_ir_value(inp) for inp in node.inputs())
|
||||
|
|
@ -1471,7 +1472,8 @@ DEBUG: (TORCH_LOGS="+export" <cmd>), additionally
|
|||
for k, tensor in self.ts_model.state_dict().items(): # type: ignore[union-attr]
|
||||
if k not in ep.state_dict:
|
||||
warnings.warn(
|
||||
f"Manually populate {k} into state_dict ExportedProgram, but it is never used by the ExportedProgram."
|
||||
f"Manually populate {k} into state_dict ExportedProgram, but it is never used by the ExportedProgram.",
|
||||
stacklevel=2,
|
||||
)
|
||||
ep.state_dict[k] = tensor
|
||||
|
||||
|
|
|
|||
|
|
@ -51,7 +51,8 @@ def _generate_inputs_for_submodules(
|
|||
model(*args, **kwargs)
|
||||
except Exception as e:
|
||||
warnings.warn(
|
||||
f"Failed to generate submodule inputs because of the following error:\n{e}"
|
||||
f"Failed to generate submodule inputs because of the following error:\n{e}",
|
||||
stacklevel=2,
|
||||
)
|
||||
finally:
|
||||
for h in handles:
|
||||
|
|
|
|||
|
|
@ -321,5 +321,6 @@ def _detect_attribute_assignment(mod: torch.nn.Module):
|
|||
warnings.warn(
|
||||
f"The tensor {noun} {', '.join(assigned_tensor_attributes)} {verb} assigned during export. "
|
||||
"Such attributes must be registered as buffers using the `register_buffer` API "
|
||||
"(https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.register_buffer)."
|
||||
"(https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.register_buffer).",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -137,7 +137,8 @@ def call_func_at_runtime_with_args(
|
|||
warnings.warn(
|
||||
"Your compiler for AOTAutograd is returning a function that doesn't take boxed arguments. "
|
||||
"Please wrap it with functorch.compile.make_boxed_func or handle the boxed arguments yourself. "
|
||||
"See https://github.com/pytorch/pytorch/pull/83137#issuecomment-1211320670 for rationale."
|
||||
"See https://github.com/pytorch/pytorch/pull/83137#issuecomment-1211320670 for rationale.",
|
||||
stacklevel=2,
|
||||
)
|
||||
out = normalize_as_list(f(*args))
|
||||
return out
|
||||
|
|
|
|||
|
|
@ -518,7 +518,8 @@ def do_auto_functionalize(
|
|||
if "self" in unwrapped_kwargs or "self_" in unwrapped_kwargs:
|
||||
warnings.warn(
|
||||
"Using `self` or `self_` as an argument in the definition of custom ops may lead to ambiguous parsing. "
|
||||
"Please consider using a different name for this argument to avoid potential issues."
|
||||
"Please consider using a different name for this argument to avoid potential issues.",
|
||||
stacklevel=2,
|
||||
)
|
||||
with ctx.redispatch_to_next():
|
||||
unwrapped_outs = auto_functionalized(
|
||||
|
|
@ -691,7 +692,8 @@ def do_auto_functionalize_v2(
|
|||
if "self" in unwrapped_kwargs or "self_" in unwrapped_kwargs:
|
||||
warnings.warn(
|
||||
"Using `self` or `self_` as an argument in the definition of custom ops may lead to ambiguous parsing. "
|
||||
"Please consider using a different name for this argument to avoid potential issues."
|
||||
"Please consider using a different name for this argument to avoid potential issues.",
|
||||
stacklevel=2,
|
||||
)
|
||||
all_basis_unwrapped = ctx.unwrap_tensors(all_bases)
|
||||
|
||||
|
|
|
|||
|
|
@ -196,7 +196,8 @@ class BaseHOP(HigherOrderOperator, abc.ABC):
|
|||
"Aliasing is not supported for HOP subgraph.\n"
|
||||
f"{subgraph.print_readable(print_output=False)}\n"
|
||||
f"Alias info: inp-inp alias: {inp_inp_alias}, inp-out alias: {inp_out_alias}, out-out alias{out_out_alias}"
|
||||
f"This may lead to silent incorrectness."
|
||||
f"This may lead to silent incorrectness.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
schema_gen = HopSchemaGenerator(self)
|
||||
|
|
|
|||
|
|
@ -177,6 +177,7 @@ def cond(
|
|||
"Pred is a Python constant. When used with torch.cond, it specializes on one of the branches."
|
||||
" If you want torch.cond to preserve two branches, please make the predicate a boolean tensor or a SymBool.",
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
# This is the eager case. We can just run the true or false branch.
|
||||
if pred:
|
||||
|
|
|
|||
|
|
@ -859,6 +859,7 @@ def ignore(drop=False, **kwargs):
|
|||
warnings.warn(
|
||||
"ignore(drop_on_export=True) has been deprecated. TorchScript will now drop the function "
|
||||
"call on compilation. Use torch.jit.unused now. {}",
|
||||
stacklevel=2,
|
||||
category=FutureWarning,
|
||||
)
|
||||
|
||||
|
|
@ -867,6 +868,7 @@ def ignore(drop=False, **kwargs):
|
|||
warnings.warn(
|
||||
"ignore(True) has been deprecated. TorchScript will now drop the function "
|
||||
"call on compilation. Use torch.jit.unused now. {}",
|
||||
stacklevel=2,
|
||||
category=FutureWarning,
|
||||
)
|
||||
|
||||
|
|
@ -992,7 +994,8 @@ def _check_overload_body(func):
|
|||
# Parsing the function definition can raise an OSError if source is unavailable.
|
||||
# Since this is just an initial check, just raise a warning if this is the case.
|
||||
warnings.warn(
|
||||
f"Unable to retrieve source for @torch.jit._overload function: {func}."
|
||||
f"Unable to retrieve source for @torch.jit._overload function: {func}.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return
|
||||
|
||||
|
|
@ -1385,7 +1388,8 @@ def check_empty_containers(obj) -> None:
|
|||
"calling torch.jit.isinstance in eager mode. For "
|
||||
"example, List[int] would become list and "
|
||||
"therefore falsely return True for List[float] or"
|
||||
" List[str]."
|
||||
" List[str].",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2137,7 +2137,8 @@ def alert_not_deterministic(caller: str):
|
|||
f"{caller} does not have a deterministic implementation, but you set "
|
||||
f"'torch.use_deterministic_algorithms(True, warn_only=True)'. "
|
||||
f"You can file an issue at https://github.com/pytorch/pytorch/issues "
|
||||
f"to help us prioritize adding deterministic support for this operation."
|
||||
f"to help us prioritize adding deterministic support for this operation.",
|
||||
stacklevel=2,
|
||||
)
|
||||
else:
|
||||
torch._check(
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ def _resize_output_check(out: TensorLikeType, shape: ShapeType):
|
|||
"be resized unless they have zero elements. "
|
||||
"You can explicitly reuse an out tensor t by resizing it, inplace, to zero elements with t.resize_(0)."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
warnings.warn(msg, stacklevel=2)
|
||||
return True
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3729,7 +3729,8 @@ def istft(
|
|||
if end > expected_output_signal_len:
|
||||
warnings.warn(
|
||||
"The length of signal is shorter than the length parameter. Result is being "
|
||||
+ "padded with zeros in the tail. Please check your center and hop_length settings"
|
||||
+ "padded with zeros in the tail. Please check your center and hop_length settings",
|
||||
stacklevel=2,
|
||||
)
|
||||
y = aten.constant_pad_nd(y, (0, end - expected_output_signal_len), 0)
|
||||
return y
|
||||
|
|
|
|||
|
|
@ -405,7 +405,8 @@ class FunctionalTensorMode(TorchDispatchMode):
|
|||
warnings.warn(
|
||||
f"At pre-dispatch tracing, we assume that any custom op marked with "
|
||||
f"CompositeImplicitAutograd and have functional schema are safe to not decompose. "
|
||||
f"Found {func} to be one such op."
|
||||
f"Found {func} to be one such op.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -350,7 +350,8 @@ class Tensor(torch._C.TensorBase):
|
|||
# hypothesis is that no one cares for meta tensors.
|
||||
if skip_data:
|
||||
warnings.warn(
|
||||
"Serializing tensors on the meta device under skip_data context manager is a no-op"
|
||||
"Serializing tensors on the meta device under skip_data context manager is a no-op",
|
||||
stacklevel=2,
|
||||
)
|
||||
arg_meta = (
|
||||
self.dtype,
|
||||
|
|
@ -1033,7 +1034,7 @@ class Tensor(torch._C.TensorBase):
|
|||
def resize(self, *sizes):
|
||||
if has_torch_function_unary(self):
|
||||
return handle_torch_function(Tensor.resize, (self,), self, *sizes)
|
||||
warnings.warn("non-inplace resize is deprecated")
|
||||
warnings.warn("non-inplace resize is deprecated", stacklevel=2)
|
||||
from torch.autograd._functions import Resize
|
||||
|
||||
return Resize.apply(self, sizes)
|
||||
|
|
@ -1041,7 +1042,7 @@ class Tensor(torch._C.TensorBase):
|
|||
def resize_as(self, tensor):
|
||||
if has_torch_function_variadic(self, tensor):
|
||||
return handle_torch_function(Tensor.resize_as, (self, tensor), self, tensor)
|
||||
warnings.warn("non-inplace resize_as is deprecated")
|
||||
warnings.warn("non-inplace resize_as is deprecated", stacklevel=2)
|
||||
from torch.autograd._functions import Resize
|
||||
|
||||
return Resize.apply(self, tensor.size())
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ def _get_async_or_non_blocking(function_name, non_blocking, kwargs):
|
|||
message = "{}() got an unexpected keyword argument '{}'"
|
||||
argument = list(kwargs.keys()).pop()
|
||||
raise TypeError(message.format(function_name, argument))
|
||||
warnings.warn("'async' is deprecated; use 'non_blocking'")
|
||||
warnings.warn("'async' is deprecated; use 'non_blocking'", stacklevel=2)
|
||||
return kwargs["async"]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -555,7 +555,8 @@ class Unpickler:
|
|||
f"Detected pickle protocol {self.proto} in the checkpoint, which was "
|
||||
"not the default pickle protocol used by `torch.load` (2). The weights_only "
|
||||
"Unpickler might not support all instructions implemented by this protocol, "
|
||||
"please file an issue for adding support if you encounter this."
|
||||
"please file an issue for adding support if you encounter this.",
|
||||
stacklevel=2,
|
||||
)
|
||||
elif key[0] == STOP[0]:
|
||||
rc = self.stack.pop()
|
||||
|
|
|
|||
|
|
@ -267,7 +267,8 @@ class autocast:
|
|||
and torch.cuda.amp.common.amp_definitely_not_available()
|
||||
):
|
||||
warnings.warn(
|
||||
"User provided device_type of 'cuda', but CUDA is not available. Disabling"
|
||||
"User provided device_type of 'cuda', but CUDA is not available. Disabling",
|
||||
stacklevel=2,
|
||||
)
|
||||
enabled = False
|
||||
if cache_enabled is not None:
|
||||
|
|
@ -281,42 +282,42 @@ class autocast:
|
|||
error_message += (
|
||||
", ".join(str(dtype) for dtype in supported_dtype) + " currently."
|
||||
)
|
||||
warnings.warn(error_message)
|
||||
warnings.warn(error_message, stacklevel=2)
|
||||
enabled = False
|
||||
elif self.device == "mtia":
|
||||
supported_dtype = [torch.bfloat16, torch.float16]
|
||||
if self.fast_dtype not in supported_dtype:
|
||||
error_message = "In MTIA autocast, but the target dtype is not supported. Disabling autocast.\n"
|
||||
error_message += "MTIA Autocast only supports dtypes of torch.bfloat16 and torch.float16 currently."
|
||||
warnings.warn(error_message)
|
||||
warnings.warn(error_message, stacklevel=2)
|
||||
enabled = False
|
||||
elif self.device == "maia":
|
||||
supported_dtype = [torch.bfloat16, torch.float16]
|
||||
if self.fast_dtype not in supported_dtype:
|
||||
error_message = "In MAIA autocast, but the target dtype is not supported. Disabling autocast.\n"
|
||||
error_message += "MAIA Autocast only supports dtypes of torch.bfloat16 and torch.float16 currently."
|
||||
warnings.warn(error_message)
|
||||
warnings.warn(error_message, stacklevel=2)
|
||||
enabled = False
|
||||
elif self.device == "xpu":
|
||||
supported_dtype = [torch.bfloat16, torch.float16]
|
||||
if self.fast_dtype not in supported_dtype:
|
||||
error_message = "In XPU autocast, but the target dtype is not supported. Disabling autocast.\n"
|
||||
error_message += "XPU Autocast only supports dtypes of torch.bfloat16 and torch.float16 currently."
|
||||
warnings.warn(error_message)
|
||||
warnings.warn(error_message, stacklevel=2)
|
||||
enabled = False
|
||||
elif self.device == "ipu":
|
||||
supported_dtypes = [torch.bfloat16, torch.float16]
|
||||
if self.fast_dtype not in supported_dtypes:
|
||||
error_message = "In IPU autocast, but the target dtype is not supported. Disabling autocast.\n"
|
||||
error_message += "IPU Autocast only supports dtypes of torch.bfloat16 and torch.float16 currently."
|
||||
warnings.warn(error_message)
|
||||
warnings.warn(error_message, stacklevel=2)
|
||||
enabled = False
|
||||
elif self.device == "hpu":
|
||||
supported_dtype = [torch.bfloat16, torch.float16]
|
||||
if self.fast_dtype not in supported_dtype:
|
||||
error_message = "In HPU autocast, but the target dtype is not supported. Disabling autocast.\n"
|
||||
error_message += "HPU Autocast only supports dtypes of torch.bfloat16 and torch.float16 currently."
|
||||
warnings.warn(error_message)
|
||||
warnings.warn(error_message, stacklevel=2)
|
||||
enabled = False
|
||||
elif self.device == self.custom_backend_name:
|
||||
supported_dtype = self.custom_device_mod.get_amp_supported_dtype()
|
||||
|
|
@ -326,7 +327,7 @@ class autocast:
|
|||
error_message += (
|
||||
", ".join(str(dtype) for dtype in supported_dtype) + " currently."
|
||||
)
|
||||
warnings.warn(error_message)
|
||||
warnings.warn(error_message, stacklevel=2)
|
||||
enabled = False
|
||||
elif self.device == "cuda":
|
||||
if (
|
||||
|
|
@ -344,7 +345,7 @@ class autocast:
|
|||
"In MPS autocast, but the target dtype is not supported. Disabling autocast.\n"
|
||||
"MPS Autocast only supports dtype of torch.bfloat16 and torch.float16 currently."
|
||||
)
|
||||
warnings.warn(error_message)
|
||||
warnings.warn(error_message, stacklevel=2)
|
||||
enabled = False
|
||||
elif self.fast_dtype == torch.bfloat16:
|
||||
if not torch.backends.mps.is_macos_or_newer(14, 0):
|
||||
|
|
@ -352,7 +353,7 @@ class autocast:
|
|||
"In MPS autocast, but the target dtype torch.bfloat16 is not supported "
|
||||
"on macOS versions below 14. Disabling autocast."
|
||||
)
|
||||
warnings.warn(error_message)
|
||||
warnings.warn(error_message, stacklevel=2)
|
||||
enabled = False
|
||||
elif self.device == "xla":
|
||||
supported_dtype = [torch.float16, torch.bfloat16]
|
||||
|
|
@ -361,7 +362,7 @@ class autocast:
|
|||
error_message += (
|
||||
"XLA Autocast only supports dtype of torch.bfloat16 currently."
|
||||
)
|
||||
warnings.warn(error_message)
|
||||
warnings.warn(error_message, stacklevel=2)
|
||||
enabled = False
|
||||
self._enabled = enabled
|
||||
|
||||
|
|
|
|||
|
|
@ -422,6 +422,7 @@ class GradScaler:
|
|||
"optimizer. In the near future GradScaler registers `grad_scale: Tensor` and "
|
||||
"`found_inf: Tensor` to the passed optimizer and let the optimizer use them directly.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
kwargs_.update({"grad_scaler": self})
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -469,14 +469,16 @@ class LSTM(torch.nn.Module):
|
|||
warnings.warn(
|
||||
"dropout option for quantizable LSTM is ignored. "
|
||||
"If you are training, please, use nn.LSTM version "
|
||||
"followed by `prepare` step."
|
||||
"followed by `prepare` step.",
|
||||
stacklevel=2,
|
||||
)
|
||||
if num_layers == 1:
|
||||
warnings.warn(
|
||||
"dropout option adds dropout after all but last "
|
||||
"recurrent layer, so non-zero dropout expects "
|
||||
f"num_layers greater than 1, but got dropout={dropout} "
|
||||
f"and num_layers={num_layers}"
|
||||
f"and num_layers={num_layers}",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
layers = [
|
||||
|
|
|
|||
|
|
@ -68,7 +68,8 @@ class Conv1d(nnq.Conv1d):
|
|||
reduce_range=True,
|
||||
):
|
||||
warnings.warn(
|
||||
f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended" # noqa: B950
|
||||
f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended", # noqa: B950
|
||||
stacklevel=2,
|
||||
)
|
||||
factory_kwargs = {"device": device, "dtype": dtype}
|
||||
kernel_size = _single(kernel_size)
|
||||
|
|
@ -155,7 +156,8 @@ class Conv2d(nnq.Conv2d):
|
|||
):
|
||||
warnings.warn(
|
||||
f"The current implementation of the {self._get_name()} module "
|
||||
"has poor numerical accuracy and its use is not recommended"
|
||||
"has poor numerical accuracy and its use is not recommended",
|
||||
stacklevel=2,
|
||||
)
|
||||
factory_kwargs = {"device": device, "dtype": dtype}
|
||||
kernel_size = _pair(kernel_size)
|
||||
|
|
@ -239,7 +241,8 @@ class Conv3d(nnq.Conv3d):
|
|||
dtype=None,
|
||||
):
|
||||
warnings.warn(
|
||||
f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended" # noqa: B950
|
||||
f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended", # noqa: B950
|
||||
stacklevel=2,
|
||||
)
|
||||
assert padding_mode != "reflect", "Conv3d does not support reflection padding"
|
||||
factory_kwargs = {"device": device, "dtype": dtype}
|
||||
|
|
@ -330,7 +333,8 @@ class ConvTranspose1d(nnq.ConvTranspose1d):
|
|||
dtype=None,
|
||||
):
|
||||
warnings.warn(
|
||||
f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended" # noqa: B950
|
||||
f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended", # noqa: B950
|
||||
stacklevel=2,
|
||||
)
|
||||
factory_kwargs = {"device": device, "dtype": dtype}
|
||||
super().__init__(
|
||||
|
|
@ -412,7 +416,8 @@ class ConvTranspose2d(nnq.ConvTranspose2d):
|
|||
dtype=None,
|
||||
):
|
||||
warnings.warn(
|
||||
f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended" # noqa: B950
|
||||
f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended", # noqa: B950
|
||||
stacklevel=2,
|
||||
)
|
||||
factory_kwargs = {"device": device, "dtype": dtype}
|
||||
super().__init__(
|
||||
|
|
@ -494,7 +499,8 @@ class ConvTranspose3d(nnq.ConvTranspose3d):
|
|||
dtype=None,
|
||||
):
|
||||
warnings.warn(
|
||||
f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended" # noqa: B950
|
||||
f"The current implementation of the {self._get_name()} module has poor numerical accuracy and its use is not recommended", # noqa: B950
|
||||
stacklevel=2,
|
||||
)
|
||||
factory_kwargs = {"device": device, "dtype": dtype}
|
||||
super().__init__(
|
||||
|
|
|
|||
|
|
@ -136,7 +136,8 @@ class RNNBase(torch.nn.Module):
|
|||
"dropout option adds dropout after all but last "
|
||||
"recurrent layer, so non-zero dropout expects "
|
||||
f"num_layers greater than 1, but got dropout={dropout} and "
|
||||
f"num_layers={num_layers}"
|
||||
f"num_layers={num_layers}",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if mode == "LSTM":
|
||||
|
|
|
|||
|
|
@ -724,7 +724,8 @@ def upsample(input, size=None, scale_factor=None, mode="nearest", align_corners=
|
|||
affects the outputs.
|
||||
"""
|
||||
warnings.warn(
|
||||
"nn.quantized.functional.upsample is deprecated. Use nn.quantized.functional.interpolate instead."
|
||||
"nn.quantized.functional.upsample is deprecated. Use nn.quantized.functional.interpolate instead.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return interpolate(input, size, scale_factor, mode, align_corners)
|
||||
|
||||
|
|
@ -749,7 +750,8 @@ def upsample_bilinear(input, size=None, scale_factor=None):
|
|||
"""
|
||||
# DeprecationWarning is ignored by default
|
||||
warnings.warn(
|
||||
"nn.quantized.functional.upsample_bilinear is deprecated. Use nn.quantized.functional.interpolate instead."
|
||||
"nn.quantized.functional.upsample_bilinear is deprecated. Use nn.quantized.functional.interpolate instead.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return interpolate(input, size, scale_factor, mode="bilinear", align_corners=True)
|
||||
|
||||
|
|
@ -774,6 +776,7 @@ def upsample_nearest(input, size=None, scale_factor=None):
|
|||
"""
|
||||
# DeprecationWarning is ignored by default
|
||||
warnings.warn(
|
||||
"nn.quantized.functional.upsample_nearest is deprecated. Use nn.quantized.functional.interpolate instead."
|
||||
"nn.quantized.functional.upsample_nearest is deprecated. Use nn.quantized.functional.interpolate instead.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return interpolate(input, size, scale_factor, mode="nearest")
|
||||
|
|
|
|||
|
|
@ -322,7 +322,8 @@ class PReLU(torch.nn.Module):
|
|||
observer(float_wt)
|
||||
if observer.dtype != torch.quint8:
|
||||
warn(
|
||||
f"PReLU's weight observer should have dtype quint8 but got {observer.dtype}"
|
||||
f"PReLU's weight observer should have dtype quint8 but got {observer.dtype}",
|
||||
stacklevel=2,
|
||||
)
|
||||
wt_scale, wt_zp = observer.calculate_qparams()
|
||||
qweight = torch.quantize_per_tensor(
|
||||
|
|
@ -339,7 +340,8 @@ class PReLU(torch.nn.Module):
|
|||
observer(float_wt)
|
||||
if observer.dtype != torch.quint8:
|
||||
warn(
|
||||
f"PReLU's weight observer should have dtype quint8 but got {observer.dtype}"
|
||||
f"PReLU's weight observer should have dtype quint8 but got {observer.dtype}",
|
||||
stacklevel=2,
|
||||
)
|
||||
wt_scale, wt_zp = observer.calculate_qparams()
|
||||
qweight = torch.quantize_per_tensor(
|
||||
|
|
|
|||
|
|
@ -213,7 +213,8 @@ class ActivationSparsifier:
|
|||
|
||||
if name in self.data_groups: # unregister layer if already present
|
||||
warnings.warn(
|
||||
"layer already attached to the sparsifier, deregistering the layer and registering with new config"
|
||||
"layer already attached to the sparsifier, deregistering the layer and registering with new config",
|
||||
stacklevel=2,
|
||||
)
|
||||
self.unregister_layer(name=name)
|
||||
|
||||
|
|
|
|||
|
|
@ -158,6 +158,7 @@ class BaseDataScheduler:
|
|||
"initialization. Please, make sure to call `data_sparsifier.step()` before "
|
||||
"`scheduler.step()`.",
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# Just check if there were two first scheduler.step() calls before sparsifier.step()
|
||||
|
|
@ -167,6 +168,7 @@ class BaseDataScheduler:
|
|||
"You have to make sure you run the data_sparsifier.step() BEFORE any "
|
||||
"calls to the scheduler.step().",
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
self._step_count += 1
|
||||
|
||||
|
|
|
|||
|
|
@ -105,7 +105,8 @@ class BaseDataSparsifier(base_sparsifier.BaseSparsifier):
|
|||
if name in self.state:
|
||||
# If the named data already exists - replace
|
||||
warnings.warn(
|
||||
"Replacing existing data of the same name. - Did you mean a different name?"
|
||||
"Replacing existing data of the same name. - Did you mean a different name?",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# reuse old config
|
||||
|
|
|
|||
|
|
@ -74,6 +74,7 @@ class StepSLScheduler(BaseDataScheduler):
|
|||
"To get the last learning rate computed by the scheduler, "
|
||||
"please use `get_last_lr()`.",
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
data_groups = self.data_sparsifier.data_groups
|
||||
if (self.last_epoch == 0) or (self.last_epoch % self.step_size != 0):
|
||||
|
|
|
|||
|
|
@ -92,7 +92,8 @@ class BaseScheduler:
|
|||
if not self._get_sl_called_within_step:
|
||||
warnings.warn(
|
||||
"To get the last sparsity level computed by the scheduler, "
|
||||
"please use `get_last_sl()`."
|
||||
"please use `get_last_sl()`.",
|
||||
stacklevel=2,
|
||||
)
|
||||
raise NotImplementedError
|
||||
|
||||
|
|
@ -124,6 +125,7 @@ class BaseScheduler:
|
|||
"initialization. Please, make sure to call `sparsifier.step()` before "
|
||||
"`scheduler.step()`.",
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
# Just check if there were two first scheduler.step() calls before sparsifier.step()
|
||||
|
|
@ -133,6 +135,7 @@ class BaseScheduler:
|
|||
"You have to make sure you run the sparsifier.step() BEFORE any "
|
||||
"calls to the scheduler.step().",
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
self._step_count += 1
|
||||
|
||||
|
|
|
|||
|
|
@ -90,7 +90,8 @@ class CubicSL(BaseScheduler):
|
|||
if not self._get_sl_called_within_step:
|
||||
warnings.warn(
|
||||
"To get the last sparsity level computed by the scheduler, "
|
||||
"please use `get_last_sl()`."
|
||||
"please use `get_last_sl()`.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return [
|
||||
self.sparsity_compute_fn(
|
||||
|
|
|
|||
|
|
@ -56,7 +56,8 @@ class LambdaSL(BaseScheduler):
|
|||
if not self._get_sl_called_within_step:
|
||||
warnings.warn(
|
||||
"To get the last sparsity level computed by the scheduler, "
|
||||
"please use `get_last_sl()`."
|
||||
"please use `get_last_sl()`.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return [
|
||||
base_sl * lmbda(self.last_epoch)
|
||||
|
|
|
|||
|
|
@ -121,7 +121,8 @@ class _InputEqualizationObserver(nn.Module):
|
|||
):
|
||||
warnings.warn(
|
||||
"Must call calculate_equalization_scale before calling calculate_scaled_minmax. "
|
||||
+ "Will not scale the next quantization observer."
|
||||
+ "Will not scale the next quantization observer.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return None, None
|
||||
|
||||
|
|
@ -226,7 +227,8 @@ def calculate_equalization_scale(
|
|||
):
|
||||
warnings.warn(
|
||||
"Must run observer before calling calculate_equalization_scale. "
|
||||
+ "Returning default equalization scale torch.tensor(1)."
|
||||
+ "Returning default equalization scale torch.tensor(1).",
|
||||
stacklevel=2,
|
||||
)
|
||||
return torch.tensor(1)
|
||||
|
||||
|
|
|
|||
|
|
@ -597,7 +597,8 @@ def _maybe_recursive_remove_dequantize(arg: Any, node: Node, graph: Graph) -> No
|
|||
_maybe_recursive_remove_dequantize(arg_element, node, graph)
|
||||
else:
|
||||
warnings.warn(
|
||||
f"Unsupported node type in recursive remove dequantize: {type(arg)}"
|
||||
f"Unsupported node type in recursive remove dequantize: {type(arg)}",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -1197,7 +1198,8 @@ def convert(
|
|||
_maybe_recursive_remove_dequantize(output, return_node, model.graph)
|
||||
else:
|
||||
warnings.warn(
|
||||
f"Unsupported node type for output_quantized_idxs: {type(output)}"
|
||||
f"Unsupported node type for output_quantized_idxs: {type(output)}",
|
||||
stacklevel=2,
|
||||
)
|
||||
elif node.op == "call_module":
|
||||
mod = _get_module(node, modules)
|
||||
|
|
|
|||
|
|
@ -1055,7 +1055,9 @@ def _maybe_insert_input_equalization_observers_for_node(
|
|||
return
|
||||
|
||||
if is_branch:
|
||||
warnings.warn(f"Cannot equalize {node} because it is part of a branch.")
|
||||
warnings.warn(
|
||||
f"Cannot equalize {node} because it is part of a branch.", stacklevel=2
|
||||
)
|
||||
return
|
||||
|
||||
new_args = []
|
||||
|
|
|
|||
|
|
@ -890,7 +890,8 @@ def _qconfig_satisfies_dtype_config_constraints(
|
|||
if backend_quant_min is not None and backend_quant_max is not None:
|
||||
if app_quant_min is None or app_quant_max is None:
|
||||
warnings.warn(
|
||||
f"QConfig {debug_string} must specify 'quant_min' and 'quant_max', ignoring {qconfig}"
|
||||
f"QConfig {debug_string} must specify 'quant_min' and 'quant_max', ignoring {qconfig}",
|
||||
stacklevel=2,
|
||||
)
|
||||
return False
|
||||
elif app_quant_min < backend_quant_min or app_quant_max > backend_quant_max:
|
||||
|
|
@ -898,20 +899,23 @@ def _qconfig_satisfies_dtype_config_constraints(
|
|||
f"QConfig {debug_string} quantization range must fall within the backend's:\n"
|
||||
f"QConfig range = ({app_quant_min}, {app_quant_max}), "
|
||||
f"BackendConfig range = ({backend_quant_min}, {backend_quant_max}), "
|
||||
f"ignoring {qconfig}"
|
||||
f"ignoring {qconfig}",
|
||||
stacklevel=2,
|
||||
)
|
||||
return False
|
||||
# check scale min
|
||||
if backend_scale_min is not None:
|
||||
if app_scale_min is None:
|
||||
warnings.warn(
|
||||
f"QConfig {debug_string} must specify 'eps', ignoring {qconfig}"
|
||||
f"QConfig {debug_string} must specify 'eps', ignoring {qconfig}",
|
||||
stacklevel=2,
|
||||
)
|
||||
return False
|
||||
if app_scale_min < backend_scale_min:
|
||||
warnings.warn(
|
||||
f"QConfig {debug_string} eps ({app_scale_min}) must be greater than or equal to "
|
||||
f"the backend's min scale value ({backend_scale_min}), ignoring {qconfig}"
|
||||
f"the backend's min scale value ({backend_scale_min}), ignoring {qconfig}",
|
||||
stacklevel=2,
|
||||
)
|
||||
return False
|
||||
# check fixed scale and zero point
|
||||
|
|
@ -935,7 +939,8 @@ def _qconfig_satisfies_dtype_config_constraints(
|
|||
) and not isinstance(activation_post_process, FixedQParamsFakeQuantize):
|
||||
warnings.warn(
|
||||
f"QConfig must specify a FixedQParamsObserver or a FixedQParamsFakeQuantize "
|
||||
f"for fixed qparams ops, ignoring {qconfig}.\n{suggestion_str}"
|
||||
f"for fixed qparams ops, ignoring {qconfig}.\n{suggestion_str}",
|
||||
stacklevel=2,
|
||||
)
|
||||
return False
|
||||
if (
|
||||
|
|
@ -945,7 +950,8 @@ def _qconfig_satisfies_dtype_config_constraints(
|
|||
warnings.warn(
|
||||
f"QConfig fixed scale ({observer.scale}) and zero point ({observer.zero_point}) "
|
||||
f"do not match the backend's ({backend_scale_exact_match} and {backend_zero_point_exact_match}), "
|
||||
f"ignoring {qconfig}.\n{suggestion_str}"
|
||||
f"ignoring {qconfig}.\n{suggestion_str}",
|
||||
stacklevel=2,
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -245,7 +245,8 @@ class UniformQuantizationObserverBase(ObserverBase):
|
|||
if reduce_range:
|
||||
warnings.warn(
|
||||
"Please use quant_min and quant_max to specify the range for observers. \
|
||||
reduce_range will be deprecated in a future release of PyTorch."
|
||||
reduce_range will be deprecated in a future release of PyTorch.",
|
||||
stacklevel=2,
|
||||
)
|
||||
self.reduce_range = reduce_range
|
||||
self.register_buffer("eps", torch.tensor([eps], **factory_kwargs))
|
||||
|
|
@ -829,7 +830,8 @@ class PerChannelMinMaxObserver(UniformQuantizationObserverBase):
|
|||
self.max_val.resize_(val.shape)
|
||||
else:
|
||||
warnings.warn(
|
||||
f"Observer load_from_state_dict got unexpected name {name}"
|
||||
f"Observer load_from_state_dict got unexpected name {name}",
|
||||
stacklevel=2,
|
||||
)
|
||||
# For torchscript module we need to update the attributes here since we do not
|
||||
# call the `_load_from_state_dict` function defined module.py
|
||||
|
|
@ -840,7 +842,8 @@ class PerChannelMinMaxObserver(UniformQuantizationObserverBase):
|
|||
self.max_val.copy_(val)
|
||||
else:
|
||||
warnings.warn(
|
||||
f"Observer load_from_state_dict got unexpected name {name}"
|
||||
f"Observer load_from_state_dict got unexpected name {name}",
|
||||
stacklevel=2,
|
||||
)
|
||||
elif strict:
|
||||
missing_keys.append(key)
|
||||
|
|
@ -1289,7 +1292,9 @@ class HistogramObserver(UniformQuantizationObserverBase):
|
|||
# want to make our quantization range infinite
|
||||
# and in practice those values will be clamped
|
||||
if x_min == -torch.inf or x_max == torch.inf:
|
||||
warnings.warn("torch.inf detected in input tensor, ignoring input")
|
||||
warnings.warn(
|
||||
"torch.inf detected in input tensor, ignoring input", stacklevel=2
|
||||
)
|
||||
x = x[x.abs() != torch.inf]
|
||||
if x.numel() == 0:
|
||||
return x_orig
|
||||
|
|
@ -1345,7 +1350,8 @@ class HistogramObserver(UniformQuantizationObserverBase):
|
|||
if is_uninitialized:
|
||||
warnings.warn(
|
||||
"must run observer before calling calculate_qparams.\
|
||||
Returning default scale and zero point "
|
||||
Returning default scale and zero point ",
|
||||
stacklevel=2,
|
||||
)
|
||||
return torch.tensor([1.0], device=self.min_val.device.type), torch.tensor(
|
||||
[0], device=self.min_val.device.type
|
||||
|
|
@ -1509,7 +1515,8 @@ class PlaceholderObserver(ObserverBase):
|
|||
warnings.warn(
|
||||
"Please use `is_dynamic` instead of `compute_dtype`. \
|
||||
`compute_dtype` will be deprecated in a future release \
|
||||
of PyTorch."
|
||||
of PyTorch.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
|
|
|
|||
|
|
@ -292,7 +292,8 @@ def get_default_qconfig(backend="x86", version=0):
|
|||
if not torch.cpu._is_vnni_supported():
|
||||
warnings.warn(
|
||||
"Default qconfig of oneDNN backend with reduce_range of false may have accuracy issues "
|
||||
"on CPU without Vector Neural Network Instruction support."
|
||||
"on CPU without Vector Neural Network Instruction support.",
|
||||
stacklevel=2,
|
||||
)
|
||||
qconfig = QConfig(
|
||||
activation=HistogramObserver.with_args(reduce_range=False),
|
||||
|
|
|
|||
|
|
@ -392,7 +392,8 @@ def prepare(
|
|||
warnings.warn(
|
||||
"None of the submodule got qconfig applied. Make sure you "
|
||||
"passed correct configuration through `qconfig_dict` or "
|
||||
"by assigning the `.qconfig` attribute directly on submodules"
|
||||
"by assigning the `.qconfig` attribute directly on submodules",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
_add_observer_(
|
||||
|
|
|
|||
|
|
@ -372,6 +372,7 @@ def _config_checker(method: Callable) -> Callable:
|
|||
if quantizer._need_skip_config(quantization_config):
|
||||
warnings.warn(
|
||||
f"Skip the quantization config for {name}.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return quantizer
|
||||
return method(quantizer, name, quantization_config)
|
||||
|
|
@ -464,7 +465,10 @@ class X86InductorQuantizer(Quantizer):
|
|||
current_mode.qat_state is not None
|
||||
and current_mode.qat_state != quantization_config.is_qat
|
||||
):
|
||||
warnings.warn("Mixed QAT and Non-QAT quantization config is not supported.")
|
||||
warnings.warn(
|
||||
"Mixed QAT and Non-QAT quantization config is not supported.",
|
||||
stacklevel=2,
|
||||
)
|
||||
need_skip = True
|
||||
if current_mode.dynamic_state is not None:
|
||||
input_activation_spec = quantization_config.input_activation
|
||||
|
|
@ -473,14 +477,15 @@ class X86InductorQuantizer(Quantizer):
|
|||
and current_mode.dynamic_state != input_activation_spec.is_dynamic
|
||||
):
|
||||
warnings.warn(
|
||||
"Mixed dynamic and static quantization config is not supported."
|
||||
"Mixed dynamic and static quantization config is not supported.",
|
||||
stacklevel=2,
|
||||
)
|
||||
need_skip = True
|
||||
return need_skip
|
||||
|
||||
def set_global(self, quantization_config: QuantizationConfig):
|
||||
if self._need_skip_config(quantization_config):
|
||||
warnings.warn("Skip the global quantization config.")
|
||||
warnings.warn("Skip the global quantization config.", stacklevel=2)
|
||||
return self
|
||||
self.global_config = quantization_config
|
||||
return self
|
||||
|
|
@ -489,7 +494,8 @@ class X86InductorQuantizer(Quantizer):
|
|||
if not isinstance(self.global_config, QuantizationConfig):
|
||||
warnings.warn(
|
||||
"The global_config for X86InductorQuantizer is currently invalid. \
|
||||
Please ensure that you use set_global to establish the global quantization configuration."
|
||||
Please ensure that you use set_global to establish the global quantization configuration.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return self.global_config
|
||||
|
||||
|
|
@ -508,7 +514,8 @@ class X86InductorQuantizer(Quantizer):
|
|||
)
|
||||
else:
|
||||
warnings.warn(
|
||||
f"function: Unable to customize quantization config for {function_type} by X86InductorQuantizer."
|
||||
f"function: Unable to customize quantization config for {function_type} by X86InductorQuantizer.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return self
|
||||
|
||||
|
|
@ -525,7 +532,8 @@ class X86InductorQuantizer(Quantizer):
|
|||
)
|
||||
else:
|
||||
warnings.warn(
|
||||
f"Module: Unable to customize quantization config for {module_type} by X86InductorQuantizer."
|
||||
f"Module: Unable to customize quantization config for {module_type} by X86InductorQuantizer.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return self
|
||||
|
||||
|
|
@ -551,7 +559,8 @@ class X86InductorQuantizer(Quantizer):
|
|||
self.operator_type_qconfig[operator_type] = quantization_config
|
||||
else:
|
||||
warnings.warn(
|
||||
f"operator: Unable to quantize {operator} by X86InductorQuantizer."
|
||||
f"operator: Unable to quantize {operator} by X86InductorQuantizer.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return self
|
||||
|
||||
|
|
@ -1317,7 +1326,8 @@ class X86InductorQuantizer(Quantizer):
|
|||
if not is_all_inputs_connected_to_quantized_op(input_nodes_to_check):
|
||||
if quantization_config is not None:
|
||||
warnings.warn(
|
||||
f"The input of maxpool2d is not quantized, skip annotate maxpool2d with config {quantization_config}."
|
||||
f"The input of maxpool2d is not quantized, skip annotate maxpool2d with config {quantization_config}.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -427,7 +427,8 @@ def check_min_max_valid(min_val: torch.Tensor, max_val: torch.Tensor) -> bool:
|
|||
if min_val.numel() == 0 or max_val.numel() == 0:
|
||||
warnings.warn(
|
||||
"must run observer before calling calculate_qparams. "
|
||||
+ "Returning default values."
|
||||
+ "Returning default values.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return False
|
||||
|
||||
|
|
@ -435,7 +436,8 @@ def check_min_max_valid(min_val: torch.Tensor, max_val: torch.Tensor) -> bool:
|
|||
if min_val == float("inf") and max_val == float("-inf"):
|
||||
warnings.warn(
|
||||
"must run observer before calling calculate_qparams. "
|
||||
+ "Returning default values."
|
||||
+ "Returning default values.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return False
|
||||
|
|
@ -806,7 +808,8 @@ def _assert_and_get_unique_device(module: torch.nn.Module) -> Any:
|
|||
"""
|
||||
if {torch.device("cpu"), torch.device("meta")} == devices:
|
||||
warnings.warn(
|
||||
"Both 'meta' and 'cpu' are present in the list of devices. Module can have one device. We Select 'cpu'."
|
||||
"Both 'meta' and 'cpu' are present in the list of devices. Module can have one device. We Select 'cpu'.",
|
||||
stacklevel=2,
|
||||
)
|
||||
devices = {torch.device("cpu")}
|
||||
""
|
||||
|
|
|
|||
|
|
@ -944,7 +944,8 @@ def _check_inputs(tupled_inputs) -> bool:
|
|||
f"Input #{idx} requires gradient and "
|
||||
"is not a double precision floating point or complex. "
|
||||
"This check will likely fail if all the inputs are "
|
||||
"not of double precision floating point or complex. "
|
||||
"not of double precision floating point or complex. ",
|
||||
stacklevel=2,
|
||||
)
|
||||
if inp.is_sparse:
|
||||
content = inp._values()
|
||||
|
|
@ -1325,7 +1326,8 @@ def _test_undefined_backward_mode(func, outputs, inputs) -> bool:
|
|||
"Backwards compatibility: New undefined gradient support checking "
|
||||
"feature is enabled by default, but it may break existing callers "
|
||||
"of this function. If this is true for you, you can call this "
|
||||
'function with "check_undefined_grad=False" to disable the feature'
|
||||
'function with "check_undefined_grad=False" to disable the feature',
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
def check_undefined_grad_support(output_to_check):
|
||||
|
|
|
|||
|
|
@ -265,22 +265,24 @@ class profile:
|
|||
if _get_privateuse1_backend_name() != "privateuseone":
|
||||
VALID_DEVICE_OPTIONS.append(_get_privateuse1_backend_name())
|
||||
if self.use_device not in VALID_DEVICE_OPTIONS:
|
||||
warn(f"The {self.use_device} is not a valid device option.")
|
||||
warn(
|
||||
f"The {self.use_device} is not a valid device option.", stacklevel=2
|
||||
)
|
||||
self.use_device = None
|
||||
|
||||
if self.use_device == "cuda" and not torch.cuda.is_available():
|
||||
warn("CUDA is not available, disabling CUDA profiling")
|
||||
warn("CUDA is not available, disabling CUDA profiling", stacklevel=2)
|
||||
self.use_cuda = False
|
||||
self.use_device = None
|
||||
|
||||
if self.use_device == "xpu" and not torch.xpu.is_available():
|
||||
warn("XPU is not available, disabling XPU profiling")
|
||||
warn("XPU is not available, disabling XPU profiling", stacklevel=2)
|
||||
self.use_device = None
|
||||
|
||||
if self.use_device == "hpu" and not (
|
||||
hasattr(torch, "hpu") and torch.hpu.is_available()
|
||||
):
|
||||
warn("HPU is not available, disabling HPU profiling")
|
||||
warn("HPU is not available, disabling HPU profiling", stacklevel=2)
|
||||
self.use_device = None
|
||||
|
||||
self.kineto_activities = set()
|
||||
|
|
@ -1224,7 +1226,8 @@ class KinetoStepTracker:
|
|||
if delta > 1:
|
||||
warn(
|
||||
"Profiler step count has increased more than 1 - "
|
||||
f"current_step = {cls._current_step} step dict = {cls._step_dict}"
|
||||
f"current_step = {cls._current_step} step dict = {cls._step_dict}",
|
||||
stacklevel=2,
|
||||
)
|
||||
for _ in range(delta):
|
||||
_kineto_step()
|
||||
|
|
|
|||
|
|
@ -118,7 +118,8 @@ def is_acceptable(tensor):
|
|||
if not is_available():
|
||||
warnings.warn(
|
||||
"PyTorch was compiled without cuDNN/MIOpen support. To use cuDNN/MIOpen, rebuild "
|
||||
"PyTorch making sure the library is visible to the build system."
|
||||
"PyTorch making sure the library is visible to the build system.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return False
|
||||
if not _init():
|
||||
|
|
@ -127,7 +128,8 @@ def is_acceptable(tensor):
|
|||
libpath={"darwin": "DYLD_LIBRARY_PATH", "win32": "PATH"}.get(
|
||||
sys.platform, "LD_LIBRARY_PATH"
|
||||
)
|
||||
)
|
||||
),
|
||||
stacklevel=2,
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -293,7 +293,8 @@ def _check_capability():
|
|||
min_arch % 10,
|
||||
max_arch // 10,
|
||||
max_arch % 10,
|
||||
)
|
||||
),
|
||||
stacklevel=2,
|
||||
)
|
||||
matched_arches = ""
|
||||
for arch, arch_info in CUDA_ARCHES_SUPPORTED.items():
|
||||
|
|
@ -303,7 +304,9 @@ def _check_capability():
|
|||
):
|
||||
matched_arches += f" {arch}"
|
||||
if matched_arches != "":
|
||||
warnings.warn(matched_cuda_warn.format(matched_arches))
|
||||
warnings.warn(
|
||||
matched_cuda_warn.format(matched_arches), stacklevel=2
|
||||
)
|
||||
|
||||
|
||||
def _check_cubins():
|
||||
|
|
@ -328,7 +331,8 @@ If you want to use the {} GPU with PyTorch, please check the instructions at htt
|
|||
warnings.warn(
|
||||
incompatible_device_warn.format(
|
||||
device_name, capability, " ".join(arch_list), device_name
|
||||
)
|
||||
),
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -818,7 +822,9 @@ def _raw_device_count_amdsmi() -> int:
|
|||
try:
|
||||
amdsmi.amdsmi_init()
|
||||
except amdsmi.AmdSmiException as e:
|
||||
warnings.warn(f"Can't initialize amdsmi - Error code: {e.err_code}")
|
||||
warnings.warn(
|
||||
f"Can't initialize amdsmi - Error code: {e.err_code}", stacklevel=2
|
||||
)
|
||||
return -1
|
||||
socket_handles = amdsmi.amdsmi_get_processor_handles()
|
||||
return len(socket_handles)
|
||||
|
|
@ -831,12 +837,12 @@ def _raw_device_count_nvml() -> int:
|
|||
nvml_h = CDLL("libnvidia-ml.so.1")
|
||||
rc = nvml_h.nvmlInit()
|
||||
if rc != 0:
|
||||
warnings.warn("Can't initialize NVML")
|
||||
warnings.warn("Can't initialize NVML", stacklevel=2)
|
||||
return -1
|
||||
dev_count = c_int(-1)
|
||||
rc = nvml_h.nvmlDeviceGetCount_v2(byref(dev_count))
|
||||
if rc != 0:
|
||||
warnings.warn("Can't get nvml device count")
|
||||
warnings.warn("Can't get nvml device count", stacklevel=2)
|
||||
return -1
|
||||
del nvml_h
|
||||
return dev_count.value
|
||||
|
|
@ -850,27 +856,27 @@ def _raw_device_uuid_amdsmi() -> Optional[list[str]]:
|
|||
try:
|
||||
amdsmi.amdsmi_init()
|
||||
except amdsmi.AmdSmiException:
|
||||
warnings.warn("Can't initialize amdsmi")
|
||||
warnings.warn("Can't initialize amdsmi", stacklevel=2)
|
||||
return None
|
||||
try:
|
||||
socket_handles = amdsmi.amdsmi_get_processor_handles()
|
||||
dev_count = len(socket_handles)
|
||||
except amdsmi.AmdSmiException:
|
||||
warnings.warn("Can't get amdsmi device count")
|
||||
warnings.warn("Can't get amdsmi device count", stacklevel=2)
|
||||
return None
|
||||
uuids: list[str] = []
|
||||
for idx in range(dev_count):
|
||||
try:
|
||||
handler = amdsmi.amdsmi_get_processor_handles()[idx]
|
||||
except amdsmi.AmdSmiException:
|
||||
warnings.warn("Cannot get amd device handler")
|
||||
warnings.warn("Cannot get amd device handler", stacklevel=2)
|
||||
return None
|
||||
try:
|
||||
uuid = amdsmi.amdsmi_get_gpu_asic_info(handler)["asic_serial"][
|
||||
2:
|
||||
] # Removes 0x prefix from serial
|
||||
except amdsmi.AmdSmiException:
|
||||
warnings.warn("Cannot get uuid for amd device")
|
||||
warnings.warn("Cannot get uuid for amd device", stacklevel=2)
|
||||
return None
|
||||
uuids.append(
|
||||
str(uuid).lower()
|
||||
|
|
@ -885,25 +891,25 @@ def _raw_device_uuid_nvml() -> Optional[list[str]]:
|
|||
nvml_h = CDLL("libnvidia-ml.so.1")
|
||||
rc = nvml_h.nvmlInit()
|
||||
if rc != 0:
|
||||
warnings.warn("Can't initialize NVML")
|
||||
warnings.warn("Can't initialize NVML", stacklevel=2)
|
||||
return None
|
||||
dev_count = c_int(-1)
|
||||
rc = nvml_h.nvmlDeviceGetCount_v2(byref(dev_count))
|
||||
if rc != 0:
|
||||
warnings.warn("Can't get nvml device count")
|
||||
warnings.warn("Can't get nvml device count", stacklevel=2)
|
||||
return None
|
||||
uuids: list[str] = []
|
||||
for idx in range(dev_count.value):
|
||||
dev_id = c_void_p()
|
||||
rc = nvml_h.nvmlDeviceGetHandleByIndex_v2(idx, byref(dev_id))
|
||||
if rc != 0:
|
||||
warnings.warn("Can't get device handle")
|
||||
warnings.warn("Can't get device handle", stacklevel=2)
|
||||
return None
|
||||
buf_len = 96
|
||||
buf = create_string_buffer(buf_len)
|
||||
rc = nvml_h.nvmlDeviceGetUUID(dev_id, buf, buf_len)
|
||||
if rc != 0:
|
||||
warnings.warn("Can't get device UUID")
|
||||
warnings.warn("Can't get device UUID", stacklevel=2)
|
||||
return None
|
||||
uuids.append(buf.raw.decode("ascii").strip("\0"))
|
||||
del nvml_h
|
||||
|
|
|
|||
|
|
@ -492,6 +492,7 @@ def reset_max_memory_allocated(device: "Device" = None) -> None:
|
|||
"torch.cuda.reset_max_memory_allocated now calls torch.cuda.reset_peak_memory_stats, "
|
||||
"which resets /all/ peak memory stats.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return reset_peak_memory_stats(device=device)
|
||||
|
||||
|
|
@ -518,6 +519,7 @@ def reset_max_memory_cached(device: "Device" = None) -> None:
|
|||
"torch.cuda.reset_max_memory_cached now calls torch.cuda.reset_peak_memory_stats, "
|
||||
"which resets /all/ peak memory stats.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return reset_peak_memory_stats(device=device)
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ SUM = 0 # ncclRedOp_t
|
|||
|
||||
def is_available(tensors):
|
||||
if not hasattr(torch._C, "_nccl_all_reduce"):
|
||||
warnings.warn("PyTorch is not compiled with NCCL support")
|
||||
warnings.warn("PyTorch is not compiled with NCCL support", stacklevel=2)
|
||||
return False
|
||||
|
||||
devices = set()
|
||||
|
|
|
|||
|
|
@ -626,7 +626,8 @@ def _process_single_offline_gemm(untuned_gemm_line: str, gpu_id: int) -> None:
|
|||
else:
|
||||
warnings.warn(
|
||||
"Offline tuning is not supported for this GEMM. Use online tuning instead. "
|
||||
+ f"Skipped tuning for: {untuned_gemm[1]}"
|
||||
+ f"Skipped tuning for: {untuned_gemm[1]}",
|
||||
stacklevel=2,
|
||||
)
|
||||
return
|
||||
|
||||
|
|
@ -644,7 +645,8 @@ def _process_single_offline_gemm(untuned_gemm_line: str, gpu_id: int) -> None:
|
|||
if m == 1 or n == 1 or k == 1:
|
||||
warnings.warn(
|
||||
"Offline tuning is not support for this GEMM. Use online tuning instead. "
|
||||
+ f"Skipped tuning for: {untuned_gemm[1]}"
|
||||
+ f"Skipped tuning for: {untuned_gemm[1]}",
|
||||
stacklevel=2,
|
||||
)
|
||||
return
|
||||
|
||||
|
|
@ -747,7 +749,7 @@ def _process_single_offline_gemm(untuned_gemm_line: str, gpu_id: int) -> None:
|
|||
matA = matA.t()
|
||||
torch.nn.functional.linear(X, matA, bias)
|
||||
else:
|
||||
warnings.warn(f"error: unknown op {op_sig}")
|
||||
warnings.warn(f"error: unknown op {op_sig}", stacklevel=2)
|
||||
|
||||
|
||||
def _check_tuning_assertions() -> None:
|
||||
|
|
@ -756,7 +758,7 @@ def _check_tuning_assertions() -> None:
|
|||
"""
|
||||
|
||||
if is_enabled() is False:
|
||||
warnings.warn("TunableOp was disabled. Trying to enable now.")
|
||||
warnings.warn("TunableOp was disabled. Trying to enable now.", stacklevel=2)
|
||||
enable(True)
|
||||
assert is_enabled() is True
|
||||
assert tuning_is_enabled() is True
|
||||
|
|
|
|||
|
|
@ -23,7 +23,8 @@ try:
|
|||
from torch.compiler import is_dynamo_compiling as is_torchdynamo_compiling
|
||||
except Exception:
|
||||
warnings.warn(
|
||||
"Unable to import torchdynamo util `is_torchdynamo_compiling`, so won't support torchdynamo correctly"
|
||||
"Unable to import torchdynamo util `is_torchdynamo_compiling`, so won't support torchdynamo correctly",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
def is_torchdynamo_compiling(): # type: ignore[misc]
|
||||
|
|
|
|||
|
|
@ -470,7 +470,8 @@ class ShardedTensor(ShardedTensorBase):
|
|||
src = shard.tensor.flatten()
|
||||
if src.nelement() == 0:
|
||||
warnings.warn(
|
||||
"Gathering a tensor with zero elements on rank " + str(rank)
|
||||
"Gathering a tensor with zero elements on rank " + str(rank),
|
||||
stacklevel=2,
|
||||
)
|
||||
continue
|
||||
shard_offset = shard_placement[shard.metadata][1]
|
||||
|
|
@ -671,7 +672,8 @@ class ShardedTensor(ShardedTensorBase):
|
|||
if device_to.index != current_idx:
|
||||
warnings.warn(
|
||||
"ShardedTensor.to only move tensor to its current device"
|
||||
"If you want to put to different device, use `reshard` instead."
|
||||
"If you want to put to different device, use `reshard` instead.",
|
||||
stacklevel=2,
|
||||
)
|
||||
device_to = torch.device(current_idx)
|
||||
|
||||
|
|
|
|||
|
|
@ -182,7 +182,8 @@ class ModTracker:
|
|||
warnings.formatwarning = custom_formatwarning
|
||||
warnings.warn(
|
||||
"The module hierarchy tracking maybe be messed up."
|
||||
" Please file a bug to PyTorch, if it is the case."
|
||||
" Please file a bug to PyTorch, if it is the case.",
|
||||
stacklevel=2,
|
||||
)
|
||||
if name not in self.parents:
|
||||
self._active_module_cnt[name] = 1
|
||||
|
|
|
|||
|
|
@ -257,7 +257,8 @@ class Join:
|
|||
f"{self._rank} has at least {WARN_THRESHOLD} "
|
||||
f"fewer inputs than other currently-active ranks. "
|
||||
"This level of skew could lead to performance "
|
||||
"degradation during training."
|
||||
"degradation during training.",
|
||||
stacklevel=2,
|
||||
)
|
||||
# Shadow the all-reduce in non-joined processes
|
||||
num_nonjoined_procs = self._get_num_nonjoined_procs()
|
||||
|
|
|
|||
|
|
@ -101,7 +101,8 @@ class PeriodicModelAverager(ModelAverager):
|
|||
"When period is 1, no need to use model averaging because the communication cost "
|
||||
"of all-reducing parameters will be no less than the cost of all-reducing gradients "
|
||||
"by DistributedDataParallel in the backward pass. Therefore, only "
|
||||
"DistributedDataParallel should be used for this case."
|
||||
"DistributedDataParallel should be used for this case.",
|
||||
stacklevel=2,
|
||||
)
|
||||
self.period = period
|
||||
|
||||
|
|
|
|||
|
|
@ -114,7 +114,8 @@ class HierarchicalModelAverager(averagers.ModelAverager):
|
|||
"no need to use model averaging because the communication cost "
|
||||
"of all-reducing parameters will be no less than the cost of all-reducing gradients "
|
||||
"by DistributedDataParallel in the backward pass. Therefore, only "
|
||||
"DistributedDataParallel should be used for this case."
|
||||
"DistributedDataParallel should be used for this case.",
|
||||
stacklevel=2,
|
||||
)
|
||||
overall_group_size = dist.get_world_size(group=self.process_group)
|
||||
if list(period_group_size_dict.values())[-1] != overall_group_size:
|
||||
|
|
|
|||
|
|
@ -660,7 +660,8 @@ class _FileSystemWriter(StorageWriter):
|
|||
warnings.warn(
|
||||
f"Detected an existing checkpoint in {self.path}, overwriting since {self.overwrite=}."
|
||||
" Past version 2.5 of PyTorch, `overwrite` will default to False. Set this variable to True to"
|
||||
" maintain this functionality or False to raise when an existing checkpoint is found."
|
||||
" maintain this functionality or False to raise when an existing checkpoint is found.",
|
||||
stacklevel=2,
|
||||
)
|
||||
else:
|
||||
raise RuntimeError(f"Checkpoint already exists and {self.overwrite=}.")
|
||||
|
|
|
|||
|
|
@ -290,6 +290,7 @@ def _verify_options(
|
|||
"will be removed in 2.5. This feature can be achieved by manually "
|
||||
"filtering out the state_dict returned from get_state_dict.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if optim_only and not optims:
|
||||
raise RuntimeError(
|
||||
|
|
@ -1234,6 +1235,7 @@ def _unflatten_model_state_dict(
|
|||
"feature, please preprocessing the model_state_dict to achieve the "
|
||||
"same functionality.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
cast_state_dict = cast(dict[nn.Module, dict[str, ValueType]], state_dict)
|
||||
new_state_dict: dict[str, ValueType] = {}
|
||||
|
|
|
|||
|
|
@ -158,7 +158,8 @@ def load(
|
|||
no_dist = no_dist or (not dist.is_available()) or (not dist.is_initialized())
|
||||
if no_dist:
|
||||
warnings.warn(
|
||||
"torch.distributed is disabled, unavailable or uninitialized, assuming the intent is to load in a single process."
|
||||
"torch.distributed is disabled, unavailable or uninitialized, assuming the intent is to load in a single process.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
with _profile():
|
||||
|
|
@ -365,7 +366,8 @@ def _load_state_dict_from_keys(
|
|||
no_dist = not (dist.is_available() and dist.is_initialized())
|
||||
if no_dist:
|
||||
warnings.warn(
|
||||
"torch.distributed is unavailable or uninitialized, assuming the intent is to load in a single process."
|
||||
"torch.distributed is unavailable or uninitialized, assuming the intent is to load in a single process.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
storage_reader = cast(
|
||||
|
|
|
|||
|
|
@ -182,7 +182,8 @@ def save(
|
|||
no_dist = no_dist or (not dist.is_available()) or (not dist.is_initialized())
|
||||
if no_dist:
|
||||
warnings.warn(
|
||||
"torch.distributed is disabled, unavailable or uninitialized, assuming the intent is to save in a single process."
|
||||
"torch.distributed is disabled, unavailable or uninitialized, assuming the intent is to save in a single process.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
with _profile():
|
||||
|
|
@ -414,7 +415,8 @@ def _save_state_dict(
|
|||
warnings.warn(
|
||||
"The function definition for SavePlanner.set_up_planner has been updated"
|
||||
" to include the storage_meta argument. Please update your implementation"
|
||||
" to include this parameter."
|
||||
" to include this parameter.",
|
||||
stacklevel=2,
|
||||
)
|
||||
planner.set_up_planner(state_dict, distW.is_coordinator) # type: ignore[call-arg, arg-type]
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -461,7 +461,8 @@ def _api_bc_check(func):
|
|||
if len(args) == 2:
|
||||
warnings.warn(
|
||||
f"The argument order of {func.__name__} has been changed. "
|
||||
"Please check the document to avoid future breakages."
|
||||
"Please check the document to avoid future breakages.",
|
||||
stacklevel=2,
|
||||
)
|
||||
sig = inspect.signature(func)
|
||||
kwonlyargs = [
|
||||
|
|
|
|||
|
|
@ -85,7 +85,8 @@ else:
|
|||
# We keep this function for backward compatibility.
|
||||
warnings.warn(
|
||||
"This get_root_mesh API will be deprecated soon."
|
||||
"Please use `get_root_mesh` inside DeviceMesh instead."
|
||||
"Please use `get_root_mesh` inside DeviceMesh instead.",
|
||||
stacklevel=2,
|
||||
)
|
||||
if not device_mesh:
|
||||
return device_mesh
|
||||
|
|
@ -108,7 +109,8 @@ else:
|
|||
) -> list["DeviceMesh"]:
|
||||
warnings.warn(
|
||||
"This _get_all_submeshes API will be deprecated soon."
|
||||
"Please use `_get_all_submeshes` inside DeviceMesh instead."
|
||||
"Please use `_get_all_submeshes` inside DeviceMesh instead.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return device_mesh._get_all_submeshes(mesh_dim_name)
|
||||
|
||||
|
|
@ -329,7 +331,8 @@ else:
|
|||
"It is recommended to set the current device for the process BEFORE the DeviceMesh initialization so that "
|
||||
"the underlying communicator (i.e. NCCL) can be initialized properly. "
|
||||
"Given that the current process has no default device selected, DeviceMesh will use a heuristic to set the "
|
||||
"device_id via `global_rank % num_devices_per_host`, assuming homogeneous hardware cluster. "
|
||||
"device_id via `global_rank % num_devices_per_host`, assuming homogeneous hardware cluster. ",
|
||||
stacklevel=2,
|
||||
)
|
||||
# heuristic to set the current cuda/cuda-like device base on num of gpu devices available in each host
|
||||
# NOTE: This device selection would only work for homogeneous hardware.
|
||||
|
|
@ -766,7 +769,8 @@ else:
|
|||
warnings.warn(
|
||||
"You are attempting to slice a submesh from another submesh. While we support this operation, "
|
||||
"it is users' responsibility to ensure that the submesh is consistently sliced across all ranks. "
|
||||
"If not, this may result in some ranks receiving the submesh while others encounter errors."
|
||||
"If not, this may result in some ranks receiving the submesh while others encounter errors.",
|
||||
stacklevel=2,
|
||||
)
|
||||
slice_from_root = False
|
||||
|
||||
|
|
@ -803,7 +807,8 @@ else:
|
|||
elif name in flatten_name_to_root_layout:
|
||||
warnings.warn(
|
||||
"Slicing a flattened dim from root mesh will be deprecated in PT 2.11. "
|
||||
"Users need to bookkeep the flattened mesh directly. "
|
||||
"Users need to bookkeep the flattened mesh directly. ",
|
||||
stacklevel=2,
|
||||
)
|
||||
layout_sliced.append(flatten_name_to_root_layout[name])
|
||||
|
||||
|
|
|
|||
|
|
@ -352,7 +352,8 @@ class Backend(str): # noqa: SLOT000
|
|||
warnings.warn(
|
||||
f"Device capability of {name} unspecified, assuming `cpu` and "
|
||||
"`cuda` or `xpu`. Please specify it via the `devices` argument of "
|
||||
"`register_backend`."
|
||||
"`register_backend`.",
|
||||
stacklevel=2,
|
||||
)
|
||||
Backend.backend_capability[name.lower()] = (
|
||||
["cpu", "cuda", "xpu"] if torch.xpu.is_available() else ["cpu", "cuda"]
|
||||
|
|
@ -427,7 +428,8 @@ class BackendConfig:
|
|||
warnings.warn(
|
||||
f"Device capability of {backend} unknown, assuming `cpu` and "
|
||||
"`cuda`. You can specify it in `device:backend` format in "
|
||||
"`init_process_group` call."
|
||||
"`init_process_group` call.",
|
||||
stacklevel=2,
|
||||
)
|
||||
backend_val = Backend(backend)
|
||||
self.device_backend_map = {
|
||||
|
|
@ -751,7 +753,8 @@ def _get_default_timeout(backend: Backend) -> timedelta:
|
|||
# TODO moco benchmark on CPU initializes pgnccl backend today, triggered this assert in CI before it was
|
||||
# changed to be a warning. We should fix the moco model.
|
||||
warnings.warn(
|
||||
"Attempted to get default timeout for nccl backend, but NCCL support is not compiled"
|
||||
"Attempted to get default timeout for nccl backend, but NCCL support is not compiled",
|
||||
stacklevel=2,
|
||||
)
|
||||
return default_pg_timeout
|
||||
return default_pg_nccl_timeout
|
||||
|
|
@ -802,6 +805,7 @@ def _get_object_coll_device(group: Optional[ProcessGroup] = None) -> str:
|
|||
f"You are using a Backend {type(group)} as a ProcessGroup. "
|
||||
"This usage is deprecated since PyTorch 2.0. Please use a public API "
|
||||
"of PyTorch Distributed instead.",
|
||||
stacklevel=2,
|
||||
)
|
||||
# Provide backward compatibility to cases where `group` passed in is
|
||||
# actually a Backend (like `ProcessGroupGloo`) rather than a
|
||||
|
|
@ -868,7 +872,8 @@ def _get_pg_default_device(group: Optional[ProcessGroup] = None) -> torch.device
|
|||
"backward-compatiblity reason. If you need to find a device for object "
|
||||
"collectives, please use `_get_object_coll_device`. If you need to query "
|
||||
"the device types supported by group, please use "
|
||||
"`_device_capability(group)`. "
|
||||
"`_device_capability(group)`. ",
|
||||
stacklevel=2,
|
||||
)
|
||||
group = group or _get_default_group()
|
||||
|
||||
|
|
@ -910,7 +915,8 @@ def _get_pg_default_device(group: Optional[ProcessGroup] = None) -> torch.device
|
|||
warnings.warn(
|
||||
"Multiple backends are registered with this ProcessGroup. We cannot "
|
||||
f"determine which one is the default. Returning {rv}. "
|
||||
"Please consider using other APIs."
|
||||
"Please consider using other APIs.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return rv
|
||||
|
||||
|
|
@ -1010,7 +1016,8 @@ def _warn_not_in_group(op_name) -> None:
|
|||
global_rank = -1 if GroupMember.WORLD is None else GroupMember.WORLD.rank()
|
||||
warnings.warn(
|
||||
f"Running {op_name} on global rank {global_rank} which does not "
|
||||
"belong to the given group."
|
||||
"belong to the given group.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -1557,7 +1564,9 @@ def _set_pg_timeout(timeout: timedelta, group: Optional[ProcessGroup] = None) ->
|
|||
elif is_gloo_available() and isinstance(backend, ProcessGroupGloo):
|
||||
backends.add(backend) # type: ignore[arg-type]
|
||||
if len(backends) == 0:
|
||||
warnings.warn("Set timeout is now only supported for either nccl or gloo.")
|
||||
warnings.warn(
|
||||
"Set timeout is now only supported for either nccl or gloo.", stacklevel=2
|
||||
)
|
||||
for backend in backends:
|
||||
backend._set_default_timeout(timeout)
|
||||
|
||||
|
|
@ -1758,7 +1767,8 @@ def init_process_group(
|
|||
warnings.warn(
|
||||
f"For MPI backend, world_size ({world_size}) and rank ({rank}) "
|
||||
"are ignored since they are assigned by the "
|
||||
"MPI runtime."
|
||||
"MPI runtime.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
default_pg, _ = _new_process_group_helper(
|
||||
|
|
@ -2038,7 +2048,8 @@ def _new_process_group_helper(
|
|||
if backend_options._timeout != timeout:
|
||||
warnings.warn(
|
||||
"backend_options._timeout was specified, "
|
||||
"but timeout kwarg has a default value that will always override it. "
|
||||
"but timeout kwarg has a default value that will always override it. ",
|
||||
stacklevel=2,
|
||||
)
|
||||
else:
|
||||
# default backend_options for NCCL
|
||||
|
|
@ -2259,7 +2270,8 @@ def destroy_process_group(group: Optional[ProcessGroup] = None):
|
|||
if pg in _world.pg_coalesce_state.keys():
|
||||
warnings.warn(
|
||||
"Some coalesced collectives haven't been launched when "
|
||||
"ProcessGroup is destroyed. They will be cleaned."
|
||||
"ProcessGroup is destroyed. They will be cleaned.",
|
||||
stacklevel=2,
|
||||
)
|
||||
del _world.pg_coalesce_state[pg]
|
||||
|
||||
|
|
@ -2349,7 +2361,8 @@ def _abort_process_group(group: Optional[ProcessGroup] = None):
|
|||
if pg in _world.pg_coalesce_state.keys():
|
||||
warnings.warn(
|
||||
"Some coalesced collectives haven't been launched when "
|
||||
"ProcessGroup is aborted. They will be cleaned."
|
||||
"ProcessGroup is aborted. They will be cleaned.",
|
||||
stacklevel=2,
|
||||
)
|
||||
del _world.pg_coalesce_state[pg]
|
||||
|
||||
|
|
@ -4919,7 +4932,8 @@ def barrier(
|
|||
if group.rank() == 0:
|
||||
warnings.warn( # warn only once
|
||||
"barrier(): using the device under current context. "
|
||||
"You can specify `device_id` in `init_process_group` to mute this warning."
|
||||
"You can specify `device_id` in `init_process_group` to mute this warning.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
work = group.barrier(opts=opts)
|
||||
|
|
@ -5001,6 +5015,7 @@ def monitored_barrier(
|
|||
warnings.warn(
|
||||
"Please specify timeout arg as a timedelta. "
|
||||
f"Converting current value of {timeout} assuming it represents seconds",
|
||||
stacklevel=2,
|
||||
)
|
||||
timeout = timedelta(seconds=timeout)
|
||||
|
||||
|
|
|
|||
|
|
@ -106,6 +106,7 @@ class WorkerSpec:
|
|||
warnings.warn(
|
||||
"WorkerSpec.fn will be deprecated,"
|
||||
" please use WorkerSpec.entrypoint instead",
|
||||
stacklevel=2,
|
||||
category=DeprecationWarning,
|
||||
)
|
||||
self.entrypoint = self.fn
|
||||
|
|
|
|||
|
|
@ -52,7 +52,9 @@ class ErrorHandler:
|
|||
try:
|
||||
faulthandler.enable(all_threads=True)
|
||||
except Exception as e:
|
||||
warnings.warn(f"Unable to enable fault handler. {type(e).__name__}: {e}")
|
||||
warnings.warn(
|
||||
f"Unable to enable fault handler. {type(e).__name__}: {e}", stacklevel=2
|
||||
)
|
||||
|
||||
def _write_error_file(self, file_path: str, error_msg: str) -> None:
|
||||
"""Write error message to the file."""
|
||||
|
|
@ -60,7 +62,9 @@ class ErrorHandler:
|
|||
with open(file_path, "w") as fp:
|
||||
fp.write(error_msg)
|
||||
except Exception as e:
|
||||
warnings.warn(f"Unable to write error to file. {type(e).__name__}: {e}")
|
||||
warnings.warn(
|
||||
f"Unable to write error to file. {type(e).__name__}: {e}", stacklevel=2
|
||||
)
|
||||
|
||||
def record_exception(self, e: BaseException) -> None:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -65,5 +65,6 @@ def _derive_module_name(depth: int = 1) -> Optional[str]:
|
|||
warnings.warn(
|
||||
f"Error deriving logger module name, using <None>. Exception: {e}",
|
||||
RuntimeWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -336,7 +336,8 @@ def _get_param_to_fqns(
|
|||
warnings.warn(
|
||||
"FlatParameter is being traversed more than once. "
|
||||
"This case should only happen when using "
|
||||
"DistributedModelParallel with FullyShardedDataParallel."
|
||||
"DistributedModelParallel with FullyShardedDataParallel.",
|
||||
stacklevel=2,
|
||||
)
|
||||
param_to_fqns[param] = global_fqns
|
||||
elif not dedup_shared_params:
|
||||
|
|
|
|||
|
|
@ -299,7 +299,8 @@ class _ExecOrderData:
|
|||
warnings.warn(
|
||||
"Forward order differs from that of the first iteration "
|
||||
f"on rank {self.rank}. Collectives are unchecked and may "
|
||||
f"give incorrect results or hang.\n{msg_prefix}{msg_suffix}"
|
||||
f"give incorrect results or hang.\n{msg_prefix}{msg_suffix}",
|
||||
stacklevel=2,
|
||||
)
|
||||
self.warn_status = _ExecOrderWarnStatus.WARNING
|
||||
self.current_order_index += 1
|
||||
|
|
|
|||
|
|
@ -1585,7 +1585,8 @@ class FlatParamHandle:
|
|||
warnings.warn(
|
||||
f"[Rank {self.rank}] Only some but not all ranks have a "
|
||||
"`None` `FlatParameter` gradient, so FSDP is using zeros to "
|
||||
"approximate those ranks' sharded gradients being `None`"
|
||||
"approximate those ranks' sharded gradients being `None`",
|
||||
stacklevel=2,
|
||||
)
|
||||
flat_param._saved_grad_shard = None # type: ignore[assignment]
|
||||
sharded_grad = torch.zeros(flat_param._sharded_size, device=self.device) # type: ignore[attr-defined]
|
||||
|
|
@ -2434,7 +2435,8 @@ class FlatParamHandle:
|
|||
f"[Rank {rank}] {'Parameter' if is_param else 'Gradient'} needs "
|
||||
f"writeback in {self._training_state}\n"
|
||||
f"expected shape={expected_shape} shape={src_shape} "
|
||||
f"expected device={dst_tensor.device} device={src_device}"
|
||||
f"expected device={dst_tensor.device} device={src_device}",
|
||||
stacklevel=2,
|
||||
)
|
||||
if src_tensor is not None and src_tensor.shape != expected_shape:
|
||||
# NOTE: Gradient shape mismatch is not possible in practice since
|
||||
|
|
|
|||
|
|
@ -431,7 +431,8 @@ def _init_core_state(
|
|||
warnings.warn(
|
||||
"FSDP is switching to use `NO_SHARD` instead of "
|
||||
f"{sharding_strategy or ShardingStrategy.FULL_SHARD} since "
|
||||
"the world size is 1."
|
||||
"the world size is 1.",
|
||||
stacklevel=2,
|
||||
)
|
||||
sharding_strategy = ShardingStrategy.NO_SHARD
|
||||
elif sharding_strategy == ShardingStrategy.NO_SHARD:
|
||||
|
|
@ -704,7 +705,8 @@ def _get_ignored_modules(
|
|||
warnings.warn(
|
||||
"Trying to ignore the top-level module passed into the FSDP "
|
||||
"constructor itself will result in all parameters being "
|
||||
f"ignored and is not well-supported: {module}"
|
||||
f"ignored and is not well-supported: {module}",
|
||||
stacklevel=2,
|
||||
)
|
||||
# Include nested FSDP modules' ignored modules
|
||||
for submodule in root_module.modules():
|
||||
|
|
@ -847,7 +849,8 @@ def _get_device_from_device_id(
|
|||
f"FSDP will use the current device {device_handle.current_device()}. "
|
||||
f"If this is incorrect, please explicitly call `torch.{device.type}.set_device()` "
|
||||
"before FSDP initialization or pass in the explicit device "
|
||||
"index as the `device_id` argument."
|
||||
"index as the `device_id` argument.",
|
||||
stacklevel=2,
|
||||
)
|
||||
device = torch.device(device_handle.current_device())
|
||||
return device
|
||||
|
|
@ -929,7 +932,8 @@ def _materialize_meta_module(
|
|||
warnings.warn(
|
||||
"Unable to call `reset_parameters()` for module on meta "
|
||||
f"device with error {str(e)}. Please ensure that your module of"
|
||||
f"type {type(module)} implements a `reset_parameters()` method." # type: ignore[possibly-undefined]
|
||||
f"type {type(module)} implements a `reset_parameters()` method.",
|
||||
stacklevel=2, # type: ignore[possibly-undefined]
|
||||
)
|
||||
raise e
|
||||
|
||||
|
|
@ -1049,7 +1053,8 @@ def _warn_cpu_init():
|
|||
"recommend passing in the `device_id` argument for FSDP to move "
|
||||
"`module` to GPU for the sharding initialization. `module` must also "
|
||||
"be on GPU device to work with the `sync_module_states=True` flag "
|
||||
"since that requires GPU communication."
|
||||
"since that requires GPU communication.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -506,7 +506,8 @@ def _flatten_optim_state_dict(
|
|||
flat_osd_state[key] = copy.deepcopy(state)
|
||||
else:
|
||||
warnings.warn(
|
||||
f"optim_state[{key}] is not on rank{fsdp_state.rank}."
|
||||
f"optim_state[{key}] is not on rank{fsdp_state.rank}.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
else:
|
||||
|
|
@ -2051,7 +2052,8 @@ def _optim_state_dict(
|
|||
"most cases, this is a user-defined state that is not "
|
||||
"associated with any particular parameter. Another possible "
|
||||
"case is this state is managed by TorchRec. Otherwise, there may "
|
||||
" be a mismatched assumption of optim_state_dict of this mode."
|
||||
" be a mismatched assumption of optim_state_dict of this mode.",
|
||||
stacklevel=2,
|
||||
)
|
||||
fsdp_osd_state[key] = value
|
||||
|
||||
|
|
|
|||
|
|
@ -337,7 +337,8 @@ def _full_post_state_dict_hook(
|
|||
"This may mean that this state_dict entry could point to invalid "
|
||||
"memory regions after returning from state_dict() call if this "
|
||||
"parameter is managed by FSDP. Please check clone "
|
||||
f"implementation of {fqn}. Error: {str(e)}"
|
||||
f"implementation of {fqn}. Error: {str(e)}",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return _common_unshard_post_state_dict_hook(
|
||||
|
|
@ -708,7 +709,8 @@ def _post_state_dict_hook(
|
|||
context = _replace_with_full_state_dict_type(fsdp_state)
|
||||
warnings.warn(
|
||||
"When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict will "
|
||||
"be returned."
|
||||
"be returned.",
|
||||
stacklevel=2,
|
||||
)
|
||||
else:
|
||||
context = contextlib.nullcontext()
|
||||
|
|
@ -770,7 +772,8 @@ def _pre_state_dict_hook(
|
|||
context = _replace_with_full_state_dict_type(fsdp_state)
|
||||
warnings.warn(
|
||||
"When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict will "
|
||||
"be returned."
|
||||
"be returned.",
|
||||
stacklevel=2,
|
||||
)
|
||||
else:
|
||||
_set_use_dtensor(fsdp_state)
|
||||
|
|
@ -824,7 +827,8 @@ def _pre_load_state_dict_hook(
|
|||
context = _replace_with_full_state_dict_type(fsdp_state)
|
||||
warnings.warn(
|
||||
"When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict will"
|
||||
"be returned."
|
||||
"be returned.",
|
||||
stacklevel=2,
|
||||
)
|
||||
else:
|
||||
_set_use_dtensor(fsdp_state)
|
||||
|
|
@ -861,7 +865,8 @@ def _post_load_state_dict_hook(
|
|||
context = _replace_with_full_state_dict_type(fsdp_state)
|
||||
warnings.warn(
|
||||
"When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict will"
|
||||
"be returned."
|
||||
"be returned.",
|
||||
stacklevel=2,
|
||||
)
|
||||
else:
|
||||
context = contextlib.nullcontext()
|
||||
|
|
|
|||
|
|
@ -153,7 +153,8 @@ def _validate_unshard_params_args(
|
|||
"offload_to_cpu=True and rank0_only=False may result in the"
|
||||
"unsharded parameters being redundantly copied to CPU memory for "
|
||||
"GPUs sharing the same CPU memory, which risks CPU OOM. We "
|
||||
"recommend using offload_to_cpu=True with rank0_only=True."
|
||||
"recommend using offload_to_cpu=True with rank0_only=True.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -120,7 +120,8 @@ def _warn_on_overridden_mixed_precision(
|
|||
"Both mixed precision and an auto_wrap_policy were specified to FSDP, "
|
||||
f"where the wrapped module has submodules of type:\n{overridden_module_classes}\n"
|
||||
"These modules will be wrapped as separate FSDP instacnes with mixed "
|
||||
"precision disabled."
|
||||
"precision disabled.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -172,7 +173,7 @@ def _validate_frozen_params(
|
|||
f"The following parameters have requires_grad=False:\n{frozen_param_fqns}"
|
||||
)
|
||||
if use_orig_params:
|
||||
warnings.warn(msg)
|
||||
warnings.warn(msg, stacklevel=2)
|
||||
else:
|
||||
raise ValueError(msg)
|
||||
|
||||
|
|
|
|||
|
|
@ -680,6 +680,7 @@ class FullyShardedDataParallel(nn.Module, _FSDPState):
|
|||
"#torch.distributed.checkpoint.state_dict.get_state_dict ."
|
||||
"Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
_state_dict_type_to_config = {
|
||||
StateDictType.FULL_STATE_DICT: FullStateDictConfig,
|
||||
|
|
@ -1208,7 +1209,8 @@ class FullyShardedDataParallel(nn.Module, _FSDPState):
|
|||
warnings.warn(
|
||||
f"Called FSDP.clip_grad_norm_() on rank {self.rank} with no "
|
||||
"gradients -- returning the total norm in the default dtype "
|
||||
f"{total_norm.dtype}"
|
||||
f"{total_norm.dtype}",
|
||||
stacklevel=2,
|
||||
) # warn since this is generally unexpected
|
||||
return total_norm
|
||||
total_norm_dtype = functools.reduce(
|
||||
|
|
|
|||
|
|
@ -87,7 +87,8 @@ class _NamedOptimizer(optim.Optimizer):
|
|||
else:
|
||||
warnings.warn(
|
||||
"Since we pass in param_groups, we will use param_groups to "
|
||||
"initialize the optimizer, not all parameters of the module."
|
||||
"initialize the optimizer, not all parameters of the module.",
|
||||
stacklevel=2,
|
||||
)
|
||||
param_to_key = {param: key for key, param in self.named_parameters.items()} # type: ignore[misc, has-type]
|
||||
ordered_param_keys = []
|
||||
|
|
|
|||
|
|
@ -92,7 +92,8 @@ class PostLocalSGDOptimizer(torch.optim.Optimizer):
|
|||
else:
|
||||
warnings.warn(
|
||||
"Loaded state dict does not contain a step counter for an averager. "
|
||||
"Setting step counter to 0."
|
||||
"Setting step counter to 0.",
|
||||
stacklevel=2,
|
||||
)
|
||||
self.averager.step = 0
|
||||
|
||||
|
|
|
|||
|
|
@ -513,7 +513,8 @@ class OpDispatcher:
|
|||
"Found a non-scalar tensor with numel=1 and ndim!=0, "
|
||||
"we are implicitly creating a replicated DTensor for it. "
|
||||
"However, please consider changing it to a scalar tensor "
|
||||
"or explicitly create a DTensor under distributed environment."
|
||||
"or explicitly create a DTensor under distributed environment.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if tensor_arg.numel() == 1 or self._allow_implicit_replication:
|
||||
|
|
|
|||
|
|
@ -43,7 +43,8 @@ def is_rng_supported_mesh(device_mesh: DeviceMesh) -> bool:
|
|||
else:
|
||||
# TODO: Logs way too much
|
||||
warnings.warn(
|
||||
f"DTensor random operators may not have complete support on {device_mesh.device_type} device mesh"
|
||||
f"DTensor random operators may not have complete support on {device_mesh.device_type} device mesh",
|
||||
stacklevel=2,
|
||||
)
|
||||
return False
|
||||
|
||||
|
|
@ -72,7 +73,8 @@ def manual_seed(seed: int, device_mesh: DeviceMesh) -> None:
|
|||
if not is_rng_supported_mesh(device_mesh):
|
||||
warnings.warn(
|
||||
"DTensor manual_seed() may not have complete support "
|
||||
f"on {device_mesh.device_type} device mesh"
|
||||
f"on {device_mesh.device_type} device mesh",
|
||||
stacklevel=2,
|
||||
)
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -74,7 +74,8 @@ def parallelize_module( # type: ignore[return]
|
|||
if parallelize_plan is None:
|
||||
warnings.warn(
|
||||
"No parallelize_plan is provided and auto-parallel is not supported "
|
||||
"at the moment, so this parallelize_module call will do nothing."
|
||||
"at the moment, so this parallelize_module call will do nothing.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return module
|
||||
|
||||
|
|
@ -108,7 +109,8 @@ def parallelize_module( # type: ignore[return]
|
|||
warnings.warn(
|
||||
f"Parallelize plan key '{module_path}' could not be resolved: "
|
||||
f"no submodule matching token '{token}' in module {module}, "
|
||||
f"skipping this plan entry."
|
||||
f"skipping this plan entry.",
|
||||
stacklevel=2,
|
||||
)
|
||||
continue
|
||||
|
||||
|
|
|
|||
|
|
@ -62,7 +62,8 @@ class Distribution:
|
|||
warnings.warn(
|
||||
f"{self.__class__} does not define `arg_constraints`. "
|
||||
+ "Please set `arg_constraints = {}` or initialize the distribution "
|
||||
+ "with `validate_args=False` to turn off validation."
|
||||
+ "with `validate_args=False` to turn off validation.",
|
||||
stacklevel=2,
|
||||
)
|
||||
for param, constraint in arg_constraints.items():
|
||||
if constraints.is_dependent(constraint):
|
||||
|
|
@ -313,7 +314,8 @@ class Distribution:
|
|||
warnings.warn(
|
||||
f"{self.__class__} does not define `support` to enable "
|
||||
+ "sample validation. Please initialize the distribution with "
|
||||
+ "`validate_args=False` to turn off validation."
|
||||
+ "`validate_args=False` to turn off validation.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return
|
||||
assert support is not None
|
||||
|
|
|
|||
|
|
@ -133,6 +133,7 @@ def _dispatch_kl(type_p, type_q):
|
|||
f"Ambiguous kl_divergence({type_p.__name__}, {type_q.__name__}). "
|
||||
f"Please register_kl({left_p.__name__}, {right_q.__name__})",
|
||||
RuntimeWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return left_fun
|
||||
|
||||
|
|
|
|||
|
|
@ -127,7 +127,8 @@ class Wishart(ExponentialFamily):
|
|||
|
||||
if self.df.lt(event_shape[-1]).any():
|
||||
warnings.warn(
|
||||
"Low df values detected. Singular samples are highly likely to occur for ndim - 1 < df < ndim."
|
||||
"Low df values detected. Singular samples are highly likely to occur for ndim - 1 < df < ndim.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
super().__init__(batch_shape, event_shape, validate_args=validate_args)
|
||||
|
|
@ -279,7 +280,7 @@ class Wishart(ExponentialFamily):
|
|||
else:
|
||||
# More optimized version with data-dependent control flow.
|
||||
if is_singular.any():
|
||||
warnings.warn("Singular sample detected.")
|
||||
warnings.warn("Singular sample detected.", stacklevel=2)
|
||||
|
||||
for _ in range(max_try_correction):
|
||||
sample_new = self._bartlett_sampling(is_singular[is_singular].shape)
|
||||
|
|
|
|||
|
|
@ -500,10 +500,10 @@ def load(
|
|||
if file_info.filename == "serialized_exported_program.json":
|
||||
serialized_exported_program = file_content
|
||||
elif file_info.filename == "serialized_state_dict.json":
|
||||
warnings.warn("This version of file is deprecated")
|
||||
warnings.warn("This version of file is deprecated", stacklevel=2)
|
||||
serialized_state_dict = file_content
|
||||
elif file_info.filename == "serialized_constants.json":
|
||||
warnings.warn("This version of file is deprecated")
|
||||
warnings.warn("This version of file is deprecated", stacklevel=2)
|
||||
serialized_constants = file_content
|
||||
elif file_info.filename == "serialized_state_dict.pt":
|
||||
serialized_state_dict = file_content
|
||||
|
|
|
|||
|
|
@ -2113,7 +2113,7 @@ def _export_for_training(
|
|||
if torch._export.config.error_on_lifted_constant_tensors:
|
||||
raise RuntimeError(error_msg)
|
||||
else:
|
||||
warnings.warn(error_msg)
|
||||
warnings.warn(error_msg, stacklevel=2)
|
||||
|
||||
export_graph_signature = export_artifact.aten.sig
|
||||
|
||||
|
|
@ -2189,7 +2189,8 @@ def _export_for_training(
|
|||
f"This is likely result of torch.export.export not being able to track side effects "
|
||||
f"that is happening outside of model scope.\n\n"
|
||||
f"Leaked tensors:\n {leak_details}\n\n"
|
||||
f"Alternatively, please file a bug report to PyTorch team for further debugging help."
|
||||
f"Alternatively, please file a bug report to PyTorch team for further debugging help.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
del legit_leak
|
||||
|
|
|
|||
|
|
@ -530,7 +530,8 @@ def _create_stateful_graph_module(
|
|||
f"A model attribute `{constant_fqn}` requires gradient. "
|
||||
f"but it's not properly registered as a parameter. "
|
||||
f"torch.export will detach it and treat it as a constant tensor "
|
||||
f"but please register it as parameter instead."
|
||||
f"but please register it as parameter instead.",
|
||||
stacklevel=2,
|
||||
)
|
||||
detached_buffer = buffer.detach()
|
||||
original_tensor_to_detached_tensor[buffer] = detached_buffer
|
||||
|
|
@ -549,7 +550,8 @@ def _create_stateful_graph_module(
|
|||
f"A model attribute `{const_name}` requires gradient "
|
||||
f"but it's not properly registered as a parameter. "
|
||||
f"torch.export will detach it and treat it as a constant tensor "
|
||||
f"but please register it as parameter instead."
|
||||
f"but please register it as parameter instead.",
|
||||
stacklevel=2,
|
||||
)
|
||||
if value in original_tensor_to_detached_tensor:
|
||||
value = original_tensor_to_detached_tensor[value]
|
||||
|
|
|
|||
|
|
@ -1684,7 +1684,8 @@ def _create_graph_module_for_export(root, graph):
|
|||
"Unable to execute the generated python source code from "
|
||||
"the graph. The graph module will no longer be directly callable, "
|
||||
"but you can still run the ExportedProgram, and if needed, you can "
|
||||
"run the graph module eagerly using torch.fx.Interpreter."
|
||||
"run the graph module eagerly using torch.fx.Interpreter.",
|
||||
stacklevel=2,
|
||||
)
|
||||
gm = torch.fx.GraphModule(root, torch.fx.Graph())
|
||||
gm._graph = graph
|
||||
|
|
|
|||
|
|
@ -108,7 +108,8 @@ def get_complete(
|
|||
|
||||
warnings.warn(
|
||||
"No complete tensor found in the group! Returning the first one. "
|
||||
"This may cause issues when your weights are not on CPU."
|
||||
"This may cause issues when your weights are not on CPU.",
|
||||
stacklevel=2,
|
||||
)
|
||||
assert len(group) > 0
|
||||
return next(iter(group))
|
||||
|
|
|
|||
13
torch/hub.py
13
torch/hub.py
|
|
@ -279,7 +279,8 @@ def _get_cache_or_reload(
|
|||
f"The ref {ref} is ambiguous. Perhaps it is both a tag and a branch in the repo? "
|
||||
"Torchhub will now assume that it's a branch. "
|
||||
"You can disambiguate tags and branches by explicitly passing refs/heads/branch_name or "
|
||||
"refs/tags/tag_name as the ref. That might require using skip_validation=True."
|
||||
"refs/tags/tag_name as the ref. That might require using skip_validation=True.",
|
||||
stacklevel=2,
|
||||
)
|
||||
disambiguated_branch_ref = f"refs/heads/{ref}"
|
||||
url = _git_archive_link(
|
||||
|
|
@ -338,7 +339,8 @@ def _check_repo_is_trusted(
|
|||
"trust_repo=False) and a command prompt will appear asking for an explicit confirmation of trust, "
|
||||
f"or {calling_fn}(..., trust_repo=True), which will assume that the prompt is to be answered with "
|
||||
f"'yes'. You can also use {calling_fn}(..., trust_repo='check') which will only prompt for "
|
||||
f"confirmation if the repo is not already trusted. This will eventually be the default behaviour"
|
||||
f"confirmation if the repo is not already trusted. This will eventually be the default behaviour",
|
||||
stacklevel=2,
|
||||
)
|
||||
return
|
||||
|
||||
|
|
@ -406,7 +408,9 @@ def get_dir() -> str:
|
|||
"""
|
||||
# Issue warning to move data if old env is set
|
||||
if os.getenv("TORCH_HUB"):
|
||||
warnings.warn("TORCH_HUB is deprecated, please use env TORCH_HOME instead")
|
||||
warnings.warn(
|
||||
"TORCH_HUB is deprecated, please use env TORCH_HOME instead", stacklevel=2
|
||||
)
|
||||
|
||||
if _hub_dir is not None:
|
||||
return _hub_dir
|
||||
|
|
@ -853,7 +857,8 @@ def load_state_dict_from_url(
|
|||
# Issue warning to move data if old env is set
|
||||
if os.getenv("TORCH_MODEL_ZOO"):
|
||||
warnings.warn(
|
||||
"TORCH_MODEL_ZOO is deprecated, please use env TORCH_HOME instead"
|
||||
"TORCH_MODEL_ZOO is deprecated, please use env TORCH_HOME instead",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if model_dir is None:
|
||||
|
|
|
|||
|
|
@ -257,7 +257,7 @@ class strict_fusion:
|
|||
|
||||
def __init__(self) -> None:
|
||||
if not torch._jit_internal.is_scripting():
|
||||
warnings.warn("Only works in script mode")
|
||||
warnings.warn("Only works in script mode", stacklevel=2)
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -180,7 +180,8 @@ class AttributeTypeIsSupportedChecker(ast.NodeVisitor):
|
|||
"instance-level annotations on empty non-base "
|
||||
"types in `__init__`. Instead, either 1) use a "
|
||||
"type annotation in the class body, or 2) wrap "
|
||||
"the type in `torch.jit.Attribute`."
|
||||
"the type in `torch.jit.Attribute`.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
def visit_Call(self, node):
|
||||
|
|
@ -245,5 +246,6 @@ class AttributeTypeIsSupportedChecker(ast.NodeVisitor):
|
|||
"instance-level annotations on empty non-base "
|
||||
"types in `__init__`. Instead, either 1) use a "
|
||||
"type annotation in the class body, or 2) wrap "
|
||||
"the type in `torch.jit.Attribute`."
|
||||
"the type in `torch.jit.Attribute`.",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -48,7 +48,9 @@ def signatures_match(decomposition_sig, torch_op_sig):
|
|||
inspect_empty = inspect._empty # type: ignore[attr-defined]
|
||||
for field in ["name", "annotation"]:
|
||||
if field == "name" and decomp_param.name == "self":
|
||||
warnings.warn("PyTorch uses 'input' instead of 'self' on public api")
|
||||
warnings.warn(
|
||||
"PyTorch uses 'input' instead of 'self' on public api", stacklevel=2
|
||||
)
|
||||
|
||||
if getattr(decomp_param, field) != getattr(op_param, field):
|
||||
return False
|
||||
|
|
|
|||
|
|
@ -309,7 +309,8 @@ def infer_concrete_type_builder(nn_module, share_types=True):
|
|||
|
||||
warnings.warn(
|
||||
f"'{name}' was found in ScriptModule constants, "
|
||||
f" but it is a non-constant {hint}. Consider removing it."
|
||||
f" but it is a non-constant {hint}. Consider removing it.",
|
||||
stacklevel=2,
|
||||
)
|
||||
continue
|
||||
if not hasattr(nn_module, name):
|
||||
|
|
@ -318,7 +319,8 @@ def infer_concrete_type_builder(nn_module, share_types=True):
|
|||
warnings.warn(
|
||||
f"'{name}' was found in ScriptModule constants, "
|
||||
"but was not actually set in __init__. "
|
||||
"Consider removing it."
|
||||
"Consider removing it.",
|
||||
stacklevel=2,
|
||||
)
|
||||
continue
|
||||
value = getattr(nn_module, name)
|
||||
|
|
|
|||
|
|
@ -775,6 +775,7 @@ if _enabled:
|
|||
"Lite Interpreter is deprecated. Please consider switching to ExecuTorch. \
|
||||
https://docs.pytorch.org/executorch/stable/getting-started.html",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self._c._save_for_mobile(*args, **kwargs)
|
||||
|
||||
|
|
@ -787,6 +788,7 @@ if _enabled:
|
|||
"Lite Interpreter is deprecated. Please consider switching to ExecuTorch. \
|
||||
https://docs.pytorch.org/executorch/stable/getting-started.html",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self._c._save_to_buffer_for_mobile(*args, **kwargs)
|
||||
|
||||
|
|
@ -1165,7 +1167,8 @@ def _script_impl(
|
|||
warnings.warn(
|
||||
"Warning: monkeytype is not installed. Please install https://github.com/Instagram/MonkeyType "
|
||||
"to enable Profile-Directed Typing in TorchScript. Refer to "
|
||||
"https://github.com/Instagram/MonkeyType/blob/master/README.rst to install MonkeyType. "
|
||||
"https://github.com/Instagram/MonkeyType/blob/master/README.rst to install MonkeyType. ",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if isinstance(obj, torch.nn.Module):
|
||||
|
|
|
|||
|
|
@ -686,7 +686,8 @@ def _trace_impl(
|
|||
# it is hard to trace it because the forward method on ScriptModule is already defined, so it
|
||||
# would result in an error.
|
||||
warnings.warn(
|
||||
"The input to trace is already a ScriptModule, tracing it is a no-op. Returning the object as is."
|
||||
"The input to trace is already a ScriptModule, tracing it is a no-op. Returning the object as is.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return func
|
||||
|
||||
|
|
|
|||
|
|
@ -389,7 +389,8 @@ def is_tensor(ann):
|
|||
warnings.warn(
|
||||
"TorchScript will treat type annotations of Tensor "
|
||||
"dtype-specific subtypes as if they are normal Tensors. "
|
||||
"dtype constraints are not enforced in compilation either."
|
||||
"dtype constraints are not enforced in compilation either.",
|
||||
stacklevel=2,
|
||||
)
|
||||
return True
|
||||
|
||||
|
|
|
|||
|
|
@ -44,7 +44,8 @@ def _apply_docstring_templates(func: Callable[_P, _T]) -> Callable[_P, _T]:
|
|||
warnings.warn(
|
||||
f"No documentation string available for {func.__name__}."
|
||||
" PyTorch team should run `python tools/update_masked_docs.py`"
|
||||
" to generate the missing docstrings."
|
||||
" to generate the missing docstrings.",
|
||||
stacklevel=2,
|
||||
)
|
||||
else:
|
||||
func.__doc__ = doc_string
|
||||
|
|
|
|||
|
|
@ -322,7 +322,7 @@ class MaskedTensor(torch.Tensor):
|
|||
"In the case that the semantics for the operator are not trivial, it would be appreciated "
|
||||
"to also include a proposal for the semantics."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
warnings.warn(msg, stacklevel=2)
|
||||
return NotImplemented
|
||||
|
||||
def __lt__(self, other):
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ def _torch_reduce_dim(fn):
|
|||
"In the case that the semantics for the operator are not trivial, it would be appreciated "
|
||||
"to also include a proposal for the semantics."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
warnings.warn(msg, stacklevel=2)
|
||||
return NotImplemented
|
||||
if not is_masked_tensor(self):
|
||||
raise TypeError("Input to reduce_dim must be a MaskedTensor")
|
||||
|
|
|
|||
|
|
@ -223,7 +223,9 @@ class ProcessContext:
|
|||
|
||||
class SpawnContext(ProcessContext):
|
||||
def __init__(self, processes, error_files):
|
||||
warnings.warn("SpawnContext is renamed to ProcessContext since 1.4 release.")
|
||||
warnings.warn(
|
||||
"SpawnContext is renamed to ProcessContext since 1.4 release.", stacklevel=2
|
||||
)
|
||||
super().__init__(processes, error_files)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,8 @@ def get_enum(reduction: str) -> int:
|
|||
elif reduction == "elementwise_mean":
|
||||
warnings.warn(
|
||||
"reduction='elementwise_mean' is deprecated. "
|
||||
"Please use reduction='mean' instead."
|
||||
"Please use reduction='mean' instead.",
|
||||
stacklevel=2,
|
||||
)
|
||||
ret = 1
|
||||
elif reduction == "sum":
|
||||
|
|
@ -48,7 +49,7 @@ def legacy_get_string(
|
|||
else:
|
||||
ret = "none"
|
||||
if emit_warning:
|
||||
warnings.warn(warning.format(ret))
|
||||
warnings.warn(warning.format(ret), stacklevel=2)
|
||||
return ret
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -60,10 +60,10 @@ def _raise_kernel_warnings(params: SDPAParams) -> None:
|
|||
"""
|
||||
if WARN_FOR_UNFUSED_KERNELS:
|
||||
if not can_use_efficient_attention(params):
|
||||
warn("Efficient attention can't be used because:")
|
||||
warn("Efficient attention can't be used because:", stacklevel=2)
|
||||
can_use_efficient_attention(params, True)
|
||||
if not can_use_flash_attention(params):
|
||||
warn("Flash attention can't be used because:")
|
||||
warn("Flash attention can't be used because:", stacklevel=2)
|
||||
can_use_flash_attention(params, True)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -134,7 +134,8 @@ class CausalBias(torch.Tensor):
|
|||
self.seq_len_kv = seq_len_kv
|
||||
if seq_len_q > seq_len_kv and variant == CausalVariant.LOWER_RIGHT:
|
||||
warn(
|
||||
"Lower right causal bias will produce NaNs in the output when seq_len_q > seq_len_kv!"
|
||||
"Lower right causal bias will produce NaNs in the output when seq_len_q > seq_len_kv!",
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
def _upper_left(self, device: torch.device) -> torch.Tensor:
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user