diff --git a/test/dynamo/test_minifier.py b/test/dynamo/test_minifier.py index 2af7e6acb91..1b7c460c707 100644 --- a/test/dynamo/test_minifier.py +++ b/test/dynamo/test_minifier.py @@ -119,7 +119,7 @@ inner(torch.randn(20, 20, requires_grad=True) + 1) backend_name = "relu_compile_error_TESTING_ONLY" run_code = f"""\ class CpuCudaModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.m_x = torch.nn.Linear(20, 20).cuda() self.m_y = torch.nn.Linear(20, 20) @@ -149,7 +149,7 @@ inner(torch.randn(20, 20).cuda(), torch.randn(20, 20)) res.minifier_module(), """\ class Repro(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.G__mod___m_x = Linear(in_features=20, out_features=20, bias=True).cuda() self.G__mod___m_y = Linear(in_features=20, out_features=20, bias=True) @@ -204,7 +204,7 @@ inner(torch.randn(20, 20)) res.repro_module(), """\ class Repro(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, x_19): diff --git a/test/inductor/test_minifier.py b/test/inductor/test_minifier.py index d7e8e530648..45d4a79decf 100644 --- a/test/inductor/test_minifier.py +++ b/test/inductor/test_minifier.py @@ -122,7 +122,7 @@ inner(torch.randn(20)) res.repro_module(), """\ class Repro(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, arg0_1): @@ -138,7 +138,7 @@ class Repro(torch.nn.Module): res.repro_module(), """\ class Repro(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, arg0_1): diff --git a/torch/_classes.py b/torch/_classes.py index 58b34745352..069f13dcb67 100644 --- a/torch/_classes.py +++ b/torch/_classes.py @@ -19,7 +19,7 @@ class _ClassNamespace(types.ModuleType): class _Classes(types.ModuleType): __file__ = "_classes.py" - def __init__(self): + def __init__(self) -> None: super().__init__("torch.classes") def __getattr__(self, name): diff --git a/torch/_decomp/decompositions_for_rng.py b/torch/_decomp/decompositions_for_rng.py index 66bd33075a5..a62a28f783b 100644 --- a/torch/_decomp/decompositions_for_rng.py +++ b/torch/_decomp/decompositions_for_rng.py @@ -71,7 +71,7 @@ class PhiloxState: trace time. """ - def __init__(self): + def __init__(self) -> None: self.reset() def reset(self): diff --git a/torch/_dynamo/backends/distributed.py b/torch/_dynamo/backends/distributed.py index a58571c77c9..8d65f1670ae 100644 --- a/torch/_dynamo/backends/distributed.py +++ b/torch/_dynamo/backends/distributed.py @@ -247,7 +247,7 @@ class SubmodCompiler(torch.fx.interpreter.Interpreter): # This gives us the appropriately strided outputs here which will reflect runtime strides. class FakeifyFirstAOTInvocationGuard: - def __init__(self): + def __init__(self) -> None: self.tc = torch._guards.TracingContext.try_get() assert self.tc torch._guards.TracingContext.try_get().fakify_first_call = True diff --git a/torch/_dynamo/code_context.py b/torch/_dynamo/code_context.py index 59c912bd30f..727aad93495 100644 --- a/torch/_dynamo/code_context.py +++ b/torch/_dynamo/code_context.py @@ -5,7 +5,7 @@ from .utils import ExactWeakKeyDictionary class CodeContextDict: - def __init__(self): + def __init__(self) -> None: self.code_context = ExactWeakKeyDictionary() def has_context(self, code: types.CodeType): diff --git a/torch/_dynamo/debug_utils.py b/torch/_dynamo/debug_utils.py index 5e9656f2068..49d9b302fae 100644 --- a/torch/_dynamo/debug_utils.py +++ b/torch/_dynamo/debug_utils.py @@ -170,7 +170,7 @@ class NNModuleToString: """ from torch.nn import * class Repro(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() """ ) @@ -491,7 +491,7 @@ _is_leaf_or_default = _mk_defaulter(False) class NopInputReader: - def __init__(self): + def __init__(self) -> None: self.total = 0 def storage(self, storage_hash, nbytes, *, device=None, dtype_hint=None): diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py index 2f7f1c243f4..797c64eaf97 100644 --- a/torch/_dynamo/eval_frame.py +++ b/torch/_dynamo/eval_frame.py @@ -497,7 +497,7 @@ class _TorchDynamoContext: wrapper function. >> class CallableClass: - >> def __init__(self): + >> def __init__(self) -> None: >> super().__init__() >> self.relu = torch.nn.ReLU() >> @@ -578,7 +578,7 @@ class OptimizeContext(_TorchDynamoContext): class RunOnlyContext(_TorchDynamoContext): - def __init__(self): + def __init__(self) -> None: # cudagraph trees relies on generation increment def on_enter(): torch._dynamo.mutation_guard.GenerationTracker.generation += 1 @@ -590,7 +590,7 @@ class RunOnlyContext(_TorchDynamoContext): class DisableContext(_TorchDynamoContext): - def __init__(self): + def __init__(self) -> None: super().__init__(callback=None) def __call__(self, fn): diff --git a/torch/_dynamo/exc.py b/torch/_dynamo/exc.py index 2ca862c0087..5a0915a9727 100644 --- a/torch/_dynamo/exc.py +++ b/torch/_dynamo/exc.py @@ -74,7 +74,7 @@ class InvalidBackend(TorchDynamoException): class ResetRequired(TorchDynamoException): - def __init__(self): + def __init__(self) -> None: super().__init__( textwrap.dedent( """ diff --git a/torch/_dynamo/profiler.py b/torch/_dynamo/profiler.py index b7e9553ce21..841ab87cdf6 100644 --- a/torch/_dynamo/profiler.py +++ b/torch/_dynamo/profiler.py @@ -92,7 +92,7 @@ def print_missing(stack): class Profiler: unique_graphs = 0 - def __init__(self): + def __init__(self) -> None: self.prof = torch.profiler.profile( activities=[torch.profiler.ProfilerActivity.CPU], with_stack=should_print_missing(), diff --git a/torch/_dynamo/variables/base.py b/torch/_dynamo/variables/base.py index 09752822dd8..5353327d98f 100644 --- a/torch/_dynamo/variables/base.py +++ b/torch/_dynamo/variables/base.py @@ -70,7 +70,7 @@ class MutableLocal(MutableLocalBase): state. """ - def __init__(self): + def __init__(self) -> None: super().__init__(MutableLocalSource.Local) def __hash__(self): diff --git a/torch/_dynamo/variables/builder.py b/torch/_dynamo/variables/builder.py index b1bb7b515dc..7f22f787d52 100644 --- a/torch/_dynamo/variables/builder.py +++ b/torch/_dynamo/variables/builder.py @@ -274,7 +274,7 @@ class GraphArg: class BackwardStateGraphArg(GraphArg): - def __init__(self): + def __init__(self) -> None: super().__init__( source=None, _example=BackwardState(), @@ -2646,7 +2646,7 @@ class SourcelessBuilder: if/else type->VariableTracker trees that were cropping up all over dynamo. """ - def __init__(self): + def __init__(self) -> None: raise AssertionError("Use SourcelessBuilder.create()") @staticmethod diff --git a/torch/_export/db/examples/class_method.py b/torch/_export/db/examples/class_method.py index 5d7f8b5b705..f701f54d4f4 100644 --- a/torch/_export/db/examples/class_method.py +++ b/torch/_export/db/examples/class_method.py @@ -10,7 +10,7 @@ class ClassMethod(torch.nn.Module): def method(cls, x): return x + 1 - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(4, 2) diff --git a/torch/_export/db/examples/cond_branch_class_method.py b/torch/_export/db/examples/cond_branch_class_method.py index 9ce4a9d6f34..22600cc5043 100644 --- a/torch/_export/db/examples/cond_branch_class_method.py +++ b/torch/_export/db/examples/cond_branch_class_method.py @@ -26,7 +26,7 @@ class CondBranchClassMethod(torch.nn.Module): NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized. """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.subm = MySubModule() diff --git a/torch/_export/db/examples/model_attr_mutation.py b/torch/_export/db/examples/model_attr_mutation.py index dfebbebd8b1..4aa623c7dc3 100644 --- a/torch/_export/db/examples/model_attr_mutation.py +++ b/torch/_export/db/examples/model_attr_mutation.py @@ -8,7 +8,7 @@ class ModelAttrMutation(torch.nn.Module): Attribute mutation is not supported. """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.attr_list = [torch.randn(3, 2), torch.randn(3, 2)] diff --git a/torch/_export/db/examples/scalar_output.py b/torch/_export/db/examples/scalar_output.py index 83dd3637967..86d3b464533 100644 --- a/torch/_export/db/examples/scalar_output.py +++ b/torch/_export/db/examples/scalar_output.py @@ -11,7 +11,7 @@ class ScalarOutput(torch.nn.Module): Returning scalar values from the graph is supported, in addition to Tensor outputs. Symbolic shapes are captured and rank is specialized. """ - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, x): diff --git a/torch/_export/db/examples/specialized_attribute.py b/torch/_export/db/examples/specialized_attribute.py index 39f7314bec7..f17092f9afc 100644 --- a/torch/_export/db/examples/specialized_attribute.py +++ b/torch/_export/db/examples/specialized_attribute.py @@ -11,7 +11,7 @@ class SpecializedAttribute(torch.nn.Module): Model attributes are specialized. """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.a = "moo" self.b = 4 diff --git a/torch/_export/passes/lift_constants_pass.py b/torch/_export/passes/lift_constants_pass.py index 823c66d2bc0..08d93287d32 100644 --- a/torch/_export/passes/lift_constants_pass.py +++ b/torch/_export/passes/lift_constants_pass.py @@ -24,7 +24,7 @@ class ConstantAttrMap(collections.abc.MutableMapping): if that's the case). """ - def __init__(self): + def __init__(self) -> None: # Underlying dict that we use to implement this mapping. self._constant_attrs: Dict[ Union[int, torch.Tensor, FakeScriptObject], List[Any] diff --git a/torch/_export/serde/serialize.py b/torch/_export/serde/serialize.py index ae0f6e39f23..28509b83416 100644 --- a/torch/_export/serde/serialize.py +++ b/torch/_export/serde/serialize.py @@ -1413,7 +1413,7 @@ class GraphModuleDeserializer(metaclass=Final): constants: Dict[str, Union[torch.Tensor, FakeScriptObject, torch.ScriptObject]] example_inputs: Optional[Tuple[Tuple[torch.Tensor, ...], Dict[str, Any]]] - def __init__(self): + def __init__(self) -> None: self.serialized_name_to_node: Dict[str, torch.fx.Node] = {} self.serialized_name_to_meta: Dict[str, MetaType] = {} self.graph = torch.fx.Graph() diff --git a/torch/_functorch/_aot_autograd/schemas.py b/torch/_functorch/_aot_autograd/schemas.py index 77f1cf22014..9b1b40b4830 100644 --- a/torch/_functorch/_aot_autograd/schemas.py +++ b/torch/_functorch/_aot_autograd/schemas.py @@ -602,7 +602,7 @@ class SubclassMeta: # Optional field because we don't compute for inference graphs grad_input_metas: Optional[List[Union[int, SubclassCreationMeta]]] = None - def __init__(self): + def __init__(self) -> None: # The fields in this class get set after its construction. pass diff --git a/torch/_functorch/aot_autograd.py b/torch/_functorch/aot_autograd.py index b7bd95a3ed4..e9fedb3d53c 100644 --- a/torch/_functorch/aot_autograd.py +++ b/torch/_functorch/aot_autograd.py @@ -878,7 +878,7 @@ def aot_module(mod: nn.Module, *args, **kwargs) -> nn.Module: ) class AOTModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.orig_module = mod diff --git a/torch/_functorch/autograd_function.py b/torch/_functorch/autograd_function.py index f80b7dee55b..270c1895f6f 100644 --- a/torch/_functorch/autograd_function.py +++ b/torch/_functorch/autograd_function.py @@ -30,7 +30,7 @@ from torch.autograd.forward_ad import _set_fwd_grad_enabled # We do this by using creating a custom HigherOrderOperator that only functorch # dispatches specially. class CustomFunctionHigherOrderOperator(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("custom_function_call") def __call__(self, autograd_function, *args, **kwargs): @@ -713,7 +713,7 @@ def autograd_function_forward_rewritten(original_forward, original_setup_context class AutogradFunctionApply(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("autograd_function_apply") def __call__(self, fwd, bwd, *fwd_args, **fwd_kwargs): diff --git a/torch/_guards.py b/torch/_guards.py index 3465f6e62ee..3fa9b57d300 100644 --- a/torch/_guards.py +++ b/torch/_guards.py @@ -427,7 +427,7 @@ class ModuleContextCheckpointState: class ModuleContext(Checkpointable[ModuleContextCheckpointState]): - def __init__(self): + def __init__(self) -> None: self.nn_modules: Dict[str, Any] = {} def copy_graphstate(self): @@ -476,7 +476,7 @@ class GlobalContext(Checkpointable[GlobalContextCheckpointState]): "autocast_cache_enabled", } - def __init__(self): + def __init__(self) -> None: self.global_state: Dict[str, Tuple[Callable, ...]] = {} def copy_graphstate(self): @@ -544,7 +544,7 @@ class GuardsSet: class GuardsContext(Checkpointable[GuardsCheckpointState]): - def __init__(self): + def __init__(self) -> None: self.dynamo_guards: GuardsSet = GuardsSet() self.aotautograd_guards: List[GuardEnvExpr] = [] diff --git a/torch/_higher_order_ops/auto_functionalize.py b/torch/_higher_order_ops/auto_functionalize.py index 40178fa750f..00f43e6acde 100644 --- a/torch/_higher_order_ops/auto_functionalize.py +++ b/torch/_higher_order_ops/auto_functionalize.py @@ -54,7 +54,7 @@ class AutoFunctionalized(HigherOrderOperator): underscore is to prevent collisions with kwarg names in **kwargs. """ - def __init__(self): + def __init__(self) -> None: super().__init__("auto_functionalized") def __call__( diff --git a/torch/_higher_order_ops/effects.py b/torch/_higher_order_ops/effects.py index f20c87c7e58..3bba77a5cfc 100644 --- a/torch/_higher_order_ops/effects.py +++ b/torch/_higher_order_ops/effects.py @@ -55,7 +55,7 @@ class WithEffects(HigherOrderOperator): per "effect type", which are enumerated in the _EffectType enum. """ - def __init__(self): + def __init__(self) -> None: super().__init__("with_effects") def __call__( diff --git a/torch/_higher_order_ops/flex_attention.py b/torch/_higher_order_ops/flex_attention.py index e3c9d718b2f..992c7398b2a 100644 --- a/torch/_higher_order_ops/flex_attention.py +++ b/torch/_higher_order_ops/flex_attention.py @@ -38,7 +38,7 @@ class TransformGetItemToIndex(TorchFunctionMode): class FlexAttentionHOP(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("flex_attention") def __call__( @@ -74,7 +74,7 @@ flex_attention.__module__ = "torch.ops.higher_order" class FlexAttentionBackwardHOP(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("flex_attention_backward") def __call__( diff --git a/torch/_higher_order_ops/out_dtype.py b/torch/_higher_order_ops/out_dtype.py index 5c9ca4f3f16..d1557909427 100644 --- a/torch/_higher_order_ops/out_dtype.py +++ b/torch/_higher_order_ops/out_dtype.py @@ -45,7 +45,7 @@ class OutDtypeOperator(HigherOrderOperator): 3. Cast the output to `out_dtype` """ - def __init__(self): + def __init__(self) -> None: super().__init__("out_dtype") # TODO(ydwu4): Subclassing HigherOrderOperator causes __module__ to # become different (torch._higher_order_ops.out_dtype) which will result diff --git a/torch/_higher_order_ops/triton_kernel_wrap.py b/torch/_higher_order_ops/triton_kernel_wrap.py index 779ab2838b3..ff01b0c0124 100644 --- a/torch/_higher_order_ops/triton_kernel_wrap.py +++ b/torch/_higher_order_ops/triton_kernel_wrap.py @@ -519,7 +519,7 @@ def identify_mutated_tensors(kernel, kwargs): # Used for wrapping a Triton Kernel class TritonKernelWrapperMutation(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("triton_kernel_wrapper_mutation") @@ -528,7 +528,7 @@ triton_kernel_wrapper_mutation = TritonKernelWrapperMutation() # Used for wrapping a Triton Kernel in a functional manner class TritonKernelWrapperFunctional(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("triton_kernel_wrapper_functional") diff --git a/torch/_higher_order_ops/while_loop.py b/torch/_higher_order_ops/while_loop.py index 4924e1f3d44..e19fa162105 100644 --- a/torch/_higher_order_ops/while_loop.py +++ b/torch/_higher_order_ops/while_loop.py @@ -18,7 +18,7 @@ from torch.fx.experimental.proxy_tensor import ProxyTorchDispatchMode, track_ten class WhileLoopOp(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("while_loop") def __call__( diff --git a/torch/_higher_order_ops/wrap.py b/torch/_higher_order_ops/wrap.py index a26253405c4..d6faef20661 100644 --- a/torch/_higher_order_ops/wrap.py +++ b/torch/_higher_order_ops/wrap.py @@ -15,7 +15,7 @@ uid = itertools.count(1) # Used for testing the HigherOrderOperator mechanism class Wrap(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("wrap") def __call__(self, func, *args, **kwargs): @@ -36,7 +36,7 @@ wrap = Wrap() class WrapWithSetGradEnabled(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("wrap_with_set_grad_enabled") def __call__(self, enable_grad, wrapped_func, *args, **kwargs): @@ -74,7 +74,7 @@ class WrapActivationCheckpoint(HigherOrderOperator): partitioners. See TagActivationCheckpoint for more information. """ - def __init__(self): + def __init__(self) -> None: super().__init__("wrap_activation_checkpoint") def __call__(self, function, *args, **kwargs): @@ -113,7 +113,7 @@ class TagActivationCheckpoint(HigherOrderOperator): the forward and recomputed forward in backward. """ - def __init__(self): + def __init__(self) -> None: super().__init__("tag_activation_checkpoint") @staticmethod diff --git a/torch/_inductor/codegen/common.py b/torch/_inductor/codegen/common.py index 9b4830159cf..b9159435d4a 100644 --- a/torch/_inductor/codegen/common.py +++ b/torch/_inductor/codegen/common.py @@ -1560,7 +1560,7 @@ class CSE: class CodeGen: - def __init__(self): + def __init__(self) -> None: super().__init__() self.exit_stack = contextlib.ExitStack() diff --git a/torch/_inductor/codegen/cpp_wrapper_cuda.py b/torch/_inductor/codegen/cpp_wrapper_cuda.py index 8eed428de07..3def5af40af 100644 --- a/torch/_inductor/codegen/cpp_wrapper_cuda.py +++ b/torch/_inductor/codegen/cpp_wrapper_cuda.py @@ -29,7 +29,7 @@ class CppWrapperCuda(CppWrapperCpu): Generates cpp wrapper for running on GPU and calls CUDA kernels """ - def __init__(self): + def __init__(self) -> None: self.device = "cuda" super().__init__() self.grid_id = count() diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py index 711d749b650..3516ba3b0e6 100644 --- a/torch/_inductor/codegen/triton.py +++ b/torch/_inductor/codegen/triton.py @@ -1113,7 +1113,7 @@ class HelperFunctions: _templates_seen: Dict[str, str] # Template code to function name finalized_helpers: List[str] - def __init__(self): + def __init__(self) -> None: self._templates_seen = {} self.finalized_helpers = [] diff --git a/torch/_inductor/dependencies.py b/torch/_inductor/dependencies.py index 335f437af08..36dedd878f8 100644 --- a/torch/_inductor/dependencies.py +++ b/torch/_inductor/dependencies.py @@ -589,7 +589,7 @@ def canonicalization_prefix(): class FreeUnbackedSymbolsOpsHandler: symbols: OrderedSet[sympy.Symbol] - def __init__(self): + def __init__(self) -> None: self.symbols = OrderedSet() def __getattr__(self, name: str) -> Callable[..., Any]: diff --git a/torch/_inductor/exc.py b/torch/_inductor/exc.py index 07c1eebf99b..2505c8a3119 100644 --- a/torch/_inductor/exc.py +++ b/torch/_inductor/exc.py @@ -65,7 +65,7 @@ class SubgraphLoweringException(RuntimeError): class InvalidCxxCompiler(RuntimeError): - def __init__(self): + def __init__(self) -> None: from . import config super().__init__( diff --git a/torch/_inductor/fx_passes/misc_patterns.py b/torch/_inductor/fx_passes/misc_patterns.py index d7873fede3c..0f608952a2f 100644 --- a/torch/_inductor/fx_passes/misc_patterns.py +++ b/torch/_inductor/fx_passes/misc_patterns.py @@ -79,7 +79,7 @@ class NumpyCompatNormalization: inverse_mapping: Dict[str, str] cache: Dict["torch.fx.graph.Target", Set[str]] - def __init__(self): + def __init__(self) -> None: self.cache = {} # callable -> tuple of replaceable args e.g. ["axis"] self.inverse_mapping = {} for actual_kwarg, numpy_kwargs in self.numpy_compat.items(): diff --git a/torch/_inductor/fx_passes/mkldnn_fusion.py b/torch/_inductor/fx_passes/mkldnn_fusion.py index 34ddbf90b7f..c930608c766 100644 --- a/torch/_inductor/fx_passes/mkldnn_fusion.py +++ b/torch/_inductor/fx_passes/mkldnn_fusion.py @@ -1207,7 +1207,7 @@ if torch._C._has_mkldnn: Combine packed weight nodes with the same inputs to reduce memory usage. for example: class Model(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = nn.Linear(32, 32, bias=True) diff --git a/torch/_inductor/metrics.py b/torch/_inductor/metrics.py index 18e00b090ce..5c26e322f12 100644 --- a/torch/_inductor/metrics.py +++ b/torch/_inductor/metrics.py @@ -99,7 +99,7 @@ class CachedMetricsHelper: apply on a cache hit. """ - def __init__(self): + def __init__(self) -> None: self.cached_metrics = {} for metric in get_metric_fields(): self.cached_metrics[metric] = globals()[metric] diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py index 380fbe515c3..d5475b8e14e 100644 --- a/torch/_inductor/utils.py +++ b/torch/_inductor/utils.py @@ -940,7 +940,7 @@ class IndentedBuffer: class FakeIndentedBuffer(IndentedBuffer): - def __init__(self): + def __init__(self) -> None: super().__init__() def __getattribute__(self, name): @@ -1219,7 +1219,7 @@ class DebugDirManager: counter = itertools.count(0) prev_debug_name: str - def __init__(self): + def __init__(self) -> None: self.id = next(DebugDirManager.counter) def __enter__(self): @@ -1268,7 +1268,7 @@ def get_code(fn, *args, **kwargs): class DummyModule: """This is empty to replace the generated triton module""" - def __init__(self): + def __init__(self) -> None: pass def call(self, *args, **kwargs): diff --git a/torch/_lazy/closure.py b/torch/_lazy/closure.py index 32b2c58ba2b..94c12c075a0 100644 --- a/torch/_lazy/closure.py +++ b/torch/_lazy/closure.py @@ -7,7 +7,7 @@ from torch._lazy.device_context import get_device_context class ClosureHandler: - def __init__(self): + def __init__(self) -> None: pass def run(self, closure): diff --git a/torch/_library/fake_class_registry.py b/torch/_library/fake_class_registry.py index a56f138f4b0..213e88ac3e5 100644 --- a/torch/_library/fake_class_registry.py +++ b/torch/_library/fake_class_registry.py @@ -42,7 +42,7 @@ class HasStaticMethodFromReal(Protocol): class FakeClassRegistry: - def __init__(self): + def __init__(self) -> None: self._registered_class: Dict[str, Any] = {} def has_impl(self, full_qualname: str) -> bool: diff --git a/torch/_python_dispatcher.py b/torch/_python_dispatcher.py index 644cf92fda2..2dfdbb296a4 100644 --- a/torch/_python_dispatcher.py +++ b/torch/_python_dispatcher.py @@ -70,7 +70,7 @@ class PythonDispatcher: ] supported_keys = runtime_keys + alias_keys - def __init__(self): + def __init__(self) -> None: C._dispatch_check_invariants(self.name) # type: ignore[attr-defined] self.ref = C._dispatch_library("FRAGMENT", self.namespace, "") self.ref.def_("foo(Tensor x) -> Tensor") diff --git a/torch/_subclasses/schema_check_mode.py b/torch/_subclasses/schema_check_mode.py index d8843eec810..d7ad9ebd281 100644 --- a/torch/_subclasses/schema_check_mode.py +++ b/torch/_subclasses/schema_check_mode.py @@ -60,7 +60,7 @@ def clone_inputs(args): class SchemaCheckMode(TorchDispatchMode): - def __init__(self): + def __init__(self) -> None: # Information recorded for testing purposes. For example: # - incorrect schemas # - overly conservative schemas diff --git a/torch/ao/nn/quantized/modules/functional_modules.py b/torch/ao/nn/quantized/modules/functional_modules.py index b707a1f681c..45dc7fc0444 100644 --- a/torch/ao/nn/quantized/modules/functional_modules.py +++ b/torch/ao/nn/quantized/modules/functional_modules.py @@ -36,7 +36,7 @@ class FloatFunctional(torch.nn.Module): - mul_scalar """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.activation_post_process = torch.nn.Identity() @@ -190,7 +190,7 @@ class QFunctional(torch.nn.Module): - mul_scalar """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.scale = 1.0 self.zero_point = 0 diff --git a/torch/ao/ns/fx/qconfig_multi_mapping.py b/torch/ao/ns/fx/qconfig_multi_mapping.py index a7c0f0a27f6..8cd4190110f 100644 --- a/torch/ao/ns/fx/qconfig_multi_mapping.py +++ b/torch/ao/ns/fx/qconfig_multi_mapping.py @@ -72,7 +72,7 @@ class QConfigMultiMapping: """ - def __init__(self): + def __init__(self) -> None: # initialize this with 1 QConfigMapping to avoid corner cases self.qconfig_mappings_list: List[QConfigMapping] = [QConfigMapping()] diff --git a/torch/ao/pruning/_experimental/pruner/README.md b/torch/ao/pruning/_experimental/pruner/README.md index 026fd33b287..2885dff0402 100644 --- a/torch/ao/pruning/_experimental/pruner/README.md +++ b/torch/ao/pruning/_experimental/pruner/README.md @@ -99,7 +99,7 @@ from torch.ao.pruning._experimental.pruner import SaliencyPruner # Define model class Model(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Linear(700, 500, bias=True), diff --git a/torch/ao/quantization/fake_quantize.py b/torch/ao/quantization/fake_quantize.py index 57cc1df04d5..8ef266ebe47 100644 --- a/torch/ao/quantization/fake_quantize.py +++ b/torch/ao/quantization/fake_quantize.py @@ -85,7 +85,7 @@ class FakeQuantizeBase(ABC, Module): fake_quant_enabled: torch.Tensor observer_enabled: torch.Tensor - def __init__(self): + def __init__(self) -> None: """Set fake_quant_enabled and observer_enabled.""" super().__init__() # fake_quant_enabled and observer_enabled are buffers to support their diff --git a/torch/ao/quantization/fx/README.md b/torch/ao/quantization/fx/README.md index a8bd154791b..ca116b282e7 100644 --- a/torch/ao/quantization/fx/README.md +++ b/torch/ao/quantization/fx/README.md @@ -70,7 +70,7 @@ In the following, I’ll first have a detailed description for each step, and th ``` class LinearReLUModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 10).float() self.relu = torch.nn.ReLU() diff --git a/torch/ao/quantization/fx/_model_report/detector.py b/torch/ao/quantization/fx/_model_report/detector.py index 534e73bfb0a..9db118a3365 100644 --- a/torch/ao/quantization/fx/_model_report/detector.py +++ b/torch/ao/quantization/fx/_model_report/detector.py @@ -137,7 +137,7 @@ class DetectorBase(ABC): - Should return a str-based report and dict info in Tuple[str,Dict] format """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.detector_config_info = None diff --git a/torch/ao/quantization/fx/custom_config.py b/torch/ao/quantization/fx/custom_config.py index 7aa408f0ceb..cb00c95fdee 100644 --- a/torch/ao/quantization/fx/custom_config.py +++ b/torch/ao/quantization/fx/custom_config.py @@ -63,7 +63,7 @@ class PrepareCustomConfig: .set_preserved_attributes(["attr1", "attr2"]) """ - def __init__(self): + def __init__(self) -> None: self.standalone_module_names: Dict[str, StandaloneModuleConfigEntry] = {} self.standalone_module_classes: Dict[Type, StandaloneModuleConfigEntry] = {} self.float_to_observed_mapping: Dict[QuantType, Dict[Type, Type]] = {} @@ -382,7 +382,7 @@ class ConvertCustomConfig: .set_preserved_attributes(["attr1", "attr2"]) """ - def __init__(self): + def __init__(self) -> None: self.observed_to_quantized_mapping: Dict[QuantType, Dict[Type, Type]] = {} self.preserved_attributes: List[str] = [] @@ -477,7 +477,7 @@ class FuseCustomConfig: fuse_custom_config = FuseCustomConfig().set_preserved_attributes(["attr1", "attr2"]) """ - def __init__(self): + def __init__(self) -> None: self.preserved_attributes: List[str] = [] def __repr__(self): diff --git a/torch/ao/quantization/observer.py b/torch/ao/quantization/observer.py index 64b14b50614..e26f0302711 100644 --- a/torch/ao/quantization/observer.py +++ b/torch/ao/quantization/observer.py @@ -1568,7 +1568,7 @@ class ReuseInputObserver(ObserverBase): Note: this is only enabled in FX Graph Mode Quantization """ - def __init__(self): + def __init__(self) -> None: super().__init__(torch.quint8, is_dynamic=False) def forward(self, x): diff --git a/torch/ao/quantization/qconfig_mapping.py b/torch/ao/quantization/qconfig_mapping.py index 1b4d9cecbf3..2c12be74ce6 100644 --- a/torch/ao/quantization/qconfig_mapping.py +++ b/torch/ao/quantization/qconfig_mapping.py @@ -229,7 +229,7 @@ class QConfigMapping: """ - def __init__(self): + def __init__(self) -> None: # In increasing match priority: self.global_qconfig: QConfigAny = None self.object_type_qconfigs: OrderedDict[ diff --git a/torch/ao/quantization/quantize_fx.py b/torch/ao/quantization/quantize_fx.py index f5949d985f9..dd8f3e811a3 100644 --- a/torch/ao/quantization/quantize_fx.py +++ b/torch/ao/quantization/quantize_fx.py @@ -289,7 +289,7 @@ def prepare_fx( from torch.ao.quantization.quantize_fx import prepare_fx class Submodule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 5) def forward(self, x): @@ -297,7 +297,7 @@ def prepare_fx( return x class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 5) self.sub = Submodule() @@ -427,7 +427,7 @@ def prepare_qat_fx( from torch.ao.quantization.quantize_fx import prepare_qat_fx class Submodule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 5) def forward(self, x): @@ -435,7 +435,7 @@ def prepare_qat_fx( return x class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 5) self.sub = Submodule() diff --git a/torch/ao/quantization/quantize_pt2e.py b/torch/ao/quantization/quantize_pt2e.py index 41676934440..1e1848a6ff0 100644 --- a/torch/ao/quantization/quantize_pt2e.py +++ b/torch/ao/quantization/quantize_pt2e.py @@ -56,7 +56,7 @@ def prepare_pt2e( ) class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 10) @@ -129,7 +129,7 @@ def prepare_qat_pt2e( ) class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 10) diff --git a/torch/ao/quantization/quantizer/embedding_quantizer.py b/torch/ao/quantization/quantizer/embedding_quantizer.py index 6c93c0b88a1..32ec3814637 100644 --- a/torch/ao/quantization/quantizer/embedding_quantizer.py +++ b/torch/ao/quantization/quantizer/embedding_quantizer.py @@ -42,7 +42,7 @@ def get_embedding_operators_config() -> OperatorConfig: class EmbeddingQuantizer(Quantizer): - def __init__(self): + def __init__(self) -> None: super().__init__() @classmethod diff --git a/torch/ao/quantization/quantizer/x86_inductor_quantizer.py b/torch/ao/quantization/quantizer/x86_inductor_quantizer.py index 09db71a191b..574af30a715 100644 --- a/torch/ao/quantization/quantizer/x86_inductor_quantizer.py +++ b/torch/ao/quantization/quantizer/x86_inductor_quantizer.py @@ -436,7 +436,7 @@ class X86InductorQuantizer(Quantizer): supported_config_and_operators = _get_supported_config_and_operators() module_function_to_aten_operator_type = _map_module_function_to_aten_operator_type() - def __init__(self): + def __init__(self) -> None: super().__init__() self.global_config: Optional[QuantizationConfig] = None self.operator_type_qconfig: Dict[ diff --git a/torch/ao/quantization/quantizer/xnnpack_quantizer.py b/torch/ao/quantization/quantizer/xnnpack_quantizer.py index 93712ded503..cc17057c82a 100644 --- a/torch/ao/quantization/quantizer/xnnpack_quantizer.py +++ b/torch/ao/quantization/quantizer/xnnpack_quantizer.py @@ -268,7 +268,7 @@ class XNNPACKQuantizer(Quantizer): "linear", ] - def __init__(self): + def __init__(self) -> None: super().__init__() self.global_config: Optional[QuantizationConfig] = None self.operator_type_config: Dict[ diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py index dad16df5b93..ff22da04a22 100644 --- a/torch/ao/quantization/utils.py +++ b/torch/ao/quantization/utils.py @@ -513,7 +513,7 @@ def _get_path_of_module( Example:: >> class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: self.linear = torch.nn.Linear(5, 5) def forward(self, x): return self.linear(x) diff --git a/torch/autograd/profiler_util.py b/torch/autograd/profiler_util.py index e3fc95580c5..67eb989f57c 100644 --- a/torch/autograd/profiler_util.py +++ b/torch/autograd/profiler_util.py @@ -645,7 +645,7 @@ class FunctionEvent(FormattedTimesMixin): class FunctionEventAvg(FormattedTimesMixin): """Used to average stats over multiple FunctionEvent objects.""" - def __init__(self): + def __init__(self) -> None: self.key: Optional[str] = None self.count: int = 0 self.node_id: int = 0 diff --git a/torch/backends/xeon/run_cpu.py b/torch/backends/xeon/run_cpu.py index bdf07e28617..634c50da4db 100644 --- a/torch/backends/xeon/run_cpu.py +++ b/torch/backends/xeon/run_cpu.py @@ -266,7 +266,7 @@ class _Launcher: or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib or /usr/lib64 or \ {expanduser('~')}/.local/lib/ so the LD_PRELOAD environment variable will not be set." - def __init__(self): + def __init__(self) -> None: self.cpuinfo = _CPUinfo() def add_lib_preload(self, lib_type): diff --git a/torch/csrc/jit/backends/backend_debug_handler.h b/torch/csrc/jit/backends/backend_debug_handler.h index d25ce2f8cb0..d4b00fe340f 100644 --- a/torch/csrc/jit/backends/backend_debug_handler.h +++ b/torch/csrc/jit/backends/backend_debug_handler.h @@ -77,17 +77,17 @@ namespace jit { * * So why does debug handle map to DebugInfoTuple = {source range and inlined * cs}? {debug_handle, source_range_tag, serialized_callstack} Take this - * example: class L(nn.Module): def __init__(self): + * example: class L(nn.Module): def __init__(self) -> None: * ... * def forward(self, x): * return x * 5 * class M(nn.Module): - * def __init__(self): + * def __init__(self) -> None: * ... * def forward(self, x): * return x - 2 * class N(nn.Module): - * def __init__(self): + * def __init__(self) -> None: * self.m = M() * def forward(self, x): * return self.m(x) + 3 diff --git a/torch/csrc/jit/docs/serialization.md b/torch/csrc/jit/docs/serialization.md index 106cea55478..3fb463c7e7f 100644 --- a/torch/csrc/jit/docs/serialization.md +++ b/torch/csrc/jit/docs/serialization.md @@ -328,7 +328,7 @@ For example: ``` class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: self.a = torch.rand(2, 3) self.b = torch.nn.Linear(10, 10) diff --git a/torch/csrc/jit/operator_upgraders/README.md b/torch/csrc/jit/operator_upgraders/README.md index 61679972073..ce995276d28 100644 --- a/torch/csrc/jit/operator_upgraders/README.md +++ b/torch/csrc/jit/operator_upgraders/README.md @@ -37,7 +37,7 @@ When making changes to the operators, the first thing to identify is if it's BC/ 1. Add a test module in `test/jit/fixtures_srcs/fixtures_src.py`. In `test/jit/fixtures_srcs/generate_models.py`, ``` class TestVersionedLinspaceV7(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, a: Union[int, float, complex], b: Union[int, float, complex]): @@ -163,7 +163,7 @@ When making changes to the operators, the first thing to identify is if it's BC/ # Step 2. Write down how current module should look like class MyModuleFloat(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, a, b: float): diff --git a/torch/csrc/jit/passes/onnx/function_extraction.h b/torch/csrc/jit/passes/onnx/function_extraction.h index 3a90967e2f1..40555f8e356 100644 --- a/torch/csrc/jit/passes/onnx/function_extraction.h +++ b/torch/csrc/jit/passes/onnx/function_extraction.h @@ -25,7 +25,7 @@ namespace onnx { // // clang-format off // class M(torch.nn.Module): -// def __init__(self): +// def __init__(self) -> None: // super().__init__() // self.lns = torch.nn.ModuleList([torch.nn.LayerNorm(3, eps = i) for i in range(2)]) // self.celu1 = torch.nn.CELU(1.0) diff --git a/torch/csrc/lazy/test_mnist.py b/torch/csrc/lazy/test_mnist.py index a3a03d9844d..762620fcc62 100644 --- a/torch/csrc/lazy/test_mnist.py +++ b/torch/csrc/lazy/test_mnist.py @@ -17,7 +17,7 @@ torch._lazy.ts_backend.init() class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) self.conv2 = nn.Conv2d(32, 64, 3, 1) diff --git a/torch/csrc/lazy/tutorial.md b/torch/csrc/lazy/tutorial.md index 155e8adfdd8..b72ae13eca7 100644 --- a/torch/csrc/lazy/tutorial.md +++ b/torch/csrc/lazy/tutorial.md @@ -135,7 +135,7 @@ Here's our model definition: ```python class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) self.conv2 = nn.Conv2d(32, 64, 3, 1) diff --git a/torch/cuda/_sanitizer.py b/torch/cuda/_sanitizer.py index f9ce311725e..34cd7bacee0 100644 --- a/torch/cuda/_sanitizer.py +++ b/torch/cuda/_sanitizer.py @@ -163,7 +163,7 @@ class TensorInfo: class _TensorsAccessed: - def __init__(self): + def __init__(self) -> None: self.accesses: Dict[DataPtr, TensorInfo] = {} def ensure_tensor_exists(self, data_ptr: DataPtr) -> None: @@ -218,7 +218,7 @@ class _TensorsAccessed: class StreamSynchronizations: - def __init__(self): + def __init__(self) -> None: self.current_sync_states: Dict[StreamId, Dict[StreamId, SeqNum]] = {} self.recorded_sync_states: Dict[EventId, Dict[StreamId, SeqNum]] = {} self.host_sync_state: Dict[StreamId, SeqNum] = {} @@ -338,7 +338,7 @@ class EventHandler: data race. """ - def __init__(self): + def __init__(self) -> None: self.tensors_accessed = _TensorsAccessed() self.syncs = StreamSynchronizations() self.seq_num: SeqNum = 0 @@ -478,7 +478,7 @@ def zip_arguments( class ArgumentHandler: - def __init__(self): + def __init__(self) -> None: self.dataptrs_read: Set[DataPtr] = set() self.dataptrs_written: Set[DataPtr] = set() self.tensor_aliases: Dict[DataPtr, List[str]] = {} @@ -527,7 +527,7 @@ class ArgumentHandler: class CUDASanitizerDispatchMode(TorchDispatchMode): - def __init__(self): + def __init__(self) -> None: self.event_handler = EventHandler() torch._C._activate_gpu_trace() gpu_trace.register_callback_for_event_creation( @@ -596,7 +596,7 @@ class CUDASanitizer: This approach was deemed more elegant than using the atexit module. """ - def __init__(self): + def __init__(self) -> None: self.dispatch = CUDASanitizerDispatchMode() self.enabled = False diff --git a/torch/distributed/_composable/checkpoint_activation.py b/torch/distributed/_composable/checkpoint_activation.py index fcee2a57a07..88253abb4b9 100644 --- a/torch/distributed/_composable/checkpoint_activation.py +++ b/torch/distributed/_composable/checkpoint_activation.py @@ -49,7 +49,7 @@ def checkpoint(module: nn.Module, **kwargs) -> nn.Module: >>> import torch.nn as nn >>> >>> class MyModel(nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> super().__init__() >>> self.l1 = nn.Linear(10, 10) >>> self.l2 = nn.Linear(10, 10) diff --git a/torch/distributed/_composable/contract.py b/torch/distributed/_composable/contract.py index 850659fc2c0..e7cd1713fae 100644 --- a/torch/distributed/_composable/contract.py +++ b/torch/distributed/_composable/contract.py @@ -47,7 +47,7 @@ def contract(state_cls: Type[_State] = _State): >>> import torch.nn as nn >>> >>> class MyModel(nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> super().__init__() >>> self.l1 = nn.Linear(10, 10) >>> self.l2 = nn.Linear(10, 10) diff --git a/torch/distributed/_composable/fsdp/_fsdp_state.py b/torch/distributed/_composable/fsdp/_fsdp_state.py index 6f62c01600d..6dde573d848 100644 --- a/torch/distributed/_composable/fsdp/_fsdp_state.py +++ b/torch/distributed/_composable/fsdp/_fsdp_state.py @@ -43,7 +43,7 @@ logger = logging.getLogger("torch.distributed._composable.fsdp") class FSDPStateContext: """This has state shared across FSDP states.""" - def __init__(self): + def __init__(self) -> None: # All FSDP states in the root state's module tree self.all_states: List[FSDPState] = [] # Iteration's forward root runs the once-per-forward logic; this root @@ -71,7 +71,7 @@ def disable_if_config_true(func): class FSDPState(_State): - def __init__(self): + def __init__(self) -> None: super().__init__() self._fsdp_param_group: Optional[FSDPParamGroup] = None self._is_root: Optional[bool] = None # root set during lazy init diff --git a/torch/distributed/_shard/sharding_plan/api.py b/torch/distributed/_shard/sharding_plan/api.py index a7552c5a68f..d141df1a521 100644 --- a/torch/distributed/_shard/sharding_plan/api.py +++ b/torch/distributed/_shard/sharding_plan/api.py @@ -38,7 +38,7 @@ class ShardingPlan: >>> # xdoctest: +REQUIRES(module:torch._C._distributed_c10d) >>> class MyModule(nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> super().__init__() >>> self.fc1 = nn.Linear() >>> self.gelu = nn.GELU() diff --git a/torch/distributed/_tensor/README.md b/torch/distributed/_tensor/README.md index 80fcc2eb41f..2fedb7cc3b4 100644 --- a/torch/distributed/_tensor/README.md +++ b/torch/distributed/_tensor/README.md @@ -117,7 +117,7 @@ import torch.nn as nn from torch.distributed._tensor import Shard, distribute_tensor, distribute_module, init_device_mesh class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = nn.Linear(8, 8) self.fc2 = nn.Linear(8, 8) diff --git a/torch/distributed/_tensor/examples/checkpoint_example.py b/torch/distributed/_tensor/examples/checkpoint_example.py index 1701e28ac2c..fe8585c2a23 100644 --- a/torch/distributed/_tensor/examples/checkpoint_example.py +++ b/torch/distributed/_tensor/examples/checkpoint_example.py @@ -25,7 +25,7 @@ from torch.distributed.tensor.parallel import ColwiseParallel, parallelize_modul class SimpleMLP(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.net1 = torch.nn.Linear(5, 128) self.relu = torch.nn.ReLU() diff --git a/torch/distributed/algorithms/join.py b/torch/distributed/algorithms/join.py index 14084485193..f7c95100b1b 100644 --- a/torch/distributed/algorithms/join.py +++ b/torch/distributed/algorithms/join.py @@ -55,7 +55,7 @@ class Joinable(ABC): """ @abstractmethod - def __init__(self): + def __init__(self) -> None: super().__init__() self._join_config = _JoinConfig.construct_disabled_join_config() diff --git a/torch/distributed/checkpoint/examples/async_checkpointing_example.py b/torch/distributed/checkpoint/examples/async_checkpointing_example.py index 5eaba9a6722..589f9b93544 100644 --- a/torch/distributed/checkpoint/examples/async_checkpointing_example.py +++ b/torch/distributed/checkpoint/examples/async_checkpointing_example.py @@ -31,7 +31,7 @@ class InjectedException(Exception): class Model(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.net1 = nn.Linear(8, 32) self.net2 = nn.Linear(32, 128) diff --git a/torch/distributed/checkpoint/examples/stateful_example.py b/torch/distributed/checkpoint/examples/stateful_example.py index 6c76ec43636..f6e0d11801d 100644 --- a/torch/distributed/checkpoint/examples/stateful_example.py +++ b/torch/distributed/checkpoint/examples/stateful_example.py @@ -22,7 +22,7 @@ CHECKPOINT_DIR = f"~/{os.environ['LOGNAME']}/checkpoint" class Model(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() torch.manual_seed(0) self.net1 = nn.Sequential(nn.Linear(8, 16), nn.ReLU()) diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 161eade7af6..a78ab550575 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -434,7 +434,7 @@ class _reduce_op: :class:`~torch.distributed.ReduceOp` is recommended to use instead. """ - def __init__(self): + def __init__(self) -> None: # __members__ is a dict storing key-value pairs for enum classes for k, v in ReduceOp.RedOpType.__members__.items(): setattr(self, k, v) @@ -568,7 +568,7 @@ class _World: of c10d and is subject to change.. """ - def __init__(self): + def __init__(self) -> None: self._default_pg = None self._pg_coalesce_state: Dict[ProcessGroup, List[_CollOp]] = {} self._pg_default_device: Dict[ProcessGroup, torch.device] = {} @@ -2194,7 +2194,7 @@ class _IllegalWork(Work): class _CoalescingManager: - def __init__(self): + def __init__(self) -> None: self.works: List[Work] = [] def append(self, work: Work): diff --git a/torch/distributed/fsdp/_common_utils.py b/torch/distributed/fsdp/_common_utils.py index 10d0f821265..d722d5b9825 100644 --- a/torch/distributed/fsdp/_common_utils.py +++ b/torch/distributed/fsdp/_common_utils.py @@ -106,7 +106,7 @@ class _FSDPDeviceHandle: class _UninitializedDeviceHandle(_FSDPDeviceHandle): - def __init__(self): + def __init__(self) -> None: pass def __getattribute__(self, __name: str) -> Any: diff --git a/torch/distributed/nn/api/remote_module.py b/torch/distributed/nn/api/remote_module.py index 5583da8c3e8..4e18fe3245e 100644 --- a/torch/distributed/nn/api/remote_module.py +++ b/torch/distributed/nn/api/remote_module.py @@ -156,7 +156,7 @@ class _RemoteModule(nn.Module): created outside of remote modules, rather than as submodules of any remote module (by calling ``add_module``). Hybrid Example: >>> class HybridModel(nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> nn.Module.__init__(self) >>> self.remote_embedding = RemoteModule(...) >>> self.local_linear = nn.Linear(...) diff --git a/torch/export/graph_signature.py b/torch/export/graph_signature.py index c36941ee02e..0d93957d77c 100644 --- a/torch/export/graph_signature.py +++ b/torch/export/graph_signature.py @@ -248,7 +248,7 @@ class ExportGraphSignature: e.g. If following module is exported:: class CustomModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super(CustomModule, self).__init__() # Define a parameter diff --git a/torch/fx/README.md b/torch/fx/README.md index a69a6ed1f65..4c799da7bc4 100644 --- a/torch/fx/README.md +++ b/torch/fx/README.md @@ -45,7 +45,7 @@ FX’s front-end makes use of the dynamic nature of Python to intercept call-sit import torch class MyModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.param = torch.nn.Parameter( torch.rand(3, 4)) diff --git a/torch/fx/__init__.py b/torch/fx/__init__.py index b9896390f12..dd04cdd09d7 100644 --- a/torch/fx/__init__.py +++ b/torch/fx/__init__.py @@ -9,7 +9,7 @@ demonstration of these components in action: import torch # Simple module for demonstration class MyModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.param = torch.nn.Parameter(torch.rand(3, 4)) self.linear = torch.nn.Linear(4, 5) diff --git a/torch/fx/_symbolic_trace.py b/torch/fx/_symbolic_trace.py index bd8d4f42669..92fb7b94948 100644 --- a/torch/fx/_symbolic_trace.py +++ b/torch/fx/_symbolic_trace.py @@ -1012,7 +1012,7 @@ class _PatchedFnSetAttr(_PatchedFn): class _Patcher: - def __init__(self): + def __init__(self) -> None: super().__init__() self.patches_made: List[_PatchedFn] = [] self.visited: Set[int] = set() diff --git a/torch/fx/experimental/migrate_gradual_types/constraint.py b/torch/fx/experimental/migrate_gradual_types/constraint.py index 45038837cae..4693a62de24 100644 --- a/torch/fx/experimental/migrate_gradual_types/constraint.py +++ b/torch/fx/experimental/migrate_gradual_types/constraint.py @@ -63,7 +63,7 @@ class T(Constraint): """ True """ - def __init__(self): + def __init__(self) -> None: pass def __eq__(self, other): @@ -76,7 +76,7 @@ class F(Constraint): """ False """ - def __init__(self): + def __init__(self) -> None: pass def __eq__(self, other): diff --git a/torch/fx/passes/graph_drawer.py b/torch/fx/passes/graph_drawer.py index 726ab04539d..a577cf8736e 100644 --- a/torch/fx/passes/graph_drawer.py +++ b/torch/fx/passes/graph_drawer.py @@ -117,7 +117,7 @@ if HAS_PYDOT: >>> # xdoctest: +REQUIRES(module:ubelt) >>> # define module >>> class MyModule(torch.nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> super().__init__() >>> self.linear = torch.nn.Linear(4, 5) >>> def forward(self, x): diff --git a/torch/fx/passes/split_module.py b/torch/fx/passes/split_module.py index fba516d74be..5984587f17c 100644 --- a/torch/fx/passes/split_module.py +++ b/torch/fx/passes/split_module.py @@ -83,7 +83,7 @@ def split_module( from torch.fx.passes.split_module import split_module class MyModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.param = torch.nn.Parameter(torch.rand(3, 4)) self.linear = torch.nn.Linear(4, 5) diff --git a/torch/fx/passes/split_utils.py b/torch/fx/passes/split_utils.py index d8254bd474b..44b97471332 100644 --- a/torch/fx/passes/split_utils.py +++ b/torch/fx/passes/split_utils.py @@ -83,7 +83,7 @@ def split_by_tags( Given the following module def: class SimpleModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear1 = torch.nn.Linear(...) self.linear2 = torch.nn.Linear(...) diff --git a/torch/fx/proxy.py b/torch/fx/proxy.py index 874ac51afff..05157f9ddb1 100644 --- a/torch/fx/proxy.py +++ b/torch/fx/proxy.py @@ -38,7 +38,7 @@ class Scope: return x.transpose(1, 2) class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: self.sub = Sub() def forward(self, x): diff --git a/torch/fx/subgraph_rewriter.py b/torch/fx/subgraph_rewriter.py index 419337a1768..8a9e78c0af4 100644 --- a/torch/fx/subgraph_rewriter.py +++ b/torch/fx/subgraph_rewriter.py @@ -118,7 +118,7 @@ def replace_pattern( from torch.fx import symbolic_trace, subgraph_rewriter class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, x, w1, w2): diff --git a/torch/fx/tensor_type.py b/torch/fx/tensor_type.py index f59ed2d45ba..83b5a9f8faf 100644 --- a/torch/fx/tensor_type.py +++ b/torch/fx/tensor_type.py @@ -38,7 +38,7 @@ class _DynType: """ _DynType defines a type which stands for the absence of type information. """ - def __init__(self): + def __init__(self) -> None: self.__name__ = '_DynType' def __eq__(self, other): diff --git a/torch/jit/__init__.py b/torch/jit/__init__.py index 6d1760fb9f4..e80fa2932fc 100644 --- a/torch/jit/__init__.py +++ b/torch/jit/__init__.py @@ -219,7 +219,7 @@ def isinstance(obj, target_type): from typing import Any, Dict, List class MyModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, input: Any): # note the Any type @@ -255,7 +255,7 @@ class strict_fusion: """ - def __init__(self): + def __init__(self) -> None: if not torch._jit_internal.is_scripting(): warnings.warn("Only works in script mode") pass diff --git a/torch/jit/_async.py b/torch/jit/_async.py index bdde55adf14..ceaef70b1fe 100644 --- a/torch/jit/_async.py +++ b/torch/jit/_async.py @@ -73,7 +73,7 @@ def fork(func, *args, **kwargs): def forward(self, a: Tensor, b : int): return a + b class Mod(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super(self).__init__() self.mod = AddMod() def forward(self, input): diff --git a/torch/jit/_check.py b/torch/jit/_check.py index 8db5bb82ce3..f708ee87f30 100644 --- a/torch/jit/_check.py +++ b/torch/jit/_check.py @@ -39,7 +39,7 @@ class AttributeTypeIsSupportedChecker(ast.NodeVisitor): def fn(self): return [] - def __init__(self): + def __init__(self) -> None: super().__init__() self.x: List[int] = [] diff --git a/torch/jit/_freeze.py b/torch/jit/_freeze.py index 8f35fc471e6..e496bd74762 100644 --- a/torch/jit/_freeze.py +++ b/torch/jit/_freeze.py @@ -65,7 +65,7 @@ def freeze( .. testcode:: import torch class MyModule2(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.modified_tensor = torch.tensor(10.) self.version = 1 diff --git a/torch/jit/_monkeytype_config.py b/torch/jit/_monkeytype_config.py index ecf7cd865fd..366a58ac6af 100644 --- a/torch/jit/_monkeytype_config.py +++ b/torch/jit/_monkeytype_config.py @@ -89,7 +89,7 @@ if _IS_MONKEYTYPE_INSTALLED: self.traces.append(trace) class JitTypeTraceStore(CallTraceStore): - def __init__(self): + def __init__(self) -> None: super().__init__() # A dictionary keeping all collected CallTrace # key is fully qualified name of called function @@ -159,15 +159,15 @@ else: # When MonkeyType is not installed, we provide dummy class definitions # for the below classes. class JitTypeTraceStoreLogger: # type: ignore[no-redef] - def __init__(self): + def __init__(self) -> None: pass class JitTypeTraceStore: # type: ignore[no-redef] - def __init__(self): + def __init__(self) -> None: self.trace_records = None class JitTypeTraceConfig: # type: ignore[no-redef] - def __init__(self): + def __init__(self) -> None: pass monkeytype_trace = None # type: ignore[assignment] # noqa: F811 diff --git a/torch/jit/_recursive.py b/torch/jit/_recursive.py index b8dc0ecf2cd..e03540a7c75 100644 --- a/torch/jit/_recursive.py +++ b/torch/jit/_recursive.py @@ -426,7 +426,7 @@ class ConcreteTypeStore: type_store: Dict[Type[Module], List[torch._C.ConcreteModuleType]] methods_compiled: Set[torch._C.ConcreteModuleType] - def __init__(self): + def __init__(self) -> None: # Python module type => List[ConcreteModuleType)] self.type_store = {} # ConcreteTypes that have had their methods already compiled diff --git a/torch/jit/_script.py b/torch/jit/_script.py index 490e9e644e2..a7bc45fa5fc 100644 --- a/torch/jit/_script.py +++ b/torch/jit/_script.py @@ -107,7 +107,7 @@ Attribute.__doc__ = """ from typing import Dict class AttributeModule(torch.jit.ScriptModule): - def __init__(self): + def __init__(self) -> None: super().__init__() self.foo = torch.jit.Attribute(0.1, float) @@ -138,7 +138,7 @@ Attribute.__doc__ = """ class AttributeModule(torch.nn.Module): names: Dict[str, int] - def __init__(self): + def __init__(self) -> None: super().__init__() self.names = {} @@ -522,7 +522,7 @@ if _enabled: "original_name", ] - def __init__(self): + def __init__(self) -> None: super().__init__() forward: Callable[..., Any] = _CachedForward() # type: ignore[assignment] @@ -1351,7 +1351,7 @@ def script( import torch.nn.functional as F class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() # torch.jit.trace produces a ScriptModule's conv1 and conv2 self.conv1 = torch.jit.trace(nn.Conv2d(1, 20, 5), torch.rand(1, 1, 16, 16)) @@ -1374,7 +1374,7 @@ def script( import torch.nn as nn class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() @torch.jit.export @@ -1547,7 +1547,7 @@ def interface(obj): return x.relu() class Impl2(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.val = torch.rand(()) @@ -1671,7 +1671,7 @@ class _ScriptProfileTable: class _ScriptProfile: - def __init__(self): + def __init__(self) -> None: self.profile = classes.profiling._ScriptProfile() def enable(self): diff --git a/torch/jit/_state.py b/torch/jit/_state.py index 63df2acfdf0..18456ebd386 100644 --- a/torch/jit/_state.py +++ b/torch/jit/_state.py @@ -19,7 +19,7 @@ class EnabledProxy: This is just a wrapper for a bool, so that we get reference semantics """ - def __init__(self): + def __init__(self) -> None: self.enabled = self.parse_env( "PYTORCH_JIT", True, "> Using PyTorch JIT", "> PyTorch JIT DISABLED" ) diff --git a/torch/jit/_trace.py b/torch/jit/_trace.py index 1c0372c7281..372fd72ddb2 100644 --- a/torch/jit/_trace.py +++ b/torch/jit/_trace.py @@ -966,7 +966,7 @@ def trace( import torch.nn as nn class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = nn.Conv2d(1, 1, 3) @@ -1182,7 +1182,7 @@ def trace_module( import torch.nn as nn class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = nn.Conv2d(1, 1, 3) diff --git a/torch/multiprocessing/reductions.py b/torch/multiprocessing/reductions.py index 0a6d3c8a444..fa0818571a9 100644 --- a/torch/multiprocessing/reductions.py +++ b/torch/multiprocessing/reductions.py @@ -61,7 +61,7 @@ class StorageWeakRef: class SharedCache(dict): """Dictionary from multiprocessing handles to StorageWeakRef.""" - def __init__(self): + def __init__(self) -> None: # free_dead_references() is called if the len exceeds the current # limit. The limit scales with the number of remaining live objects. self.limit = 128 diff --git a/torch/nn/modules/container.py b/torch/nn/modules/container.py index 30992e394b0..585f4ef1658 100644 --- a/torch/nn/modules/container.py +++ b/torch/nn/modules/container.py @@ -291,7 +291,7 @@ class ModuleList(Module): Example:: class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)]) @@ -465,7 +465,7 @@ class ModuleDict(Module): Example:: class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.choices = nn.ModuleDict({ 'conv': nn.Conv2d(10, 10, 3), @@ -597,7 +597,7 @@ class ParameterList(Module): Example:: class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.params = nn.ParameterList([nn.Parameter(torch.randn(10, 10)) for i in range(10)]) @@ -749,7 +749,7 @@ class ParameterDict(Module): Example:: class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.params = nn.ParameterDict({ 'left': nn.Parameter(torch.randn(5, 10)), diff --git a/torch/nn/modules/lazy.py b/torch/nn/modules/lazy.py index 7a9a0161006..61cabd061ae 100644 --- a/torch/nn/modules/lazy.py +++ b/torch/nn/modules/lazy.py @@ -86,7 +86,7 @@ class LazyModuleMixin: >>> # xdoctest: +SKIP >>> class LazyMLP(torch.nn.Module): - ... def __init__(self): + ... def __init__(self) -> None: ... super().__init__() ... self.fc1 = torch.nn.LazyLinear(10) ... self.relu1 = torch.nn.ReLU() diff --git a/torch/nn/modules/module.py b/torch/nn/modules/module.py index a6592655fd4..a15850553f1 100644 --- a/torch/nn/modules/module.py +++ b/torch/nn/modules/module.py @@ -408,7 +408,7 @@ class Module: import torch.nn.functional as F class Model(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) diff --git a/torch/onnx/_globals.py b/torch/onnx/_globals.py index 22c05075dba..ebef6c331b2 100644 --- a/torch/onnx/_globals.py +++ b/torch/onnx/_globals.py @@ -20,7 +20,7 @@ class _InternalGlobals: global variables unless they are absolutely necessary. """ - def __init__(self): + def __init__(self) -> None: self._export_onnx_opset_version = _constants.ONNX_DEFAULT_OPSET self._training_mode: _C_onnx.TrainingMode = _C_onnx.TrainingMode.EVAL self._in_onnx_export: bool = False diff --git a/torch/onnx/_internal/exporter.py b/torch/onnx/_internal/exporter.py index e53f906cd84..7c7203c8085 100644 --- a/torch/onnx/_internal/exporter.py +++ b/torch/onnx/_internal/exporter.py @@ -760,7 +760,7 @@ class ONNXProgram: >>> import pprint >>> class CustomModule(torch.nn.Module): - ... def __init__(self): + ... def __init__(self) -> None: ... super().__init__() ... self.my_parameter = torch.nn.Parameter(torch.tensor(2.0)) ... self.register_buffer("my_buffer1", torch.tensor(3.0)) diff --git a/torch/onnx/_internal/fx/dynamo_graph_extractor.py b/torch/onnx/_internal/fx/dynamo_graph_extractor.py index a3b8a69f60d..5abf2bf2c63 100644 --- a/torch/onnx/_internal/fx/dynamo_graph_extractor.py +++ b/torch/onnx/_internal/fx/dynamo_graph_extractor.py @@ -24,7 +24,7 @@ class _PyTreeExtensionContext: _extensions: dict[type, tuple[pytree.FlattenFunc, pytree.UnflattenFunc]] - def __init__(self): + def __init__(self) -> None: self._extensions = {} # Register PyTree extension for HuggingFace model output. self._register_huggingface_model_output_extension() diff --git a/torch/onnx/_internal/fx/passes/modularization.py b/torch/onnx/_internal/fx/passes/modularization.py index 4f4d347401e..db74d52dda4 100644 --- a/torch/onnx/_internal/fx/passes/modularization.py +++ b/torch/onnx/_internal/fx/passes/modularization.py @@ -795,7 +795,7 @@ class Modularize(_pass.Transform): >>> from torch.onnx._internal.diagnostics import infra >>> >>> class CustomModule(torch.nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> super().__init__() >>> self.embedding = torch.nn.Embedding(10, 32) >>> self.relu = torch.nn.ReLU() @@ -806,7 +806,7 @@ class Modularize(_pass.Transform): >>> return out >>> >>> class TestModule(torch.nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> super().__init__() >>> self.layer = CustomModule() >>> self.linear = torch.nn.Linear(32, 10) diff --git a/torch/onnx/_internal/fx/patcher.py b/torch/onnx/_internal/fx/patcher.py index 3c0ee6c0714..239edb6dde6 100644 --- a/torch/onnx/_internal/fx/patcher.py +++ b/torch/onnx/_internal/fx/patcher.py @@ -53,7 +53,7 @@ class ONNXTorchPatcher: `torch.fx._symbolic_trace._wrapped_methods_to_patch` """ - def __init__(self): + def __init__(self) -> None: # List of file paths processed by torch.load. self.paths: List[Union[str, io.BufferedIOBase]] = [] diff --git a/torch/onnx/_internal/onnxruntime.py b/torch/onnx/_internal/onnxruntime.py index b9d6bce1651..59609866bae 100644 --- a/torch/onnx/_internal/onnxruntime.py +++ b/torch/onnx/_internal/onnxruntime.py @@ -602,7 +602,7 @@ class OrtExecutionInfoPerSession: @dataclasses.dataclass class OrtExecutionInfoForAllGraphModules: - def __init__(self): + def __init__(self) -> None: # All sessions (and their related information) created by exporting the same GraphModule # with different inputs. self.execution_info_per_graph_module: Dict[ diff --git a/torch/onnx/_internal/registration.py b/torch/onnx/_internal/registration.py index c59ab11d4fa..95de41b3f03 100644 --- a/torch/onnx/_internal/registration.py +++ b/torch/onnx/_internal/registration.py @@ -69,7 +69,7 @@ class OverrideDict(Collection[_K], Generic[_K, _V]): ones. """ - def __init__(self): + def __init__(self) -> None: self._base: Dict[_K, _V] = {} self._overrides: Dict[_K, _V] = {} self._merged: Dict[_K, _V] = {} diff --git a/torch/onnx/verification.py b/torch/onnx/verification.py index e8bcfe4ca9e..bcf1de6b643 100644 --- a/torch/onnx/verification.py +++ b/torch/onnx/verification.py @@ -1722,7 +1722,7 @@ def find_mismatch( ... opset_version=opset_version, ... ) >>> class Model(torch.nn.Module): - ... def __init__(self): + ... def __init__(self) -> None: ... super().__init__() ... self.layers = torch.nn.Sequential( ... torch.nn.Linear(3, 4), diff --git a/torch/overrides.py b/torch/overrides.py index bbd055de447..ecb5613f80f 100644 --- a/torch/overrides.py +++ b/torch/overrides.py @@ -2025,7 +2025,7 @@ class TorchFunctionMode: inner: "TorchFunctionMode" # Force metaclass to generate constructor at the base of the hierarchy - def __init__(self): + def __init__(self) -> None: pass def __torch_function__(self, func, types, args=(), kwargs=None): diff --git a/torch/package/_mangling.py b/torch/package/_mangling.py index 7dcf3538631..700a9ad6a04 100644 --- a/torch/package/_mangling.py +++ b/torch/package/_mangling.py @@ -12,7 +12,7 @@ class PackageMangler: Used on import, to ensure that all modules imported have a shared mangle parent. """ - def __init__(self): + def __init__(self) -> None: global _mangle_index self._mangle_index = _mangle_index # Increment the global index diff --git a/torch/profiler/profiler.py b/torch/profiler/profiler.py index 47faac8c176..98f1c1b6735 100644 --- a/torch/profiler/profiler.py +++ b/torch/profiler/profiler.py @@ -772,7 +772,7 @@ class ExecutionTraceObserver(_ITraceObserver): incurring any overheads. """ - def __init__(self): + def __init__(self) -> None: """ Initializes the default states. """ diff --git a/torch/testing/_internal/common_fsdp.py b/torch/testing/_internal/common_fsdp.py index 51a3deac9c1..fe02eeeabb1 100644 --- a/torch/testing/_internal/common_fsdp.py +++ b/torch/testing/_internal/common_fsdp.py @@ -1498,7 +1498,7 @@ def test_compiled_fsdp(compile_compute_on_module: Optional[type] = None): class SkipModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin = nn.Linear(10, 10, bias=False) diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 4011f6752de..8f63db32a07 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -3591,7 +3591,7 @@ def error_inputs_adaptive_max_pool3d(opinfo, device, **kwargs): class _TestParamsMaxPoolBase: - def __init__(self): + def __init__(self) -> None: self.kwargs = { 'kernel_size': [3], 'stride': [2, None], @@ -3628,7 +3628,7 @@ class _TestParamsMaxPoolBase: class _TestParamsMaxPool1d(_TestParamsMaxPoolBase): - def __init__(self): + def __init__(self) -> None: super().__init__() self.kwargs['kernel_size'] += [(3,)] self.kwargs['stride'] += [(2,)] @@ -3637,7 +3637,7 @@ class _TestParamsMaxPool1d(_TestParamsMaxPoolBase): class _TestParamsMaxPool2d(_TestParamsMaxPoolBase): - def __init__(self): + def __init__(self) -> None: super().__init__() self.kwargs['kernel_size'] += [(3, 2)] self.kwargs['stride'] += [(2, 1)] @@ -3648,7 +3648,7 @@ class _TestParamsMaxPool2d(_TestParamsMaxPoolBase): class _TestParamsMaxPool3d(_TestParamsMaxPoolBase): - def __init__(self): + def __init__(self) -> None: super().__init__() self.kwargs['kernel_size'] += [(3, 2, 3)] self.kwargs['stride'] += [(2, 1, 2)] diff --git a/torch/testing/_internal/common_nn.py b/torch/testing/_internal/common_nn.py index 0dd11312c04..7f53a72a7ef 100644 --- a/torch/testing/_internal/common_nn.py +++ b/torch/testing/_internal/common_nn.py @@ -3967,13 +3967,13 @@ def _test_module_empty_input(test_case, module, inp, check_size=True, inference= def _create_basic_net(): class Layer(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.layer_dummy_param = nn.Parameter(torch.empty(3, 5)) self.layer_dummy_buf = nn.Buffer(torch.zeros(1, 3, 3, 7)) class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.l1 = Layer() self.dummy_param = nn.Parameter(torch.empty(3, 5)) diff --git a/torch/testing/_internal/common_pruning.py b/torch/testing/_internal/common_pruning.py index 031e4ad9efb..43dd716c288 100644 --- a/torch/testing/_internal/common_pruning.py +++ b/torch/testing/_internal/common_pruning.py @@ -52,7 +52,7 @@ class SimpleLinear(nn.Module): r"""Model with only Linear layers without biases, some wrapped in a Sequential, some following the Sequential. Used to test basic pruned Linear-Linear fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Linear(7, 5, bias=False), @@ -73,7 +73,7 @@ class LinearBias(nn.Module): r"""Model with only Linear layers, alternating layers with biases, wrapped in a Sequential. Used to test pruned Linear-Bias-Linear fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Linear(7, 5, bias=True), @@ -93,7 +93,7 @@ class LinearActivation(nn.Module): Activation functions modules in between each Linear in the Sequential, and each outside layer. Used to test pruned Linear(Bias)-Activation-Linear fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Linear(7, 5, bias=True), @@ -122,7 +122,7 @@ class LinearActivationFunctional(nn.Module): activationals are called in between each outside layer. Used to test pruned Linear(Bias)-Activation-Linear fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Linear(7, 5, bias=True), @@ -151,7 +151,7 @@ class SimpleConv2d(nn.Module): r"""Model with only Conv2d layers, all without bias, some in a Sequential and some following. Used to test pruned Conv2d-Conv2d fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 32, 3, 1, bias=False), @@ -171,7 +171,7 @@ class Conv2dBias(nn.Module): r"""Model with only Conv2d layers, some with bias, some in a Sequential and some outside. Used to test pruned Conv2d-Bias-Conv2d fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 32, 3, 1, bias=True), @@ -194,7 +194,7 @@ class Conv2dActivation(nn.Module): in-between each outside layer. Used to test pruned Conv2d-Bias-Activation-Conv2d fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 32, 3, 1, bias=True), @@ -222,7 +222,7 @@ class Conv2dPadBias(nn.Module): Used to test that bias is propagated correctly in the special case of pruned Conv2d-Bias-(Activation)Conv2d fusion, when the second Conv2d layer has padding > 0.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 32, 3, 1, padding=1, bias=True), @@ -255,7 +255,7 @@ class Conv2dPool(nn.Module): Activation function modules in between each layer, Pool2d modules in between each layer. Used to test pruned Conv2d-Pool2d-Conv2d fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 32, kernel_size=3, padding=1, bias=True), @@ -289,7 +289,7 @@ class Conv2dPoolFlattenFunctional(nn.Module): Activation functions and Pool2ds in between each layer also. Used to test pruned Conv2d-Pool2d-Flatten-Linear fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 3, kernel_size=3, padding=1, bias=True), @@ -323,7 +323,7 @@ class Conv2dPoolFlatten(nn.Module): Activation functions and Pool2ds in between each layer also. Used to test pruned Conv2d-Pool2d-Flatten-Linear fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 3, kernel_size=3, padding=1, bias=True), diff --git a/torch/testing/_internal/common_quantization.py b/torch/testing/_internal/common_quantization.py index 553d483ab0a..2482629fe99 100644 --- a/torch/testing/_internal/common_quantization.py +++ b/torch/testing/_internal/common_quantization.py @@ -1326,7 +1326,7 @@ class PT2EQuantizationTestCase(QuantizationTestCase): def _get_pt2e_quantized_linear(self, is_per_channel=False) -> torch.fx.GraphModule: class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(2, 2) @@ -1343,7 +1343,7 @@ class PT2EQuantizationTestCase(QuantizationTestCase): # Below are a series of toy models to use in testing quantization class SingleLayerLinearModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 5).to(dtype=torch.float) @@ -1381,7 +1381,7 @@ class SingleLayerLinearDynamicModel(torch.nn.Module): return (torch.rand(1, 5),) class LinearAddModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float) self.fc2 = torch.nn.Linear(8, 5).to(dtype=torch.float) @@ -1436,7 +1436,7 @@ class LSTMwithHiddenDynamicModel(torch.nn.Module): return x, hid class ConvModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float) @@ -1448,7 +1448,7 @@ class ConvModel(torch.nn.Module): return (torch.rand(1, 3, 5, 5),) class ConvTransposeModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.ConvTranspose2d(3, 5, 3, bias=False).to(dtype=torch.float) @@ -1494,7 +1494,7 @@ class AnnotatedConvTransposeModel(torch.nn.Module): return (torch.rand(1, 3, 5, 5),) class ConvBnModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float) self.bn = torch.nn.BatchNorm2d(5).to(dtype=torch.float) @@ -1508,7 +1508,7 @@ class ConvBnModel(torch.nn.Module): return (torch.rand(1, 3, 5, 5),) class AnnotatedConvBnModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.qconfig = default_qconfig self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float) @@ -1527,7 +1527,7 @@ class AnnotatedConvBnModel(torch.nn.Module): return (torch.rand(1, 3, 5, 5),) class ConvBnReLUModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float) self.bn = torch.nn.BatchNorm2d(5).to(dtype=torch.float) @@ -1571,7 +1571,7 @@ class AnnotatedConvBnReLUModel(torch.nn.Module): return (torch.rand(1, 3, 5, 5),) class TwoLayerConvModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float) self.conv2 = torch.nn.Conv2d(5, 5, 1, bias=False).to(dtype=torch.float) @@ -1585,7 +1585,7 @@ class TwoLayerConvModel(torch.nn.Module): return (torch.rand(1, 3, 5, 5),) class TwoLayerLinearModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float) self.fc2 = torch.nn.Linear(8, 5).to(dtype=torch.float) @@ -1599,7 +1599,7 @@ class TwoLayerLinearModel(torch.nn.Module): return (torch.rand(1, 5),) class LinearModelWithSubmodule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.subm = TwoLayerLinearModel() self.fc = nn.Linear(5, 5) @@ -1613,7 +1613,7 @@ class LinearModelWithSubmodule(nn.Module): return self.subm.get_example_inputs() class AnnotatedTwoLayerLinearModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float) self.fc2 = QuantWrapper(torch.nn.Linear(8, 5).to(dtype=torch.float)) @@ -1628,7 +1628,7 @@ class AnnotatedTwoLayerLinearModel(torch.nn.Module): return (torch.rand(1, 5),) class ActivationsTestModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.qconfig = torch.ao.quantization.get_default_qconfig("fbgemm") self.quant = torch.ao.quantization.QuantStub() @@ -1644,7 +1644,7 @@ class ActivationsTestModel(torch.nn.Module): return x class LinearReluModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc = torch.nn.Linear(5, 5).to(dtype=torch.float) self.relu = torch.nn.ReLU() @@ -1658,7 +1658,7 @@ class LinearReluModel(torch.nn.Module): class LinearReluLinearModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float) self.relu = torch.nn.ReLU() @@ -1674,7 +1674,7 @@ class LinearReluLinearModel(torch.nn.Module): return (torch.rand(1, 5),) class LinearReluAddModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 5).to(dtype=torch.float) self.relu = torch.nn.ReLU() @@ -1710,7 +1710,7 @@ class LinearBnLeakyReluModel(torch.nn.Module): return (torch.rand(1, 5),) class LinearTanhModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = nn.Linear(5, 5) self.tanh = nn.Tanh() @@ -1785,7 +1785,7 @@ class ConvBnAddReluModel(torch.nn.Module): # TODO: self.fc should be self.conv class ConvReluModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc = torch.nn.Conv2d(3, 5, 3).to(dtype=torch.float) self.relu = torch.nn.ReLU() @@ -1799,7 +1799,7 @@ class ConvReluModel(torch.nn.Module): # TODO: self.fc should be self.conv class ConvReluConvModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Conv2d(3, 5, 3).to(dtype=torch.float) self.relu = torch.nn.ReLU() @@ -1816,7 +1816,7 @@ class ConvReluConvModel(torch.nn.Module): # TODO: self.fc should be self.conv class ConvReluAddModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Conv2d(3, 5, 3).to(dtype=torch.float) self.relu = torch.nn.ReLU() @@ -1834,7 +1834,7 @@ class ConvReluAddModel(torch.nn.Module): return (torch.rand(1, 3, 5, 5),) class NormalizationTestModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.quant = torch.ao.quantization.QuantStub() self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float) @@ -1855,7 +1855,7 @@ class NormalizationTestModel(torch.nn.Module): return x class NestedModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.sub1 = LinearReluModel() self.sub2 = TwoLayerLinearModel() @@ -1887,7 +1887,7 @@ class AnnotatedNestedModel(torch.nn.Module): return x class AnnotatedSubNestedModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.sub1 = LinearReluModel() self.sub2 = QuantWrapper(TwoLayerLinearModel()) @@ -1902,7 +1902,7 @@ class AnnotatedSubNestedModel(torch.nn.Module): return x class AnnotatedCustomConfigNestedModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.sub1 = LinearReluModel() self.sub2 = TwoLayerLinearModel() @@ -1928,7 +1928,7 @@ class AnnotatedCustomConfigNestedModel(torch.nn.Module): return x class QuantSubModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.sub1 = LinearReluModel() self.sub2 = QuantWrapper(TwoLayerLinearModel()) @@ -1943,7 +1943,7 @@ class QuantSubModel(torch.nn.Module): return x class InnerModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float) self.relu1 = torch.nn.ReLU() @@ -1970,7 +1970,7 @@ class InnerModule(torch.nn.Module): torch.ao.quantization.fuse_modules(self, fusable_layers, inplace=True) class FunctionalLinear(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.weight = torch.rand((5, 5)) self.bias = torch.zeros(5) @@ -1982,7 +1982,7 @@ class FunctionalLinear(torch.nn.Module): return (torch.rand(1, 5),) class SingleLayerFunctionalLinearModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear1 = FunctionalLinear() @@ -1994,7 +1994,7 @@ class SingleLayerFunctionalLinearModel(torch.nn.Module): return self.linear1.get_example_inputs() class TwoLayerFunctionalLinearModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear1 = FunctionalLinear() self.linear2 = FunctionalLinear() @@ -2008,7 +2008,7 @@ class TwoLayerFunctionalLinearModel(torch.nn.Module): return self.linear1.get_example_inputs() class FunctionalLinearAddModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear1 = FunctionalLinear() self.linear2 = FunctionalLinear() @@ -2023,7 +2023,7 @@ class FunctionalLinearAddModel(torch.nn.Module): return self.linear1.get_example_inputs() class FunctionalLinearReluModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = FunctionalLinear() @@ -2036,7 +2036,7 @@ class FunctionalLinearReluModel(nn.Module): return self.linear.get_example_inputs() class FunctionalLinearReluLinearModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear1 = FunctionalLinear() self.relu = nn.ReLU() @@ -2052,7 +2052,7 @@ class FunctionalLinearReluLinearModel(nn.Module): return self.linear1.get_example_inputs() class FunctionalConv2d(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.weight = torch.rand(3, 3, 3, 3) self.bias = torch.rand(3) @@ -2068,7 +2068,7 @@ class FunctionalConv2d(torch.nn.Module): return (torch.rand(1, 3, 5, 5),) class SingleLayerFunctionalConvModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = FunctionalConv2d() @@ -2080,7 +2080,7 @@ class SingleLayerFunctionalConvModel(torch.nn.Module): return self.conv1.get_example_inputs() class TwoLayerFunctionalConvModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = FunctionalConv2d() self.conv2 = FunctionalConv2d() @@ -2094,7 +2094,7 @@ class TwoLayerFunctionalConvModel(torch.nn.Module): return self.conv1.get_example_inputs() class FunctionalConvReluModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = FunctionalConv2d() @@ -2107,7 +2107,7 @@ class FunctionalConvReluModel(nn.Module): return self.conv.get_example_inputs() class FunctionalConvReluConvModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = FunctionalConv2d() self.relu = nn.ReLU() @@ -2126,7 +2126,7 @@ class SkipQuantModel(torch.nn.Module): r"""We can skip quantization by explicitly setting qconfig of a submodule to None """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.sub = InnerModule() self.fc = torch.nn.Linear(5, 5).to(dtype=torch.float) @@ -2158,7 +2158,7 @@ class AnnotatedSkipQuantModel(torch.nn.Module): class QuantStubModel(torch.nn.Module): r"""A Module with manually inserted `QuantStub` and `DeQuantStub` """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.qconfig = torch.ao.quantization.get_default_qconfig("qnnpack") self.quant = QuantStub() @@ -2243,11 +2243,11 @@ class ManualConvLinearSymmQATModel(ManualConvLinearQATModel): r"""Same as ManualConvLinearQATModule but with Symmetric Quantization. Supported only with qnnpack. """ - def __init__(self): + def __init__(self) -> None: super().__init__(default_symmetric_qnnpack_qat_qconfig) class ManualEmbeddingBagLinear(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.emb = nn.EmbeddingBag(num_embeddings=10, embedding_dim=12, mode='sum') self.emb.qconfig = default_embedding_qat_qconfig @@ -2287,7 +2287,7 @@ class DeFusedEmbeddingBagLinear(nn.Module): return self.dequant(x) class SubModelForFusion(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = nn.Conv2d(2, 2, 1, bias=None).to(dtype=torch.float) self.bn = nn.BatchNorm2d(2).to(dtype=torch.float) @@ -2299,7 +2299,7 @@ class SubModelForFusion(nn.Module): class SubModelWithoutFusion(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = nn.Conv2d(2, 2, 1, bias=None).to(dtype=torch.float) self.relu = nn.ReLU(inplace=False).to(dtype=torch.float) @@ -2354,7 +2354,7 @@ class ModelForFusion(nn.Module): return x class ConvBNReLU(nn.Sequential): - def __init__(self): + def __init__(self) -> None: super().__init__( nn.Conv2d(3, 3, 1, 1, bias=False), nn.BatchNorm2d(3), @@ -2362,7 +2362,7 @@ class ConvBNReLU(nn.Sequential): ) class ModelWithSequentialFusion(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = nn.Conv2d(3, 3, 1) self.relu1 = nn.ReLU(inplace=False) @@ -2388,7 +2388,7 @@ class ModelWithSequentialFusion(nn.Module): return x class ModelForFusionWithBias(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = nn.Conv2d(3, 2, 5, bias=True).to(dtype=torch.float) self.bn1 = nn.BatchNorm2d(2).to(dtype=torch.float) @@ -2409,7 +2409,7 @@ class ModelForFusionWithBias(nn.Module): return x class ModelForLinearBNFusion(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc = nn.Linear(20, 10) self.bn = nn.BatchNorm1d(10) @@ -2428,7 +2428,7 @@ class DummyObserver(torch.nn.Module): class ModelForConvTransposeBNFusion(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = nn.ConvTranspose1d(3, 3, 1) self.bn1 = nn.BatchNorm1d(3) @@ -2450,7 +2450,7 @@ class ModelForConvTransposeBNFusion(nn.Module): class ModelWithFunctionals(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.mycat = nnq.FloatFunctional() self.myadd = nnq.FloatFunctional() @@ -2474,7 +2474,7 @@ class ModelWithFunctionals(torch.nn.Module): class ResNetBase(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() norm_layer = nn.BatchNorm2d inplanes = 3 @@ -2507,7 +2507,7 @@ class ResNetBase(torch.nn.Module): torch.ao.quantization.fuse_modules(self, [['conv1', 'bn1', 'relu1']], inplace=True) class ModelMultipleOps(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() norm_layer = nn.BatchNorm2d inplanes = 3 @@ -2542,7 +2542,7 @@ class ModelMultipleOps(torch.nn.Module): # accurately with fake-quant so this model does not # contain those operations class ModelMultipleOpsNoAvgPool(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() norm_layer = nn.BatchNorm2d inplanes = 3 @@ -2572,7 +2572,7 @@ class ModelMultipleOpsNoAvgPool(torch.nn.Module): return out class EmbeddingBagModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.emb = torch.nn.EmbeddingBag(num_embeddings=10, embedding_dim=12, include_last_offset=True, scale_grad_by_freq=False, mode='sum') @@ -2581,7 +2581,7 @@ class EmbeddingBagModule(torch.nn.Module): return self.emb(indices, offsets, per_sample_weights) class EmbeddingModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.emb = torch.nn.Embedding(num_embeddings=10, embedding_dim=12) @@ -2589,7 +2589,7 @@ class EmbeddingModule(torch.nn.Module): return self.emb(indices) class EmbeddingWithStaticLinear(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.emb = torch.nn.EmbeddingBag(num_embeddings=10, embedding_dim=12) self.fc = torch.nn.Linear(4, 2) @@ -2671,7 +2671,7 @@ class SparseNNModel(nn.Module): class TestHelperModules: class Conv2dPropAnnotaton(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 3, 3) self.linear = torch.nn.Linear(3, 3) @@ -2684,7 +2684,7 @@ class TestHelperModules: return x class Conv2dWithObsSharingOps(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 3, 3) self.hardtanh = torch.nn.Hardtanh() @@ -2698,7 +2698,7 @@ class TestHelperModules: return x class Conv2dWithTwoLinearPermute(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 16, 3) self.linear1 = torch.nn.Linear(16, 8, bias=False) @@ -2710,7 +2710,7 @@ class TestHelperModules: return self.linear2(self.linear1(permute_out)) class Conv2dWithTwoLinear(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 16, 3) self.linear1 = torch.nn.Linear(64, 8, bias=False) @@ -2722,7 +2722,7 @@ class TestHelperModules: return self.linear2(self.linear1(reshape_out)) class ConvLinearWPermute(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 8, 3) self.linear1 = torch.nn.Linear(8, 8) @@ -2733,7 +2733,7 @@ class TestHelperModules: return self.linear1(permute_out) class TwoLinearModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear1 = torch.nn.Linear(8, 16, bias=False) self.linear2 = torch.nn.Linear(16, 8) @@ -2742,7 +2742,7 @@ class TestHelperModules: return self.linear2(self.linear1(x)) class ConvMaxPool2d(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(2, 2, 1) self.pool = torch.nn.MaxPool2d(1, 1) @@ -2753,7 +2753,7 @@ class TestHelperModules: return x class ConvWithAdaptiveAvgPool2d(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 3, 3) self.adaptive_avg_pool2d = torch.nn.AdaptiveAvgPool2d((1, 1)) @@ -2806,7 +2806,7 @@ class TestHelperModules: return self.relu(x) class Conv2dThenConv1d(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1d = torch.nn.Conv1d(3, 3, 3) self.conv2d = torch.nn.Conv2d(3, 3, 3) @@ -2821,7 +2821,7 @@ class TestHelperModules: return (torch.randn(1, 3, 5, 5),) class Conv2dWithCat(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = torch.nn.Conv2d(3, 3, 3) self.conv2 = torch.nn.Conv2d(3, 3, 3) @@ -2833,7 +2833,7 @@ class TestHelperModules: return z class Conv2dWithTwoCat(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = torch.nn.Conv2d(3, 3, 3) self.conv2 = torch.nn.Conv2d(3, 3, 3) @@ -2854,7 +2854,7 @@ class TestHelperModules: return w class EmbeddingModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.emb = torch.nn.Embedding(num_embeddings=10, embedding_dim=12) @@ -2862,7 +2862,7 @@ class TestHelperModules: return self.emb(indices) class EmbeddingConvLinearModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.emb = torch.nn.Embedding(num_embeddings=10, embedding_dim=8) self.conv = torch.nn.Conv2d(8, 16, (1, 3)) @@ -2898,7 +2898,7 @@ class TestHelperModules: return x class ConvBnReLU2dAndLinearReLU(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv_bn_relu = TestHelperModules.ConvWithBNRelu(relu=True) self.linear = torch.nn.Linear(3, 8, bias=False) @@ -2911,7 +2911,7 @@ class TestHelperModules: return linear_out class GroupwiseConv2d(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(4, 4, 3, groups=2) @@ -2922,7 +2922,7 @@ class TestHelperModules: return (torch.randn(2, 4, 10, 10),) class LinearReluModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc = torch.nn.Linear(5, 5).to(dtype=torch.float) self.relu = torch.nn.ReLU() diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index 12723039358..8ec568c665c 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -1092,7 +1092,7 @@ def sanitize_pytest_xml(xml_file: str): def get_pytest_test_cases(argv: List[str]) -> List[str]: class TestCollectorPlugin: - def __init__(self): + def __init__(self) -> None: self.tests = [] def pytest_collection_finish(self, session): diff --git a/torch/testing/_internal/data/network1.py b/torch/testing/_internal/data/network1.py index e6180f4f2d2..8755643a78c 100644 --- a/torch/testing/_internal/data/network1.py +++ b/torch/testing/_internal/data/network1.py @@ -5,6 +5,6 @@ import torch.nn as nn class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = nn.Linear(10, 20) diff --git a/torch/testing/_internal/data/network2.py b/torch/testing/_internal/data/network2.py index fdb583d0af9..19b0b8ee53d 100644 --- a/torch/testing/_internal/data/network2.py +++ b/torch/testing/_internal/data/network2.py @@ -5,7 +5,7 @@ import torch.nn as nn class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = nn.Linear(10, 20) self.relu = nn.ReLU() diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py index 276dc4fa6e7..a8e1434ecdb 100644 --- a/torch/testing/_internal/distributed/distributed_test.py +++ b/torch/testing/_internal/distributed/distributed_test.py @@ -107,7 +107,7 @@ else: class NetWithBuffers(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.a = nn.Linear(10, 10, bias=False) self.b = nn.Linear(10, 1, bias=False) @@ -260,7 +260,7 @@ class DDPUnevenTestInput(NamedTuple): class _FC2(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc = nn.Linear(10, 50, bias=True) self.fc.bias.requires_grad = False @@ -271,7 +271,7 @@ class _FC2(nn.Module): class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = nn.Linear(2, 10, bias=False) self.fc2 = _FC2() @@ -289,7 +289,7 @@ class Net(nn.Module): class LargeNet(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = nn.Linear(1000, 2000, bias=False) self.fc2 = nn.Linear(2000, 500, bias=False) @@ -301,7 +301,7 @@ class LargeNet(nn.Module): class Task(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.p = nn.Parameter(torch.ones(2, 2)) @@ -325,7 +325,7 @@ class BatchNormNet(nn.Module): class UnusedParamTwoLinLayerNet(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.a = nn.Linear(10, 10, bias=False) self.b = nn.Linear(10, 10, bias=False) @@ -338,7 +338,7 @@ class UnusedParamTwoLinLayerNet(nn.Module): class DictOutputModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.module = UnusedParamTwoLinLayerNet() @@ -352,7 +352,7 @@ class DictOutputModule(nn.Module): class TwoLinLayerNet(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.a = nn.Linear(10, 10, bias=False) self.b = nn.Linear(10, 1, bias=False) @@ -383,7 +383,7 @@ class EmbeddingNetDifferentParams(nn.Module): class ControlFlowToyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin1 = nn.Linear(10, 10, bias=False) self.lin2 = nn.Linear(10, 10, bias=False) @@ -4408,7 +4408,7 @@ class DistributedTest: @skip_if_lt_x_gpu(int(os.environ["WORLD_SIZE"])) def test_ddp_zero_output_features(self): class ToyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.net1 = nn.Linear(10, 10) self.relu = nn.ReLU() @@ -4422,7 +4422,7 @@ class DistributedTest: @skip_but_pass_in_sandcastle_if(BACKEND == "nccl", "Gloo-only test") def test_ddp_create_graph(self): class Model(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.p = nn.Parameter(torch.tensor(1.0)) @@ -4979,7 +4979,7 @@ class DistributedTest: mp_config = self._get_fp16_config() class MyModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.m = torch.nn.Linear(1, 5) self.register_buffer('buffer', torch.randn(1, 2)) @@ -7241,7 +7241,7 @@ class DistributedTest: # for models with SyncBN or general collective comm when # throw_on_early_termination=True. class ModelWithComm(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin = nn.Linear(2, 40, bias=False) @@ -7523,7 +7523,7 @@ class DistributedTest: error_str = "Intentional error" class ExceptionModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.param = nn.Parameter(torch.ones(1, requires_grad=True)) @@ -7731,7 +7731,7 @@ class DistributedTest: @skip_if_lt_x_gpu(2) def test_ddp_unused_params_rebuild_buckets_exception(self): class ToyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.net1 = nn.Linear(10, 10, bias=False) self.net2 = nn.Linear(10, 10, bias=False) @@ -7785,7 +7785,7 @@ class DistributedTest: # When find_unused_parameters=True, ensure we mark unused parameters # even if they share gradient accumulators. class ToyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() # net1, bias, and net1.bias are all unused params. self.net1 = nn.Linear(10, 5, bias=False) @@ -8984,7 +8984,7 @@ class DistributedTest: @skip_if_lt_x_gpu(2) def test_ddp_build_debug_param_to_name_mapping_requires_grad(self): class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin = nn.Linear(10, 10) # Is not tracked by DDP and should not show up in param to @@ -9009,7 +9009,7 @@ class DistributedTest: debug_mode_off = dist.get_debug_level() == dist.DebugLevel.OFF class SubModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.embedding_net = EmbeddingNetDifferentParams(0) self.lin = TwoLinLayerNet() @@ -9025,7 +9025,7 @@ class DistributedTest: return x class MyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.sub_module = SubModule() @@ -9261,7 +9261,7 @@ class DistributedTest: torch.cuda.set_device(rank) class NestedOutputModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin = nn.Linear(100, 1, bias=False) @@ -9347,7 +9347,7 @@ class DistributedTest: torch.cuda.set_device(self.rank) class MyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = nn.Linear(10, 10, bias=False) self.fc2 = nn.Linear(10, 10, bias=False) @@ -9384,7 +9384,7 @@ class DistributedTest: ) def test_detect_ddp_is_actually_static(self): class ToyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.net1 = nn.Linear(10, 10, bias=False) self.net2 = nn.Linear(10, 10) @@ -9430,7 +9430,7 @@ class DistributedTest: def _test_ddp_new_tensor_in_fwd(self, static_graph): # Test from https://github.com/pytorch/pytorch/issues/60733 class MyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = nn.Linear(10, 10, bias=False) self.fc2 = nn.Linear(10, 10, bias=False) @@ -9965,7 +9965,7 @@ class DistributedTest: torch.cuda.manual_seed(rank) class NetWithBuffers(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.a = nn.Linear(10, 10, bias=False) self.b = nn.Linear(10, 1, bias=False) @@ -10002,7 +10002,7 @@ class DistributedTest: ) def test_static_graph_multi_forward(self): class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin = nn.Linear(10, 10) self.relu = nn.ReLU() @@ -10084,7 +10084,7 @@ class DistributedTest: ) def test_stateless_api_with_ddp(self): class MockModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.l1 = torch.nn.Linear(1, 1) buffer = torch.ones(1) @@ -10131,7 +10131,7 @@ class DistributedTest: @skip_if_lt_x_gpu(2) def test_ddp_forward_backward_hook(self): class DummyTestModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() torch.manual_seed(0) self.fc = nn.Linear(2, 2) @@ -10391,7 +10391,7 @@ class DistributedTest: return func(*args, **kwargs) class MyModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc = torch.nn.Linear(10, 10) diff --git a/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py b/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py index 1ea7eace829..5d7e7b1244b 100644 --- a/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py +++ b/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py @@ -44,7 +44,7 @@ class Policy(nn.Module): Copying the code to make these two examples independent. See https://github.com/pytorch/examples/tree/master/reinforcement_learning """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.affine1 = nn.Linear(4, 128) self.dropout = nn.Dropout(p=0.6) @@ -97,7 +97,7 @@ class Observer: select an action. Then, the observer applies the action to its environment and reports the reward to the agent. """ - def __init__(self): + def __init__(self) -> None: self.id = rpc.get_worker_info().id self.env = DummyEnv() self.env.seed(SEED) diff --git a/torch/testing/_internal/distributed/rpc/rpc_test.py b/torch/testing/_internal/distributed/rpc/rpc_test.py index 3a3977d7b89..413f97d94eb 100644 --- a/torch/testing/_internal/distributed/rpc/rpc_test.py +++ b/torch/testing/_internal/distributed/rpc/rpc_test.py @@ -144,7 +144,7 @@ def set_and_check_done(value): TensorClass = namedtuple("TensorClass", ["tensors"]) class MyPickleClass: - def __init__(self): + def __init__(self) -> None: self.t = None def __getstate__(self): @@ -1446,7 +1446,7 @@ class RpcTest(RpcAgentTestFixture, RpcTestCommon): world_size=self.world_size) class MyModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin = torch.nn.Linear(3, 4) diff --git a/torch/testing/_internal/jit_metaprogramming_utils.py b/torch/testing/_internal/jit_metaprogramming_utils.py index 8171a959189..02a9fcc5405 100644 --- a/torch/testing/_internal/jit_metaprogramming_utils.py +++ b/torch/testing/_internal/jit_metaprogramming_utils.py @@ -604,7 +604,7 @@ def create_script_module(self, nn_module, constructor_args, *args, **kwargs): class TheModule(torch.jit.ScriptModule): __constants__ = submodule_constants - def __init__(self): + def __init__(self) -> None: super().__init__() self.submodule = nn_module(*constructor_args) diff --git a/torch/testing/_internal/jit_utils.py b/torch/testing/_internal/jit_utils.py index c0109ecacf7..a8c7fa261f9 100644 --- a/torch/testing/_internal/jit_utils.py +++ b/torch/testing/_internal/jit_utils.py @@ -770,7 +770,7 @@ def _get_py3_code(code, fn_name): return fn class TensorExprTestOptions: - def __init__(self): + def __init__(self) -> None: self.old_profiling_executor = torch._C._jit_set_profiling_executor(True) self.old_profiling_mode = torch._C._get_graph_executor_optimize(True) diff --git a/torch/utils/_sympy/value_ranges.py b/torch/utils/_sympy/value_ranges.py index 4a01d8e53b9..29ee1886261 100644 --- a/torch/utils/_sympy/value_ranges.py +++ b/torch/utils/_sympy/value_ranges.py @@ -936,7 +936,7 @@ class SymPyValueRangeAnalysis: class ValueRangeAnalysis(SymPyValueRangeAnalysis): - def __init__(self): + def __init__(self) -> None: self.name = "ValueRangeAnalysis" boolean_operators = ( "xor", diff --git a/torch/utils/data/_utils/worker.py b/torch/utils/data/_utils/worker.py index b07439526bf..c61b78d42d8 100644 --- a/torch/utils/data/_utils/worker.py +++ b/torch/utils/data/_utils/worker.py @@ -28,7 +28,7 @@ if IS_WINDOWS: # is gone, and the only way to check it through OS is to let the worker have a process handle # of the manager and ask if the process status has changed. class ManagerWatchdog: - def __init__(self): + def __init__(self) -> None: self.manager_pid = os.getppid() # mypy cannot detect this code is windows only @@ -60,7 +60,7 @@ if IS_WINDOWS: else: class ManagerWatchdog: # type: ignore[no-redef] - def __init__(self): + def __init__(self) -> None: self.manager_pid = os.getppid() self.manager_dead = False diff --git a/torch/utils/module_tracker.py b/torch/utils/module_tracker.py index 9feef40ca4d..01e966c712b 100644 --- a/torch/utils/module_tracker.py +++ b/torch/utils/module_tracker.py @@ -52,7 +52,7 @@ class ModuleTracker: A Set containing the fqn for each module currently running their forward """ - def __init__(self): + def __init__(self) -> None: self.parents = {"Global"} self._known_modules: weakref.WeakKeyDictionary = weakref.WeakKeyDictionary() self._seen_modules: weakref.WeakSet = weakref.WeakSet()