diff --git a/torch/_classes.py b/torch/_classes.py index 58b347453524a2..069f13dcb6793d 100644 --- a/torch/_classes.py +++ b/torch/_classes.py @@ -19,7 +19,7 @@ def __getattr__(self, attr): class _Classes(types.ModuleType): __file__ = "_classes.py" - def __init__(self): + def __init__(self) -> None: super().__init__("torch.classes") def __getattr__(self, name): diff --git a/torch/_decomp/decompositions_for_rng.py b/torch/_decomp/decompositions_for_rng.py index 66bd33075a5ed8..a62a28f783b713 100644 --- a/torch/_decomp/decompositions_for_rng.py +++ b/torch/_decomp/decompositions_for_rng.py @@ -71,7 +71,7 @@ class PhiloxState: trace time. """ - def __init__(self): + def __init__(self) -> None: self.reset() def reset(self): diff --git a/torch/_dynamo/backends/distributed.py b/torch/_dynamo/backends/distributed.py index a58571c77c9302..8d65f1670ae1f1 100644 --- a/torch/_dynamo/backends/distributed.py +++ b/torch/_dynamo/backends/distributed.py @@ -247,7 +247,7 @@ def run_node(self, n: Node) -> Any: # This gives us the appropriately strided outputs here which will reflect runtime strides. class FakeifyFirstAOTInvocationGuard: - def __init__(self): + def __init__(self) -> None: self.tc = torch._guards.TracingContext.try_get() assert self.tc torch._guards.TracingContext.try_get().fakify_first_call = True diff --git a/torch/_dynamo/code_context.py b/torch/_dynamo/code_context.py index 59c912bd30f771..727aad9349555f 100644 --- a/torch/_dynamo/code_context.py +++ b/torch/_dynamo/code_context.py @@ -5,7 +5,7 @@ class CodeContextDict: - def __init__(self): + def __init__(self) -> None: self.code_context = ExactWeakKeyDictionary() def has_context(self, code: types.CodeType): diff --git a/torch/_dynamo/debug_utils.py b/torch/_dynamo/debug_utils.py index 5e9656f2068c36..49d9b302faebec 100644 --- a/torch/_dynamo/debug_utils.py +++ b/torch/_dynamo/debug_utils.py @@ -170,7 +170,7 @@ def convert(gm): """ from torch.nn import * class Repro(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() """ ) @@ -491,7 +491,7 @@ def _mk_defaulter(d: T) -> Callable[[Optional[T]], T]: class NopInputReader: - def __init__(self): + def __init__(self) -> None: self.total = 0 def storage(self, storage_hash, nbytes, *, device=None, dtype_hint=None): diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py index 3630f0da6b247c..de4ef26fb1b284 100644 --- a/torch/_dynamo/eval_frame.py +++ b/torch/_dynamo/eval_frame.py @@ -496,7 +496,7 @@ def _fn(*args, **kwargs): wrapper function. >> class CallableClass: - >> def __init__(self): + >> def __init__(self) -> None: >> super().__init__() >> self.relu = torch.nn.ReLU() >> @@ -577,7 +577,7 @@ def __reduce__(self): class RunOnlyContext(_TorchDynamoContext): - def __init__(self): + def __init__(self) -> None: # cudagraph trees relies on generation increment def on_enter(): torch._dynamo.mutation_guard.GenerationTracker.generation += 1 @@ -589,7 +589,7 @@ def __reduce__(self): class DisableContext(_TorchDynamoContext): - def __init__(self): + def __init__(self) -> None: super().__init__(callback=None) def __call__(self, fn): diff --git a/torch/_dynamo/exc.py b/torch/_dynamo/exc.py index 2ca862c008740a..5a0915a9727fd9 100644 --- a/torch/_dynamo/exc.py +++ b/torch/_dynamo/exc.py @@ -74,7 +74,7 @@ def __init__(self, name): class ResetRequired(TorchDynamoException): - def __init__(self): + def __init__(self) -> None: super().__init__( textwrap.dedent( """ diff --git a/torch/_dynamo/profiler.py b/torch/_dynamo/profiler.py index b7e9553ce219f7..841ab87cdf68d9 100644 --- a/torch/_dynamo/profiler.py +++ b/torch/_dynamo/profiler.py @@ -92,7 +92,7 @@ def print_missing(stack): class Profiler: unique_graphs = 0 - def __init__(self): + def __init__(self) -> None: self.prof = torch.profiler.profile( activities=[torch.profiler.ProfilerActivity.CPU], with_stack=should_print_missing(), diff --git a/torch/_dynamo/variables/base.py b/torch/_dynamo/variables/base.py index 09752822dd8209..5353327d98fee4 100644 --- a/torch/_dynamo/variables/base.py +++ b/torch/_dynamo/variables/base.py @@ -70,7 +70,7 @@ class MutableLocal(MutableLocalBase): state. """ - def __init__(self): + def __init__(self) -> None: super().__init__(MutableLocalSource.Local) def __hash__(self): diff --git a/torch/_dynamo/variables/builder.py b/torch/_dynamo/variables/builder.py index f2819fb2b1e1e3..383f9e8b4d04b6 100644 --- a/torch/_dynamo/variables/builder.py +++ b/torch/_dynamo/variables/builder.py @@ -270,7 +270,7 @@ def __eq__(self, other): class BackwardStateGraphArg(GraphArg): - def __init__(self): + def __init__(self) -> None: super().__init__( source=None, _example=BackwardState(), @@ -2638,7 +2638,7 @@ class SourcelessBuilder: if/else type->VariableTracker trees that were cropping up all over dynamo. """ - def __init__(self): + def __init__(self) -> None: raise AssertionError("Use SourcelessBuilder.create()") @staticmethod diff --git a/torch/_export/db/examples/class_method.py b/torch/_export/db/examples/class_method.py index 5d7f8b5b705497..f701f54d4f4ea1 100644 --- a/torch/_export/db/examples/class_method.py +++ b/torch/_export/db/examples/class_method.py @@ -10,7 +10,7 @@ class ClassMethod(torch.nn.Module): def method(cls, x): return x + 1 - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(4, 2) diff --git a/torch/_export/db/examples/cond_branch_class_method.py b/torch/_export/db/examples/cond_branch_class_method.py index 9ce4a9d6f34a56..22600cc504348d 100644 --- a/torch/_export/db/examples/cond_branch_class_method.py +++ b/torch/_export/db/examples/cond_branch_class_method.py @@ -26,7 +26,7 @@ class CondBranchClassMethod(torch.nn.Module): NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized. """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.subm = MySubModule() diff --git a/torch/_export/db/examples/model_attr_mutation.py b/torch/_export/db/examples/model_attr_mutation.py index dfebbebd8b1b38..4aa623c7dc39ef 100644 --- a/torch/_export/db/examples/model_attr_mutation.py +++ b/torch/_export/db/examples/model_attr_mutation.py @@ -8,7 +8,7 @@ class ModelAttrMutation(torch.nn.Module): Attribute mutation is not supported. """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.attr_list = [torch.randn(3, 2), torch.randn(3, 2)] diff --git a/torch/_export/db/examples/scalar_output.py b/torch/_export/db/examples/scalar_output.py index 83dd36379677d8..86d3b4645330c4 100644 --- a/torch/_export/db/examples/scalar_output.py +++ b/torch/_export/db/examples/scalar_output.py @@ -11,7 +11,7 @@ class ScalarOutput(torch.nn.Module): Returning scalar values from the graph is supported, in addition to Tensor outputs. Symbolic shapes are captured and rank is specialized. """ - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, x): diff --git a/torch/_export/db/examples/specialized_attribute.py b/torch/_export/db/examples/specialized_attribute.py index 39f7314bec70ad..f17092f9afc681 100644 --- a/torch/_export/db/examples/specialized_attribute.py +++ b/torch/_export/db/examples/specialized_attribute.py @@ -11,7 +11,7 @@ class SpecializedAttribute(torch.nn.Module): Model attributes are specialized. """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.a = "moo" self.b = 4 diff --git a/torch/_export/passes/lift_constants_pass.py b/torch/_export/passes/lift_constants_pass.py index 823c66d2bc0942..08d93287d3205c 100644 --- a/torch/_export/passes/lift_constants_pass.py +++ b/torch/_export/passes/lift_constants_pass.py @@ -24,7 +24,7 @@ class ConstantAttrMap(collections.abc.MutableMapping): if that's the case). """ - def __init__(self): + def __init__(self) -> None: # Underlying dict that we use to implement this mapping. self._constant_attrs: Dict[ Union[int, torch.Tensor, FakeScriptObject], List[Any] diff --git a/torch/_export/serde/serialize.py b/torch/_export/serde/serialize.py index ae0f6e39f23b90..28509b8341691f 100644 --- a/torch/_export/serde/serialize.py +++ b/torch/_export/serde/serialize.py @@ -1413,7 +1413,7 @@ class Result: constants: Dict[str, Union[torch.Tensor, FakeScriptObject, torch.ScriptObject]] example_inputs: Optional[Tuple[Tuple[torch.Tensor, ...], Dict[str, Any]]] - def __init__(self): + def __init__(self) -> None: self.serialized_name_to_node: Dict[str, torch.fx.Node] = {} self.serialized_name_to_meta: Dict[str, MetaType] = {} self.graph = torch.fx.Graph() diff --git a/torch/_functorch/_aot_autograd/schemas.py b/torch/_functorch/_aot_autograd/schemas.py index 77f1cf220140df..9b1b40b4830401 100644 --- a/torch/_functorch/_aot_autograd/schemas.py +++ b/torch/_functorch/_aot_autograd/schemas.py @@ -602,7 +602,7 @@ class SubclassMeta: # Optional field because we don't compute for inference graphs grad_input_metas: Optional[List[Union[int, SubclassCreationMeta]]] = None - def __init__(self): + def __init__(self) -> None: # The fields in this class get set after its construction. pass diff --git a/torch/_functorch/aot_autograd.py b/torch/_functorch/aot_autograd.py index b7bd95a3ed4941..e9fedb3d53cc9e 100644 --- a/torch/_functorch/aot_autograd.py +++ b/torch/_functorch/aot_autograd.py @@ -878,7 +878,7 @@ def functional_call(named_params, named_buffers, *args, **kwargs): ) class AOTModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.orig_module = mod diff --git a/torch/_functorch/autograd_function.py b/torch/_functorch/autograd_function.py index f80b7dee55be90..270c1895f6fd54 100644 --- a/torch/_functorch/autograd_function.py +++ b/torch/_functorch/autograd_function.py @@ -30,7 +30,7 @@ # We do this by using creating a custom HigherOrderOperator that only functorch # dispatches specially. class CustomFunctionHigherOrderOperator(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("custom_function_call") def __call__(self, autograd_function, *args, **kwargs): @@ -713,7 +713,7 @@ def new_forward(ctx, *args, **kwargs): class AutogradFunctionApply(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("autograd_function_apply") def __call__(self, fwd, bwd, *fwd_args, **fwd_kwargs): diff --git a/torch/_guards.py b/torch/_guards.py index c0633861a7b812..91c2c664d7544c 100644 --- a/torch/_guards.py +++ b/torch/_guards.py @@ -407,7 +407,7 @@ def __eq__(self, other): class ModuleContext(Checkpointable[ModuleContextCheckpointState]): - def __init__(self): + def __init__(self) -> None: self.nn_modules: Dict[str, Any] = {} def copy_graphstate(self): @@ -456,7 +456,7 @@ class GlobalContext(Checkpointable[GlobalContextCheckpointState]): "autocast_cache_enabled", } - def __init__(self): + def __init__(self) -> None: self.global_state: Dict[str, Tuple[Callable, ...]] = {} def copy_graphstate(self): @@ -524,7 +524,7 @@ def remove_guards_with_source(self, source): class GuardsContext(Checkpointable[GuardsCheckpointState]): - def __init__(self): + def __init__(self) -> None: self.dynamo_guards: GuardsSet = GuardsSet() self.aotautograd_guards: List[GuardEnvExpr] = [] diff --git a/torch/_higher_order_ops/auto_functionalize.py b/torch/_higher_order_ops/auto_functionalize.py index 40178fa750fc98..00f43e6acdeeeb 100644 --- a/torch/_higher_order_ops/auto_functionalize.py +++ b/torch/_higher_order_ops/auto_functionalize.py @@ -54,7 +54,7 @@ class AutoFunctionalized(HigherOrderOperator): underscore is to prevent collisions with kwarg names in **kwargs. """ - def __init__(self): + def __init__(self) -> None: super().__init__("auto_functionalized") def __call__( diff --git a/torch/_higher_order_ops/effects.py b/torch/_higher_order_ops/effects.py index f20c87c7e5876f..3bba77a5cfc80a 100644 --- a/torch/_higher_order_ops/effects.py +++ b/torch/_higher_order_ops/effects.py @@ -55,7 +55,7 @@ class WithEffects(HigherOrderOperator): per "effect type", which are enumerated in the _EffectType enum. """ - def __init__(self): + def __init__(self) -> None: super().__init__("with_effects") def __call__( diff --git a/torch/_higher_order_ops/flex_attention.py b/torch/_higher_order_ops/flex_attention.py index e3c9d718b2f4db..992c7398b2a93d 100644 --- a/torch/_higher_order_ops/flex_attention.py +++ b/torch/_higher_order_ops/flex_attention.py @@ -38,7 +38,7 @@ def __torch_function__(self, func, types, args, kwargs=None): class FlexAttentionHOP(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("flex_attention") def __call__( @@ -74,7 +74,7 @@ def __call__( class FlexAttentionBackwardHOP(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("flex_attention_backward") def __call__( diff --git a/torch/_higher_order_ops/out_dtype.py b/torch/_higher_order_ops/out_dtype.py index 5c9ca4f3f16913..d1557909427645 100644 --- a/torch/_higher_order_ops/out_dtype.py +++ b/torch/_higher_order_ops/out_dtype.py @@ -45,7 +45,7 @@ class OutDtypeOperator(HigherOrderOperator): 3. Cast the output to `out_dtype` """ - def __init__(self): + def __init__(self) -> None: super().__init__("out_dtype") # TODO(ydwu4): Subclassing HigherOrderOperator causes __module__ to # become different (torch._higher_order_ops.out_dtype) which will result diff --git a/torch/_higher_order_ops/triton_kernel_wrap.py b/torch/_higher_order_ops/triton_kernel_wrap.py index 779ab2838b3c27..ff01b0c0124025 100644 --- a/torch/_higher_order_ops/triton_kernel_wrap.py +++ b/torch/_higher_order_ops/triton_kernel_wrap.py @@ -519,7 +519,7 @@ def identify_mutated_tensors(kernel, kwargs): # Used for wrapping a Triton Kernel class TritonKernelWrapperMutation(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("triton_kernel_wrapper_mutation") @@ -528,7 +528,7 @@ def __init__(self): # Used for wrapping a Triton Kernel in a functional manner class TritonKernelWrapperFunctional(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("triton_kernel_wrapper_functional") diff --git a/torch/_higher_order_ops/while_loop.py b/torch/_higher_order_ops/while_loop.py index 4924e1f3d4499d..e19fa162105816 100644 --- a/torch/_higher_order_ops/while_loop.py +++ b/torch/_higher_order_ops/while_loop.py @@ -18,7 +18,7 @@ class WhileLoopOp(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("while_loop") def __call__( diff --git a/torch/_higher_order_ops/wrap.py b/torch/_higher_order_ops/wrap.py index a26253405c430b..d6faef206619a9 100644 --- a/torch/_higher_order_ops/wrap.py +++ b/torch/_higher_order_ops/wrap.py @@ -15,7 +15,7 @@ # Used for testing the HigherOrderOperator mechanism class Wrap(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("wrap") def __call__(self, func, *args, **kwargs): @@ -36,7 +36,7 @@ def wrapper(): class WrapWithSetGradEnabled(HigherOrderOperator): - def __init__(self): + def __init__(self) -> None: super().__init__("wrap_with_set_grad_enabled") def __call__(self, enable_grad, wrapped_func, *args, **kwargs): @@ -74,7 +74,7 @@ class WrapActivationCheckpoint(HigherOrderOperator): partitioners. See TagActivationCheckpoint for more information. """ - def __init__(self): + def __init__(self) -> None: super().__init__("wrap_activation_checkpoint") def __call__(self, function, *args, **kwargs): @@ -113,7 +113,7 @@ class TagActivationCheckpoint(HigherOrderOperator): the forward and recomputed forward in backward. """ - def __init__(self): + def __init__(self) -> None: super().__init__("tag_activation_checkpoint") @staticmethod diff --git a/torch/_inductor/codegen/common.py b/torch/_inductor/codegen/common.py index 05b7475785e572..2a192f1ab69a31 100644 --- a/torch/_inductor/codegen/common.py +++ b/torch/_inductor/codegen/common.py @@ -1560,7 +1560,7 @@ def newvar(self, bounds: ValueRanges[Any] = ValueRanges.unknown()) -> CSEVariabl class CodeGen: - def __init__(self): + def __init__(self) -> None: super().__init__() self.exit_stack = contextlib.ExitStack() diff --git a/torch/_inductor/codegen/cpp_wrapper_cuda.py b/torch/_inductor/codegen/cpp_wrapper_cuda.py index 8eed428de07a92..3def5af40affdd 100644 --- a/torch/_inductor/codegen/cpp_wrapper_cuda.py +++ b/torch/_inductor/codegen/cpp_wrapper_cuda.py @@ -29,7 +29,7 @@ class CppWrapperCuda(CppWrapperCpu): Generates cpp wrapper for running on GPU and calls CUDA kernels """ - def __init__(self): + def __init__(self) -> None: self.device = "cuda" super().__init__() self.grid_id = count() diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py index 8e23e877fe26f2..5c6626052ed224 100644 --- a/torch/_inductor/codegen/triton.py +++ b/torch/_inductor/codegen/triton.py @@ -1113,7 +1113,7 @@ class HelperFunctions: _templates_seen: Dict[str, str] # Template code to function name finalized_helpers: List[str] - def __init__(self): + def __init__(self) -> None: self._templates_seen = {} self.finalized_helpers = [] diff --git a/torch/_inductor/dependencies.py b/torch/_inductor/dependencies.py index 5339ad6c214ec1..31cee49a31984b 100644 --- a/torch/_inductor/dependencies.py +++ b/torch/_inductor/dependencies.py @@ -588,7 +588,7 @@ def canonicalization_prefix(): class FreeUnbackedSymbolsOpsHandler: symbols: Set[sympy.Symbol] - def __init__(self): + def __init__(self) -> None: self.symbols = set() def __getattr__(self, name: str) -> Callable[..., Any]: diff --git a/torch/_inductor/exc.py b/torch/_inductor/exc.py index 07c1eebf99b8db..2505c8a31196b8 100644 --- a/torch/_inductor/exc.py +++ b/torch/_inductor/exc.py @@ -65,7 +65,7 @@ class SubgraphLoweringException(RuntimeError): class InvalidCxxCompiler(RuntimeError): - def __init__(self): + def __init__(self) -> None: from . import config super().__init__( diff --git a/torch/_inductor/fx_passes/misc_patterns.py b/torch/_inductor/fx_passes/misc_patterns.py index d7873fede3c587..0f608952a2fb27 100644 --- a/torch/_inductor/fx_passes/misc_patterns.py +++ b/torch/_inductor/fx_passes/misc_patterns.py @@ -79,7 +79,7 @@ class NumpyCompatNormalization: inverse_mapping: Dict[str, str] cache: Dict["torch.fx.graph.Target", Set[str]] - def __init__(self): + def __init__(self) -> None: self.cache = {} # callable -> tuple of replaceable args e.g. ["axis"] self.inverse_mapping = {} for actual_kwarg, numpy_kwargs in self.numpy_compat.items(): diff --git a/torch/_inductor/fx_passes/mkldnn_fusion.py b/torch/_inductor/fx_passes/mkldnn_fusion.py index 34ddbf90b7fd22..c930608c766134 100644 --- a/torch/_inductor/fx_passes/mkldnn_fusion.py +++ b/torch/_inductor/fx_passes/mkldnn_fusion.py @@ -1207,7 +1207,7 @@ def _eliminate_duplicate_packed_nodes(gm): Combine packed weight nodes with the same inputs to reduce memory usage. for example: class Model(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = nn.Linear(32, 32, bias=True) diff --git a/torch/_inductor/metrics.py b/torch/_inductor/metrics.py index 18e00b090ce2bc..5c26e322f12633 100644 --- a/torch/_inductor/metrics.py +++ b/torch/_inductor/metrics.py @@ -99,7 +99,7 @@ class CachedMetricsHelper: apply on a cache hit. """ - def __init__(self): + def __init__(self) -> None: self.cached_metrics = {} for metric in get_metric_fields(): self.cached_metrics[metric] = globals()[metric] diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py index 380fbe515c3cd0..d5475b8e14ee33 100644 --- a/torch/_inductor/utils.py +++ b/torch/_inductor/utils.py @@ -940,7 +940,7 @@ def __add__(self, other): class FakeIndentedBuffer(IndentedBuffer): - def __init__(self): + def __init__(self) -> None: super().__init__() def __getattribute__(self, name): @@ -1219,7 +1219,7 @@ class DebugDirManager: counter = itertools.count(0) prev_debug_name: str - def __init__(self): + def __init__(self) -> None: self.id = next(DebugDirManager.counter) def __enter__(self): @@ -1268,7 +1268,7 @@ def patched_compile_to_module(self: GraphLowering): class DummyModule: """This is empty to replace the generated triton module""" - def __init__(self): + def __init__(self) -> None: pass def call(self, *args, **kwargs): diff --git a/torch/_lazy/closure.py b/torch/_lazy/closure.py index 32b2c58ba2b8dc..94c12c075a092b 100644 --- a/torch/_lazy/closure.py +++ b/torch/_lazy/closure.py @@ -7,7 +7,7 @@ class ClosureHandler: - def __init__(self): + def __init__(self) -> None: pass def run(self, closure): diff --git a/torch/_library/fake_class_registry.py b/torch/_library/fake_class_registry.py index a56f138f4b0879..213e88ac3e5513 100644 --- a/torch/_library/fake_class_registry.py +++ b/torch/_library/fake_class_registry.py @@ -42,7 +42,7 @@ def from_real(cls, real_obj: torch.ScriptObject): class FakeClassRegistry: - def __init__(self): + def __init__(self) -> None: self._registered_class: Dict[str, Any] = {} def has_impl(self, full_qualname: str) -> bool: diff --git a/torch/_python_dispatcher.py b/torch/_python_dispatcher.py index 644cf92fda2bdd..2dfdbb296a4b2d 100644 --- a/torch/_python_dispatcher.py +++ b/torch/_python_dispatcher.py @@ -70,7 +70,7 @@ class PythonDispatcher: ] supported_keys = runtime_keys + alias_keys - def __init__(self): + def __init__(self) -> None: C._dispatch_check_invariants(self.name) # type: ignore[attr-defined] self.ref = C._dispatch_library("FRAGMENT", self.namespace, "") self.ref.def_("foo(Tensor x) -> Tensor") diff --git a/torch/_subclasses/schema_check_mode.py b/torch/_subclasses/schema_check_mode.py index d8843eec8100e3..d7ad9ebd28170c 100644 --- a/torch/_subclasses/schema_check_mode.py +++ b/torch/_subclasses/schema_check_mode.py @@ -60,7 +60,7 @@ def clone_inputs(args): class SchemaCheckMode(TorchDispatchMode): - def __init__(self): + def __init__(self) -> None: # Information recorded for testing purposes. For example: # - incorrect schemas # - overly conservative schemas diff --git a/torch/ao/nn/quantized/modules/functional_modules.py b/torch/ao/nn/quantized/modules/functional_modules.py index b707a1f681cefc..45dc7fc0444c16 100644 --- a/torch/ao/nn/quantized/modules/functional_modules.py +++ b/torch/ao/nn/quantized/modules/functional_modules.py @@ -36,7 +36,7 @@ class FloatFunctional(torch.nn.Module): - mul_scalar """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.activation_post_process = torch.nn.Identity() @@ -190,7 +190,7 @@ class QFunctional(torch.nn.Module): - mul_scalar """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.scale = 1.0 self.zero_point = 0 diff --git a/torch/ao/ns/fx/qconfig_multi_mapping.py b/torch/ao/ns/fx/qconfig_multi_mapping.py index a7c0f0a27f6613..8cd4190110ffe3 100644 --- a/torch/ao/ns/fx/qconfig_multi_mapping.py +++ b/torch/ao/ns/fx/qconfig_multi_mapping.py @@ -72,7 +72,7 @@ class QConfigMultiMapping: """ - def __init__(self): + def __init__(self) -> None: # initialize this with 1 QConfigMapping to avoid corner cases self.qconfig_mappings_list: List[QConfigMapping] = [QConfigMapping()] diff --git a/torch/ao/pruning/_experimental/pruner/README.md b/torch/ao/pruning/_experimental/pruner/README.md index 026fd33b2876fb..2885dff04020d2 100644 --- a/torch/ao/pruning/_experimental/pruner/README.md +++ b/torch/ao/pruning/_experimental/pruner/README.md @@ -99,7 +99,7 @@ from torch.ao.pruning._experimental.pruner import SaliencyPruner # Define model class Model(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Linear(700, 500, bias=True), diff --git a/torch/ao/quantization/fake_quantize.py b/torch/ao/quantization/fake_quantize.py index 57cc1df04d54a5..8ef266ebe47f50 100644 --- a/torch/ao/quantization/fake_quantize.py +++ b/torch/ao/quantization/fake_quantize.py @@ -85,7 +85,7 @@ class FakeQuantizeBase(ABC, Module): fake_quant_enabled: torch.Tensor observer_enabled: torch.Tensor - def __init__(self): + def __init__(self) -> None: """Set fake_quant_enabled and observer_enabled.""" super().__init__() # fake_quant_enabled and observer_enabled are buffers to support their diff --git a/torch/ao/quantization/fx/README.md b/torch/ao/quantization/fx/README.md index a8bd154791b721..ca116b282e7ab5 100644 --- a/torch/ao/quantization/fx/README.md +++ b/torch/ao/quantization/fx/README.md @@ -70,7 +70,7 @@ In the following, I’ll first have a detailed description for each step, and th ``` class LinearReLUModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 10).float() self.relu = torch.nn.ReLU() diff --git a/torch/ao/quantization/fx/_model_report/detector.py b/torch/ao/quantization/fx/_model_report/detector.py index 534e73bfb0a4cb..9db118a33652a0 100644 --- a/torch/ao/quantization/fx/_model_report/detector.py +++ b/torch/ao/quantization/fx/_model_report/detector.py @@ -137,7 +137,7 @@ class DetectorBase(ABC): - Should return a str-based report and dict info in Tuple[str,Dict] format """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.detector_config_info = None diff --git a/torch/ao/quantization/fx/custom_config.py b/torch/ao/quantization/fx/custom_config.py index 7aa408f0cebd4b..cb00c95fdee1d8 100644 --- a/torch/ao/quantization/fx/custom_config.py +++ b/torch/ao/quantization/fx/custom_config.py @@ -63,7 +63,7 @@ class PrepareCustomConfig: .set_preserved_attributes(["attr1", "attr2"]) """ - def __init__(self): + def __init__(self) -> None: self.standalone_module_names: Dict[str, StandaloneModuleConfigEntry] = {} self.standalone_module_classes: Dict[Type, StandaloneModuleConfigEntry] = {} self.float_to_observed_mapping: Dict[QuantType, Dict[Type, Type]] = {} @@ -382,7 +382,7 @@ class ConvertCustomConfig: .set_preserved_attributes(["attr1", "attr2"]) """ - def __init__(self): + def __init__(self) -> None: self.observed_to_quantized_mapping: Dict[QuantType, Dict[Type, Type]] = {} self.preserved_attributes: List[str] = [] @@ -477,7 +477,7 @@ class FuseCustomConfig: fuse_custom_config = FuseCustomConfig().set_preserved_attributes(["attr1", "attr2"]) """ - def __init__(self): + def __init__(self) -> None: self.preserved_attributes: List[str] = [] def __repr__(self): diff --git a/torch/ao/quantization/observer.py b/torch/ao/quantization/observer.py index 64b14b506143b3..e26f03027116e2 100644 --- a/torch/ao/quantization/observer.py +++ b/torch/ao/quantization/observer.py @@ -1568,7 +1568,7 @@ class ReuseInputObserver(ObserverBase): Note: this is only enabled in FX Graph Mode Quantization """ - def __init__(self): + def __init__(self) -> None: super().__init__(torch.quint8, is_dynamic=False) def forward(self, x): diff --git a/torch/ao/quantization/qconfig_mapping.py b/torch/ao/quantization/qconfig_mapping.py index 1b4d9cecbf3fc0..2c12be74ce6815 100644 --- a/torch/ao/quantization/qconfig_mapping.py +++ b/torch/ao/quantization/qconfig_mapping.py @@ -229,7 +229,7 @@ class QConfigMapping: """ - def __init__(self): + def __init__(self) -> None: # In increasing match priority: self.global_qconfig: QConfigAny = None self.object_type_qconfigs: OrderedDict[ diff --git a/torch/ao/quantization/quantize_fx.py b/torch/ao/quantization/quantize_fx.py index f5949d985f99c8..dd8f3e811a39ea 100644 --- a/torch/ao/quantization/quantize_fx.py +++ b/torch/ao/quantization/quantize_fx.py @@ -289,7 +289,7 @@ def prepare_fx( from torch.ao.quantization.quantize_fx import prepare_fx class Submodule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 5) def forward(self, x): @@ -297,7 +297,7 @@ def forward(self, x): return x class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 5) self.sub = Submodule() @@ -427,7 +427,7 @@ def prepare_qat_fx( from torch.ao.quantization.quantize_fx import prepare_qat_fx class Submodule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 5) def forward(self, x): @@ -435,7 +435,7 @@ def forward(self, x): return x class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 5) self.sub = Submodule() diff --git a/torch/ao/quantization/quantize_pt2e.py b/torch/ao/quantization/quantize_pt2e.py index 41676934440d47..1e1848a6ff0d4a 100644 --- a/torch/ao/quantization/quantize_pt2e.py +++ b/torch/ao/quantization/quantize_pt2e.py @@ -56,7 +56,7 @@ def prepare_pt2e( ) class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 10) @@ -129,7 +129,7 @@ def prepare_qat_pt2e( ) class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(5, 10) diff --git a/torch/ao/quantization/quantizer/embedding_quantizer.py b/torch/ao/quantization/quantizer/embedding_quantizer.py index 6c93c0b88a194e..32ec3814637cba 100644 --- a/torch/ao/quantization/quantizer/embedding_quantizer.py +++ b/torch/ao/quantization/quantizer/embedding_quantizer.py @@ -42,7 +42,7 @@ def get_embedding_operators_config() -> OperatorConfig: class EmbeddingQuantizer(Quantizer): - def __init__(self): + def __init__(self) -> None: super().__init__() @classmethod diff --git a/torch/ao/quantization/quantizer/x86_inductor_quantizer.py b/torch/ao/quantization/quantizer/x86_inductor_quantizer.py index 09db71a191b79c..574af30a7159b9 100644 --- a/torch/ao/quantization/quantizer/x86_inductor_quantizer.py +++ b/torch/ao/quantization/quantizer/x86_inductor_quantizer.py @@ -436,7 +436,7 @@ class X86InductorQuantizer(Quantizer): supported_config_and_operators = _get_supported_config_and_operators() module_function_to_aten_operator_type = _map_module_function_to_aten_operator_type() - def __init__(self): + def __init__(self) -> None: super().__init__() self.global_config: Optional[QuantizationConfig] = None self.operator_type_qconfig: Dict[ diff --git a/torch/ao/quantization/quantizer/xnnpack_quantizer.py b/torch/ao/quantization/quantizer/xnnpack_quantizer.py index 93712ded503c7a..cc17057c82a49a 100644 --- a/torch/ao/quantization/quantizer/xnnpack_quantizer.py +++ b/torch/ao/quantization/quantizer/xnnpack_quantizer.py @@ -268,7 +268,7 @@ class XNNPACKQuantizer(Quantizer): "linear", ] - def __init__(self): + def __init__(self) -> None: super().__init__() self.global_config: Optional[QuantizationConfig] = None self.operator_type_config: Dict[ diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py index dad16df5b93b0d..ff22da04a222c8 100644 --- a/torch/ao/quantization/utils.py +++ b/torch/ao/quantization/utils.py @@ -513,7 +513,7 @@ def _get_path_of_module( Example:: >> class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: self.linear = torch.nn.Linear(5, 5) def forward(self, x): return self.linear(x) diff --git a/torch/autograd/profiler_util.py b/torch/autograd/profiler_util.py index e3fc95580c5a62..67eb989f57c80c 100644 --- a/torch/autograd/profiler_util.py +++ b/torch/autograd/profiler_util.py @@ -645,7 +645,7 @@ def __repr__(self): class FunctionEventAvg(FormattedTimesMixin): """Used to average stats over multiple FunctionEvent objects.""" - def __init__(self): + def __init__(self) -> None: self.key: Optional[str] = None self.count: int = 0 self.node_id: int = 0 diff --git a/torch/backends/xeon/run_cpu.py b/torch/backends/xeon/run_cpu.py index bdf07e28617448..634c50da4dbc2c 100644 --- a/torch/backends/xeon/run_cpu.py +++ b/torch/backends/xeon/run_cpu.py @@ -266,7 +266,7 @@ class _Launcher: or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib or /usr/lib64 or \ {expanduser('~')}/.local/lib/ so the LD_PRELOAD environment variable will not be set." - def __init__(self): + def __init__(self) -> None: self.cpuinfo = _CPUinfo() def add_lib_preload(self, lib_type): diff --git a/torch/csrc/jit/backends/backend_debug_handler.h b/torch/csrc/jit/backends/backend_debug_handler.h index d25ce2f8cb0416..d4b00fe340f2b2 100644 --- a/torch/csrc/jit/backends/backend_debug_handler.h +++ b/torch/csrc/jit/backends/backend_debug_handler.h @@ -77,17 +77,17 @@ namespace jit { * * So why does debug handle map to DebugInfoTuple = {source range and inlined * cs}? {debug_handle, source_range_tag, serialized_callstack} Take this - * example: class L(nn.Module): def __init__(self): + * example: class L(nn.Module): def __init__(self) -> None: * ... * def forward(self, x): * return x * 5 * class M(nn.Module): - * def __init__(self): + * def __init__(self) -> None: * ... * def forward(self, x): * return x - 2 * class N(nn.Module): - * def __init__(self): + * def __init__(self) -> None: * self.m = M() * def forward(self, x): * return self.m(x) + 3 diff --git a/torch/csrc/jit/docs/serialization.md b/torch/csrc/jit/docs/serialization.md index 106cea55478af8..3fb463c7e7fe37 100644 --- a/torch/csrc/jit/docs/serialization.md +++ b/torch/csrc/jit/docs/serialization.md @@ -328,7 +328,7 @@ For example: ``` class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: self.a = torch.rand(2, 3) self.b = torch.nn.Linear(10, 10) diff --git a/torch/csrc/jit/operator_upgraders/README.md b/torch/csrc/jit/operator_upgraders/README.md index 61679972073059..ce995276d283a2 100644 --- a/torch/csrc/jit/operator_upgraders/README.md +++ b/torch/csrc/jit/operator_upgraders/README.md @@ -37,7 +37,7 @@ When making changes to the operators, the first thing to identify is if it's BC/ 1. Add a test module in `test/jit/fixtures_srcs/fixtures_src.py`. In `test/jit/fixtures_srcs/generate_models.py`, ``` class TestVersionedLinspaceV7(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, a: Union[int, float, complex], b: Union[int, float, complex]): @@ -163,7 +163,7 @@ When making changes to the operators, the first thing to identify is if it's BC/ # Step 2. Write down how current module should look like class MyModuleFloat(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, a, b: float): diff --git a/torch/csrc/jit/passes/onnx/function_extraction.h b/torch/csrc/jit/passes/onnx/function_extraction.h index 3a90967e2f1f8c..40555f8e3561ca 100644 --- a/torch/csrc/jit/passes/onnx/function_extraction.h +++ b/torch/csrc/jit/passes/onnx/function_extraction.h @@ -25,7 +25,7 @@ namespace onnx { // // clang-format off // class M(torch.nn.Module): -// def __init__(self): +// def __init__(self) -> None: // super().__init__() // self.lns = torch.nn.ModuleList([torch.nn.LayerNorm(3, eps = i) for i in range(2)]) // self.celu1 = torch.nn.CELU(1.0) diff --git a/torch/csrc/lazy/test_mnist.py b/torch/csrc/lazy/test_mnist.py index a3a03d9844d3e4..762620fcc62f11 100644 --- a/torch/csrc/lazy/test_mnist.py +++ b/torch/csrc/lazy/test_mnist.py @@ -17,7 +17,7 @@ class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) self.conv2 = nn.Conv2d(32, 64, 3, 1) diff --git a/torch/csrc/lazy/tutorial.md b/torch/csrc/lazy/tutorial.md index 155e8adfdd85aa..b72ae13eca7dd8 100644 --- a/torch/csrc/lazy/tutorial.md +++ b/torch/csrc/lazy/tutorial.md @@ -135,7 +135,7 @@ Here's our model definition: ```python class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) self.conv2 = nn.Conv2d(32, 64, 3, 1) diff --git a/torch/cuda/_sanitizer.py b/torch/cuda/_sanitizer.py index f9ce311725e2f3..34cd7bacee060d 100644 --- a/torch/cuda/_sanitizer.py +++ b/torch/cuda/_sanitizer.py @@ -163,7 +163,7 @@ class TensorInfo: class _TensorsAccessed: - def __init__(self): + def __init__(self) -> None: self.accesses: Dict[DataPtr, TensorInfo] = {} def ensure_tensor_exists(self, data_ptr: DataPtr) -> None: @@ -218,7 +218,7 @@ def set_write(self, data_ptr: DataPtr, access: Access) -> None: class StreamSynchronizations: - def __init__(self): + def __init__(self) -> None: self.current_sync_states: Dict[StreamId, Dict[StreamId, SeqNum]] = {} self.recorded_sync_states: Dict[EventId, Dict[StreamId, SeqNum]] = {} self.host_sync_state: Dict[StreamId, SeqNum] = {} @@ -338,7 +338,7 @@ class EventHandler: data race. """ - def __init__(self): + def __init__(self) -> None: self.tensors_accessed = _TensorsAccessed() self.syncs = StreamSynchronizations() self.seq_num: SeqNum = 0 @@ -478,7 +478,7 @@ def zip_arguments( class ArgumentHandler: - def __init__(self): + def __init__(self) -> None: self.dataptrs_read: Set[DataPtr] = set() self.dataptrs_written: Set[DataPtr] = set() self.tensor_aliases: Dict[DataPtr, List[str]] = {} @@ -527,7 +527,7 @@ def parse_outputs(self, outputs: Any) -> None: class CUDASanitizerDispatchMode(TorchDispatchMode): - def __init__(self): + def __init__(self) -> None: self.event_handler = EventHandler() torch._C._activate_gpu_trace() gpu_trace.register_callback_for_event_creation( @@ -596,7 +596,7 @@ class CUDASanitizer: This approach was deemed more elegant than using the atexit module. """ - def __init__(self): + def __init__(self) -> None: self.dispatch = CUDASanitizerDispatchMode() self.enabled = False diff --git a/torch/distributed/_composable/checkpoint_activation.py b/torch/distributed/_composable/checkpoint_activation.py index fcee2a57a07e7b..88253abb4b9cbb 100644 --- a/torch/distributed/_composable/checkpoint_activation.py +++ b/torch/distributed/_composable/checkpoint_activation.py @@ -49,7 +49,7 @@ def checkpoint(module: nn.Module, **kwargs) -> nn.Module: >>> import torch.nn as nn >>> >>> class MyModel(nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> super().__init__() >>> self.l1 = nn.Linear(10, 10) >>> self.l2 = nn.Linear(10, 10) diff --git a/torch/distributed/_composable/contract.py b/torch/distributed/_composable/contract.py index 850659fc2c0171..e7cd1713fae4c2 100644 --- a/torch/distributed/_composable/contract.py +++ b/torch/distributed/_composable/contract.py @@ -47,7 +47,7 @@ def contract(state_cls: Type[_State] = _State): >>> import torch.nn as nn >>> >>> class MyModel(nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> super().__init__() >>> self.l1 = nn.Linear(10, 10) >>> self.l2 = nn.Linear(10, 10) diff --git a/torch/distributed/_composable/fsdp/_fsdp_state.py b/torch/distributed/_composable/fsdp/_fsdp_state.py index 3aad8e2226c387..be587d9b5b0bd3 100644 --- a/torch/distributed/_composable/fsdp/_fsdp_state.py +++ b/torch/distributed/_composable/fsdp/_fsdp_state.py @@ -44,7 +44,7 @@ class FSDPStateContext: """This has state shared across FSDP states.""" - def __init__(self): + def __init__(self) -> None: # All FSDP states in the root state's module tree self.all_states: List[FSDPState] = [] # Iteration's forward root runs the once-per-forward logic; this root @@ -72,7 +72,7 @@ def fsdp_hook_wrapper(*args, **kwargs): class FSDPState(_State): - def __init__(self): + def __init__(self) -> None: super().__init__() self._fsdp_param_group: Optional[FSDPParamGroup] = None self._is_root: Optional[bool] = None # root set during lazy init diff --git a/torch/distributed/_shard/sharding_plan/api.py b/torch/distributed/_shard/sharding_plan/api.py index a7552c5a68f88e..d141df1a5214fc 100644 --- a/torch/distributed/_shard/sharding_plan/api.py +++ b/torch/distributed/_shard/sharding_plan/api.py @@ -38,7 +38,7 @@ class ShardingPlan: >>> # xdoctest: +REQUIRES(module:torch._C._distributed_c10d) >>> class MyModule(nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> super().__init__() >>> self.fc1 = nn.Linear() >>> self.gelu = nn.GELU() diff --git a/torch/distributed/_tensor/README.md b/torch/distributed/_tensor/README.md index 80fcc2eb41f167..2fedb7cc3b426a 100644 --- a/torch/distributed/_tensor/README.md +++ b/torch/distributed/_tensor/README.md @@ -117,7 +117,7 @@ import torch.nn as nn from torch.distributed._tensor import Shard, distribute_tensor, distribute_module, init_device_mesh class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = nn.Linear(8, 8) self.fc2 = nn.Linear(8, 8) diff --git a/torch/distributed/_tensor/examples/checkpoint_example.py b/torch/distributed/_tensor/examples/checkpoint_example.py index 1701e28ac2ca76..fe8585c2a239c3 100644 --- a/torch/distributed/_tensor/examples/checkpoint_example.py +++ b/torch/distributed/_tensor/examples/checkpoint_example.py @@ -25,7 +25,7 @@ class SimpleMLP(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.net1 = torch.nn.Linear(5, 128) self.relu = torch.nn.ReLU() diff --git a/torch/distributed/algorithms/join.py b/torch/distributed/algorithms/join.py index 140844851938b7..f7c95100b1b202 100644 --- a/torch/distributed/algorithms/join.py +++ b/torch/distributed/algorithms/join.py @@ -55,7 +55,7 @@ class Joinable(ABC): """ @abstractmethod - def __init__(self): + def __init__(self) -> None: super().__init__() self._join_config = _JoinConfig.construct_disabled_join_config() diff --git a/torch/distributed/checkpoint/examples/async_checkpointing_example.py b/torch/distributed/checkpoint/examples/async_checkpointing_example.py index 5eaba9a67227d9..589f9b93544289 100644 --- a/torch/distributed/checkpoint/examples/async_checkpointing_example.py +++ b/torch/distributed/checkpoint/examples/async_checkpointing_example.py @@ -31,7 +31,7 @@ class InjectedException(Exception): class Model(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.net1 = nn.Linear(8, 32) self.net2 = nn.Linear(32, 128) diff --git a/torch/distributed/checkpoint/examples/stateful_example.py b/torch/distributed/checkpoint/examples/stateful_example.py index 6c76ec436364bf..f6e0d11801def0 100644 --- a/torch/distributed/checkpoint/examples/stateful_example.py +++ b/torch/distributed/checkpoint/examples/stateful_example.py @@ -22,7 +22,7 @@ class Model(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() torch.manual_seed(0) self.net1 = nn.Sequential(nn.Linear(8, 16), nn.ReLU()) diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 161eade7af6655..a78ab550575458 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -434,7 +434,7 @@ class _reduce_op: :class:`~torch.distributed.ReduceOp` is recommended to use instead. """ - def __init__(self): + def __init__(self) -> None: # __members__ is a dict storing key-value pairs for enum classes for k, v in ReduceOp.RedOpType.__members__.items(): setattr(self, k, v) @@ -568,7 +568,7 @@ class _World: of c10d and is subject to change.. """ - def __init__(self): + def __init__(self) -> None: self._default_pg = None self._pg_coalesce_state: Dict[ProcessGroup, List[_CollOp]] = {} self._pg_default_device: Dict[ProcessGroup, torch.device] = {} @@ -2194,7 +2194,7 @@ def __getattribute__(self, name): class _CoalescingManager: - def __init__(self): + def __init__(self) -> None: self.works: List[Work] = [] def append(self, work: Work): diff --git a/torch/distributed/fsdp/_common_utils.py b/torch/distributed/fsdp/_common_utils.py index 10d0f821265119..d722d5b9825999 100644 --- a/torch/distributed/fsdp/_common_utils.py +++ b/torch/distributed/fsdp/_common_utils.py @@ -106,7 +106,7 @@ def __getattr__(self, __name: str) -> Any: class _UninitializedDeviceHandle(_FSDPDeviceHandle): - def __init__(self): + def __init__(self) -> None: pass def __getattribute__(self, __name: str) -> Any: diff --git a/torch/distributed/nn/api/remote_module.py b/torch/distributed/nn/api/remote_module.py index 5583da8c3e8d4e..4e18fe3245e183 100644 --- a/torch/distributed/nn/api/remote_module.py +++ b/torch/distributed/nn/api/remote_module.py @@ -156,7 +156,7 @@ def __init__( created outside of remote modules, rather than as submodules of any remote module (by calling ``add_module``). Hybrid Example: >>> class HybridModel(nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> nn.Module.__init__(self) >>> self.remote_embedding = RemoteModule(...) >>> self.local_linear = nn.Linear(...) diff --git a/torch/export/graph_signature.py b/torch/export/graph_signature.py index c36941ee02e195..0d93957d77cd28 100644 --- a/torch/export/graph_signature.py +++ b/torch/export/graph_signature.py @@ -248,7 +248,7 @@ class ExportGraphSignature: e.g. If following module is exported:: class CustomModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super(CustomModule, self).__init__() # Define a parameter diff --git a/torch/fx/README.md b/torch/fx/README.md index a69a6ed1f65a76..4c799da7bc4022 100644 --- a/torch/fx/README.md +++ b/torch/fx/README.md @@ -45,7 +45,7 @@ FX’s front-end makes use of the dynamic nature of Python to intercept call-sit import torch class MyModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.param = torch.nn.Parameter( torch.rand(3, 4)) diff --git a/torch/fx/__init__.py b/torch/fx/__init__.py index b9896390f12434..dd04cdd09d7fa1 100644 --- a/torch/fx/__init__.py +++ b/torch/fx/__init__.py @@ -9,7 +9,7 @@ import torch # Simple module for demonstration class MyModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.param = torch.nn.Parameter(torch.rand(3, 4)) self.linear = torch.nn.Linear(4, 5) diff --git a/torch/fx/_symbolic_trace.py b/torch/fx/_symbolic_trace.py index bd8d4f4266962b..92fb7b9494891b 100644 --- a/torch/fx/_symbolic_trace.py +++ b/torch/fx/_symbolic_trace.py @@ -1012,7 +1012,7 @@ def revert(self): class _Patcher: - def __init__(self): + def __init__(self) -> None: super().__init__() self.patches_made: List[_PatchedFn] = [] self.visited: Set[int] = set() diff --git a/torch/fx/experimental/migrate_gradual_types/constraint.py b/torch/fx/experimental/migrate_gradual_types/constraint.py index 45038837cae608..4693a62de24025 100644 --- a/torch/fx/experimental/migrate_gradual_types/constraint.py +++ b/torch/fx/experimental/migrate_gradual_types/constraint.py @@ -63,7 +63,7 @@ class T(Constraint): """ True """ - def __init__(self): + def __init__(self) -> None: pass def __eq__(self, other): @@ -76,7 +76,7 @@ class F(Constraint): """ False """ - def __init__(self): + def __init__(self) -> None: pass def __eq__(self, other): diff --git a/torch/fx/passes/graph_drawer.py b/torch/fx/passes/graph_drawer.py index 726ab04539d9d3..a577cf8736e098 100644 --- a/torch/fx/passes/graph_drawer.py +++ b/torch/fx/passes/graph_drawer.py @@ -117,7 +117,7 @@ def get_dot_graph(self, submod_name=None) -> pydot.Dot: >>> # xdoctest: +REQUIRES(module:ubelt) >>> # define module >>> class MyModule(torch.nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> super().__init__() >>> self.linear = torch.nn.Linear(4, 5) >>> def forward(self, x): diff --git a/torch/fx/passes/split_module.py b/torch/fx/passes/split_module.py index fba516d74beca5..5984587f17c521 100644 --- a/torch/fx/passes/split_module.py +++ b/torch/fx/passes/split_module.py @@ -83,7 +83,7 @@ def split_module( from torch.fx.passes.split_module import split_module class MyModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.param = torch.nn.Parameter(torch.rand(3, 4)) self.linear = torch.nn.Linear(4, 5) diff --git a/torch/fx/passes/split_utils.py b/torch/fx/passes/split_utils.py index d8254bd474b552..44b97471332f2c 100644 --- a/torch/fx/passes/split_utils.py +++ b/torch/fx/passes/split_utils.py @@ -83,7 +83,7 @@ def split_by_tags( Given the following module def: class SimpleModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear1 = torch.nn.Linear(...) self.linear2 = torch.nn.Linear(...) diff --git a/torch/fx/proxy.py b/torch/fx/proxy.py index 874ac51afff1e4..05157f9ddb19f0 100644 --- a/torch/fx/proxy.py +++ b/torch/fx/proxy.py @@ -38,7 +38,7 @@ def forward(self, x): return x.transpose(1, 2) class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: self.sub = Sub() def forward(self, x): diff --git a/torch/fx/subgraph_rewriter.py b/torch/fx/subgraph_rewriter.py index 419337a17683cf..8a9e78c0af4f8f 100644 --- a/torch/fx/subgraph_rewriter.py +++ b/torch/fx/subgraph_rewriter.py @@ -118,7 +118,7 @@ class Match(NamedTuple): from torch.fx import symbolic_trace, subgraph_rewriter class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, x, w1, w2): diff --git a/torch/fx/tensor_type.py b/torch/fx/tensor_type.py index f59ed2d45baa4d..83b5a9f8faf65e 100644 --- a/torch/fx/tensor_type.py +++ b/torch/fx/tensor_type.py @@ -38,7 +38,7 @@ class _DynType: """ _DynType defines a type which stands for the absence of type information. """ - def __init__(self): + def __init__(self) -> None: self.__name__ = '_DynType' def __eq__(self, other): diff --git a/torch/jit/__init__.py b/torch/jit/__init__.py index 6d1760fb9f4fcb..e80fa2932fcbe2 100644 --- a/torch/jit/__init__.py +++ b/torch/jit/__init__.py @@ -219,7 +219,7 @@ def isinstance(obj, target_type): from typing import Any, Dict, List class MyModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, input: Any): # note the Any type @@ -255,7 +255,7 @@ def foo(x): """ - def __init__(self): + def __init__(self) -> None: if not torch._jit_internal.is_scripting(): warnings.warn("Only works in script mode") pass diff --git a/torch/jit/_async.py b/torch/jit/_async.py index bdde55adf14fc4..ceaef70b1fe3b0 100644 --- a/torch/jit/_async.py +++ b/torch/jit/_async.py @@ -73,7 +73,7 @@ class AddMod(torch.nn.Module): def forward(self, a: Tensor, b : int): return a + b class Mod(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super(self).__init__() self.mod = AddMod() def forward(self, input): diff --git a/torch/jit/_check.py b/torch/jit/_check.py index 8db5bb82ce3d63..f708ee87f3089a 100644 --- a/torch/jit/_check.py +++ b/torch/jit/_check.py @@ -39,7 +39,7 @@ class M(torch.nn.Module): def fn(self): return [] - def __init__(self): + def __init__(self) -> None: super().__init__() self.x: List[int] = [] diff --git a/torch/jit/_freeze.py b/torch/jit/_freeze.py index 8f35fc471e6841..e496bd74762554 100644 --- a/torch/jit/_freeze.py +++ b/torch/jit/_freeze.py @@ -65,7 +65,7 @@ def forward(self, input): .. testcode:: import torch class MyModule2(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.modified_tensor = torch.tensor(10.) self.version = 1 diff --git a/torch/jit/_monkeytype_config.py b/torch/jit/_monkeytype_config.py index ecf7cd865fdeb7..366a58ac6afd30 100644 --- a/torch/jit/_monkeytype_config.py +++ b/torch/jit/_monkeytype_config.py @@ -89,7 +89,7 @@ def log(self, trace: CallTrace) -> None: self.traces.append(trace) class JitTypeTraceStore(CallTraceStore): - def __init__(self): + def __init__(self) -> None: super().__init__() # A dictionary keeping all collected CallTrace # key is fully qualified name of called function @@ -159,15 +159,15 @@ def code_filter(self) -> Optional[CodeFilter]: # When MonkeyType is not installed, we provide dummy class definitions # for the below classes. class JitTypeTraceStoreLogger: # type: ignore[no-redef] - def __init__(self): + def __init__(self) -> None: pass class JitTypeTraceStore: # type: ignore[no-redef] - def __init__(self): + def __init__(self) -> None: self.trace_records = None class JitTypeTraceConfig: # type: ignore[no-redef] - def __init__(self): + def __init__(self) -> None: pass monkeytype_trace = None # type: ignore[assignment] # noqa: F811 diff --git a/torch/jit/_recursive.py b/torch/jit/_recursive.py index b8dc0ecf2cd6b9..e03540a7c75b34 100644 --- a/torch/jit/_recursive.py +++ b/torch/jit/_recursive.py @@ -426,7 +426,7 @@ class ConcreteTypeStore: type_store: Dict[Type[Module], List[torch._C.ConcreteModuleType]] methods_compiled: Set[torch._C.ConcreteModuleType] - def __init__(self): + def __init__(self) -> None: # Python module type => List[ConcreteModuleType)] self.type_store = {} # ConcreteTypes that have had their methods already compiled diff --git a/torch/jit/_script.py b/torch/jit/_script.py index 490e9e644e2c6e..a7bc45fa5fc46e 100644 --- a/torch/jit/_script.py +++ b/torch/jit/_script.py @@ -107,7 +107,7 @@ def Attribute(value, type): # type: ignore[no-redef] from typing import Dict class AttributeModule(torch.jit.ScriptModule): - def __init__(self): + def __init__(self) -> None: super().__init__() self.foo = torch.jit.Attribute(0.1, float) @@ -138,7 +138,7 @@ def __init__(self): class AttributeModule(torch.nn.Module): names: Dict[str, int] - def __init__(self): + def __init__(self) -> None: super().__init__() self.names = {} @@ -522,7 +522,7 @@ class ScriptModule(Module, metaclass=ScriptMeta): "original_name", ] - def __init__(self): + def __init__(self) -> None: super().__init__() forward: Callable[..., Any] = _CachedForward() # type: ignore[assignment] @@ -1351,7 +1351,7 @@ def forward(self, input): import torch.nn.functional as F class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() # torch.jit.trace produces a ScriptModule's conv1 and conv2 self.conv1 = torch.jit.trace(nn.Conv2d(1, 20, 5), torch.rand(1, 1, 16, 16)) @@ -1374,7 +1374,7 @@ def forward(self, input): import torch.nn as nn class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() @torch.jit.export @@ -1547,7 +1547,7 @@ def run(self, x: torch.Tensor) -> torch.Tensor: return x.relu() class Impl2(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.val = torch.rand(()) @@ -1671,7 +1671,7 @@ def dump_string(self): class _ScriptProfile: - def __init__(self): + def __init__(self) -> None: self.profile = classes.profiling._ScriptProfile() def enable(self): diff --git a/torch/jit/_state.py b/torch/jit/_state.py index 63df2acfdf09b6..18456ebd38687c 100644 --- a/torch/jit/_state.py +++ b/torch/jit/_state.py @@ -19,7 +19,7 @@ class EnabledProxy: This is just a wrapper for a bool, so that we get reference semantics """ - def __init__(self): + def __init__(self) -> None: self.enabled = self.parse_env( "PYTORCH_JIT", True, "> Using PyTorch JIT", "> PyTorch JIT DISABLED" ) diff --git a/torch/jit/_trace.py b/torch/jit/_trace.py index 1c0372c7281bdb..372fd72ddb28b7 100644 --- a/torch/jit/_trace.py +++ b/torch/jit/_trace.py @@ -966,7 +966,7 @@ def foo(x, y): import torch.nn as nn class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = nn.Conv2d(1, 1, 3) @@ -1182,7 +1182,7 @@ def trace_module( import torch.nn as nn class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = nn.Conv2d(1, 1, 3) diff --git a/torch/multiprocessing/reductions.py b/torch/multiprocessing/reductions.py index 0a6d3c8a444206..fa0818571a93c0 100644 --- a/torch/multiprocessing/reductions.py +++ b/torch/multiprocessing/reductions.py @@ -61,7 +61,7 @@ def __eq__(self, other): class SharedCache(dict): """Dictionary from multiprocessing handles to StorageWeakRef.""" - def __init__(self): + def __init__(self) -> None: # free_dead_references() is called if the len exceeds the current # limit. The limit scales with the number of remaining live objects. self.limit = 128 diff --git a/torch/nn/modules/container.py b/torch/nn/modules/container.py index 30992e394b0fd4..585f4ef1658afd 100644 --- a/torch/nn/modules/container.py +++ b/torch/nn/modules/container.py @@ -291,7 +291,7 @@ class ModuleList(Module): Example:: class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)]) @@ -465,7 +465,7 @@ class ModuleDict(Module): Example:: class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.choices = nn.ModuleDict({ 'conv': nn.Conv2d(10, 10, 3), @@ -597,7 +597,7 @@ class ParameterList(Module): Example:: class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.params = nn.ParameterList([nn.Parameter(torch.randn(10, 10)) for i in range(10)]) @@ -749,7 +749,7 @@ class ParameterDict(Module): Example:: class MyModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.params = nn.ParameterDict({ 'left': nn.Parameter(torch.randn(5, 10)), diff --git a/torch/nn/modules/lazy.py b/torch/nn/modules/lazy.py index 7a9a0161006f14..61cabd061ae922 100644 --- a/torch/nn/modules/lazy.py +++ b/torch/nn/modules/lazy.py @@ -86,7 +86,7 @@ class LazyModuleMixin: >>> # xdoctest: +SKIP >>> class LazyMLP(torch.nn.Module): - ... def __init__(self): + ... def __init__(self) -> None: ... super().__init__() ... self.fc1 = torch.nn.LazyLinear(10) ... self.relu1 = torch.nn.ReLU() diff --git a/torch/nn/modules/module.py b/torch/nn/modules/module.py index a6592655fd4486..a15850553f1f56 100644 --- a/torch/nn/modules/module.py +++ b/torch/nn/modules/module.py @@ -408,7 +408,7 @@ class Module: import torch.nn.functional as F class Model(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) diff --git a/torch/onnx/_globals.py b/torch/onnx/_globals.py index 22c05075dba8d2..ebef6c331b2d1c 100644 --- a/torch/onnx/_globals.py +++ b/torch/onnx/_globals.py @@ -20,7 +20,7 @@ class _InternalGlobals: global variables unless they are absolutely necessary. """ - def __init__(self): + def __init__(self) -> None: self._export_onnx_opset_version = _constants.ONNX_DEFAULT_OPSET self._training_mode: _C_onnx.TrainingMode = _C_onnx.TrainingMode.EVAL self._in_onnx_export: bool = False diff --git a/torch/onnx/_internal/exporter.py b/torch/onnx/_internal/exporter.py index e53f906cd84c0b..7c7203c80851d8 100644 --- a/torch/onnx/_internal/exporter.py +++ b/torch/onnx/_internal/exporter.py @@ -760,7 +760,7 @@ def model_signature(self) -> torch.export.ExportGraphSignature | None: >>> import pprint >>> class CustomModule(torch.nn.Module): - ... def __init__(self): + ... def __init__(self) -> None: ... super().__init__() ... self.my_parameter = torch.nn.Parameter(torch.tensor(2.0)) ... self.register_buffer("my_buffer1", torch.tensor(3.0)) diff --git a/torch/onnx/_internal/fx/dynamo_graph_extractor.py b/torch/onnx/_internal/fx/dynamo_graph_extractor.py index a3b8a69f60dcb6..5abf2bf2c6373f 100644 --- a/torch/onnx/_internal/fx/dynamo_graph_extractor.py +++ b/torch/onnx/_internal/fx/dynamo_graph_extractor.py @@ -24,7 +24,7 @@ class _PyTreeExtensionContext: _extensions: dict[type, tuple[pytree.FlattenFunc, pytree.UnflattenFunc]] - def __init__(self): + def __init__(self) -> None: self._extensions = {} # Register PyTree extension for HuggingFace model output. self._register_huggingface_model_output_extension() diff --git a/torch/onnx/_internal/fx/passes/modularization.py b/torch/onnx/_internal/fx/passes/modularization.py index 4f4d347401ea7a..db74d52dda4799 100644 --- a/torch/onnx/_internal/fx/passes/modularization.py +++ b/torch/onnx/_internal/fx/passes/modularization.py @@ -795,7 +795,7 @@ class Modularize(_pass.Transform): >>> from torch.onnx._internal.diagnostics import infra >>> >>> class CustomModule(torch.nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> super().__init__() >>> self.embedding = torch.nn.Embedding(10, 32) >>> self.relu = torch.nn.ReLU() @@ -806,7 +806,7 @@ class Modularize(_pass.Transform): >>> return out >>> >>> class TestModule(torch.nn.Module): - >>> def __init__(self): + >>> def __init__(self) -> None: >>> super().__init__() >>> self.layer = CustomModule() >>> self.linear = torch.nn.Linear(32, 10) diff --git a/torch/onnx/_internal/fx/patcher.py b/torch/onnx/_internal/fx/patcher.py index 3c0ee6c071436c..239edb6dde6342 100644 --- a/torch/onnx/_internal/fx/patcher.py +++ b/torch/onnx/_internal/fx/patcher.py @@ -53,7 +53,7 @@ class ONNXTorchPatcher: `torch.fx._symbolic_trace._wrapped_methods_to_patch` """ - def __init__(self): + def __init__(self) -> None: # List of file paths processed by torch.load. self.paths: List[Union[str, io.BufferedIOBase]] = [] diff --git a/torch/onnx/_internal/onnxruntime.py b/torch/onnx/_internal/onnxruntime.py index b9d6bce165169f..59609866bae83f 100644 --- a/torch/onnx/_internal/onnxruntime.py +++ b/torch/onnx/_internal/onnxruntime.py @@ -602,7 +602,7 @@ def is_supported(self, *args): @dataclasses.dataclass class OrtExecutionInfoForAllGraphModules: - def __init__(self): + def __init__(self) -> None: # All sessions (and their related information) created by exporting the same GraphModule # with different inputs. self.execution_info_per_graph_module: Dict[ diff --git a/torch/onnx/_internal/registration.py b/torch/onnx/_internal/registration.py index c59ab11d4fa77b..95de41b3f03aee 100644 --- a/torch/onnx/_internal/registration.py +++ b/torch/onnx/_internal/registration.py @@ -69,7 +69,7 @@ class OverrideDict(Collection[_K], Generic[_K, _V]): ones. """ - def __init__(self): + def __init__(self) -> None: self._base: Dict[_K, _V] = {} self._overrides: Dict[_K, _V] = {} self._merged: Dict[_K, _V] = {} diff --git a/torch/onnx/verification.py b/torch/onnx/verification.py index e8bcfe4ca9ef9a..bcf1de6b6437e4 100644 --- a/torch/onnx/verification.py +++ b/torch/onnx/verification.py @@ -1722,7 +1722,7 @@ def find_mismatch( ... opset_version=opset_version, ... ) >>> class Model(torch.nn.Module): - ... def __init__(self): + ... def __init__(self) -> None: ... super().__init__() ... self.layers = torch.nn.Sequential( ... torch.nn.Linear(3, 4), diff --git a/torch/overrides.py b/torch/overrides.py index bbd055de447e1f..ecb5613f80f536 100644 --- a/torch/overrides.py +++ b/torch/overrides.py @@ -2025,7 +2025,7 @@ class TorchFunctionMode: inner: "TorchFunctionMode" # Force metaclass to generate constructor at the base of the hierarchy - def __init__(self): + def __init__(self) -> None: pass def __torch_function__(self, func, types, args=(), kwargs=None): diff --git a/torch/package/_mangling.py b/torch/package/_mangling.py index 7dcf3538631f92..700a9ad6a04a16 100644 --- a/torch/package/_mangling.py +++ b/torch/package/_mangling.py @@ -12,7 +12,7 @@ class PackageMangler: Used on import, to ensure that all modules imported have a shared mangle parent. """ - def __init__(self): + def __init__(self) -> None: global _mangle_index self._mangle_index = _mangle_index # Increment the global index diff --git a/torch/profiler/profiler.py b/torch/profiler/profiler.py index 47faac8c176d45..98f1c1b6735147 100644 --- a/torch/profiler/profiler.py +++ b/torch/profiler/profiler.py @@ -772,7 +772,7 @@ class ExecutionTraceObserver(_ITraceObserver): incurring any overheads. """ - def __init__(self): + def __init__(self) -> None: """ Initializes the default states. """ diff --git a/torch/testing/_internal/common_fsdp.py b/torch/testing/_internal/common_fsdp.py index 51a3deac9c1bfe..fe02eeeabb1baf 100644 --- a/torch/testing/_internal/common_fsdp.py +++ b/torch/testing/_internal/common_fsdp.py @@ -1498,7 +1498,7 @@ def wrapper(*args, **kwargs): class SkipModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin = nn.Linear(10, 10, bias=False) diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index f1e4dae34e1af5..d0413b17e50bae 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -3591,7 +3591,7 @@ def error_inputs_adaptive_max_pool3d(opinfo, device, **kwargs): class _TestParamsMaxPoolBase: - def __init__(self): + def __init__(self) -> None: self.kwargs = { 'kernel_size': [3], 'stride': [2, None], @@ -3628,7 +3628,7 @@ def gen_input_params(self): class _TestParamsMaxPool1d(_TestParamsMaxPoolBase): - def __init__(self): + def __init__(self) -> None: super().__init__() self.kwargs['kernel_size'] += [(3,)] self.kwargs['stride'] += [(2,)] @@ -3637,7 +3637,7 @@ def __init__(self): class _TestParamsMaxPool2d(_TestParamsMaxPoolBase): - def __init__(self): + def __init__(self) -> None: super().__init__() self.kwargs['kernel_size'] += [(3, 2)] self.kwargs['stride'] += [(2, 1)] @@ -3648,7 +3648,7 @@ def __init__(self): class _TestParamsMaxPool3d(_TestParamsMaxPoolBase): - def __init__(self): + def __init__(self) -> None: super().__init__() self.kwargs['kernel_size'] += [(3, 2, 3)] self.kwargs['stride'] += [(2, 1, 2)] diff --git a/torch/testing/_internal/common_nn.py b/torch/testing/_internal/common_nn.py index 0dd11312c04359..7f53a72a7ef682 100644 --- a/torch/testing/_internal/common_nn.py +++ b/torch/testing/_internal/common_nn.py @@ -3967,13 +3967,13 @@ def _test_module_empty_input(test_case, module, inp, check_size=True, inference= def _create_basic_net(): class Layer(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.layer_dummy_param = nn.Parameter(torch.empty(3, 5)) self.layer_dummy_buf = nn.Buffer(torch.zeros(1, 3, 3, 7)) class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.l1 = Layer() self.dummy_param = nn.Parameter(torch.empty(3, 5)) diff --git a/torch/testing/_internal/common_pruning.py b/torch/testing/_internal/common_pruning.py index 031e4ad9efbd40..43dd716c288e0b 100644 --- a/torch/testing/_internal/common_pruning.py +++ b/torch/testing/_internal/common_pruning.py @@ -52,7 +52,7 @@ class SimpleLinear(nn.Module): r"""Model with only Linear layers without biases, some wrapped in a Sequential, some following the Sequential. Used to test basic pruned Linear-Linear fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Linear(7, 5, bias=False), @@ -73,7 +73,7 @@ class LinearBias(nn.Module): r"""Model with only Linear layers, alternating layers with biases, wrapped in a Sequential. Used to test pruned Linear-Bias-Linear fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Linear(7, 5, bias=True), @@ -93,7 +93,7 @@ class LinearActivation(nn.Module): Activation functions modules in between each Linear in the Sequential, and each outside layer. Used to test pruned Linear(Bias)-Activation-Linear fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Linear(7, 5, bias=True), @@ -122,7 +122,7 @@ class LinearActivationFunctional(nn.Module): activationals are called in between each outside layer. Used to test pruned Linear(Bias)-Activation-Linear fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Linear(7, 5, bias=True), @@ -151,7 +151,7 @@ class SimpleConv2d(nn.Module): r"""Model with only Conv2d layers, all without bias, some in a Sequential and some following. Used to test pruned Conv2d-Conv2d fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 32, 3, 1, bias=False), @@ -171,7 +171,7 @@ class Conv2dBias(nn.Module): r"""Model with only Conv2d layers, some with bias, some in a Sequential and some outside. Used to test pruned Conv2d-Bias-Conv2d fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 32, 3, 1, bias=True), @@ -194,7 +194,7 @@ class Conv2dActivation(nn.Module): in-between each outside layer. Used to test pruned Conv2d-Bias-Activation-Conv2d fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 32, 3, 1, bias=True), @@ -222,7 +222,7 @@ class Conv2dPadBias(nn.Module): Used to test that bias is propagated correctly in the special case of pruned Conv2d-Bias-(Activation)Conv2d fusion, when the second Conv2d layer has padding > 0.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 32, 3, 1, padding=1, bias=True), @@ -255,7 +255,7 @@ class Conv2dPool(nn.Module): Activation function modules in between each layer, Pool2d modules in between each layer. Used to test pruned Conv2d-Pool2d-Conv2d fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 32, kernel_size=3, padding=1, bias=True), @@ -289,7 +289,7 @@ class Conv2dPoolFlattenFunctional(nn.Module): Activation functions and Pool2ds in between each layer also. Used to test pruned Conv2d-Pool2d-Flatten-Linear fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 3, kernel_size=3, padding=1, bias=True), @@ -323,7 +323,7 @@ class Conv2dPoolFlatten(nn.Module): Activation functions and Pool2ds in between each layer also. Used to test pruned Conv2d-Pool2d-Flatten-Linear fusion.""" - def __init__(self): + def __init__(self) -> None: super().__init__() self.seq = nn.Sequential( nn.Conv2d(1, 3, kernel_size=3, padding=1, bias=True), diff --git a/torch/testing/_internal/common_quantization.py b/torch/testing/_internal/common_quantization.py index 553d483ab0a8e1..2482629fe9937f 100644 --- a/torch/testing/_internal/common_quantization.py +++ b/torch/testing/_internal/common_quantization.py @@ -1326,7 +1326,7 @@ def _quantize(self, m, quantizer, example_inputs, is_qat: bool = False): def _get_pt2e_quantized_linear(self, is_per_channel=False) -> torch.fx.GraphModule: class M(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = torch.nn.Linear(2, 2) @@ -1343,7 +1343,7 @@ def forward(self, x): # Below are a series of toy models to use in testing quantization class SingleLayerLinearModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 5).to(dtype=torch.float) @@ -1381,7 +1381,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 5),) class LinearAddModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float) self.fc2 = torch.nn.Linear(8, 5).to(dtype=torch.float) @@ -1436,7 +1436,7 @@ def forward(self, x, hid): return x, hid class ConvModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float) @@ -1448,7 +1448,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 3, 5, 5),) class ConvTransposeModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.ConvTranspose2d(3, 5, 3, bias=False).to(dtype=torch.float) @@ -1494,7 +1494,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 3, 5, 5),) class ConvBnModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float) self.bn = torch.nn.BatchNorm2d(5).to(dtype=torch.float) @@ -1508,7 +1508,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 3, 5, 5),) class AnnotatedConvBnModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.qconfig = default_qconfig self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float) @@ -1527,7 +1527,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 3, 5, 5),) class ConvBnReLUModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float) self.bn = torch.nn.BatchNorm2d(5).to(dtype=torch.float) @@ -1571,7 +1571,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 3, 5, 5),) class TwoLayerConvModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float) self.conv2 = torch.nn.Conv2d(5, 5, 1, bias=False).to(dtype=torch.float) @@ -1585,7 +1585,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 3, 5, 5),) class TwoLayerLinearModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float) self.fc2 = torch.nn.Linear(8, 5).to(dtype=torch.float) @@ -1599,7 +1599,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 5),) class LinearModelWithSubmodule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.subm = TwoLayerLinearModel() self.fc = nn.Linear(5, 5) @@ -1613,7 +1613,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return self.subm.get_example_inputs() class AnnotatedTwoLayerLinearModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float) self.fc2 = QuantWrapper(torch.nn.Linear(8, 5).to(dtype=torch.float)) @@ -1628,7 +1628,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 5),) class ActivationsTestModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.qconfig = torch.ao.quantization.get_default_qconfig("fbgemm") self.quant = torch.ao.quantization.QuantStub() @@ -1644,7 +1644,7 @@ def forward(self, x): return x class LinearReluModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc = torch.nn.Linear(5, 5).to(dtype=torch.float) self.relu = torch.nn.ReLU() @@ -1658,7 +1658,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: class LinearReluLinearModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float) self.relu = torch.nn.ReLU() @@ -1674,7 +1674,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 5),) class LinearReluAddModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 5).to(dtype=torch.float) self.relu = torch.nn.ReLU() @@ -1710,7 +1710,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 5),) class LinearTanhModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = nn.Linear(5, 5) self.tanh = nn.Tanh() @@ -1785,7 +1785,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: # TODO: self.fc should be self.conv class ConvReluModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc = torch.nn.Conv2d(3, 5, 3).to(dtype=torch.float) self.relu = torch.nn.ReLU() @@ -1799,7 +1799,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: # TODO: self.fc should be self.conv class ConvReluConvModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Conv2d(3, 5, 3).to(dtype=torch.float) self.relu = torch.nn.ReLU() @@ -1816,7 +1816,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: # TODO: self.fc should be self.conv class ConvReluAddModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Conv2d(3, 5, 3).to(dtype=torch.float) self.relu = torch.nn.ReLU() @@ -1834,7 +1834,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 3, 5, 5),) class NormalizationTestModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.quant = torch.ao.quantization.QuantStub() self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float) @@ -1855,7 +1855,7 @@ def forward(self, x): return x class NestedModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.sub1 = LinearReluModel() self.sub2 = TwoLayerLinearModel() @@ -1887,7 +1887,7 @@ def forward(self, x): return x class AnnotatedSubNestedModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.sub1 = LinearReluModel() self.sub2 = QuantWrapper(TwoLayerLinearModel()) @@ -1902,7 +1902,7 @@ def forward(self, x): return x class AnnotatedCustomConfigNestedModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.sub1 = LinearReluModel() self.sub2 = TwoLayerLinearModel() @@ -1928,7 +1928,7 @@ def forward(self, x): return x class QuantSubModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.sub1 = LinearReluModel() self.sub2 = QuantWrapper(TwoLayerLinearModel()) @@ -1943,7 +1943,7 @@ def forward(self, x): return x class InnerModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float) self.relu1 = torch.nn.ReLU() @@ -1970,7 +1970,7 @@ def fuse_modules(self): torch.ao.quantization.fuse_modules(self, fusable_layers, inplace=True) class FunctionalLinear(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.weight = torch.rand((5, 5)) self.bias = torch.zeros(5) @@ -1982,7 +1982,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 5),) class SingleLayerFunctionalLinearModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear1 = FunctionalLinear() @@ -1994,7 +1994,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return self.linear1.get_example_inputs() class TwoLayerFunctionalLinearModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear1 = FunctionalLinear() self.linear2 = FunctionalLinear() @@ -2008,7 +2008,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return self.linear1.get_example_inputs() class FunctionalLinearAddModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear1 = FunctionalLinear() self.linear2 = FunctionalLinear() @@ -2023,7 +2023,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return self.linear1.get_example_inputs() class FunctionalLinearReluModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = FunctionalLinear() @@ -2036,7 +2036,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return self.linear.get_example_inputs() class FunctionalLinearReluLinearModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear1 = FunctionalLinear() self.relu = nn.ReLU() @@ -2052,7 +2052,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return self.linear1.get_example_inputs() class FunctionalConv2d(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.weight = torch.rand(3, 3, 3, 3) self.bias = torch.rand(3) @@ -2068,7 +2068,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return (torch.rand(1, 3, 5, 5),) class SingleLayerFunctionalConvModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = FunctionalConv2d() @@ -2080,7 +2080,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return self.conv1.get_example_inputs() class TwoLayerFunctionalConvModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = FunctionalConv2d() self.conv2 = FunctionalConv2d() @@ -2094,7 +2094,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return self.conv1.get_example_inputs() class FunctionalConvReluModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = FunctionalConv2d() @@ -2107,7 +2107,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]: return self.conv.get_example_inputs() class FunctionalConvReluConvModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = FunctionalConv2d() self.relu = nn.ReLU() @@ -2126,7 +2126,7 @@ class SkipQuantModel(torch.nn.Module): r"""We can skip quantization by explicitly setting qconfig of a submodule to None """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.sub = InnerModule() self.fc = torch.nn.Linear(5, 5).to(dtype=torch.float) @@ -2158,7 +2158,7 @@ def fuse_modules(self): class QuantStubModel(torch.nn.Module): r"""A Module with manually inserted `QuantStub` and `DeQuantStub` """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.qconfig = torch.ao.quantization.get_default_qconfig("qnnpack") self.quant = QuantStub() @@ -2243,11 +2243,11 @@ class ManualConvLinearSymmQATModel(ManualConvLinearQATModel): r"""Same as ManualConvLinearQATModule but with Symmetric Quantization. Supported only with qnnpack. """ - def __init__(self): + def __init__(self) -> None: super().__init__(default_symmetric_qnnpack_qat_qconfig) class ManualEmbeddingBagLinear(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.emb = nn.EmbeddingBag(num_embeddings=10, embedding_dim=12, mode='sum') self.emb.qconfig = default_embedding_qat_qconfig @@ -2287,7 +2287,7 @@ def forward(self, input: torch.Tensor) -> torch.Tensor: return self.dequant(x) class SubModelForFusion(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = nn.Conv2d(2, 2, 1, bias=None).to(dtype=torch.float) self.bn = nn.BatchNorm2d(2).to(dtype=torch.float) @@ -2299,7 +2299,7 @@ def forward(self, x): class SubModelWithoutFusion(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = nn.Conv2d(2, 2, 1, bias=None).to(dtype=torch.float) self.relu = nn.ReLU(inplace=False).to(dtype=torch.float) @@ -2354,7 +2354,7 @@ def forward(self, x): return x class ConvBNReLU(nn.Sequential): - def __init__(self): + def __init__(self) -> None: super().__init__( nn.Conv2d(3, 3, 1, 1, bias=False), nn.BatchNorm2d(3), @@ -2362,7 +2362,7 @@ def __init__(self): ) class ModelWithSequentialFusion(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = nn.Conv2d(3, 3, 1) self.relu1 = nn.ReLU(inplace=False) @@ -2388,7 +2388,7 @@ def forward(self, x): return x class ModelForFusionWithBias(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = nn.Conv2d(3, 2, 5, bias=True).to(dtype=torch.float) self.bn1 = nn.BatchNorm2d(2).to(dtype=torch.float) @@ -2409,7 +2409,7 @@ def forward(self, x): return x class ModelForLinearBNFusion(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc = nn.Linear(20, 10) self.bn = nn.BatchNorm1d(10) @@ -2428,7 +2428,7 @@ def forward(self, x): class ModelForConvTransposeBNFusion(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = nn.ConvTranspose1d(3, 3, 1) self.bn1 = nn.BatchNorm1d(3) @@ -2450,7 +2450,7 @@ def forward(self, x): class ModelWithFunctionals(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.mycat = nnq.FloatFunctional() self.myadd = nnq.FloatFunctional() @@ -2474,7 +2474,7 @@ def forward(self, x): class ResNetBase(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() norm_layer = nn.BatchNorm2d inplanes = 3 @@ -2507,7 +2507,7 @@ def fuse_model(self): torch.ao.quantization.fuse_modules(self, [['conv1', 'bn1', 'relu1']], inplace=True) class ModelMultipleOps(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() norm_layer = nn.BatchNorm2d inplanes = 3 @@ -2542,7 +2542,7 @@ def forward(self, x): # accurately with fake-quant so this model does not # contain those operations class ModelMultipleOpsNoAvgPool(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() norm_layer = nn.BatchNorm2d inplanes = 3 @@ -2572,7 +2572,7 @@ def forward(self, x): return out class EmbeddingBagModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.emb = torch.nn.EmbeddingBag(num_embeddings=10, embedding_dim=12, include_last_offset=True, scale_grad_by_freq=False, mode='sum') @@ -2581,7 +2581,7 @@ def forward(self, indices, offsets, per_sample_weights): return self.emb(indices, offsets, per_sample_weights) class EmbeddingModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.emb = torch.nn.Embedding(num_embeddings=10, embedding_dim=12) @@ -2589,7 +2589,7 @@ def forward(self, indices): return self.emb(indices) class EmbeddingWithStaticLinear(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.emb = torch.nn.EmbeddingBag(num_embeddings=10, embedding_dim=12) self.fc = torch.nn.Linear(4, 2) @@ -2671,7 +2671,7 @@ def forward( class TestHelperModules: class Conv2dPropAnnotaton(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 3, 3) self.linear = torch.nn.Linear(3, 3) @@ -2684,7 +2684,7 @@ def forward(self, x): return x class Conv2dWithObsSharingOps(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 3, 3) self.hardtanh = torch.nn.Hardtanh() @@ -2698,7 +2698,7 @@ def forward(self, x): return x class Conv2dWithTwoLinearPermute(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 16, 3) self.linear1 = torch.nn.Linear(16, 8, bias=False) @@ -2710,7 +2710,7 @@ def forward(self, x): return self.linear2(self.linear1(permute_out)) class Conv2dWithTwoLinear(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 16, 3) self.linear1 = torch.nn.Linear(64, 8, bias=False) @@ -2722,7 +2722,7 @@ def forward(self, x): return self.linear2(self.linear1(reshape_out)) class ConvLinearWPermute(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 8, 3) self.linear1 = torch.nn.Linear(8, 8) @@ -2733,7 +2733,7 @@ def forward(self, x): return self.linear1(permute_out) class TwoLinearModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear1 = torch.nn.Linear(8, 16, bias=False) self.linear2 = torch.nn.Linear(16, 8) @@ -2742,7 +2742,7 @@ def forward(self, x): return self.linear2(self.linear1(x)) class ConvMaxPool2d(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(2, 2, 1) self.pool = torch.nn.MaxPool2d(1, 1) @@ -2753,7 +2753,7 @@ def forward(self, x): return x class ConvWithAdaptiveAvgPool2d(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(3, 3, 3) self.adaptive_avg_pool2d = torch.nn.AdaptiveAvgPool2d((1, 1)) @@ -2806,7 +2806,7 @@ def forward(self, x): return self.relu(x) class Conv2dThenConv1d(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1d = torch.nn.Conv1d(3, 3, 3) self.conv2d = torch.nn.Conv2d(3, 3, 3) @@ -2821,7 +2821,7 @@ def example_inputs(self): return (torch.randn(1, 3, 5, 5),) class Conv2dWithCat(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = torch.nn.Conv2d(3, 3, 3) self.conv2 = torch.nn.Conv2d(3, 3, 3) @@ -2833,7 +2833,7 @@ def forward(self, x, y): return z class Conv2dWithTwoCat(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv1 = torch.nn.Conv2d(3, 3, 3) self.conv2 = torch.nn.Conv2d(3, 3, 3) @@ -2854,7 +2854,7 @@ def forward(self, x1, x2, x3, x4): return w class EmbeddingModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.emb = torch.nn.Embedding(num_embeddings=10, embedding_dim=12) @@ -2862,7 +2862,7 @@ def forward(self, indices): return self.emb(indices) class EmbeddingConvLinearModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.emb = torch.nn.Embedding(num_embeddings=10, embedding_dim=8) self.conv = torch.nn.Conv2d(8, 16, (1, 3)) @@ -2898,7 +2898,7 @@ def forward(self, x): return x class ConvBnReLU2dAndLinearReLU(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv_bn_relu = TestHelperModules.ConvWithBNRelu(relu=True) self.linear = torch.nn.Linear(3, 8, bias=False) @@ -2911,7 +2911,7 @@ def forward(self, x): return linear_out class GroupwiseConv2d(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.conv = torch.nn.Conv2d(4, 4, 3, groups=2) @@ -2922,7 +2922,7 @@ def example_inputs(self): return (torch.randn(2, 4, 10, 10),) class LinearReluModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc = torch.nn.Linear(5, 5).to(dtype=torch.float) self.relu = torch.nn.ReLU() diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index 12723039358e09..8ec568c665c1fb 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -1092,7 +1092,7 @@ def sanitize_pytest_xml(xml_file: str): def get_pytest_test_cases(argv: List[str]) -> List[str]: class TestCollectorPlugin: - def __init__(self): + def __init__(self) -> None: self.tests = [] def pytest_collection_finish(self, session): diff --git a/torch/testing/_internal/data/network1.py b/torch/testing/_internal/data/network1.py index e6180f4f2d2ed1..8755643a78cca8 100644 --- a/torch/testing/_internal/data/network1.py +++ b/torch/testing/_internal/data/network1.py @@ -5,6 +5,6 @@ class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = nn.Linear(10, 20) diff --git a/torch/testing/_internal/data/network2.py b/torch/testing/_internal/data/network2.py index fdb583d0af92fe..19b0b8ee53d3b5 100644 --- a/torch/testing/_internal/data/network2.py +++ b/torch/testing/_internal/data/network2.py @@ -5,7 +5,7 @@ class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.linear = nn.Linear(10, 20) self.relu = nn.ReLU() diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py index 276dc4fa6e702d..a8e1434ecdb474 100644 --- a/torch/testing/_internal/distributed/distributed_test.py +++ b/torch/testing/_internal/distributed/distributed_test.py @@ -107,7 +107,7 @@ class NetWithBuffers(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.a = nn.Linear(10, 10, bias=False) self.b = nn.Linear(10, 1, bias=False) @@ -260,7 +260,7 @@ class DDPUnevenTestInput(NamedTuple): class _FC2(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc = nn.Linear(10, 50, bias=True) self.fc.bias.requires_grad = False @@ -271,7 +271,7 @@ def forward(self, x): class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = nn.Linear(2, 10, bias=False) self.fc2 = _FC2() @@ -289,7 +289,7 @@ def forward(self, x): class LargeNet(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = nn.Linear(1000, 2000, bias=False) self.fc2 = nn.Linear(2000, 500, bias=False) @@ -301,7 +301,7 @@ def forward(self, x): class Task(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.p = nn.Parameter(torch.ones(2, 2)) @@ -325,7 +325,7 @@ def forward(self, x): class UnusedParamTwoLinLayerNet(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.a = nn.Linear(10, 10, bias=False) self.b = nn.Linear(10, 10, bias=False) @@ -338,7 +338,7 @@ def forward(self, x): class DictOutputModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.module = UnusedParamTwoLinLayerNet() @@ -352,7 +352,7 @@ def forward(self, x): class TwoLinLayerNet(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.a = nn.Linear(10, 10, bias=False) self.b = nn.Linear(10, 1, bias=False) @@ -383,7 +383,7 @@ def forward(self, x): class ControlFlowToyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin1 = nn.Linear(10, 10, bias=False) self.lin2 = nn.Linear(10, 10, bias=False) @@ -4408,7 +4408,7 @@ def test_DistributedDataParallel_requires_grad(self): @skip_if_lt_x_gpu(int(os.environ["WORLD_SIZE"])) def test_ddp_zero_output_features(self): class ToyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.net1 = nn.Linear(10, 10) self.relu = nn.ReLU() @@ -4422,7 +4422,7 @@ def __init__(self): @skip_but_pass_in_sandcastle_if(BACKEND == "nccl", "Gloo-only test") def test_ddp_create_graph(self): class Model(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.p = nn.Parameter(torch.tensor(1.0)) @@ -4979,7 +4979,7 @@ def _test_ddp_native_mixed_precision( mp_config = self._get_fp16_config() class MyModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.m = torch.nn.Linear(1, 5) self.register_buffer('buffer', torch.randn(1, 2)) @@ -7241,7 +7241,7 @@ def test_ddp_uneven_inputs_stop_iteration_sync_bn(self): # for models with SyncBN or general collective comm when # throw_on_early_termination=True. class ModelWithComm(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin = nn.Linear(2, 40, bias=False) @@ -7523,7 +7523,7 @@ def test_ddp_uneven_input_exception(self): error_str = "Intentional error" class ExceptionModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.param = nn.Parameter(torch.ones(1, requires_grad=True)) @@ -7731,7 +7731,7 @@ def test_ddp_ignore_params_arg(self): @skip_if_lt_x_gpu(2) def test_ddp_unused_params_rebuild_buckets_exception(self): class ToyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.net1 = nn.Linear(10, 10, bias=False) self.net2 = nn.Linear(10, 10, bias=False) @@ -7785,7 +7785,7 @@ def test_ddp_shared_grad_acc_unused_params(self): # When find_unused_parameters=True, ensure we mark unused parameters # even if they share gradient accumulators. class ToyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() # net1, bias, and net1.bias are all unused params. self.net1 = nn.Linear(10, 5, bias=False) @@ -8984,7 +8984,7 @@ def test_ddp_build_debug_param_to_name_mapping(self): @skip_if_lt_x_gpu(2) def test_ddp_build_debug_param_to_name_mapping_requires_grad(self): class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin = nn.Linear(10, 10) # Is not tracked by DDP and should not show up in param to @@ -9009,7 +9009,7 @@ def _test_ddp_multiple_nested_unused_params_error(self, ignore_sparse): debug_mode_off = dist.get_debug_level() == dist.DebugLevel.OFF class SubModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.embedding_net = EmbeddingNetDifferentParams(0) self.lin = TwoLinLayerNet() @@ -9025,7 +9025,7 @@ def forward(self, x): return x class MyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.sub_module = SubModule() @@ -9261,7 +9261,7 @@ def test_ddp_static_graph_nested_types(self): torch.cuda.set_device(rank) class NestedOutputModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin = nn.Linear(100, 1, bias=False) @@ -9347,7 +9347,7 @@ def test_ddp_returns_tensor_with_no_grad(self): torch.cuda.set_device(self.rank) class MyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = nn.Linear(10, 10, bias=False) self.fc2 = nn.Linear(10, 10, bias=False) @@ -9384,7 +9384,7 @@ def forward(self, x): ) def test_detect_ddp_is_actually_static(self): class ToyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.net1 = nn.Linear(10, 10, bias=False) self.net2 = nn.Linear(10, 10) @@ -9430,7 +9430,7 @@ def forward(self, x, find_unused, dynamic): def _test_ddp_new_tensor_in_fwd(self, static_graph): # Test from https://github.com/pytorch/pytorch/issues/60733 class MyModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc1 = nn.Linear(10, 10, bias=False) self.fc2 = nn.Linear(10, 10, bias=False) @@ -9965,7 +9965,7 @@ def test_ddp_broadcast_buffer(self): torch.cuda.manual_seed(rank) class NetWithBuffers(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.a = nn.Linear(10, 10, bias=False) self.b = nn.Linear(10, 1, bias=False) @@ -10002,7 +10002,7 @@ def forward(self, x): ) def test_static_graph_multi_forward(self): class Net(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin = nn.Linear(10, 10) self.relu = nn.ReLU() @@ -10084,7 +10084,7 @@ def test_sync_bn_logged(self): ) def test_stateless_api_with_ddp(self): class MockModule(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.l1 = torch.nn.Linear(1, 1) buffer = torch.ones(1) @@ -10131,7 +10131,7 @@ def forward(self, x): @skip_if_lt_x_gpu(2) def test_ddp_forward_backward_hook(self): class DummyTestModel(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() torch.manual_seed(0) self.fc = nn.Linear(2, 2) @@ -10391,7 +10391,7 @@ def __torch_dispatch__(self, func, types, args=(), kwargs=None): return func(*args, **kwargs) class MyModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.fc = torch.nn.Linear(10, 10) diff --git a/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py b/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py index 1ea7eace8294cb..5d7e7b1244bcea 100644 --- a/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py +++ b/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py @@ -44,7 +44,7 @@ class Policy(nn.Module): Copying the code to make these two examples independent. See https://github.com/pytorch/examples/tree/master/reinforcement_learning """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.affine1 = nn.Linear(4, 128) self.dropout = nn.Dropout(p=0.6) @@ -97,7 +97,7 @@ class Observer: select an action. Then, the observer applies the action to its environment and reports the reward to the agent. """ - def __init__(self): + def __init__(self) -> None: self.id = rpc.get_worker_info().id self.env = DummyEnv() self.env.seed(SEED) diff --git a/torch/testing/_internal/distributed/rpc/rpc_test.py b/torch/testing/_internal/distributed/rpc/rpc_test.py index 3a3977d7b89604..413f97d94eb281 100644 --- a/torch/testing/_internal/distributed/rpc/rpc_test.py +++ b/torch/testing/_internal/distributed/rpc/rpc_test.py @@ -144,7 +144,7 @@ def set_and_check_done(value): TensorClass = namedtuple("TensorClass", ["tensors"]) class MyPickleClass: - def __init__(self): + def __init__(self) -> None: self.t = None def __getstate__(self): @@ -1446,7 +1446,7 @@ def test_pg_init_no_rpc_init(self): world_size=self.world_size) class MyModel(torch.nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.lin = torch.nn.Linear(3, 4) diff --git a/torch/testing/_internal/jit_metaprogramming_utils.py b/torch/testing/_internal/jit_metaprogramming_utils.py index 8171a95918939f..02a9fcc5405e51 100644 --- a/torch/testing/_internal/jit_metaprogramming_utils.py +++ b/torch/testing/_internal/jit_metaprogramming_utils.py @@ -604,7 +604,7 @@ def script_module(*args, **kwargs): class TheModule(torch.jit.ScriptModule): __constants__ = submodule_constants - def __init__(self): + def __init__(self) -> None: super().__init__() self.submodule = nn_module(*constructor_args) diff --git a/torch/testing/_internal/jit_utils.py b/torch/testing/_internal/jit_utils.py index c0109ecacf7f7d..a8c7fa261f9982 100644 --- a/torch/testing/_internal/jit_utils.py +++ b/torch/testing/_internal/jit_utils.py @@ -770,7 +770,7 @@ def _get_py3_code(code, fn_name): return fn class TensorExprTestOptions: - def __init__(self): + def __init__(self) -> None: self.old_profiling_executor = torch._C._jit_set_profiling_executor(True) self.old_profiling_mode = torch._C._get_graph_executor_optimize(True) diff --git a/torch/utils/_sympy/value_ranges.py b/torch/utils/_sympy/value_ranges.py index 4a01d8e53b915b..29ee1886261bdb 100644 --- a/torch/utils/_sympy/value_ranges.py +++ b/torch/utils/_sympy/value_ranges.py @@ -936,7 +936,7 @@ def trunc(x): class ValueRangeAnalysis(SymPyValueRangeAnalysis): - def __init__(self): + def __init__(self) -> None: self.name = "ValueRangeAnalysis" boolean_operators = ( "xor", diff --git a/torch/utils/data/_utils/worker.py b/torch/utils/data/_utils/worker.py index b07439526bf648..c61b78d42d8a56 100644 --- a/torch/utils/data/_utils/worker.py +++ b/torch/utils/data/_utils/worker.py @@ -28,7 +28,7 @@ # is gone, and the only way to check it through OS is to let the worker have a process handle # of the manager and ask if the process status has changed. class ManagerWatchdog: - def __init__(self): + def __init__(self) -> None: self.manager_pid = os.getppid() # mypy cannot detect this code is windows only @@ -60,7 +60,7 @@ def is_alive(self): else: class ManagerWatchdog: # type: ignore[no-redef] - def __init__(self): + def __init__(self) -> None: self.manager_pid = os.getppid() self.manager_dead = False diff --git a/torch/utils/module_tracker.py b/torch/utils/module_tracker.py index 9feef40ca4da88..01e966c712b595 100644 --- a/torch/utils/module_tracker.py +++ b/torch/utils/module_tracker.py @@ -52,7 +52,7 @@ def my_linear(m1, m2, bias): A Set containing the fqn for each module currently running their forward """ - def __init__(self): + def __init__(self) -> None: self.parents = {"Global"} self._known_modules: weakref.WeakKeyDictionary = weakref.WeakKeyDictionary() self._seen_modules: weakref.WeakSet = weakref.WeakSet()