diff --git a/torch/_classes.py b/torch/_classes.py
index 58b347453524a2..069f13dcb6793d 100644
--- a/torch/_classes.py
+++ b/torch/_classes.py
@@ -19,7 +19,7 @@ def __getattr__(self, attr):
class _Classes(types.ModuleType):
__file__ = "_classes.py"
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("torch.classes")
def __getattr__(self, name):
diff --git a/torch/_decomp/decompositions_for_rng.py b/torch/_decomp/decompositions_for_rng.py
index 66bd33075a5ed8..a62a28f783b713 100644
--- a/torch/_decomp/decompositions_for_rng.py
+++ b/torch/_decomp/decompositions_for_rng.py
@@ -71,7 +71,7 @@ class PhiloxState:
trace time.
"""
- def __init__(self):
+ def __init__(self) -> None:
self.reset()
def reset(self):
diff --git a/torch/_dynamo/backends/distributed.py b/torch/_dynamo/backends/distributed.py
index a58571c77c9302..8d65f1670ae1f1 100644
--- a/torch/_dynamo/backends/distributed.py
+++ b/torch/_dynamo/backends/distributed.py
@@ -247,7 +247,7 @@ def run_node(self, n: Node) -> Any:
# This gives us the appropriately strided outputs here which will reflect runtime strides.
class FakeifyFirstAOTInvocationGuard:
- def __init__(self):
+ def __init__(self) -> None:
self.tc = torch._guards.TracingContext.try_get()
assert self.tc
torch._guards.TracingContext.try_get().fakify_first_call = True
diff --git a/torch/_dynamo/code_context.py b/torch/_dynamo/code_context.py
index 59c912bd30f771..727aad9349555f 100644
--- a/torch/_dynamo/code_context.py
+++ b/torch/_dynamo/code_context.py
@@ -5,7 +5,7 @@
class CodeContextDict:
- def __init__(self):
+ def __init__(self) -> None:
self.code_context = ExactWeakKeyDictionary()
def has_context(self, code: types.CodeType):
diff --git a/torch/_dynamo/debug_utils.py b/torch/_dynamo/debug_utils.py
index 5e9656f2068c36..49d9b302faebec 100644
--- a/torch/_dynamo/debug_utils.py
+++ b/torch/_dynamo/debug_utils.py
@@ -170,7 +170,7 @@ def convert(gm):
"""
from torch.nn import *
class Repro(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
"""
)
@@ -491,7 +491,7 @@ def _mk_defaulter(d: T) -> Callable[[Optional[T]], T]:
class NopInputReader:
- def __init__(self):
+ def __init__(self) -> None:
self.total = 0
def storage(self, storage_hash, nbytes, *, device=None, dtype_hint=None):
diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py
index 3630f0da6b247c..de4ef26fb1b284 100644
--- a/torch/_dynamo/eval_frame.py
+++ b/torch/_dynamo/eval_frame.py
@@ -496,7 +496,7 @@ def _fn(*args, **kwargs):
wrapper function.
>> class CallableClass:
- >> def __init__(self):
+ >> def __init__(self) -> None:
>> super().__init__()
>> self.relu = torch.nn.ReLU()
>>
@@ -577,7 +577,7 @@ def __reduce__(self):
class RunOnlyContext(_TorchDynamoContext):
- def __init__(self):
+ def __init__(self) -> None:
# cudagraph trees relies on generation increment
def on_enter():
torch._dynamo.mutation_guard.GenerationTracker.generation += 1
@@ -589,7 +589,7 @@ def __reduce__(self):
class DisableContext(_TorchDynamoContext):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__(callback=None)
def __call__(self, fn):
diff --git a/torch/_dynamo/exc.py b/torch/_dynamo/exc.py
index 2ca862c008740a..5a0915a9727fd9 100644
--- a/torch/_dynamo/exc.py
+++ b/torch/_dynamo/exc.py
@@ -74,7 +74,7 @@ def __init__(self, name):
class ResetRequired(TorchDynamoException):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__(
textwrap.dedent(
"""
diff --git a/torch/_dynamo/profiler.py b/torch/_dynamo/profiler.py
index b7e9553ce219f7..841ab87cdf68d9 100644
--- a/torch/_dynamo/profiler.py
+++ b/torch/_dynamo/profiler.py
@@ -92,7 +92,7 @@ def print_missing(stack):
class Profiler:
unique_graphs = 0
- def __init__(self):
+ def __init__(self) -> None:
self.prof = torch.profiler.profile(
activities=[torch.profiler.ProfilerActivity.CPU],
with_stack=should_print_missing(),
diff --git a/torch/_dynamo/variables/base.py b/torch/_dynamo/variables/base.py
index 09752822dd8209..5353327d98fee4 100644
--- a/torch/_dynamo/variables/base.py
+++ b/torch/_dynamo/variables/base.py
@@ -70,7 +70,7 @@ class MutableLocal(MutableLocalBase):
state.
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__(MutableLocalSource.Local)
def __hash__(self):
diff --git a/torch/_dynamo/variables/builder.py b/torch/_dynamo/variables/builder.py
index f2819fb2b1e1e3..383f9e8b4d04b6 100644
--- a/torch/_dynamo/variables/builder.py
+++ b/torch/_dynamo/variables/builder.py
@@ -270,7 +270,7 @@ def __eq__(self, other):
class BackwardStateGraphArg(GraphArg):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__(
source=None,
_example=BackwardState(),
@@ -2638,7 +2638,7 @@ class SourcelessBuilder:
if/else type->VariableTracker trees that were cropping up all over dynamo.
"""
- def __init__(self):
+ def __init__(self) -> None:
raise AssertionError("Use SourcelessBuilder.create()")
@staticmethod
diff --git a/torch/_export/db/examples/class_method.py b/torch/_export/db/examples/class_method.py
index 5d7f8b5b705497..f701f54d4f4ea1 100644
--- a/torch/_export/db/examples/class_method.py
+++ b/torch/_export/db/examples/class_method.py
@@ -10,7 +10,7 @@ class ClassMethod(torch.nn.Module):
def method(cls, x):
return x + 1
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = torch.nn.Linear(4, 2)
diff --git a/torch/_export/db/examples/cond_branch_class_method.py b/torch/_export/db/examples/cond_branch_class_method.py
index 9ce4a9d6f34a56..22600cc504348d 100644
--- a/torch/_export/db/examples/cond_branch_class_method.py
+++ b/torch/_export/db/examples/cond_branch_class_method.py
@@ -26,7 +26,7 @@ class CondBranchClassMethod(torch.nn.Module):
NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized.
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.subm = MySubModule()
diff --git a/torch/_export/db/examples/model_attr_mutation.py b/torch/_export/db/examples/model_attr_mutation.py
index dfebbebd8b1b38..4aa623c7dc39ef 100644
--- a/torch/_export/db/examples/model_attr_mutation.py
+++ b/torch/_export/db/examples/model_attr_mutation.py
@@ -8,7 +8,7 @@ class ModelAttrMutation(torch.nn.Module):
Attribute mutation is not supported.
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.attr_list = [torch.randn(3, 2), torch.randn(3, 2)]
diff --git a/torch/_export/db/examples/scalar_output.py b/torch/_export/db/examples/scalar_output.py
index 83dd36379677d8..86d3b4645330c4 100644
--- a/torch/_export/db/examples/scalar_output.py
+++ b/torch/_export/db/examples/scalar_output.py
@@ -11,7 +11,7 @@ class ScalarOutput(torch.nn.Module):
Returning scalar values from the graph is supported, in addition to Tensor
outputs. Symbolic shapes are captured and rank is specialized.
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
def forward(self, x):
diff --git a/torch/_export/db/examples/specialized_attribute.py b/torch/_export/db/examples/specialized_attribute.py
index 39f7314bec70ad..f17092f9afc681 100644
--- a/torch/_export/db/examples/specialized_attribute.py
+++ b/torch/_export/db/examples/specialized_attribute.py
@@ -11,7 +11,7 @@ class SpecializedAttribute(torch.nn.Module):
Model attributes are specialized.
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.a = "moo"
self.b = 4
diff --git a/torch/_export/passes/lift_constants_pass.py b/torch/_export/passes/lift_constants_pass.py
index 823c66d2bc0942..08d93287d3205c 100644
--- a/torch/_export/passes/lift_constants_pass.py
+++ b/torch/_export/passes/lift_constants_pass.py
@@ -24,7 +24,7 @@ class ConstantAttrMap(collections.abc.MutableMapping):
if that's the case).
"""
- def __init__(self):
+ def __init__(self) -> None:
# Underlying dict that we use to implement this mapping.
self._constant_attrs: Dict[
Union[int, torch.Tensor, FakeScriptObject], List[Any]
diff --git a/torch/_export/serde/serialize.py b/torch/_export/serde/serialize.py
index ae0f6e39f23b90..28509b8341691f 100644
--- a/torch/_export/serde/serialize.py
+++ b/torch/_export/serde/serialize.py
@@ -1413,7 +1413,7 @@ class Result:
constants: Dict[str, Union[torch.Tensor, FakeScriptObject, torch.ScriptObject]]
example_inputs: Optional[Tuple[Tuple[torch.Tensor, ...], Dict[str, Any]]]
- def __init__(self):
+ def __init__(self) -> None:
self.serialized_name_to_node: Dict[str, torch.fx.Node] = {}
self.serialized_name_to_meta: Dict[str, MetaType] = {}
self.graph = torch.fx.Graph()
diff --git a/torch/_functorch/_aot_autograd/schemas.py b/torch/_functorch/_aot_autograd/schemas.py
index 77f1cf220140df..9b1b40b4830401 100644
--- a/torch/_functorch/_aot_autograd/schemas.py
+++ b/torch/_functorch/_aot_autograd/schemas.py
@@ -602,7 +602,7 @@ class SubclassMeta:
# Optional field because we don't compute for inference graphs
grad_input_metas: Optional[List[Union[int, SubclassCreationMeta]]] = None
- def __init__(self):
+ def __init__(self) -> None:
# The fields in this class get set after its construction.
pass
diff --git a/torch/_functorch/aot_autograd.py b/torch/_functorch/aot_autograd.py
index b7bd95a3ed4941..e9fedb3d53cc9e 100644
--- a/torch/_functorch/aot_autograd.py
+++ b/torch/_functorch/aot_autograd.py
@@ -878,7 +878,7 @@ def functional_call(named_params, named_buffers, *args, **kwargs):
)
class AOTModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.orig_module = mod
diff --git a/torch/_functorch/autograd_function.py b/torch/_functorch/autograd_function.py
index f80b7dee55be90..270c1895f6fd54 100644
--- a/torch/_functorch/autograd_function.py
+++ b/torch/_functorch/autograd_function.py
@@ -30,7 +30,7 @@
# We do this by using creating a custom HigherOrderOperator that only functorch
# dispatches specially.
class CustomFunctionHigherOrderOperator(HigherOrderOperator):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("custom_function_call")
def __call__(self, autograd_function, *args, **kwargs):
@@ -713,7 +713,7 @@ def new_forward(ctx, *args, **kwargs):
class AutogradFunctionApply(HigherOrderOperator):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("autograd_function_apply")
def __call__(self, fwd, bwd, *fwd_args, **fwd_kwargs):
diff --git a/torch/_guards.py b/torch/_guards.py
index c0633861a7b812..91c2c664d7544c 100644
--- a/torch/_guards.py
+++ b/torch/_guards.py
@@ -407,7 +407,7 @@ def __eq__(self, other):
class ModuleContext(Checkpointable[ModuleContextCheckpointState]):
- def __init__(self):
+ def __init__(self) -> None:
self.nn_modules: Dict[str, Any] = {}
def copy_graphstate(self):
@@ -456,7 +456,7 @@ class GlobalContext(Checkpointable[GlobalContextCheckpointState]):
"autocast_cache_enabled",
}
- def __init__(self):
+ def __init__(self) -> None:
self.global_state: Dict[str, Tuple[Callable, ...]] = {}
def copy_graphstate(self):
@@ -524,7 +524,7 @@ def remove_guards_with_source(self, source):
class GuardsContext(Checkpointable[GuardsCheckpointState]):
- def __init__(self):
+ def __init__(self) -> None:
self.dynamo_guards: GuardsSet = GuardsSet()
self.aotautograd_guards: List[GuardEnvExpr] = []
diff --git a/torch/_higher_order_ops/auto_functionalize.py b/torch/_higher_order_ops/auto_functionalize.py
index 40178fa750fc98..00f43e6acdeeeb 100644
--- a/torch/_higher_order_ops/auto_functionalize.py
+++ b/torch/_higher_order_ops/auto_functionalize.py
@@ -54,7 +54,7 @@ class AutoFunctionalized(HigherOrderOperator):
underscore is to prevent collisions with kwarg names in **kwargs.
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("auto_functionalized")
def __call__(
diff --git a/torch/_higher_order_ops/effects.py b/torch/_higher_order_ops/effects.py
index f20c87c7e5876f..3bba77a5cfc80a 100644
--- a/torch/_higher_order_ops/effects.py
+++ b/torch/_higher_order_ops/effects.py
@@ -55,7 +55,7 @@ class WithEffects(HigherOrderOperator):
per "effect type", which are enumerated in the _EffectType enum.
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("with_effects")
def __call__(
diff --git a/torch/_higher_order_ops/flex_attention.py b/torch/_higher_order_ops/flex_attention.py
index e3c9d718b2f4db..992c7398b2a93d 100644
--- a/torch/_higher_order_ops/flex_attention.py
+++ b/torch/_higher_order_ops/flex_attention.py
@@ -38,7 +38,7 @@ def __torch_function__(self, func, types, args, kwargs=None):
class FlexAttentionHOP(HigherOrderOperator):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("flex_attention")
def __call__(
@@ -74,7 +74,7 @@ def __call__(
class FlexAttentionBackwardHOP(HigherOrderOperator):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("flex_attention_backward")
def __call__(
diff --git a/torch/_higher_order_ops/out_dtype.py b/torch/_higher_order_ops/out_dtype.py
index 5c9ca4f3f16913..d1557909427645 100644
--- a/torch/_higher_order_ops/out_dtype.py
+++ b/torch/_higher_order_ops/out_dtype.py
@@ -45,7 +45,7 @@ class OutDtypeOperator(HigherOrderOperator):
3. Cast the output to `out_dtype`
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("out_dtype")
# TODO(ydwu4): Subclassing HigherOrderOperator causes __module__ to
# become different (torch._higher_order_ops.out_dtype) which will result
diff --git a/torch/_higher_order_ops/triton_kernel_wrap.py b/torch/_higher_order_ops/triton_kernel_wrap.py
index 779ab2838b3c27..ff01b0c0124025 100644
--- a/torch/_higher_order_ops/triton_kernel_wrap.py
+++ b/torch/_higher_order_ops/triton_kernel_wrap.py
@@ -519,7 +519,7 @@ def identify_mutated_tensors(kernel, kwargs):
# Used for wrapping a Triton Kernel
class TritonKernelWrapperMutation(HigherOrderOperator):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("triton_kernel_wrapper_mutation")
@@ -528,7 +528,7 @@ def __init__(self):
# Used for wrapping a Triton Kernel in a functional manner
class TritonKernelWrapperFunctional(HigherOrderOperator):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("triton_kernel_wrapper_functional")
diff --git a/torch/_higher_order_ops/while_loop.py b/torch/_higher_order_ops/while_loop.py
index 4924e1f3d4499d..e19fa162105816 100644
--- a/torch/_higher_order_ops/while_loop.py
+++ b/torch/_higher_order_ops/while_loop.py
@@ -18,7 +18,7 @@
class WhileLoopOp(HigherOrderOperator):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("while_loop")
def __call__(
diff --git a/torch/_higher_order_ops/wrap.py b/torch/_higher_order_ops/wrap.py
index a26253405c430b..d6faef206619a9 100644
--- a/torch/_higher_order_ops/wrap.py
+++ b/torch/_higher_order_ops/wrap.py
@@ -15,7 +15,7 @@
# Used for testing the HigherOrderOperator mechanism
class Wrap(HigherOrderOperator):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("wrap")
def __call__(self, func, *args, **kwargs):
@@ -36,7 +36,7 @@ def wrapper():
class WrapWithSetGradEnabled(HigherOrderOperator):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("wrap_with_set_grad_enabled")
def __call__(self, enable_grad, wrapped_func, *args, **kwargs):
@@ -74,7 +74,7 @@ class WrapActivationCheckpoint(HigherOrderOperator):
partitioners. See TagActivationCheckpoint for more information.
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("wrap_activation_checkpoint")
def __call__(self, function, *args, **kwargs):
@@ -113,7 +113,7 @@ class TagActivationCheckpoint(HigherOrderOperator):
the forward and recomputed forward in backward.
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__("tag_activation_checkpoint")
@staticmethod
diff --git a/torch/_inductor/codegen/common.py b/torch/_inductor/codegen/common.py
index 05b7475785e572..2a192f1ab69a31 100644
--- a/torch/_inductor/codegen/common.py
+++ b/torch/_inductor/codegen/common.py
@@ -1560,7 +1560,7 @@ def newvar(self, bounds: ValueRanges[Any] = ValueRanges.unknown()) -> CSEVariabl
class CodeGen:
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.exit_stack = contextlib.ExitStack()
diff --git a/torch/_inductor/codegen/cpp_wrapper_cuda.py b/torch/_inductor/codegen/cpp_wrapper_cuda.py
index 8eed428de07a92..3def5af40affdd 100644
--- a/torch/_inductor/codegen/cpp_wrapper_cuda.py
+++ b/torch/_inductor/codegen/cpp_wrapper_cuda.py
@@ -29,7 +29,7 @@ class CppWrapperCuda(CppWrapperCpu):
Generates cpp wrapper for running on GPU and calls CUDA kernels
"""
- def __init__(self):
+ def __init__(self) -> None:
self.device = "cuda"
super().__init__()
self.grid_id = count()
diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py
index 8e23e877fe26f2..5c6626052ed224 100644
--- a/torch/_inductor/codegen/triton.py
+++ b/torch/_inductor/codegen/triton.py
@@ -1113,7 +1113,7 @@ class HelperFunctions:
_templates_seen: Dict[str, str] # Template code to function name
finalized_helpers: List[str]
- def __init__(self):
+ def __init__(self) -> None:
self._templates_seen = {}
self.finalized_helpers = []
diff --git a/torch/_inductor/dependencies.py b/torch/_inductor/dependencies.py
index 5339ad6c214ec1..31cee49a31984b 100644
--- a/torch/_inductor/dependencies.py
+++ b/torch/_inductor/dependencies.py
@@ -588,7 +588,7 @@ def canonicalization_prefix():
class FreeUnbackedSymbolsOpsHandler:
symbols: Set[sympy.Symbol]
- def __init__(self):
+ def __init__(self) -> None:
self.symbols = set()
def __getattr__(self, name: str) -> Callable[..., Any]:
diff --git a/torch/_inductor/exc.py b/torch/_inductor/exc.py
index 07c1eebf99b8db..2505c8a31196b8 100644
--- a/torch/_inductor/exc.py
+++ b/torch/_inductor/exc.py
@@ -65,7 +65,7 @@ class SubgraphLoweringException(RuntimeError):
class InvalidCxxCompiler(RuntimeError):
- def __init__(self):
+ def __init__(self) -> None:
from . import config
super().__init__(
diff --git a/torch/_inductor/fx_passes/misc_patterns.py b/torch/_inductor/fx_passes/misc_patterns.py
index d7873fede3c587..0f608952a2fb27 100644
--- a/torch/_inductor/fx_passes/misc_patterns.py
+++ b/torch/_inductor/fx_passes/misc_patterns.py
@@ -79,7 +79,7 @@ class NumpyCompatNormalization:
inverse_mapping: Dict[str, str]
cache: Dict["torch.fx.graph.Target", Set[str]]
- def __init__(self):
+ def __init__(self) -> None:
self.cache = {} # callable -> tuple of replaceable args e.g. ["axis"]
self.inverse_mapping = {}
for actual_kwarg, numpy_kwargs in self.numpy_compat.items():
diff --git a/torch/_inductor/fx_passes/mkldnn_fusion.py b/torch/_inductor/fx_passes/mkldnn_fusion.py
index 34ddbf90b7fd22..c930608c766134 100644
--- a/torch/_inductor/fx_passes/mkldnn_fusion.py
+++ b/torch/_inductor/fx_passes/mkldnn_fusion.py
@@ -1207,7 +1207,7 @@ def _eliminate_duplicate_packed_nodes(gm):
Combine packed weight nodes with the same inputs to reduce memory usage.
for example:
class Model(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = nn.Linear(32, 32, bias=True)
diff --git a/torch/_inductor/metrics.py b/torch/_inductor/metrics.py
index 18e00b090ce2bc..5c26e322f12633 100644
--- a/torch/_inductor/metrics.py
+++ b/torch/_inductor/metrics.py
@@ -99,7 +99,7 @@ class CachedMetricsHelper:
apply on a cache hit.
"""
- def __init__(self):
+ def __init__(self) -> None:
self.cached_metrics = {}
for metric in get_metric_fields():
self.cached_metrics[metric] = globals()[metric]
diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py
index 380fbe515c3cd0..d5475b8e14ee33 100644
--- a/torch/_inductor/utils.py
+++ b/torch/_inductor/utils.py
@@ -940,7 +940,7 @@ def __add__(self, other):
class FakeIndentedBuffer(IndentedBuffer):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
def __getattribute__(self, name):
@@ -1219,7 +1219,7 @@ class DebugDirManager:
counter = itertools.count(0)
prev_debug_name: str
- def __init__(self):
+ def __init__(self) -> None:
self.id = next(DebugDirManager.counter)
def __enter__(self):
@@ -1268,7 +1268,7 @@ def patched_compile_to_module(self: GraphLowering):
class DummyModule:
"""This is empty to replace the generated triton module"""
- def __init__(self):
+ def __init__(self) -> None:
pass
def call(self, *args, **kwargs):
diff --git a/torch/_lazy/closure.py b/torch/_lazy/closure.py
index 32b2c58ba2b8dc..94c12c075a092b 100644
--- a/torch/_lazy/closure.py
+++ b/torch/_lazy/closure.py
@@ -7,7 +7,7 @@
class ClosureHandler:
- def __init__(self):
+ def __init__(self) -> None:
pass
def run(self, closure):
diff --git a/torch/_library/fake_class_registry.py b/torch/_library/fake_class_registry.py
index a56f138f4b0879..213e88ac3e5513 100644
--- a/torch/_library/fake_class_registry.py
+++ b/torch/_library/fake_class_registry.py
@@ -42,7 +42,7 @@ def from_real(cls, real_obj: torch.ScriptObject):
class FakeClassRegistry:
- def __init__(self):
+ def __init__(self) -> None:
self._registered_class: Dict[str, Any] = {}
def has_impl(self, full_qualname: str) -> bool:
diff --git a/torch/_python_dispatcher.py b/torch/_python_dispatcher.py
index 644cf92fda2bdd..2dfdbb296a4b2d 100644
--- a/torch/_python_dispatcher.py
+++ b/torch/_python_dispatcher.py
@@ -70,7 +70,7 @@ class PythonDispatcher:
]
supported_keys = runtime_keys + alias_keys
- def __init__(self):
+ def __init__(self) -> None:
C._dispatch_check_invariants(self.name) # type: ignore[attr-defined]
self.ref = C._dispatch_library("FRAGMENT", self.namespace, "")
self.ref.def_("foo(Tensor x) -> Tensor")
diff --git a/torch/_subclasses/schema_check_mode.py b/torch/_subclasses/schema_check_mode.py
index d8843eec8100e3..d7ad9ebd28170c 100644
--- a/torch/_subclasses/schema_check_mode.py
+++ b/torch/_subclasses/schema_check_mode.py
@@ -60,7 +60,7 @@ def clone_inputs(args):
class SchemaCheckMode(TorchDispatchMode):
- def __init__(self):
+ def __init__(self) -> None:
# Information recorded for testing purposes. For example:
# - incorrect schemas
# - overly conservative schemas
diff --git a/torch/ao/nn/quantized/modules/functional_modules.py b/torch/ao/nn/quantized/modules/functional_modules.py
index b707a1f681cefc..45dc7fc0444c16 100644
--- a/torch/ao/nn/quantized/modules/functional_modules.py
+++ b/torch/ao/nn/quantized/modules/functional_modules.py
@@ -36,7 +36,7 @@ class FloatFunctional(torch.nn.Module):
- mul_scalar
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.activation_post_process = torch.nn.Identity()
@@ -190,7 +190,7 @@ class QFunctional(torch.nn.Module):
- mul_scalar
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.scale = 1.0
self.zero_point = 0
diff --git a/torch/ao/ns/fx/qconfig_multi_mapping.py b/torch/ao/ns/fx/qconfig_multi_mapping.py
index a7c0f0a27f6613..8cd4190110ffe3 100644
--- a/torch/ao/ns/fx/qconfig_multi_mapping.py
+++ b/torch/ao/ns/fx/qconfig_multi_mapping.py
@@ -72,7 +72,7 @@ class QConfigMultiMapping:
"""
- def __init__(self):
+ def __init__(self) -> None:
# initialize this with 1 QConfigMapping to avoid corner cases
self.qconfig_mappings_list: List[QConfigMapping] = [QConfigMapping()]
diff --git a/torch/ao/pruning/_experimental/pruner/README.md b/torch/ao/pruning/_experimental/pruner/README.md
index 026fd33b2876fb..2885dff04020d2 100644
--- a/torch/ao/pruning/_experimental/pruner/README.md
+++ b/torch/ao/pruning/_experimental/pruner/README.md
@@ -99,7 +99,7 @@ from torch.ao.pruning._experimental.pruner import SaliencyPruner
# Define model
class Model(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.seq = nn.Sequential(
nn.Linear(700, 500, bias=True),
diff --git a/torch/ao/quantization/fake_quantize.py b/torch/ao/quantization/fake_quantize.py
index 57cc1df04d54a5..8ef266ebe47f50 100644
--- a/torch/ao/quantization/fake_quantize.py
+++ b/torch/ao/quantization/fake_quantize.py
@@ -85,7 +85,7 @@ class FakeQuantizeBase(ABC, Module):
fake_quant_enabled: torch.Tensor
observer_enabled: torch.Tensor
- def __init__(self):
+ def __init__(self) -> None:
"""Set fake_quant_enabled and observer_enabled."""
super().__init__()
# fake_quant_enabled and observer_enabled are buffers to support their
diff --git a/torch/ao/quantization/fx/README.md b/torch/ao/quantization/fx/README.md
index a8bd154791b721..ca116b282e7ab5 100644
--- a/torch/ao/quantization/fx/README.md
+++ b/torch/ao/quantization/fx/README.md
@@ -70,7 +70,7 @@ In the following, I’ll first have a detailed description for each step, and th
```
class LinearReLUModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = torch.nn.Linear(5, 10).float()
self.relu = torch.nn.ReLU()
diff --git a/torch/ao/quantization/fx/_model_report/detector.py b/torch/ao/quantization/fx/_model_report/detector.py
index 534e73bfb0a4cb..9db118a33652a0 100644
--- a/torch/ao/quantization/fx/_model_report/detector.py
+++ b/torch/ao/quantization/fx/_model_report/detector.py
@@ -137,7 +137,7 @@ class DetectorBase(ABC):
- Should return a str-based report and dict info in Tuple[str,Dict] format
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.detector_config_info = None
diff --git a/torch/ao/quantization/fx/custom_config.py b/torch/ao/quantization/fx/custom_config.py
index 7aa408f0cebd4b..cb00c95fdee1d8 100644
--- a/torch/ao/quantization/fx/custom_config.py
+++ b/torch/ao/quantization/fx/custom_config.py
@@ -63,7 +63,7 @@ class PrepareCustomConfig:
.set_preserved_attributes(["attr1", "attr2"])
"""
- def __init__(self):
+ def __init__(self) -> None:
self.standalone_module_names: Dict[str, StandaloneModuleConfigEntry] = {}
self.standalone_module_classes: Dict[Type, StandaloneModuleConfigEntry] = {}
self.float_to_observed_mapping: Dict[QuantType, Dict[Type, Type]] = {}
@@ -382,7 +382,7 @@ class ConvertCustomConfig:
.set_preserved_attributes(["attr1", "attr2"])
"""
- def __init__(self):
+ def __init__(self) -> None:
self.observed_to_quantized_mapping: Dict[QuantType, Dict[Type, Type]] = {}
self.preserved_attributes: List[str] = []
@@ -477,7 +477,7 @@ class FuseCustomConfig:
fuse_custom_config = FuseCustomConfig().set_preserved_attributes(["attr1", "attr2"])
"""
- def __init__(self):
+ def __init__(self) -> None:
self.preserved_attributes: List[str] = []
def __repr__(self):
diff --git a/torch/ao/quantization/observer.py b/torch/ao/quantization/observer.py
index 64b14b506143b3..e26f03027116e2 100644
--- a/torch/ao/quantization/observer.py
+++ b/torch/ao/quantization/observer.py
@@ -1568,7 +1568,7 @@ class ReuseInputObserver(ObserverBase):
Note: this is only enabled in FX Graph Mode Quantization
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__(torch.quint8, is_dynamic=False)
def forward(self, x):
diff --git a/torch/ao/quantization/qconfig_mapping.py b/torch/ao/quantization/qconfig_mapping.py
index 1b4d9cecbf3fc0..2c12be74ce6815 100644
--- a/torch/ao/quantization/qconfig_mapping.py
+++ b/torch/ao/quantization/qconfig_mapping.py
@@ -229,7 +229,7 @@ class QConfigMapping:
"""
- def __init__(self):
+ def __init__(self) -> None:
# In increasing match priority:
self.global_qconfig: QConfigAny = None
self.object_type_qconfigs: OrderedDict[
diff --git a/torch/ao/quantization/quantize_fx.py b/torch/ao/quantization/quantize_fx.py
index f5949d985f99c8..dd8f3e811a39ea 100644
--- a/torch/ao/quantization/quantize_fx.py
+++ b/torch/ao/quantization/quantize_fx.py
@@ -289,7 +289,7 @@ def prepare_fx(
from torch.ao.quantization.quantize_fx import prepare_fx
class Submodule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = torch.nn.Linear(5, 5)
def forward(self, x):
@@ -297,7 +297,7 @@ def forward(self, x):
return x
class M(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = torch.nn.Linear(5, 5)
self.sub = Submodule()
@@ -427,7 +427,7 @@ def prepare_qat_fx(
from torch.ao.quantization.quantize_fx import prepare_qat_fx
class Submodule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = torch.nn.Linear(5, 5)
def forward(self, x):
@@ -435,7 +435,7 @@ def forward(self, x):
return x
class M(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = torch.nn.Linear(5, 5)
self.sub = Submodule()
diff --git a/torch/ao/quantization/quantize_pt2e.py b/torch/ao/quantization/quantize_pt2e.py
index 41676934440d47..1e1848a6ff0d4a 100644
--- a/torch/ao/quantization/quantize_pt2e.py
+++ b/torch/ao/quantization/quantize_pt2e.py
@@ -56,7 +56,7 @@ def prepare_pt2e(
)
class M(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = torch.nn.Linear(5, 10)
@@ -129,7 +129,7 @@ def prepare_qat_pt2e(
)
class M(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = torch.nn.Linear(5, 10)
diff --git a/torch/ao/quantization/quantizer/embedding_quantizer.py b/torch/ao/quantization/quantizer/embedding_quantizer.py
index 6c93c0b88a194e..32ec3814637cba 100644
--- a/torch/ao/quantization/quantizer/embedding_quantizer.py
+++ b/torch/ao/quantization/quantizer/embedding_quantizer.py
@@ -42,7 +42,7 @@ def get_embedding_operators_config() -> OperatorConfig:
class EmbeddingQuantizer(Quantizer):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
@classmethod
diff --git a/torch/ao/quantization/quantizer/x86_inductor_quantizer.py b/torch/ao/quantization/quantizer/x86_inductor_quantizer.py
index 09db71a191b79c..574af30a7159b9 100644
--- a/torch/ao/quantization/quantizer/x86_inductor_quantizer.py
+++ b/torch/ao/quantization/quantizer/x86_inductor_quantizer.py
@@ -436,7 +436,7 @@ class X86InductorQuantizer(Quantizer):
supported_config_and_operators = _get_supported_config_and_operators()
module_function_to_aten_operator_type = _map_module_function_to_aten_operator_type()
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.global_config: Optional[QuantizationConfig] = None
self.operator_type_qconfig: Dict[
diff --git a/torch/ao/quantization/quantizer/xnnpack_quantizer.py b/torch/ao/quantization/quantizer/xnnpack_quantizer.py
index 93712ded503c7a..cc17057c82a49a 100644
--- a/torch/ao/quantization/quantizer/xnnpack_quantizer.py
+++ b/torch/ao/quantization/quantizer/xnnpack_quantizer.py
@@ -268,7 +268,7 @@ class XNNPACKQuantizer(Quantizer):
"linear",
]
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.global_config: Optional[QuantizationConfig] = None
self.operator_type_config: Dict[
diff --git a/torch/ao/quantization/utils.py b/torch/ao/quantization/utils.py
index dad16df5b93b0d..ff22da04a222c8 100644
--- a/torch/ao/quantization/utils.py
+++ b/torch/ao/quantization/utils.py
@@ -513,7 +513,7 @@ def _get_path_of_module(
Example::
>> class M(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
self.linear = torch.nn.Linear(5, 5)
def forward(self, x):
return self.linear(x)
diff --git a/torch/autograd/profiler_util.py b/torch/autograd/profiler_util.py
index e3fc95580c5a62..67eb989f57c80c 100644
--- a/torch/autograd/profiler_util.py
+++ b/torch/autograd/profiler_util.py
@@ -645,7 +645,7 @@ def __repr__(self):
class FunctionEventAvg(FormattedTimesMixin):
"""Used to average stats over multiple FunctionEvent objects."""
- def __init__(self):
+ def __init__(self) -> None:
self.key: Optional[str] = None
self.count: int = 0
self.node_id: int = 0
diff --git a/torch/backends/xeon/run_cpu.py b/torch/backends/xeon/run_cpu.py
index bdf07e28617448..634c50da4dbc2c 100644
--- a/torch/backends/xeon/run_cpu.py
+++ b/torch/backends/xeon/run_cpu.py
@@ -266,7 +266,7 @@ class _Launcher:
or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib or /usr/lib64 or \
{expanduser('~')}/.local/lib/ so the LD_PRELOAD environment variable will not be set."
- def __init__(self):
+ def __init__(self) -> None:
self.cpuinfo = _CPUinfo()
def add_lib_preload(self, lib_type):
diff --git a/torch/csrc/jit/backends/backend_debug_handler.h b/torch/csrc/jit/backends/backend_debug_handler.h
index d25ce2f8cb0416..d4b00fe340f2b2 100644
--- a/torch/csrc/jit/backends/backend_debug_handler.h
+++ b/torch/csrc/jit/backends/backend_debug_handler.h
@@ -77,17 +77,17 @@ namespace jit {
*
* So why does debug handle map to DebugInfoTuple = {source range and inlined
* cs}? {debug_handle, source_range_tag, serialized_callstack} Take this
- * example: class L(nn.Module): def __init__(self):
+ * example: class L(nn.Module): def __init__(self) -> None:
* ...
* def forward(self, x):
* return x * 5
* class M(nn.Module):
- * def __init__(self):
+ * def __init__(self) -> None:
* ...
* def forward(self, x):
* return x - 2
* class N(nn.Module):
- * def __init__(self):
+ * def __init__(self) -> None:
* self.m = M()
* def forward(self, x):
* return self.m(x) + 3
diff --git a/torch/csrc/jit/docs/serialization.md b/torch/csrc/jit/docs/serialization.md
index 106cea55478af8..3fb463c7e7fe37 100644
--- a/torch/csrc/jit/docs/serialization.md
+++ b/torch/csrc/jit/docs/serialization.md
@@ -328,7 +328,7 @@ For example:
```
class M(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
self.a = torch.rand(2, 3)
self.b = torch.nn.Linear(10, 10)
diff --git a/torch/csrc/jit/operator_upgraders/README.md b/torch/csrc/jit/operator_upgraders/README.md
index 61679972073059..ce995276d283a2 100644
--- a/torch/csrc/jit/operator_upgraders/README.md
+++ b/torch/csrc/jit/operator_upgraders/README.md
@@ -37,7 +37,7 @@ When making changes to the operators, the first thing to identify is if it's BC/
1. Add a test module in `test/jit/fixtures_srcs/fixtures_src.py`. In `test/jit/fixtures_srcs/generate_models.py`,
```
class TestVersionedLinspaceV7(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
def forward(self, a: Union[int, float, complex], b: Union[int, float, complex]):
@@ -163,7 +163,7 @@ When making changes to the operators, the first thing to identify is if it's BC/
# Step 2. Write down how current module should look like
class MyModuleFloat(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
def forward(self, a, b: float):
diff --git a/torch/csrc/jit/passes/onnx/function_extraction.h b/torch/csrc/jit/passes/onnx/function_extraction.h
index 3a90967e2f1f8c..40555f8e3561ca 100644
--- a/torch/csrc/jit/passes/onnx/function_extraction.h
+++ b/torch/csrc/jit/passes/onnx/function_extraction.h
@@ -25,7 +25,7 @@ namespace onnx {
//
// clang-format off
// class M(torch.nn.Module):
-// def __init__(self):
+// def __init__(self) -> None:
// super().__init__()
// self.lns = torch.nn.ModuleList([torch.nn.LayerNorm(3, eps = i) for i in range(2)])
// self.celu1 = torch.nn.CELU(1.0)
diff --git a/torch/csrc/lazy/test_mnist.py b/torch/csrc/lazy/test_mnist.py
index a3a03d9844d3e4..762620fcc62f11 100644
--- a/torch/csrc/lazy/test_mnist.py
+++ b/torch/csrc/lazy/test_mnist.py
@@ -17,7 +17,7 @@
class Net(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
diff --git a/torch/csrc/lazy/tutorial.md b/torch/csrc/lazy/tutorial.md
index 155e8adfdd85aa..b72ae13eca7dd8 100644
--- a/torch/csrc/lazy/tutorial.md
+++ b/torch/csrc/lazy/tutorial.md
@@ -135,7 +135,7 @@ Here's our model definition:
```python
class Net(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
diff --git a/torch/cuda/_sanitizer.py b/torch/cuda/_sanitizer.py
index f9ce311725e2f3..34cd7bacee060d 100644
--- a/torch/cuda/_sanitizer.py
+++ b/torch/cuda/_sanitizer.py
@@ -163,7 +163,7 @@ class TensorInfo:
class _TensorsAccessed:
- def __init__(self):
+ def __init__(self) -> None:
self.accesses: Dict[DataPtr, TensorInfo] = {}
def ensure_tensor_exists(self, data_ptr: DataPtr) -> None:
@@ -218,7 +218,7 @@ def set_write(self, data_ptr: DataPtr, access: Access) -> None:
class StreamSynchronizations:
- def __init__(self):
+ def __init__(self) -> None:
self.current_sync_states: Dict[StreamId, Dict[StreamId, SeqNum]] = {}
self.recorded_sync_states: Dict[EventId, Dict[StreamId, SeqNum]] = {}
self.host_sync_state: Dict[StreamId, SeqNum] = {}
@@ -338,7 +338,7 @@ class EventHandler:
data race.
"""
- def __init__(self):
+ def __init__(self) -> None:
self.tensors_accessed = _TensorsAccessed()
self.syncs = StreamSynchronizations()
self.seq_num: SeqNum = 0
@@ -478,7 +478,7 @@ def zip_arguments(
class ArgumentHandler:
- def __init__(self):
+ def __init__(self) -> None:
self.dataptrs_read: Set[DataPtr] = set()
self.dataptrs_written: Set[DataPtr] = set()
self.tensor_aliases: Dict[DataPtr, List[str]] = {}
@@ -527,7 +527,7 @@ def parse_outputs(self, outputs: Any) -> None:
class CUDASanitizerDispatchMode(TorchDispatchMode):
- def __init__(self):
+ def __init__(self) -> None:
self.event_handler = EventHandler()
torch._C._activate_gpu_trace()
gpu_trace.register_callback_for_event_creation(
@@ -596,7 +596,7 @@ class CUDASanitizer:
This approach was deemed more elegant than using the atexit module.
"""
- def __init__(self):
+ def __init__(self) -> None:
self.dispatch = CUDASanitizerDispatchMode()
self.enabled = False
diff --git a/torch/distributed/_composable/checkpoint_activation.py b/torch/distributed/_composable/checkpoint_activation.py
index fcee2a57a07e7b..88253abb4b9cbb 100644
--- a/torch/distributed/_composable/checkpoint_activation.py
+++ b/torch/distributed/_composable/checkpoint_activation.py
@@ -49,7 +49,7 @@ def checkpoint(module: nn.Module, **kwargs) -> nn.Module:
>>> import torch.nn as nn
>>>
>>> class MyModel(nn.Module):
- >>> def __init__(self):
+ >>> def __init__(self) -> None:
>>> super().__init__()
>>> self.l1 = nn.Linear(10, 10)
>>> self.l2 = nn.Linear(10, 10)
diff --git a/torch/distributed/_composable/contract.py b/torch/distributed/_composable/contract.py
index 850659fc2c0171..e7cd1713fae4c2 100644
--- a/torch/distributed/_composable/contract.py
+++ b/torch/distributed/_composable/contract.py
@@ -47,7 +47,7 @@ def contract(state_cls: Type[_State] = _State):
>>> import torch.nn as nn
>>>
>>> class MyModel(nn.Module):
- >>> def __init__(self):
+ >>> def __init__(self) -> None:
>>> super().__init__()
>>> self.l1 = nn.Linear(10, 10)
>>> self.l2 = nn.Linear(10, 10)
diff --git a/torch/distributed/_composable/fsdp/_fsdp_state.py b/torch/distributed/_composable/fsdp/_fsdp_state.py
index 3aad8e2226c387..be587d9b5b0bd3 100644
--- a/torch/distributed/_composable/fsdp/_fsdp_state.py
+++ b/torch/distributed/_composable/fsdp/_fsdp_state.py
@@ -44,7 +44,7 @@
class FSDPStateContext:
"""This has state shared across FSDP states."""
- def __init__(self):
+ def __init__(self) -> None:
# All FSDP states in the root state's module tree
self.all_states: List[FSDPState] = []
# Iteration's forward root runs the once-per-forward logic; this root
@@ -72,7 +72,7 @@ def fsdp_hook_wrapper(*args, **kwargs):
class FSDPState(_State):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self._fsdp_param_group: Optional[FSDPParamGroup] = None
self._is_root: Optional[bool] = None # root set during lazy init
diff --git a/torch/distributed/_shard/sharding_plan/api.py b/torch/distributed/_shard/sharding_plan/api.py
index a7552c5a68f88e..d141df1a5214fc 100644
--- a/torch/distributed/_shard/sharding_plan/api.py
+++ b/torch/distributed/_shard/sharding_plan/api.py
@@ -38,7 +38,7 @@ class ShardingPlan:
>>> # xdoctest: +REQUIRES(module:torch._C._distributed_c10d)
>>> class MyModule(nn.Module):
- >>> def __init__(self):
+ >>> def __init__(self) -> None:
>>> super().__init__()
>>> self.fc1 = nn.Linear()
>>> self.gelu = nn.GELU()
diff --git a/torch/distributed/_tensor/README.md b/torch/distributed/_tensor/README.md
index 80fcc2eb41f167..2fedb7cc3b426a 100644
--- a/torch/distributed/_tensor/README.md
+++ b/torch/distributed/_tensor/README.md
@@ -117,7 +117,7 @@ import torch.nn as nn
from torch.distributed._tensor import Shard, distribute_tensor, distribute_module, init_device_mesh
class MyModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = nn.Linear(8, 8)
self.fc2 = nn.Linear(8, 8)
diff --git a/torch/distributed/_tensor/examples/checkpoint_example.py b/torch/distributed/_tensor/examples/checkpoint_example.py
index 1701e28ac2ca76..fe8585c2a239c3 100644
--- a/torch/distributed/_tensor/examples/checkpoint_example.py
+++ b/torch/distributed/_tensor/examples/checkpoint_example.py
@@ -25,7 +25,7 @@
class SimpleMLP(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.net1 = torch.nn.Linear(5, 128)
self.relu = torch.nn.ReLU()
diff --git a/torch/distributed/algorithms/join.py b/torch/distributed/algorithms/join.py
index 140844851938b7..f7c95100b1b202 100644
--- a/torch/distributed/algorithms/join.py
+++ b/torch/distributed/algorithms/join.py
@@ -55,7 +55,7 @@ class Joinable(ABC):
"""
@abstractmethod
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self._join_config = _JoinConfig.construct_disabled_join_config()
diff --git a/torch/distributed/checkpoint/examples/async_checkpointing_example.py b/torch/distributed/checkpoint/examples/async_checkpointing_example.py
index 5eaba9a67227d9..589f9b93544289 100644
--- a/torch/distributed/checkpoint/examples/async_checkpointing_example.py
+++ b/torch/distributed/checkpoint/examples/async_checkpointing_example.py
@@ -31,7 +31,7 @@ class InjectedException(Exception):
class Model(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.net1 = nn.Linear(8, 32)
self.net2 = nn.Linear(32, 128)
diff --git a/torch/distributed/checkpoint/examples/stateful_example.py b/torch/distributed/checkpoint/examples/stateful_example.py
index 6c76ec436364bf..f6e0d11801def0 100644
--- a/torch/distributed/checkpoint/examples/stateful_example.py
+++ b/torch/distributed/checkpoint/examples/stateful_example.py
@@ -22,7 +22,7 @@
class Model(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
torch.manual_seed(0)
self.net1 = nn.Sequential(nn.Linear(8, 16), nn.ReLU())
diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py
index 161eade7af6655..a78ab550575458 100644
--- a/torch/distributed/distributed_c10d.py
+++ b/torch/distributed/distributed_c10d.py
@@ -434,7 +434,7 @@ class _reduce_op:
:class:`~torch.distributed.ReduceOp` is recommended to use instead.
"""
- def __init__(self):
+ def __init__(self) -> None:
# __members__ is a dict storing key-value pairs for enum classes
for k, v in ReduceOp.RedOpType.__members__.items():
setattr(self, k, v)
@@ -568,7 +568,7 @@ class _World:
of c10d and is subject to change..
"""
- def __init__(self):
+ def __init__(self) -> None:
self._default_pg = None
self._pg_coalesce_state: Dict[ProcessGroup, List[_CollOp]] = {}
self._pg_default_device: Dict[ProcessGroup, torch.device] = {}
@@ -2194,7 +2194,7 @@ def __getattribute__(self, name):
class _CoalescingManager:
- def __init__(self):
+ def __init__(self) -> None:
self.works: List[Work] = []
def append(self, work: Work):
diff --git a/torch/distributed/fsdp/_common_utils.py b/torch/distributed/fsdp/_common_utils.py
index 10d0f821265119..d722d5b9825999 100644
--- a/torch/distributed/fsdp/_common_utils.py
+++ b/torch/distributed/fsdp/_common_utils.py
@@ -106,7 +106,7 @@ def __getattr__(self, __name: str) -> Any:
class _UninitializedDeviceHandle(_FSDPDeviceHandle):
- def __init__(self):
+ def __init__(self) -> None:
pass
def __getattribute__(self, __name: str) -> Any:
diff --git a/torch/distributed/nn/api/remote_module.py b/torch/distributed/nn/api/remote_module.py
index 5583da8c3e8d4e..4e18fe3245e183 100644
--- a/torch/distributed/nn/api/remote_module.py
+++ b/torch/distributed/nn/api/remote_module.py
@@ -156,7 +156,7 @@ def __init__(
created outside of remote modules, rather than as submodules of any remote module (by calling ``add_module``).
Hybrid Example:
>>> class HybridModel(nn.Module):
- >>> def __init__(self):
+ >>> def __init__(self) -> None:
>>> nn.Module.__init__(self)
>>> self.remote_embedding = RemoteModule(...)
>>> self.local_linear = nn.Linear(...)
diff --git a/torch/export/graph_signature.py b/torch/export/graph_signature.py
index c36941ee02e195..0d93957d77cd28 100644
--- a/torch/export/graph_signature.py
+++ b/torch/export/graph_signature.py
@@ -248,7 +248,7 @@ class ExportGraphSignature:
e.g. If following module is exported::
class CustomModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super(CustomModule, self).__init__()
# Define a parameter
diff --git a/torch/fx/README.md b/torch/fx/README.md
index a69a6ed1f65a76..4c799da7bc4022 100644
--- a/torch/fx/README.md
+++ b/torch/fx/README.md
@@ -45,7 +45,7 @@ FX’s front-end makes use of the dynamic nature of Python to intercept call-sit
import torch
class MyModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.param = torch.nn.Parameter(
torch.rand(3, 4))
diff --git a/torch/fx/__init__.py b/torch/fx/__init__.py
index b9896390f12434..dd04cdd09d7fa1 100644
--- a/torch/fx/__init__.py
+++ b/torch/fx/__init__.py
@@ -9,7 +9,7 @@
import torch
# Simple module for demonstration
class MyModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.param = torch.nn.Parameter(torch.rand(3, 4))
self.linear = torch.nn.Linear(4, 5)
diff --git a/torch/fx/_symbolic_trace.py b/torch/fx/_symbolic_trace.py
index bd8d4f4266962b..92fb7b9494891b 100644
--- a/torch/fx/_symbolic_trace.py
+++ b/torch/fx/_symbolic_trace.py
@@ -1012,7 +1012,7 @@ def revert(self):
class _Patcher:
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.patches_made: List[_PatchedFn] = []
self.visited: Set[int] = set()
diff --git a/torch/fx/experimental/migrate_gradual_types/constraint.py b/torch/fx/experimental/migrate_gradual_types/constraint.py
index 45038837cae608..4693a62de24025 100644
--- a/torch/fx/experimental/migrate_gradual_types/constraint.py
+++ b/torch/fx/experimental/migrate_gradual_types/constraint.py
@@ -63,7 +63,7 @@ class T(Constraint):
"""
True
"""
- def __init__(self):
+ def __init__(self) -> None:
pass
def __eq__(self, other):
@@ -76,7 +76,7 @@ class F(Constraint):
"""
False
"""
- def __init__(self):
+ def __init__(self) -> None:
pass
def __eq__(self, other):
diff --git a/torch/fx/passes/graph_drawer.py b/torch/fx/passes/graph_drawer.py
index 726ab04539d9d3..a577cf8736e098 100644
--- a/torch/fx/passes/graph_drawer.py
+++ b/torch/fx/passes/graph_drawer.py
@@ -117,7 +117,7 @@ def get_dot_graph(self, submod_name=None) -> pydot.Dot:
>>> # xdoctest: +REQUIRES(module:ubelt)
>>> # define module
>>> class MyModule(torch.nn.Module):
- >>> def __init__(self):
+ >>> def __init__(self) -> None:
>>> super().__init__()
>>> self.linear = torch.nn.Linear(4, 5)
>>> def forward(self, x):
diff --git a/torch/fx/passes/split_module.py b/torch/fx/passes/split_module.py
index fba516d74beca5..5984587f17c521 100644
--- a/torch/fx/passes/split_module.py
+++ b/torch/fx/passes/split_module.py
@@ -83,7 +83,7 @@ def split_module(
from torch.fx.passes.split_module import split_module
class MyModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.param = torch.nn.Parameter(torch.rand(3, 4))
self.linear = torch.nn.Linear(4, 5)
diff --git a/torch/fx/passes/split_utils.py b/torch/fx/passes/split_utils.py
index d8254bd474b552..44b97471332f2c 100644
--- a/torch/fx/passes/split_utils.py
+++ b/torch/fx/passes/split_utils.py
@@ -83,7 +83,7 @@ def split_by_tags(
Given the following module def:
class SimpleModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear1 = torch.nn.Linear(...)
self.linear2 = torch.nn.Linear(...)
diff --git a/torch/fx/proxy.py b/torch/fx/proxy.py
index 874ac51afff1e4..05157f9ddb19f0 100644
--- a/torch/fx/proxy.py
+++ b/torch/fx/proxy.py
@@ -38,7 +38,7 @@ def forward(self, x):
return x.transpose(1, 2)
class M(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
self.sub = Sub()
def forward(self, x):
diff --git a/torch/fx/subgraph_rewriter.py b/torch/fx/subgraph_rewriter.py
index 419337a17683cf..8a9e78c0af4f8f 100644
--- a/torch/fx/subgraph_rewriter.py
+++ b/torch/fx/subgraph_rewriter.py
@@ -118,7 +118,7 @@ class Match(NamedTuple):
from torch.fx import symbolic_trace, subgraph_rewriter
class M(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
def forward(self, x, w1, w2):
diff --git a/torch/fx/tensor_type.py b/torch/fx/tensor_type.py
index f59ed2d45baa4d..83b5a9f8faf65e 100644
--- a/torch/fx/tensor_type.py
+++ b/torch/fx/tensor_type.py
@@ -38,7 +38,7 @@ class _DynType:
"""
_DynType defines a type which stands for the absence of type information.
"""
- def __init__(self):
+ def __init__(self) -> None:
self.__name__ = '_DynType'
def __eq__(self, other):
diff --git a/torch/jit/__init__.py b/torch/jit/__init__.py
index 6d1760fb9f4fcb..e80fa2932fcbe2 100644
--- a/torch/jit/__init__.py
+++ b/torch/jit/__init__.py
@@ -219,7 +219,7 @@ def isinstance(obj, target_type):
from typing import Any, Dict, List
class MyModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
def forward(self, input: Any): # note the Any type
@@ -255,7 +255,7 @@ def foo(x):
"""
- def __init__(self):
+ def __init__(self) -> None:
if not torch._jit_internal.is_scripting():
warnings.warn("Only works in script mode")
pass
diff --git a/torch/jit/_async.py b/torch/jit/_async.py
index bdde55adf14fc4..ceaef70b1fe3b0 100644
--- a/torch/jit/_async.py
+++ b/torch/jit/_async.py
@@ -73,7 +73,7 @@ class AddMod(torch.nn.Module):
def forward(self, a: Tensor, b : int):
return a + b
class Mod(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super(self).__init__()
self.mod = AddMod()
def forward(self, input):
diff --git a/torch/jit/_check.py b/torch/jit/_check.py
index 8db5bb82ce3d63..f708ee87f3089a 100644
--- a/torch/jit/_check.py
+++ b/torch/jit/_check.py
@@ -39,7 +39,7 @@ class M(torch.nn.Module):
def fn(self):
return []
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.x: List[int] = []
diff --git a/torch/jit/_freeze.py b/torch/jit/_freeze.py
index 8f35fc471e6841..e496bd74762554 100644
--- a/torch/jit/_freeze.py
+++ b/torch/jit/_freeze.py
@@ -65,7 +65,7 @@ def forward(self, input):
.. testcode::
import torch
class MyModule2(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.modified_tensor = torch.tensor(10.)
self.version = 1
diff --git a/torch/jit/_monkeytype_config.py b/torch/jit/_monkeytype_config.py
index ecf7cd865fdeb7..366a58ac6afd30 100644
--- a/torch/jit/_monkeytype_config.py
+++ b/torch/jit/_monkeytype_config.py
@@ -89,7 +89,7 @@ def log(self, trace: CallTrace) -> None:
self.traces.append(trace)
class JitTypeTraceStore(CallTraceStore):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
# A dictionary keeping all collected CallTrace
# key is fully qualified name of called function
@@ -159,15 +159,15 @@ def code_filter(self) -> Optional[CodeFilter]:
# When MonkeyType is not installed, we provide dummy class definitions
# for the below classes.
class JitTypeTraceStoreLogger: # type: ignore[no-redef]
- def __init__(self):
+ def __init__(self) -> None:
pass
class JitTypeTraceStore: # type: ignore[no-redef]
- def __init__(self):
+ def __init__(self) -> None:
self.trace_records = None
class JitTypeTraceConfig: # type: ignore[no-redef]
- def __init__(self):
+ def __init__(self) -> None:
pass
monkeytype_trace = None # type: ignore[assignment] # noqa: F811
diff --git a/torch/jit/_recursive.py b/torch/jit/_recursive.py
index b8dc0ecf2cd6b9..e03540a7c75b34 100644
--- a/torch/jit/_recursive.py
+++ b/torch/jit/_recursive.py
@@ -426,7 +426,7 @@ class ConcreteTypeStore:
type_store: Dict[Type[Module], List[torch._C.ConcreteModuleType]]
methods_compiled: Set[torch._C.ConcreteModuleType]
- def __init__(self):
+ def __init__(self) -> None:
# Python module type => List[ConcreteModuleType)]
self.type_store = {}
# ConcreteTypes that have had their methods already compiled
diff --git a/torch/jit/_script.py b/torch/jit/_script.py
index 490e9e644e2c6e..a7bc45fa5fc46e 100644
--- a/torch/jit/_script.py
+++ b/torch/jit/_script.py
@@ -107,7 +107,7 @@ def Attribute(value, type): # type: ignore[no-redef]
from typing import Dict
class AttributeModule(torch.jit.ScriptModule):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.foo = torch.jit.Attribute(0.1, float)
@@ -138,7 +138,7 @@ def __init__(self):
class AttributeModule(torch.nn.Module):
names: Dict[str, int]
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.names = {}
@@ -522,7 +522,7 @@ class ScriptModule(Module, metaclass=ScriptMeta):
"original_name",
]
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
forward: Callable[..., Any] = _CachedForward() # type: ignore[assignment]
@@ -1351,7 +1351,7 @@ def forward(self, input):
import torch.nn.functional as F
class MyModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
# torch.jit.trace produces a ScriptModule's conv1 and conv2
self.conv1 = torch.jit.trace(nn.Conv2d(1, 20, 5), torch.rand(1, 1, 16, 16))
@@ -1374,7 +1374,7 @@ def forward(self, input):
import torch.nn as nn
class MyModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
@torch.jit.export
@@ -1547,7 +1547,7 @@ def run(self, x: torch.Tensor) -> torch.Tensor:
return x.relu()
class Impl2(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.val = torch.rand(())
@@ -1671,7 +1671,7 @@ def dump_string(self):
class _ScriptProfile:
- def __init__(self):
+ def __init__(self) -> None:
self.profile = classes.profiling._ScriptProfile()
def enable(self):
diff --git a/torch/jit/_state.py b/torch/jit/_state.py
index 63df2acfdf09b6..18456ebd38687c 100644
--- a/torch/jit/_state.py
+++ b/torch/jit/_state.py
@@ -19,7 +19,7 @@ class EnabledProxy:
This is just a wrapper for a bool, so that we get reference semantics
"""
- def __init__(self):
+ def __init__(self) -> None:
self.enabled = self.parse_env(
"PYTORCH_JIT", True, "> Using PyTorch JIT", "> PyTorch JIT DISABLED"
)
diff --git a/torch/jit/_trace.py b/torch/jit/_trace.py
index 1c0372c7281bdb..372fd72ddb28b7 100644
--- a/torch/jit/_trace.py
+++ b/torch/jit/_trace.py
@@ -966,7 +966,7 @@ def foo(x, y):
import torch.nn as nn
class Net(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = nn.Conv2d(1, 1, 3)
@@ -1182,7 +1182,7 @@ def trace_module(
import torch.nn as nn
class Net(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = nn.Conv2d(1, 1, 3)
diff --git a/torch/multiprocessing/reductions.py b/torch/multiprocessing/reductions.py
index 0a6d3c8a444206..fa0818571a93c0 100644
--- a/torch/multiprocessing/reductions.py
+++ b/torch/multiprocessing/reductions.py
@@ -61,7 +61,7 @@ def __eq__(self, other):
class SharedCache(dict):
"""Dictionary from multiprocessing handles to StorageWeakRef."""
- def __init__(self):
+ def __init__(self) -> None:
# free_dead_references() is called if the len exceeds the current
# limit. The limit scales with the number of remaining live objects.
self.limit = 128
diff --git a/torch/nn/modules/container.py b/torch/nn/modules/container.py
index 30992e394b0fd4..585f4ef1658afd 100644
--- a/torch/nn/modules/container.py
+++ b/torch/nn/modules/container.py
@@ -291,7 +291,7 @@ class ModuleList(Module):
Example::
class MyModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)])
@@ -465,7 +465,7 @@ class ModuleDict(Module):
Example::
class MyModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.choices = nn.ModuleDict({
'conv': nn.Conv2d(10, 10, 3),
@@ -597,7 +597,7 @@ class ParameterList(Module):
Example::
class MyModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.params = nn.ParameterList([nn.Parameter(torch.randn(10, 10)) for i in range(10)])
@@ -749,7 +749,7 @@ class ParameterDict(Module):
Example::
class MyModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.params = nn.ParameterDict({
'left': nn.Parameter(torch.randn(5, 10)),
diff --git a/torch/nn/modules/lazy.py b/torch/nn/modules/lazy.py
index 7a9a0161006f14..61cabd061ae922 100644
--- a/torch/nn/modules/lazy.py
+++ b/torch/nn/modules/lazy.py
@@ -86,7 +86,7 @@ class LazyModuleMixin:
>>> # xdoctest: +SKIP
>>> class LazyMLP(torch.nn.Module):
- ... def __init__(self):
+ ... def __init__(self) -> None:
... super().__init__()
... self.fc1 = torch.nn.LazyLinear(10)
... self.relu1 = torch.nn.ReLU()
diff --git a/torch/nn/modules/module.py b/torch/nn/modules/module.py
index a6592655fd4486..a15850553f1f56 100644
--- a/torch/nn/modules/module.py
+++ b/torch/nn/modules/module.py
@@ -408,7 +408,7 @@ class Module:
import torch.nn.functional as F
class Model(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1 = nn.Conv2d(1, 20, 5)
self.conv2 = nn.Conv2d(20, 20, 5)
diff --git a/torch/onnx/_globals.py b/torch/onnx/_globals.py
index 22c05075dba8d2..ebef6c331b2d1c 100644
--- a/torch/onnx/_globals.py
+++ b/torch/onnx/_globals.py
@@ -20,7 +20,7 @@ class _InternalGlobals:
global variables unless they are absolutely necessary.
"""
- def __init__(self):
+ def __init__(self) -> None:
self._export_onnx_opset_version = _constants.ONNX_DEFAULT_OPSET
self._training_mode: _C_onnx.TrainingMode = _C_onnx.TrainingMode.EVAL
self._in_onnx_export: bool = False
diff --git a/torch/onnx/_internal/exporter.py b/torch/onnx/_internal/exporter.py
index e53f906cd84c0b..7c7203c80851d8 100644
--- a/torch/onnx/_internal/exporter.py
+++ b/torch/onnx/_internal/exporter.py
@@ -760,7 +760,7 @@ def model_signature(self) -> torch.export.ExportGraphSignature | None:
>>> import pprint
>>> class CustomModule(torch.nn.Module):
- ... def __init__(self):
+ ... def __init__(self) -> None:
... super().__init__()
... self.my_parameter = torch.nn.Parameter(torch.tensor(2.0))
... self.register_buffer("my_buffer1", torch.tensor(3.0))
diff --git a/torch/onnx/_internal/fx/dynamo_graph_extractor.py b/torch/onnx/_internal/fx/dynamo_graph_extractor.py
index a3b8a69f60dcb6..5abf2bf2c6373f 100644
--- a/torch/onnx/_internal/fx/dynamo_graph_extractor.py
+++ b/torch/onnx/_internal/fx/dynamo_graph_extractor.py
@@ -24,7 +24,7 @@ class _PyTreeExtensionContext:
_extensions: dict[type, tuple[pytree.FlattenFunc, pytree.UnflattenFunc]]
- def __init__(self):
+ def __init__(self) -> None:
self._extensions = {}
# Register PyTree extension for HuggingFace model output.
self._register_huggingface_model_output_extension()
diff --git a/torch/onnx/_internal/fx/passes/modularization.py b/torch/onnx/_internal/fx/passes/modularization.py
index 4f4d347401ea7a..db74d52dda4799 100644
--- a/torch/onnx/_internal/fx/passes/modularization.py
+++ b/torch/onnx/_internal/fx/passes/modularization.py
@@ -795,7 +795,7 @@ class Modularize(_pass.Transform):
>>> from torch.onnx._internal.diagnostics import infra
>>>
>>> class CustomModule(torch.nn.Module):
- >>> def __init__(self):
+ >>> def __init__(self) -> None:
>>> super().__init__()
>>> self.embedding = torch.nn.Embedding(10, 32)
>>> self.relu = torch.nn.ReLU()
@@ -806,7 +806,7 @@ class Modularize(_pass.Transform):
>>> return out
>>>
>>> class TestModule(torch.nn.Module):
- >>> def __init__(self):
+ >>> def __init__(self) -> None:
>>> super().__init__()
>>> self.layer = CustomModule()
>>> self.linear = torch.nn.Linear(32, 10)
diff --git a/torch/onnx/_internal/fx/patcher.py b/torch/onnx/_internal/fx/patcher.py
index 3c0ee6c071436c..239edb6dde6342 100644
--- a/torch/onnx/_internal/fx/patcher.py
+++ b/torch/onnx/_internal/fx/patcher.py
@@ -53,7 +53,7 @@ class ONNXTorchPatcher:
`torch.fx._symbolic_trace._wrapped_methods_to_patch`
"""
- def __init__(self):
+ def __init__(self) -> None:
# List of file paths processed by torch.load.
self.paths: List[Union[str, io.BufferedIOBase]] = []
diff --git a/torch/onnx/_internal/onnxruntime.py b/torch/onnx/_internal/onnxruntime.py
index b9d6bce165169f..59609866bae83f 100644
--- a/torch/onnx/_internal/onnxruntime.py
+++ b/torch/onnx/_internal/onnxruntime.py
@@ -602,7 +602,7 @@ def is_supported(self, *args):
@dataclasses.dataclass
class OrtExecutionInfoForAllGraphModules:
- def __init__(self):
+ def __init__(self) -> None:
# All sessions (and their related information) created by exporting the same GraphModule
# with different inputs.
self.execution_info_per_graph_module: Dict[
diff --git a/torch/onnx/_internal/registration.py b/torch/onnx/_internal/registration.py
index c59ab11d4fa77b..95de41b3f03aee 100644
--- a/torch/onnx/_internal/registration.py
+++ b/torch/onnx/_internal/registration.py
@@ -69,7 +69,7 @@ class OverrideDict(Collection[_K], Generic[_K, _V]):
ones.
"""
- def __init__(self):
+ def __init__(self) -> None:
self._base: Dict[_K, _V] = {}
self._overrides: Dict[_K, _V] = {}
self._merged: Dict[_K, _V] = {}
diff --git a/torch/onnx/verification.py b/torch/onnx/verification.py
index e8bcfe4ca9ef9a..bcf1de6b6437e4 100644
--- a/torch/onnx/verification.py
+++ b/torch/onnx/verification.py
@@ -1722,7 +1722,7 @@ def find_mismatch(
... opset_version=opset_version,
... )
>>> class Model(torch.nn.Module):
- ... def __init__(self):
+ ... def __init__(self) -> None:
... super().__init__()
... self.layers = torch.nn.Sequential(
... torch.nn.Linear(3, 4),
diff --git a/torch/overrides.py b/torch/overrides.py
index bbd055de447e1f..ecb5613f80f536 100644
--- a/torch/overrides.py
+++ b/torch/overrides.py
@@ -2025,7 +2025,7 @@ class TorchFunctionMode:
inner: "TorchFunctionMode"
# Force metaclass to generate constructor at the base of the hierarchy
- def __init__(self):
+ def __init__(self) -> None:
pass
def __torch_function__(self, func, types, args=(), kwargs=None):
diff --git a/torch/package/_mangling.py b/torch/package/_mangling.py
index 7dcf3538631f92..700a9ad6a04a16 100644
--- a/torch/package/_mangling.py
+++ b/torch/package/_mangling.py
@@ -12,7 +12,7 @@ class PackageMangler:
Used on import, to ensure that all modules imported have a shared mangle parent.
"""
- def __init__(self):
+ def __init__(self) -> None:
global _mangle_index
self._mangle_index = _mangle_index
# Increment the global index
diff --git a/torch/profiler/profiler.py b/torch/profiler/profiler.py
index 47faac8c176d45..98f1c1b6735147 100644
--- a/torch/profiler/profiler.py
+++ b/torch/profiler/profiler.py
@@ -772,7 +772,7 @@ class ExecutionTraceObserver(_ITraceObserver):
incurring any overheads.
"""
- def __init__(self):
+ def __init__(self) -> None:
"""
Initializes the default states.
"""
diff --git a/torch/testing/_internal/common_fsdp.py b/torch/testing/_internal/common_fsdp.py
index 51a3deac9c1bfe..fe02eeeabb1baf 100644
--- a/torch/testing/_internal/common_fsdp.py
+++ b/torch/testing/_internal/common_fsdp.py
@@ -1498,7 +1498,7 @@ def wrapper(*args, **kwargs):
class SkipModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.lin = nn.Linear(10, 10, bias=False)
diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
index f1e4dae34e1af5..d0413b17e50bae 100644
--- a/torch/testing/_internal/common_methods_invocations.py
+++ b/torch/testing/_internal/common_methods_invocations.py
@@ -3591,7 +3591,7 @@ def error_inputs_adaptive_max_pool3d(opinfo, device, **kwargs):
class _TestParamsMaxPoolBase:
- def __init__(self):
+ def __init__(self) -> None:
self.kwargs = {
'kernel_size': [3],
'stride': [2, None],
@@ -3628,7 +3628,7 @@ def gen_input_params(self):
class _TestParamsMaxPool1d(_TestParamsMaxPoolBase):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.kwargs['kernel_size'] += [(3,)]
self.kwargs['stride'] += [(2,)]
@@ -3637,7 +3637,7 @@ def __init__(self):
class _TestParamsMaxPool2d(_TestParamsMaxPoolBase):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.kwargs['kernel_size'] += [(3, 2)]
self.kwargs['stride'] += [(2, 1)]
@@ -3648,7 +3648,7 @@ def __init__(self):
class _TestParamsMaxPool3d(_TestParamsMaxPoolBase):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.kwargs['kernel_size'] += [(3, 2, 3)]
self.kwargs['stride'] += [(2, 1, 2)]
diff --git a/torch/testing/_internal/common_nn.py b/torch/testing/_internal/common_nn.py
index 0dd11312c04359..7f53a72a7ef682 100644
--- a/torch/testing/_internal/common_nn.py
+++ b/torch/testing/_internal/common_nn.py
@@ -3967,13 +3967,13 @@ def _test_module_empty_input(test_case, module, inp, check_size=True, inference=
def _create_basic_net():
class Layer(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.layer_dummy_param = nn.Parameter(torch.empty(3, 5))
self.layer_dummy_buf = nn.Buffer(torch.zeros(1, 3, 3, 7))
class Net(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.l1 = Layer()
self.dummy_param = nn.Parameter(torch.empty(3, 5))
diff --git a/torch/testing/_internal/common_pruning.py b/torch/testing/_internal/common_pruning.py
index 031e4ad9efbd40..43dd716c288e0b 100644
--- a/torch/testing/_internal/common_pruning.py
+++ b/torch/testing/_internal/common_pruning.py
@@ -52,7 +52,7 @@ class SimpleLinear(nn.Module):
r"""Model with only Linear layers without biases, some wrapped in a Sequential,
some following the Sequential. Used to test basic pruned Linear-Linear fusion."""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.seq = nn.Sequential(
nn.Linear(7, 5, bias=False),
@@ -73,7 +73,7 @@ class LinearBias(nn.Module):
r"""Model with only Linear layers, alternating layers with biases,
wrapped in a Sequential. Used to test pruned Linear-Bias-Linear fusion."""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.seq = nn.Sequential(
nn.Linear(7, 5, bias=True),
@@ -93,7 +93,7 @@ class LinearActivation(nn.Module):
Activation functions modules in between each Linear in the Sequential, and each outside layer.
Used to test pruned Linear(Bias)-Activation-Linear fusion."""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.seq = nn.Sequential(
nn.Linear(7, 5, bias=True),
@@ -122,7 +122,7 @@ class LinearActivationFunctional(nn.Module):
activationals are called in between each outside layer.
Used to test pruned Linear(Bias)-Activation-Linear fusion."""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.seq = nn.Sequential(
nn.Linear(7, 5, bias=True),
@@ -151,7 +151,7 @@ class SimpleConv2d(nn.Module):
r"""Model with only Conv2d layers, all without bias, some in a Sequential and some following.
Used to test pruned Conv2d-Conv2d fusion."""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.seq = nn.Sequential(
nn.Conv2d(1, 32, 3, 1, bias=False),
@@ -171,7 +171,7 @@ class Conv2dBias(nn.Module):
r"""Model with only Conv2d layers, some with bias, some in a Sequential and some outside.
Used to test pruned Conv2d-Bias-Conv2d fusion."""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.seq = nn.Sequential(
nn.Conv2d(1, 32, 3, 1, bias=True),
@@ -194,7 +194,7 @@ class Conv2dActivation(nn.Module):
in-between each outside layer.
Used to test pruned Conv2d-Bias-Activation-Conv2d fusion."""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.seq = nn.Sequential(
nn.Conv2d(1, 32, 3, 1, bias=True),
@@ -222,7 +222,7 @@ class Conv2dPadBias(nn.Module):
Used to test that bias is propagated correctly in the special case of
pruned Conv2d-Bias-(Activation)Conv2d fusion, when the second Conv2d layer has padding > 0."""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.seq = nn.Sequential(
nn.Conv2d(1, 32, 3, 1, padding=1, bias=True),
@@ -255,7 +255,7 @@ class Conv2dPool(nn.Module):
Activation function modules in between each layer, Pool2d modules in between each layer.
Used to test pruned Conv2d-Pool2d-Conv2d fusion."""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.seq = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, padding=1, bias=True),
@@ -289,7 +289,7 @@ class Conv2dPoolFlattenFunctional(nn.Module):
Activation functions and Pool2ds in between each layer also.
Used to test pruned Conv2d-Pool2d-Flatten-Linear fusion."""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.seq = nn.Sequential(
nn.Conv2d(1, 3, kernel_size=3, padding=1, bias=True),
@@ -323,7 +323,7 @@ class Conv2dPoolFlatten(nn.Module):
Activation functions and Pool2ds in between each layer also.
Used to test pruned Conv2d-Pool2d-Flatten-Linear fusion."""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.seq = nn.Sequential(
nn.Conv2d(1, 3, kernel_size=3, padding=1, bias=True),
diff --git a/torch/testing/_internal/common_quantization.py b/torch/testing/_internal/common_quantization.py
index 553d483ab0a8e1..2482629fe9937f 100644
--- a/torch/testing/_internal/common_quantization.py
+++ b/torch/testing/_internal/common_quantization.py
@@ -1326,7 +1326,7 @@ def _quantize(self, m, quantizer, example_inputs, is_qat: bool = False):
def _get_pt2e_quantized_linear(self, is_per_channel=False) -> torch.fx.GraphModule:
class M(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = torch.nn.Linear(2, 2)
@@ -1343,7 +1343,7 @@ def forward(self, x):
# Below are a series of toy models to use in testing quantization
class SingleLayerLinearModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = torch.nn.Linear(5, 5).to(dtype=torch.float)
@@ -1381,7 +1381,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 5),)
class LinearAddModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float)
self.fc2 = torch.nn.Linear(8, 5).to(dtype=torch.float)
@@ -1436,7 +1436,7 @@ def forward(self, x, hid):
return x, hid
class ConvModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float)
@@ -1448,7 +1448,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 3, 5, 5),)
class ConvTransposeModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = torch.nn.ConvTranspose2d(3, 5, 3, bias=False).to(dtype=torch.float)
@@ -1494,7 +1494,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 3, 5, 5),)
class ConvBnModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float)
self.bn = torch.nn.BatchNorm2d(5).to(dtype=torch.float)
@@ -1508,7 +1508,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 3, 5, 5),)
class AnnotatedConvBnModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.qconfig = default_qconfig
self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float)
@@ -1527,7 +1527,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 3, 5, 5),)
class ConvBnReLUModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float)
self.bn = torch.nn.BatchNorm2d(5).to(dtype=torch.float)
@@ -1571,7 +1571,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 3, 5, 5),)
class TwoLayerConvModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1 = torch.nn.Conv2d(3, 5, 3, bias=False).to(dtype=torch.float)
self.conv2 = torch.nn.Conv2d(5, 5, 1, bias=False).to(dtype=torch.float)
@@ -1585,7 +1585,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 3, 5, 5),)
class TwoLayerLinearModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float)
self.fc2 = torch.nn.Linear(8, 5).to(dtype=torch.float)
@@ -1599,7 +1599,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 5),)
class LinearModelWithSubmodule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.subm = TwoLayerLinearModel()
self.fc = nn.Linear(5, 5)
@@ -1613,7 +1613,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return self.subm.get_example_inputs()
class AnnotatedTwoLayerLinearModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float)
self.fc2 = QuantWrapper(torch.nn.Linear(8, 5).to(dtype=torch.float))
@@ -1628,7 +1628,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 5),)
class ActivationsTestModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.qconfig = torch.ao.quantization.get_default_qconfig("fbgemm")
self.quant = torch.ao.quantization.QuantStub()
@@ -1644,7 +1644,7 @@ def forward(self, x):
return x
class LinearReluModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc = torch.nn.Linear(5, 5).to(dtype=torch.float)
self.relu = torch.nn.ReLU()
@@ -1658,7 +1658,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
class LinearReluLinearModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float)
self.relu = torch.nn.ReLU()
@@ -1674,7 +1674,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 5),)
class LinearReluAddModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = torch.nn.Linear(5, 5).to(dtype=torch.float)
self.relu = torch.nn.ReLU()
@@ -1710,7 +1710,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 5),)
class LinearTanhModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = nn.Linear(5, 5)
self.tanh = nn.Tanh()
@@ -1785,7 +1785,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
# TODO: self.fc should be self.conv
class ConvReluModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc = torch.nn.Conv2d(3, 5, 3).to(dtype=torch.float)
self.relu = torch.nn.ReLU()
@@ -1799,7 +1799,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
# TODO: self.fc should be self.conv
class ConvReluConvModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = torch.nn.Conv2d(3, 5, 3).to(dtype=torch.float)
self.relu = torch.nn.ReLU()
@@ -1816,7 +1816,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
# TODO: self.fc should be self.conv
class ConvReluAddModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = torch.nn.Conv2d(3, 5, 3).to(dtype=torch.float)
self.relu = torch.nn.ReLU()
@@ -1834,7 +1834,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 3, 5, 5),)
class NormalizationTestModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.quant = torch.ao.quantization.QuantStub()
self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float)
@@ -1855,7 +1855,7 @@ def forward(self, x):
return x
class NestedModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.sub1 = LinearReluModel()
self.sub2 = TwoLayerLinearModel()
@@ -1887,7 +1887,7 @@ def forward(self, x):
return x
class AnnotatedSubNestedModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.sub1 = LinearReluModel()
self.sub2 = QuantWrapper(TwoLayerLinearModel())
@@ -1902,7 +1902,7 @@ def forward(self, x):
return x
class AnnotatedCustomConfigNestedModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.sub1 = LinearReluModel()
self.sub2 = TwoLayerLinearModel()
@@ -1928,7 +1928,7 @@ def forward(self, x):
return x
class QuantSubModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.sub1 = LinearReluModel()
self.sub2 = QuantWrapper(TwoLayerLinearModel())
@@ -1943,7 +1943,7 @@ def forward(self, x):
return x
class InnerModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = torch.nn.Linear(5, 8).to(dtype=torch.float)
self.relu1 = torch.nn.ReLU()
@@ -1970,7 +1970,7 @@ def fuse_modules(self):
torch.ao.quantization.fuse_modules(self, fusable_layers, inplace=True)
class FunctionalLinear(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.weight = torch.rand((5, 5))
self.bias = torch.zeros(5)
@@ -1982,7 +1982,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 5),)
class SingleLayerFunctionalLinearModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear1 = FunctionalLinear()
@@ -1994,7 +1994,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return self.linear1.get_example_inputs()
class TwoLayerFunctionalLinearModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear1 = FunctionalLinear()
self.linear2 = FunctionalLinear()
@@ -2008,7 +2008,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return self.linear1.get_example_inputs()
class FunctionalLinearAddModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear1 = FunctionalLinear()
self.linear2 = FunctionalLinear()
@@ -2023,7 +2023,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return self.linear1.get_example_inputs()
class FunctionalLinearReluModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = FunctionalLinear()
@@ -2036,7 +2036,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return self.linear.get_example_inputs()
class FunctionalLinearReluLinearModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear1 = FunctionalLinear()
self.relu = nn.ReLU()
@@ -2052,7 +2052,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return self.linear1.get_example_inputs()
class FunctionalConv2d(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.weight = torch.rand(3, 3, 3, 3)
self.bias = torch.rand(3)
@@ -2068,7 +2068,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return (torch.rand(1, 3, 5, 5),)
class SingleLayerFunctionalConvModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1 = FunctionalConv2d()
@@ -2080,7 +2080,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return self.conv1.get_example_inputs()
class TwoLayerFunctionalConvModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1 = FunctionalConv2d()
self.conv2 = FunctionalConv2d()
@@ -2094,7 +2094,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return self.conv1.get_example_inputs()
class FunctionalConvReluModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = FunctionalConv2d()
@@ -2107,7 +2107,7 @@ def get_example_inputs(self) -> Tuple[Any, ...]:
return self.conv.get_example_inputs()
class FunctionalConvReluConvModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1 = FunctionalConv2d()
self.relu = nn.ReLU()
@@ -2126,7 +2126,7 @@ class SkipQuantModel(torch.nn.Module):
r"""We can skip quantization by explicitly
setting qconfig of a submodule to None
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.sub = InnerModule()
self.fc = torch.nn.Linear(5, 5).to(dtype=torch.float)
@@ -2158,7 +2158,7 @@ def fuse_modules(self):
class QuantStubModel(torch.nn.Module):
r"""A Module with manually inserted `QuantStub` and `DeQuantStub`
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.qconfig = torch.ao.quantization.get_default_qconfig("qnnpack")
self.quant = QuantStub()
@@ -2243,11 +2243,11 @@ class ManualConvLinearSymmQATModel(ManualConvLinearQATModel):
r"""Same as ManualConvLinearQATModule but with Symmetric Quantization.
Supported only with qnnpack.
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__(default_symmetric_qnnpack_qat_qconfig)
class ManualEmbeddingBagLinear(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.emb = nn.EmbeddingBag(num_embeddings=10, embedding_dim=12, mode='sum')
self.emb.qconfig = default_embedding_qat_qconfig
@@ -2287,7 +2287,7 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
return self.dequant(x)
class SubModelForFusion(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = nn.Conv2d(2, 2, 1, bias=None).to(dtype=torch.float)
self.bn = nn.BatchNorm2d(2).to(dtype=torch.float)
@@ -2299,7 +2299,7 @@ def forward(self, x):
class SubModelWithoutFusion(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = nn.Conv2d(2, 2, 1, bias=None).to(dtype=torch.float)
self.relu = nn.ReLU(inplace=False).to(dtype=torch.float)
@@ -2354,7 +2354,7 @@ def forward(self, x):
return x
class ConvBNReLU(nn.Sequential):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__(
nn.Conv2d(3, 3, 1, 1, bias=False),
nn.BatchNorm2d(3),
@@ -2362,7 +2362,7 @@ def __init__(self):
)
class ModelWithSequentialFusion(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1 = nn.Conv2d(3, 3, 1)
self.relu1 = nn.ReLU(inplace=False)
@@ -2388,7 +2388,7 @@ def forward(self, x):
return x
class ModelForFusionWithBias(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1 = nn.Conv2d(3, 2, 5, bias=True).to(dtype=torch.float)
self.bn1 = nn.BatchNorm2d(2).to(dtype=torch.float)
@@ -2409,7 +2409,7 @@ def forward(self, x):
return x
class ModelForLinearBNFusion(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc = nn.Linear(20, 10)
self.bn = nn.BatchNorm1d(10)
@@ -2428,7 +2428,7 @@ def forward(self, x):
class ModelForConvTransposeBNFusion(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1 = nn.ConvTranspose1d(3, 3, 1)
self.bn1 = nn.BatchNorm1d(3)
@@ -2450,7 +2450,7 @@ def forward(self, x):
class ModelWithFunctionals(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.mycat = nnq.FloatFunctional()
self.myadd = nnq.FloatFunctional()
@@ -2474,7 +2474,7 @@ def forward(self, x):
class ResNetBase(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
norm_layer = nn.BatchNorm2d
inplanes = 3
@@ -2507,7 +2507,7 @@ def fuse_model(self):
torch.ao.quantization.fuse_modules(self, [['conv1', 'bn1', 'relu1']], inplace=True)
class ModelMultipleOps(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
norm_layer = nn.BatchNorm2d
inplanes = 3
@@ -2542,7 +2542,7 @@ def forward(self, x):
# accurately with fake-quant so this model does not
# contain those operations
class ModelMultipleOpsNoAvgPool(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
norm_layer = nn.BatchNorm2d
inplanes = 3
@@ -2572,7 +2572,7 @@ def forward(self, x):
return out
class EmbeddingBagModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.emb = torch.nn.EmbeddingBag(num_embeddings=10, embedding_dim=12,
include_last_offset=True, scale_grad_by_freq=False, mode='sum')
@@ -2581,7 +2581,7 @@ def forward(self, indices, offsets, per_sample_weights):
return self.emb(indices, offsets, per_sample_weights)
class EmbeddingModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.emb = torch.nn.Embedding(num_embeddings=10, embedding_dim=12)
@@ -2589,7 +2589,7 @@ def forward(self, indices):
return self.emb(indices)
class EmbeddingWithStaticLinear(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.emb = torch.nn.EmbeddingBag(num_embeddings=10, embedding_dim=12)
self.fc = torch.nn.Linear(4, 2)
@@ -2671,7 +2671,7 @@ def forward(
class TestHelperModules:
class Conv2dPropAnnotaton(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = torch.nn.Conv2d(3, 3, 3)
self.linear = torch.nn.Linear(3, 3)
@@ -2684,7 +2684,7 @@ def forward(self, x):
return x
class Conv2dWithObsSharingOps(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = torch.nn.Conv2d(3, 3, 3)
self.hardtanh = torch.nn.Hardtanh()
@@ -2698,7 +2698,7 @@ def forward(self, x):
return x
class Conv2dWithTwoLinearPermute(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = torch.nn.Conv2d(3, 16, 3)
self.linear1 = torch.nn.Linear(16, 8, bias=False)
@@ -2710,7 +2710,7 @@ def forward(self, x):
return self.linear2(self.linear1(permute_out))
class Conv2dWithTwoLinear(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = torch.nn.Conv2d(3, 16, 3)
self.linear1 = torch.nn.Linear(64, 8, bias=False)
@@ -2722,7 +2722,7 @@ def forward(self, x):
return self.linear2(self.linear1(reshape_out))
class ConvLinearWPermute(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = torch.nn.Conv2d(3, 8, 3)
self.linear1 = torch.nn.Linear(8, 8)
@@ -2733,7 +2733,7 @@ def forward(self, x):
return self.linear1(permute_out)
class TwoLinearModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear1 = torch.nn.Linear(8, 16, bias=False)
self.linear2 = torch.nn.Linear(16, 8)
@@ -2742,7 +2742,7 @@ def forward(self, x):
return self.linear2(self.linear1(x))
class ConvMaxPool2d(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = torch.nn.Conv2d(2, 2, 1)
self.pool = torch.nn.MaxPool2d(1, 1)
@@ -2753,7 +2753,7 @@ def forward(self, x):
return x
class ConvWithAdaptiveAvgPool2d(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = torch.nn.Conv2d(3, 3, 3)
self.adaptive_avg_pool2d = torch.nn.AdaptiveAvgPool2d((1, 1))
@@ -2806,7 +2806,7 @@ def forward(self, x):
return self.relu(x)
class Conv2dThenConv1d(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1d = torch.nn.Conv1d(3, 3, 3)
self.conv2d = torch.nn.Conv2d(3, 3, 3)
@@ -2821,7 +2821,7 @@ def example_inputs(self):
return (torch.randn(1, 3, 5, 5),)
class Conv2dWithCat(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1 = torch.nn.Conv2d(3, 3, 3)
self.conv2 = torch.nn.Conv2d(3, 3, 3)
@@ -2833,7 +2833,7 @@ def forward(self, x, y):
return z
class Conv2dWithTwoCat(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv1 = torch.nn.Conv2d(3, 3, 3)
self.conv2 = torch.nn.Conv2d(3, 3, 3)
@@ -2854,7 +2854,7 @@ def forward(self, x1, x2, x3, x4):
return w
class EmbeddingModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.emb = torch.nn.Embedding(num_embeddings=10, embedding_dim=12)
@@ -2862,7 +2862,7 @@ def forward(self, indices):
return self.emb(indices)
class EmbeddingConvLinearModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.emb = torch.nn.Embedding(num_embeddings=10, embedding_dim=8)
self.conv = torch.nn.Conv2d(8, 16, (1, 3))
@@ -2898,7 +2898,7 @@ def forward(self, x):
return x
class ConvBnReLU2dAndLinearReLU(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv_bn_relu = TestHelperModules.ConvWithBNRelu(relu=True)
self.linear = torch.nn.Linear(3, 8, bias=False)
@@ -2911,7 +2911,7 @@ def forward(self, x):
return linear_out
class GroupwiseConv2d(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.conv = torch.nn.Conv2d(4, 4, 3, groups=2)
@@ -2922,7 +2922,7 @@ def example_inputs(self):
return (torch.randn(2, 4, 10, 10),)
class LinearReluModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc = torch.nn.Linear(5, 5).to(dtype=torch.float)
self.relu = torch.nn.ReLU()
diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py
index 12723039358e09..8ec568c665c1fb 100644
--- a/torch/testing/_internal/common_utils.py
+++ b/torch/testing/_internal/common_utils.py
@@ -1092,7 +1092,7 @@ def sanitize_pytest_xml(xml_file: str):
def get_pytest_test_cases(argv: List[str]) -> List[str]:
class TestCollectorPlugin:
- def __init__(self):
+ def __init__(self) -> None:
self.tests = []
def pytest_collection_finish(self, session):
diff --git a/torch/testing/_internal/data/network1.py b/torch/testing/_internal/data/network1.py
index e6180f4f2d2ed1..8755643a78cca8 100644
--- a/torch/testing/_internal/data/network1.py
+++ b/torch/testing/_internal/data/network1.py
@@ -5,6 +5,6 @@
class Net(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = nn.Linear(10, 20)
diff --git a/torch/testing/_internal/data/network2.py b/torch/testing/_internal/data/network2.py
index fdb583d0af92fe..19b0b8ee53d3b5 100644
--- a/torch/testing/_internal/data/network2.py
+++ b/torch/testing/_internal/data/network2.py
@@ -5,7 +5,7 @@
class Net(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.linear = nn.Linear(10, 20)
self.relu = nn.ReLU()
diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py
index 276dc4fa6e702d..a8e1434ecdb474 100644
--- a/torch/testing/_internal/distributed/distributed_test.py
+++ b/torch/testing/_internal/distributed/distributed_test.py
@@ -107,7 +107,7 @@
class NetWithBuffers(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.a = nn.Linear(10, 10, bias=False)
self.b = nn.Linear(10, 1, bias=False)
@@ -260,7 +260,7 @@ class DDPUnevenTestInput(NamedTuple):
class _FC2(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc = nn.Linear(10, 50, bias=True)
self.fc.bias.requires_grad = False
@@ -271,7 +271,7 @@ def forward(self, x):
class Net(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = nn.Linear(2, 10, bias=False)
self.fc2 = _FC2()
@@ -289,7 +289,7 @@ def forward(self, x):
class LargeNet(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = nn.Linear(1000, 2000, bias=False)
self.fc2 = nn.Linear(2000, 500, bias=False)
@@ -301,7 +301,7 @@ def forward(self, x):
class Task(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.p = nn.Parameter(torch.ones(2, 2))
@@ -325,7 +325,7 @@ def forward(self, x):
class UnusedParamTwoLinLayerNet(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.a = nn.Linear(10, 10, bias=False)
self.b = nn.Linear(10, 10, bias=False)
@@ -338,7 +338,7 @@ def forward(self, x):
class DictOutputModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.module = UnusedParamTwoLinLayerNet()
@@ -352,7 +352,7 @@ def forward(self, x):
class TwoLinLayerNet(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.a = nn.Linear(10, 10, bias=False)
self.b = nn.Linear(10, 1, bias=False)
@@ -383,7 +383,7 @@ def forward(self, x):
class ControlFlowToyModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.lin1 = nn.Linear(10, 10, bias=False)
self.lin2 = nn.Linear(10, 10, bias=False)
@@ -4408,7 +4408,7 @@ def test_DistributedDataParallel_requires_grad(self):
@skip_if_lt_x_gpu(int(os.environ["WORLD_SIZE"]))
def test_ddp_zero_output_features(self):
class ToyModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.net1 = nn.Linear(10, 10)
self.relu = nn.ReLU()
@@ -4422,7 +4422,7 @@ def __init__(self):
@skip_but_pass_in_sandcastle_if(BACKEND == "nccl", "Gloo-only test")
def test_ddp_create_graph(self):
class Model(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.p = nn.Parameter(torch.tensor(1.0))
@@ -4979,7 +4979,7 @@ def _test_ddp_native_mixed_precision(
mp_config = self._get_fp16_config()
class MyModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.m = torch.nn.Linear(1, 5)
self.register_buffer('buffer', torch.randn(1, 2))
@@ -7241,7 +7241,7 @@ def test_ddp_uneven_inputs_stop_iteration_sync_bn(self):
# for models with SyncBN or general collective comm when
# throw_on_early_termination=True.
class ModelWithComm(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.lin = nn.Linear(2, 40, bias=False)
@@ -7523,7 +7523,7 @@ def test_ddp_uneven_input_exception(self):
error_str = "Intentional error"
class ExceptionModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.param = nn.Parameter(torch.ones(1, requires_grad=True))
@@ -7731,7 +7731,7 @@ def test_ddp_ignore_params_arg(self):
@skip_if_lt_x_gpu(2)
def test_ddp_unused_params_rebuild_buckets_exception(self):
class ToyModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.net1 = nn.Linear(10, 10, bias=False)
self.net2 = nn.Linear(10, 10, bias=False)
@@ -7785,7 +7785,7 @@ def test_ddp_shared_grad_acc_unused_params(self):
# When find_unused_parameters=True, ensure we mark unused parameters
# even if they share gradient accumulators.
class ToyModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
# net1, bias, and net1.bias are all unused params.
self.net1 = nn.Linear(10, 5, bias=False)
@@ -8984,7 +8984,7 @@ def test_ddp_build_debug_param_to_name_mapping(self):
@skip_if_lt_x_gpu(2)
def test_ddp_build_debug_param_to_name_mapping_requires_grad(self):
class Net(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.lin = nn.Linear(10, 10)
# Is not tracked by DDP and should not show up in param to
@@ -9009,7 +9009,7 @@ def _test_ddp_multiple_nested_unused_params_error(self, ignore_sparse):
debug_mode_off = dist.get_debug_level() == dist.DebugLevel.OFF
class SubModule(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.embedding_net = EmbeddingNetDifferentParams(0)
self.lin = TwoLinLayerNet()
@@ -9025,7 +9025,7 @@ def forward(self, x):
return x
class MyModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.sub_module = SubModule()
@@ -9261,7 +9261,7 @@ def test_ddp_static_graph_nested_types(self):
torch.cuda.set_device(rank)
class NestedOutputModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.lin = nn.Linear(100, 1, bias=False)
@@ -9347,7 +9347,7 @@ def test_ddp_returns_tensor_with_no_grad(self):
torch.cuda.set_device(self.rank)
class MyModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = nn.Linear(10, 10, bias=False)
self.fc2 = nn.Linear(10, 10, bias=False)
@@ -9384,7 +9384,7 @@ def forward(self, x):
)
def test_detect_ddp_is_actually_static(self):
class ToyModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.net1 = nn.Linear(10, 10, bias=False)
self.net2 = nn.Linear(10, 10)
@@ -9430,7 +9430,7 @@ def forward(self, x, find_unused, dynamic):
def _test_ddp_new_tensor_in_fwd(self, static_graph):
# Test from https://github.com/pytorch/pytorch/issues/60733
class MyModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc1 = nn.Linear(10, 10, bias=False)
self.fc2 = nn.Linear(10, 10, bias=False)
@@ -9965,7 +9965,7 @@ def test_ddp_broadcast_buffer(self):
torch.cuda.manual_seed(rank)
class NetWithBuffers(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.a = nn.Linear(10, 10, bias=False)
self.b = nn.Linear(10, 1, bias=False)
@@ -10002,7 +10002,7 @@ def forward(self, x):
)
def test_static_graph_multi_forward(self):
class Net(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.lin = nn.Linear(10, 10)
self.relu = nn.ReLU()
@@ -10084,7 +10084,7 @@ def test_sync_bn_logged(self):
)
def test_stateless_api_with_ddp(self):
class MockModule(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.l1 = torch.nn.Linear(1, 1)
buffer = torch.ones(1)
@@ -10131,7 +10131,7 @@ def forward(self, x):
@skip_if_lt_x_gpu(2)
def test_ddp_forward_backward_hook(self):
class DummyTestModel(nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
torch.manual_seed(0)
self.fc = nn.Linear(2, 2)
@@ -10391,7 +10391,7 @@ def __torch_dispatch__(self, func, types, args=(), kwargs=None):
return func(*args, **kwargs)
class MyModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.fc = torch.nn.Linear(10, 10)
diff --git a/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py b/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py
index 1ea7eace8294cb..5d7e7b1244bcea 100644
--- a/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py
+++ b/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py
@@ -44,7 +44,7 @@ class Policy(nn.Module):
Copying the code to make these two examples independent.
See https://github.com/pytorch/examples/tree/master/reinforcement_learning
"""
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.affine1 = nn.Linear(4, 128)
self.dropout = nn.Dropout(p=0.6)
@@ -97,7 +97,7 @@ class Observer:
select an action. Then, the observer applies the action to its environment
and reports the reward to the agent.
"""
- def __init__(self):
+ def __init__(self) -> None:
self.id = rpc.get_worker_info().id
self.env = DummyEnv()
self.env.seed(SEED)
diff --git a/torch/testing/_internal/distributed/rpc/rpc_test.py b/torch/testing/_internal/distributed/rpc/rpc_test.py
index 3a3977d7b89604..413f97d94eb281 100644
--- a/torch/testing/_internal/distributed/rpc/rpc_test.py
+++ b/torch/testing/_internal/distributed/rpc/rpc_test.py
@@ -144,7 +144,7 @@ def set_and_check_done(value):
TensorClass = namedtuple("TensorClass", ["tensors"])
class MyPickleClass:
- def __init__(self):
+ def __init__(self) -> None:
self.t = None
def __getstate__(self):
@@ -1446,7 +1446,7 @@ def test_pg_init_no_rpc_init(self):
world_size=self.world_size)
class MyModel(torch.nn.Module):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.lin = torch.nn.Linear(3, 4)
diff --git a/torch/testing/_internal/jit_metaprogramming_utils.py b/torch/testing/_internal/jit_metaprogramming_utils.py
index 8171a95918939f..02a9fcc5405e51 100644
--- a/torch/testing/_internal/jit_metaprogramming_utils.py
+++ b/torch/testing/_internal/jit_metaprogramming_utils.py
@@ -604,7 +604,7 @@ def script_module(*args, **kwargs):
class TheModule(torch.jit.ScriptModule):
__constants__ = submodule_constants
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.submodule = nn_module(*constructor_args)
diff --git a/torch/testing/_internal/jit_utils.py b/torch/testing/_internal/jit_utils.py
index c0109ecacf7f7d..a8c7fa261f9982 100644
--- a/torch/testing/_internal/jit_utils.py
+++ b/torch/testing/_internal/jit_utils.py
@@ -770,7 +770,7 @@ def _get_py3_code(code, fn_name):
return fn
class TensorExprTestOptions:
- def __init__(self):
+ def __init__(self) -> None:
self.old_profiling_executor = torch._C._jit_set_profiling_executor(True)
self.old_profiling_mode = torch._C._get_graph_executor_optimize(True)
diff --git a/torch/utils/_sympy/value_ranges.py b/torch/utils/_sympy/value_ranges.py
index 4a01d8e53b915b..29ee1886261bdb 100644
--- a/torch/utils/_sympy/value_ranges.py
+++ b/torch/utils/_sympy/value_ranges.py
@@ -936,7 +936,7 @@ def trunc(x):
class ValueRangeAnalysis(SymPyValueRangeAnalysis):
- def __init__(self):
+ def __init__(self) -> None:
self.name = "ValueRangeAnalysis"
boolean_operators = (
"xor",
diff --git a/torch/utils/data/_utils/worker.py b/torch/utils/data/_utils/worker.py
index b07439526bf648..c61b78d42d8a56 100644
--- a/torch/utils/data/_utils/worker.py
+++ b/torch/utils/data/_utils/worker.py
@@ -28,7 +28,7 @@
# is gone, and the only way to check it through OS is to let the worker have a process handle
# of the manager and ask if the process status has changed.
class ManagerWatchdog:
- def __init__(self):
+ def __init__(self) -> None:
self.manager_pid = os.getppid()
# mypy cannot detect this code is windows only
@@ -60,7 +60,7 @@ def is_alive(self):
else:
class ManagerWatchdog: # type: ignore[no-redef]
- def __init__(self):
+ def __init__(self) -> None:
self.manager_pid = os.getppid()
self.manager_dead = False
diff --git a/torch/utils/module_tracker.py b/torch/utils/module_tracker.py
index 9feef40ca4da88..01e966c712b595 100644
--- a/torch/utils/module_tracker.py
+++ b/torch/utils/module_tracker.py
@@ -52,7 +52,7 @@ def my_linear(m1, m2, bias):
A Set containing the fqn for each module currently running their forward
"""
- def __init__(self):
+ def __init__(self) -> None:
self.parents = {"Global"}
self._known_modules: weakref.WeakKeyDictionary = weakref.WeakKeyDictionary()
self._seen_modules: weakref.WeakSet = weakref.WeakSet()