forked from python/mypy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstats.py
489 lines (410 loc) · 16.2 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
"""Utilities for calculating and reporting statistics about types."""
from __future__ import annotations
import os
from collections import Counter
from contextlib import contextmanager
from typing import Final, Iterator
from mypy import nodes
from mypy.argmap import map_formals_to_actuals
from mypy.nodes import (
AssignmentExpr,
AssignmentStmt,
BreakStmt,
BytesExpr,
CallExpr,
ClassDef,
ComparisonExpr,
ComplexExpr,
ContinueStmt,
EllipsisExpr,
Expression,
ExpressionStmt,
FloatExpr,
FuncDef,
Import,
ImportAll,
ImportFrom,
IndexExpr,
IntExpr,
MemberExpr,
MypyFile,
NameExpr,
Node,
OpExpr,
PassStmt,
RefExpr,
StrExpr,
TypeApplication,
UnaryExpr,
YieldFromExpr,
)
from mypy.traverser import TraverserVisitor
from mypy.typeanal import collect_all_inner_types
from mypy.types import (
AnyType,
CallableType,
FunctionLike,
Instance,
TupleType,
Type,
TypeOfAny,
TypeQuery,
TypeVarType,
get_proper_type,
get_proper_types,
)
from mypy.util import correct_relative_import
TYPE_EMPTY: Final = 0
TYPE_UNANALYZED: Final = 1 # type of non-typechecked code
TYPE_PRECISE: Final = 2
TYPE_IMPRECISE: Final = 3
TYPE_ANY: Final = 4
precision_names: Final = ["empty", "unanalyzed", "precise", "imprecise", "any"]
class StatisticsVisitor(TraverserVisitor):
def __init__(
self,
inferred: bool,
filename: str,
modules: dict[str, MypyFile],
typemap: dict[Expression, Type] | None = None,
all_nodes: bool = False,
visit_untyped_defs: bool = True,
) -> None:
self.inferred = inferred
self.filename = filename
self.modules = modules
self.typemap = typemap
self.all_nodes = all_nodes
self.visit_untyped_defs = visit_untyped_defs
self.num_precise_exprs = 0
self.num_imprecise_exprs = 0
self.num_any_exprs = 0
self.num_simple_types = 0
self.num_generic_types = 0
self.num_tuple_types = 0
self.num_function_types = 0
self.num_typevar_types = 0
self.num_complex_types = 0
self.num_any_types = 0
self.line = -1
self.line_map: dict[int, int] = {}
self.type_of_any_counter: Counter[int] = Counter()
self.any_line_map: dict[int, list[AnyType]] = {}
# For each scope (top level/function), whether the scope was type checked
# (annotated function).
#
# TODO: Handle --check-untyped-defs
self.checked_scopes = [True]
self.output: list[str] = []
TraverserVisitor.__init__(self)
def visit_mypy_file(self, o: MypyFile) -> None:
self.cur_mod_node = o
self.cur_mod_id = o.fullname
super().visit_mypy_file(o)
def visit_import_from(self, imp: ImportFrom) -> None:
self.process_import(imp)
def visit_import_all(self, imp: ImportAll) -> None:
self.process_import(imp)
def process_import(self, imp: ImportFrom | ImportAll) -> None:
import_id, ok = correct_relative_import(
self.cur_mod_id, imp.relative, imp.id, self.cur_mod_node.is_package_init_file()
)
if ok and import_id in self.modules:
kind = TYPE_PRECISE
else:
kind = TYPE_ANY
self.record_line(imp.line, kind)
def visit_import(self, imp: Import) -> None:
if all(id in self.modules for id, _ in imp.ids):
kind = TYPE_PRECISE
else:
kind = TYPE_ANY
self.record_line(imp.line, kind)
def visit_func_def(self, o: FuncDef) -> None:
with self.enter_scope(o):
self.line = o.line
if len(o.expanded) > 1 and o.expanded != [o] * len(o.expanded):
if o in o.expanded:
print(
"{}:{}: ERROR: cycle in function expansion; skipping".format(
self.filename, o.line
)
)
return
for defn in o.expanded:
assert isinstance(defn, FuncDef)
self.visit_func_def(defn)
else:
if o.type:
assert isinstance(o.type, CallableType)
sig = o.type
arg_types = sig.arg_types
if sig.arg_names and sig.arg_names[0] == "self" and not self.inferred:
arg_types = arg_types[1:]
for arg in arg_types:
self.type(arg)
self.type(sig.ret_type)
elif self.all_nodes:
self.record_line(self.line, TYPE_ANY)
if not o.is_dynamic() or self.visit_untyped_defs:
super().visit_func_def(o)
@contextmanager
def enter_scope(self, o: FuncDef) -> Iterator[None]:
self.checked_scopes.append(o.type is not None and self.checked_scopes[-1])
yield None
self.checked_scopes.pop()
def is_checked_scope(self) -> bool:
return self.checked_scopes[-1]
def visit_class_def(self, o: ClassDef) -> None:
self.record_line(o.line, TYPE_PRECISE) # TODO: Look at base classes
# Override this method because we don't want to analyze base_type_exprs (base_type_exprs
# are base classes in a class declaration).
# While base_type_exprs are technically expressions, type analyzer does not visit them and
# they are not in the typemap.
for d in o.decorators:
d.accept(self)
o.defs.accept(self)
def visit_type_application(self, o: TypeApplication) -> None:
self.line = o.line
for t in o.types:
self.type(t)
super().visit_type_application(o)
def visit_assignment_stmt(self, o: AssignmentStmt) -> None:
self.line = o.line
if isinstance(o.rvalue, nodes.CallExpr) and isinstance(
o.rvalue.analyzed, nodes.TypeVarExpr
):
# Type variable definition -- not a real assignment.
return
if o.type:
self.type(o.type)
elif self.inferred and not self.all_nodes:
# if self.all_nodes is set, lvalues will be visited later
for lvalue in o.lvalues:
if isinstance(lvalue, nodes.TupleExpr):
items = lvalue.items
else:
items = [lvalue]
for item in items:
if isinstance(item, RefExpr) and item.is_inferred_def:
if self.typemap is not None:
self.type(self.typemap.get(item))
super().visit_assignment_stmt(o)
def visit_expression_stmt(self, o: ExpressionStmt) -> None:
if isinstance(o.expr, (StrExpr, BytesExpr)):
# Docstring
self.record_line(o.line, TYPE_EMPTY)
else:
super().visit_expression_stmt(o)
def visit_pass_stmt(self, o: PassStmt) -> None:
self.record_precise_if_checked_scope(o)
def visit_break_stmt(self, o: BreakStmt) -> None:
self.record_precise_if_checked_scope(o)
def visit_continue_stmt(self, o: ContinueStmt) -> None:
self.record_precise_if_checked_scope(o)
def visit_name_expr(self, o: NameExpr) -> None:
if o.fullname in ("builtins.None", "builtins.True", "builtins.False", "builtins.Ellipsis"):
self.record_precise_if_checked_scope(o)
else:
self.process_node(o)
super().visit_name_expr(o)
def visit_yield_from_expr(self, o: YieldFromExpr) -> None:
if o.expr:
o.expr.accept(self)
def visit_call_expr(self, o: CallExpr) -> None:
self.process_node(o)
if o.analyzed:
o.analyzed.accept(self)
else:
o.callee.accept(self)
for a in o.args:
a.accept(self)
self.record_call_target_precision(o)
def record_call_target_precision(self, o: CallExpr) -> None:
"""Record precision of formal argument types used in a call."""
if not self.typemap or o.callee not in self.typemap:
# Type not available.
return
callee_type = get_proper_type(self.typemap[o.callee])
if isinstance(callee_type, CallableType):
self.record_callable_target_precision(o, callee_type)
else:
pass # TODO: Handle overloaded functions, etc.
def record_callable_target_precision(self, o: CallExpr, callee: CallableType) -> None:
"""Record imprecision caused by callee argument types.
This only considers arguments passed in a call expression. Arguments
with default values that aren't provided in a call arguably don't
contribute to typing imprecision at the *call site* (but they
contribute at the function definition).
"""
assert self.typemap
typemap = self.typemap
actual_to_formal = map_formals_to_actuals(
o.arg_kinds,
o.arg_names,
callee.arg_kinds,
callee.arg_names,
lambda n: typemap[o.args[n]],
)
for formals in actual_to_formal:
for n in formals:
formal = get_proper_type(callee.arg_types[n])
if isinstance(formal, AnyType):
self.record_line(o.line, TYPE_ANY)
elif is_imprecise(formal):
self.record_line(o.line, TYPE_IMPRECISE)
def visit_member_expr(self, o: MemberExpr) -> None:
self.process_node(o)
super().visit_member_expr(o)
def visit_op_expr(self, o: OpExpr) -> None:
self.process_node(o)
super().visit_op_expr(o)
def visit_comparison_expr(self, o: ComparisonExpr) -> None:
self.process_node(o)
super().visit_comparison_expr(o)
def visit_index_expr(self, o: IndexExpr) -> None:
self.process_node(o)
super().visit_index_expr(o)
def visit_assignment_expr(self, o: AssignmentExpr) -> None:
self.process_node(o)
super().visit_assignment_expr(o)
def visit_unary_expr(self, o: UnaryExpr) -> None:
self.process_node(o)
super().visit_unary_expr(o)
def visit_str_expr(self, o: StrExpr) -> None:
self.record_precise_if_checked_scope(o)
def visit_bytes_expr(self, o: BytesExpr) -> None:
self.record_precise_if_checked_scope(o)
def visit_int_expr(self, o: IntExpr) -> None:
self.record_precise_if_checked_scope(o)
def visit_float_expr(self, o: FloatExpr) -> None:
self.record_precise_if_checked_scope(o)
def visit_complex_expr(self, o: ComplexExpr) -> None:
self.record_precise_if_checked_scope(o)
def visit_ellipsis(self, o: EllipsisExpr) -> None:
self.record_precise_if_checked_scope(o)
# Helpers
def process_node(self, node: Expression) -> None:
if self.all_nodes:
if self.typemap is not None:
self.line = node.line
self.type(self.typemap.get(node))
def record_precise_if_checked_scope(self, node: Node) -> None:
if isinstance(node, Expression) and self.typemap and node not in self.typemap:
kind = TYPE_UNANALYZED
elif self.is_checked_scope():
kind = TYPE_PRECISE
else:
kind = TYPE_ANY
self.record_line(node.line, kind)
def type(self, t: Type | None) -> None:
t = get_proper_type(t)
if not t:
# If an expression does not have a type, it is often due to dead code.
# Don't count these because there can be an unanalyzed value on a line with other
# analyzed expressions, which overwrite the TYPE_UNANALYZED.
self.record_line(self.line, TYPE_UNANALYZED)
return
if isinstance(t, AnyType) and is_special_form_any(t):
# TODO: What if there is an error in special form definition?
self.record_line(self.line, TYPE_PRECISE)
return
if isinstance(t, AnyType):
self.log(" !! Any type around line %d" % self.line)
self.num_any_exprs += 1
self.record_line(self.line, TYPE_ANY)
elif (not self.all_nodes and is_imprecise(t)) or (self.all_nodes and is_imprecise2(t)):
self.log(" !! Imprecise type around line %d" % self.line)
self.num_imprecise_exprs += 1
self.record_line(self.line, TYPE_IMPRECISE)
else:
self.num_precise_exprs += 1
self.record_line(self.line, TYPE_PRECISE)
for typ in get_proper_types(collect_all_inner_types(t)) + [t]:
if isinstance(typ, AnyType):
typ = get_original_any(typ)
if is_special_form_any(typ):
continue
self.type_of_any_counter[typ.type_of_any] += 1
self.num_any_types += 1
if self.line in self.any_line_map:
self.any_line_map[self.line].append(typ)
else:
self.any_line_map[self.line] = [typ]
elif isinstance(typ, Instance):
if typ.args:
if any(is_complex(arg) for arg in typ.args):
self.num_complex_types += 1
else:
self.num_generic_types += 1
else:
self.num_simple_types += 1
elif isinstance(typ, FunctionLike):
self.num_function_types += 1
elif isinstance(typ, TupleType):
if any(is_complex(item) for item in typ.items):
self.num_complex_types += 1
else:
self.num_tuple_types += 1
elif isinstance(typ, TypeVarType):
self.num_typevar_types += 1
def log(self, string: str) -> None:
self.output.append(string)
def record_line(self, line: int, precision: int) -> None:
self.line_map[line] = max(precision, self.line_map.get(line, TYPE_EMPTY))
def dump_type_stats(
tree: MypyFile,
path: str,
modules: dict[str, MypyFile],
inferred: bool = False,
typemap: dict[Expression, Type] | None = None,
) -> None:
if is_special_module(path):
return
print(path)
visitor = StatisticsVisitor(inferred, filename=tree.fullname, modules=modules, typemap=typemap)
tree.accept(visitor)
for line in visitor.output:
print(line)
print(" ** precision **")
print(" precise ", visitor.num_precise_exprs)
print(" imprecise", visitor.num_imprecise_exprs)
print(" any ", visitor.num_any_exprs)
print(" ** kinds **")
print(" simple ", visitor.num_simple_types)
print(" generic ", visitor.num_generic_types)
print(" function ", visitor.num_function_types)
print(" tuple ", visitor.num_tuple_types)
print(" TypeVar ", visitor.num_typevar_types)
print(" complex ", visitor.num_complex_types)
print(" any ", visitor.num_any_types)
def is_special_module(path: str) -> bool:
return os.path.basename(path) in ("abc.pyi", "typing.pyi", "builtins.pyi")
def is_imprecise(t: Type) -> bool:
return t.accept(HasAnyQuery())
class HasAnyQuery(TypeQuery[bool]):
def __init__(self) -> None:
super().__init__(any)
def visit_any(self, t: AnyType) -> bool:
return not is_special_form_any(t)
def is_imprecise2(t: Type) -> bool:
return t.accept(HasAnyQuery2())
class HasAnyQuery2(HasAnyQuery):
def visit_callable_type(self, t: CallableType) -> bool:
# We don't want to flag references to functions with some Any
# argument types (etc.) since they generally don't mean trouble.
return False
def is_generic(t: Type) -> bool:
t = get_proper_type(t)
return isinstance(t, Instance) and bool(t.args)
def is_complex(t: Type) -> bool:
t = get_proper_type(t)
return is_generic(t) or isinstance(t, (FunctionLike, TupleType, TypeVarType))
def is_special_form_any(t: AnyType) -> bool:
return get_original_any(t).type_of_any == TypeOfAny.special_form
def get_original_any(t: AnyType) -> AnyType:
if t.type_of_any == TypeOfAny.from_another_any:
assert t.source_any
assert t.source_any.type_of_any != TypeOfAny.from_another_any
t = t.source_any
return t