forked from python/cpython
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataclasses.py
1516 lines (1289 loc) · 57.7 KB
/
dataclasses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import re
import sys
import copy
import types
import inspect
import keyword
import builtins
import functools
import itertools
import abc
import _thread
from types import FunctionType, GenericAlias
__all__ = ['dataclass',
'field',
'Field',
'FrozenInstanceError',
'InitVar',
'KW_ONLY',
'MISSING',
# Helper functions.
'fields',
'asdict',
'astuple',
'make_dataclass',
'replace',
'is_dataclass',
]
# Conditions for adding methods. The boxes indicate what action the
# dataclass decorator takes. For all of these tables, when I talk
# about init=, repr=, eq=, order=, unsafe_hash=, or frozen=, I'm
# referring to the arguments to the @dataclass decorator. When
# checking if a dunder method already exists, I mean check for an
# entry in the class's __dict__. I never check to see if an attribute
# is defined in a base class.
# Key:
# +=========+=========================================+
# + Value | Meaning |
# +=========+=========================================+
# | <blank> | No action: no method is added. |
# +---------+-----------------------------------------+
# | add | Generated method is added. |
# +---------+-----------------------------------------+
# | raise | TypeError is raised. |
# +---------+-----------------------------------------+
# | None | Attribute is set to None. |
# +=========+=========================================+
# __init__
#
# +--- init= parameter
# |
# v | | |
# | no | yes | <--- class has __init__ in __dict__?
# +=======+=======+=======+
# | False | | |
# +-------+-------+-------+
# | True | add | | <- the default
# +=======+=======+=======+
# __repr__
#
# +--- repr= parameter
# |
# v | | |
# | no | yes | <--- class has __repr__ in __dict__?
# +=======+=======+=======+
# | False | | |
# +-------+-------+-------+
# | True | add | | <- the default
# +=======+=======+=======+
# __setattr__
# __delattr__
#
# +--- frozen= parameter
# |
# v | | |
# | no | yes | <--- class has __setattr__ or __delattr__ in __dict__?
# +=======+=======+=======+
# | False | | | <- the default
# +-------+-------+-------+
# | True | add | raise |
# +=======+=======+=======+
# Raise because not adding these methods would break the "frozen-ness"
# of the class.
# __eq__
#
# +--- eq= parameter
# |
# v | | |
# | no | yes | <--- class has __eq__ in __dict__?
# +=======+=======+=======+
# | False | | |
# +-------+-------+-------+
# | True | add | | <- the default
# +=======+=======+=======+
# __lt__
# __le__
# __gt__
# __ge__
#
# +--- order= parameter
# |
# v | | |
# | no | yes | <--- class has any comparison method in __dict__?
# +=======+=======+=======+
# | False | | | <- the default
# +-------+-------+-------+
# | True | add | raise |
# +=======+=======+=======+
# Raise because to allow this case would interfere with using
# functools.total_ordering.
# __hash__
# +------------------- unsafe_hash= parameter
# | +----------- eq= parameter
# | | +--- frozen= parameter
# | | |
# v v v | | |
# | no | yes | <--- class has explicitly defined __hash__
# +=======+=======+=======+========+========+
# | False | False | False | | | No __eq__, use the base class __hash__
# +-------+-------+-------+--------+--------+
# | False | False | True | | | No __eq__, use the base class __hash__
# +-------+-------+-------+--------+--------+
# | False | True | False | None | | <-- the default, not hashable
# +-------+-------+-------+--------+--------+
# | False | True | True | add | | Frozen, so hashable, allows override
# +-------+-------+-------+--------+--------+
# | True | False | False | add | raise | Has no __eq__, but hashable
# +-------+-------+-------+--------+--------+
# | True | False | True | add | raise | Has no __eq__, but hashable
# +-------+-------+-------+--------+--------+
# | True | True | False | add | raise | Not frozen, but hashable
# +-------+-------+-------+--------+--------+
# | True | True | True | add | raise | Frozen, so hashable
# +=======+=======+=======+========+========+
# For boxes that are blank, __hash__ is untouched and therefore
# inherited from the base class. If the base is object, then
# id-based hashing is used.
#
# Note that a class may already have __hash__=None if it specified an
# __eq__ method in the class body (not one that was created by
# @dataclass).
#
# See _hash_action (below) for a coded version of this table.
# __match_args__
#
# +--- match_args= parameter
# |
# v | | |
# | no | yes | <--- class has __match_args__ in __dict__?
# +=======+=======+=======+
# | False | | |
# +-------+-------+-------+
# | True | add | | <- the default
# +=======+=======+=======+
# __match_args__ is always added unless the class already defines it. It is a
# tuple of __init__ parameter names; non-init fields must be matched by keyword.
# Raised when an attempt is made to modify a frozen class.
class FrozenInstanceError(AttributeError): pass
# A sentinel object for default values to signal that a default
# factory will be used. This is given a nice repr() which will appear
# in the function signature of dataclasses' constructors.
class _HAS_DEFAULT_FACTORY_CLASS:
def __repr__(self):
return '<factory>'
_HAS_DEFAULT_FACTORY = _HAS_DEFAULT_FACTORY_CLASS()
# A sentinel object to detect if a parameter is supplied or not. Use
# a class to give it a better repr.
class _MISSING_TYPE:
pass
MISSING = _MISSING_TYPE()
# A sentinel object to indicate that following fields are keyword-only by
# default. Use a class to give it a better repr.
class _KW_ONLY_TYPE:
pass
KW_ONLY = _KW_ONLY_TYPE()
# Since most per-field metadata will be unused, create an empty
# read-only proxy that can be shared among all fields.
_EMPTY_METADATA = types.MappingProxyType({})
# Markers for the various kinds of fields and pseudo-fields.
class _FIELD_BASE:
def __init__(self, name):
self.name = name
def __repr__(self):
return self.name
_FIELD = _FIELD_BASE('_FIELD')
_FIELD_CLASSVAR = _FIELD_BASE('_FIELD_CLASSVAR')
_FIELD_INITVAR = _FIELD_BASE('_FIELD_INITVAR')
# The name of an attribute on the class where we store the Field
# objects. Also used to check if a class is a Data Class.
_FIELDS = '__dataclass_fields__'
# The name of an attribute on the class that stores the parameters to
# @dataclass.
_PARAMS = '__dataclass_params__'
# The name of the function, that if it exists, is called at the end of
# __init__.
_POST_INIT_NAME = '__post_init__'
# String regex that string annotations for ClassVar or InitVar must match.
# Allows "identifier.identifier[" or "identifier[".
# https://bugs.python.org/issue33453 for details.
_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)')
class InitVar:
__slots__ = ('type', )
def __init__(self, type):
self.type = type
def __repr__(self):
if isinstance(self.type, type):
type_name = self.type.__name__
else:
# typing objects, e.g. List[int]
type_name = repr(self.type)
return f'dataclasses.InitVar[{type_name}]'
def __class_getitem__(cls, type):
return InitVar(type)
# Instances of Field are only ever created from within this module,
# and only from the field() function, although Field instances are
# exposed externally as (conceptually) read-only objects.
#
# name and type are filled in after the fact, not in __init__.
# They're not known at the time this class is instantiated, but it's
# convenient if they're available later.
#
# When cls._FIELDS is filled in with a list of Field objects, the name
# and type fields will have been populated.
class Field:
__slots__ = ('name',
'type',
'default',
'default_factory',
'repr',
'hash',
'init',
'compare',
'metadata',
'kw_only',
'_field_type', # Private: not to be used by user code.
)
def __init__(self, default, default_factory, init, repr, hash, compare,
metadata, kw_only):
self.name = None
self.type = None
self.default = default
self.default_factory = default_factory
self.init = init
self.repr = repr
self.hash = hash
self.compare = compare
self.metadata = (_EMPTY_METADATA
if metadata is None else
types.MappingProxyType(metadata))
self.kw_only = kw_only
self._field_type = None
def __repr__(self):
return ('Field('
f'name={self.name!r},'
f'type={self.type!r},'
f'default={self.default!r},'
f'default_factory={self.default_factory!r},'
f'init={self.init!r},'
f'repr={self.repr!r},'
f'hash={self.hash!r},'
f'compare={self.compare!r},'
f'metadata={self.metadata!r},'
f'kw_only={self.kw_only!r},'
f'_field_type={self._field_type}'
')')
# This is used to support the PEP 487 __set_name__ protocol in the
# case where we're using a field that contains a descriptor as a
# default value. For details on __set_name__, see
# https://peps.python.org/pep-0487/#implementation-details.
#
# Note that in _process_class, this Field object is overwritten
# with the default value, so the end result is a descriptor that
# had __set_name__ called on it at the right time.
def __set_name__(self, owner, name):
func = getattr(type(self.default), '__set_name__', None)
if func:
# There is a __set_name__ method on the descriptor, call
# it.
func(self.default, owner, name)
__class_getitem__ = classmethod(GenericAlias)
class _DataclassParams:
__slots__ = ('init',
'repr',
'eq',
'order',
'unsafe_hash',
'frozen',
'match_args',
'kw_only',
'slots',
'weakref_slot',
)
def __init__(self,
init, repr, eq, order, unsafe_hash, frozen,
match_args, kw_only, slots, weakref_slot):
self.init = init
self.repr = repr
self.eq = eq
self.order = order
self.unsafe_hash = unsafe_hash
self.frozen = frozen
self.match_args = match_args
self.kw_only = kw_only
self.slots = slots
self.weakref_slot = weakref_slot
def __repr__(self):
return ('_DataclassParams('
f'init={self.init!r},'
f'repr={self.repr!r},'
f'eq={self.eq!r},'
f'order={self.order!r},'
f'unsafe_hash={self.unsafe_hash!r},'
f'frozen={self.frozen!r},'
f'match_args={self.match_args!r},'
f'kw_only={self.kw_only!r},'
f'slots={self.slots!r},'
f'weakref_slot={self.weakref_slot!r}'
')')
# This function is used instead of exposing Field creation directly,
# so that a type checker can be told (via overloads) that this is a
# function whose type depends on its parameters.
def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True,
hash=None, compare=True, metadata=None, kw_only=MISSING):
"""Return an object to identify dataclass fields.
default is the default value of the field. default_factory is a
0-argument function called to initialize a field's value. If init
is true, the field will be a parameter to the class's __init__()
function. If repr is true, the field will be included in the
object's repr(). If hash is true, the field will be included in the
object's hash(). If compare is true, the field will be used in
comparison functions. metadata, if specified, must be a mapping
which is stored but not otherwise examined by dataclass. If kw_only
is true, the field will become a keyword-only parameter to
__init__().
It is an error to specify both default and default_factory.
"""
if default is not MISSING and default_factory is not MISSING:
raise ValueError('cannot specify both default and default_factory')
return Field(default, default_factory, init, repr, hash, compare,
metadata, kw_only)
def _fields_in_init_order(fields):
# Returns the fields as __init__ will output them. It returns 2 tuples:
# the first for normal args, and the second for keyword args.
return (tuple(f for f in fields if f.init and not f.kw_only),
tuple(f for f in fields if f.init and f.kw_only)
)
def _tuple_str(obj_name, fields):
# Return a string representing each field of obj_name as a tuple
# member. So, if fields is ['x', 'y'] and obj_name is "self",
# return "(self.x,self.y)".
# Special case for the 0-tuple.
if not fields:
return '()'
# Note the trailing comma, needed if this turns out to be a 1-tuple.
return f'({",".join([f"{obj_name}.{f.name}" for f in fields])},)'
# This function's logic is copied from "recursive_repr" function in
# reprlib module to avoid dependency.
def _recursive_repr(user_function):
# Decorator to make a repr function return "..." for a recursive
# call.
repr_running = set()
@functools.wraps(user_function)
def wrapper(self):
key = id(self), _thread.get_ident()
if key in repr_running:
return '...'
repr_running.add(key)
try:
result = user_function(self)
finally:
repr_running.discard(key)
return result
return wrapper
def _create_fn(name, args, body, *, globals=None, locals=None,
return_type=MISSING):
# Note that we may mutate locals. Callers beware!
# The only callers are internal to this module, so no
# worries about external callers.
if locals is None:
locals = {}
return_annotation = ''
if return_type is not MISSING:
locals['_return_type'] = return_type
return_annotation = '->_return_type'
args = ','.join(args)
body = '\n'.join(f' {b}' for b in body)
# Compute the text of the entire function.
txt = f' def {name}({args}){return_annotation}:\n{body}'
# Free variables in exec are resolved in the global namespace.
# The global namespace we have is user-provided, so we can't modify it for
# our purposes. So we put the things we need into locals and introduce a
# scope to allow the function we're creating to close over them.
local_vars = ', '.join(locals.keys())
txt = f"def __create_fn__({local_vars}):\n{txt}\n return {name}"
ns = {}
exec(txt, globals, ns)
return ns['__create_fn__'](**locals)
def _field_assign(frozen, name, value, self_name):
# If we're a frozen class, then assign to our fields in __init__
# via object.__setattr__. Otherwise, just use a simple
# assignment.
#
# self_name is what "self" is called in this function: don't
# hard-code "self", since that might be a field name.
if frozen:
return f'__dataclass_builtins_object__.__setattr__({self_name},{name!r},{value})'
return f'{self_name}.{name}={value}'
def _field_init(f, frozen, globals, self_name, slots):
# Return the text of the line in the body of __init__ that will
# initialize this field.
default_name = f'_dflt_{f.name}'
if f.default_factory is not MISSING:
if f.init:
# This field has a default factory. If a parameter is
# given, use it. If not, call the factory.
globals[default_name] = f.default_factory
value = (f'{default_name}() '
f'if {f.name} is _HAS_DEFAULT_FACTORY '
f'else {f.name}')
else:
# This is a field that's not in the __init__ params, but
# has a default factory function. It needs to be
# initialized here by calling the factory function,
# because there's no other way to initialize it.
# For a field initialized with a default=defaultvalue, the
# class dict just has the default value
# (cls.fieldname=defaultvalue). But that won't work for a
# default factory, the factory must be called in __init__
# and we must assign that to self.fieldname. We can't
# fall back to the class dict's value, both because it's
# not set, and because it might be different per-class
# (which, after all, is why we have a factory function!).
globals[default_name] = f.default_factory
value = f'{default_name}()'
else:
# No default factory.
if f.init:
if f.default is MISSING:
# There's no default, just do an assignment.
value = f.name
elif f.default is not MISSING:
globals[default_name] = f.default
value = f.name
else:
# If the class has slots, then initialize this field.
if slots and f.default is not MISSING:
globals[default_name] = f.default
value = default_name
else:
# This field does not need initialization: reading from it will
# just use the class attribute that contains the default.
# Signify that to the caller by returning None.
return None
# Only test this now, so that we can create variables for the
# default. However, return None to signify that we're not going
# to actually do the assignment statement for InitVars.
if f._field_type is _FIELD_INITVAR:
return None
# Now, actually generate the field assignment.
return _field_assign(frozen, f.name, value, self_name)
def _init_param(f):
# Return the __init__ parameter string for this field. For
# example, the equivalent of 'x:int=3' (except instead of 'int',
# reference a variable set to int, and instead of '3', reference a
# variable set to 3).
if f.default is MISSING and f.default_factory is MISSING:
# There's no default, and no default_factory, just output the
# variable name and type.
default = ''
elif f.default is not MISSING:
# There's a default, this will be the name that's used to look
# it up.
default = f'=_dflt_{f.name}'
elif f.default_factory is not MISSING:
# There's a factory function. Set a marker.
default = '=_HAS_DEFAULT_FACTORY'
return f'{f.name}:_type_{f.name}{default}'
def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init,
self_name, globals, slots):
# fields contains both real fields and InitVar pseudo-fields.
# Make sure we don't have fields without defaults following fields
# with defaults. This actually would be caught when exec-ing the
# function source code, but catching it here gives a better error
# message, and future-proofs us in case we build up the function
# using ast.
seen_default = False
for f in std_fields:
# Only consider the non-kw-only fields in the __init__ call.
if f.init:
if not (f.default is MISSING and f.default_factory is MISSING):
seen_default = True
elif seen_default:
raise TypeError(f'non-default argument {f.name!r} '
'follows default argument')
locals = {f'_type_{f.name}': f.type for f in fields}
locals.update({
'MISSING': MISSING,
'_HAS_DEFAULT_FACTORY': _HAS_DEFAULT_FACTORY,
'__dataclass_builtins_object__': object,
})
body_lines = []
for f in fields:
line = _field_init(f, frozen, locals, self_name, slots)
# line is None means that this field doesn't require
# initialization (it's a pseudo-field). Just skip it.
if line:
body_lines.append(line)
# Does this class have a post-init function?
if has_post_init:
params_str = ','.join(f.name for f in fields
if f._field_type is _FIELD_INITVAR)
body_lines.append(f'{self_name}.{_POST_INIT_NAME}({params_str})')
# If no body lines, use 'pass'.
if not body_lines:
body_lines = ['pass']
_init_params = [_init_param(f) for f in std_fields]
if kw_only_fields:
# Add the keyword-only args. Because the * can only be added if
# there's at least one keyword-only arg, there needs to be a test here
# (instead of just concatenting the lists together).
_init_params += ['*']
_init_params += [_init_param(f) for f in kw_only_fields]
return _create_fn('__init__',
[self_name] + _init_params,
body_lines,
locals=locals,
globals=globals,
return_type=None)
def _repr_fn(fields, globals):
fn = _create_fn('__repr__',
('self',),
['return self.__class__.__qualname__ + f"(' +
', '.join([f"{f.name}={{self.{f.name}!r}}"
for f in fields]) +
')"'],
globals=globals)
return _recursive_repr(fn)
def _frozen_get_del_attr(cls, fields, globals):
locals = {'cls': cls,
'FrozenInstanceError': FrozenInstanceError}
if fields:
fields_str = '(' + ','.join(repr(f.name) for f in fields) + ',)'
else:
# Special case for the zero-length tuple.
fields_str = '()'
return (_create_fn('__setattr__',
('self', 'name', 'value'),
(f'if type(self) is cls or name in {fields_str}:',
' raise FrozenInstanceError(f"cannot assign to field {name!r}")',
f'super(cls, self).__setattr__(name, value)'),
locals=locals,
globals=globals),
_create_fn('__delattr__',
('self', 'name'),
(f'if type(self) is cls or name in {fields_str}:',
' raise FrozenInstanceError(f"cannot delete field {name!r}")',
f'super(cls, self).__delattr__(name)'),
locals=locals,
globals=globals),
)
def _cmp_fn(name, op, self_tuple, other_tuple, globals):
# Create a comparison function. If the fields in the object are
# named 'x' and 'y', then self_tuple is the string
# '(self.x,self.y)' and other_tuple is the string
# '(other.x,other.y)'.
return _create_fn(name,
('self', 'other'),
[ 'if other.__class__ is self.__class__:',
f' return {self_tuple}{op}{other_tuple}',
'return NotImplemented'],
globals=globals)
def _hash_fn(fields, globals):
self_tuple = _tuple_str('self', fields)
return _create_fn('__hash__',
('self',),
[f'return hash({self_tuple})'],
globals=globals)
def _is_classvar(a_type, typing):
# This test uses a typing internal class, but it's the best way to
# test if this is a ClassVar.
return (a_type is typing.ClassVar
or (type(a_type) is typing._GenericAlias
and a_type.__origin__ is typing.ClassVar))
def _is_initvar(a_type, dataclasses):
# The module we're checking against is the module we're
# currently in (dataclasses.py).
return (a_type is dataclasses.InitVar
or type(a_type) is dataclasses.InitVar)
def _is_kw_only(a_type, dataclasses):
return a_type is dataclasses.KW_ONLY
def _is_type(annotation, cls, a_module, a_type, is_type_predicate):
# Given a type annotation string, does it refer to a_type in
# a_module? For example, when checking that annotation denotes a
# ClassVar, then a_module is typing, and a_type is
# typing.ClassVar.
# It's possible to look up a_module given a_type, but it involves
# looking in sys.modules (again!), and seems like a waste since
# the caller already knows a_module.
# - annotation is a string type annotation
# - cls is the class that this annotation was found in
# - a_module is the module we want to match
# - a_type is the type in that module we want to match
# - is_type_predicate is a function called with (obj, a_module)
# that determines if obj is of the desired type.
# Since this test does not do a local namespace lookup (and
# instead only a module (global) lookup), there are some things it
# gets wrong.
# With string annotations, cv0 will be detected as a ClassVar:
# CV = ClassVar
# @dataclass
# class C0:
# cv0: CV
# But in this example cv1 will not be detected as a ClassVar:
# @dataclass
# class C1:
# CV = ClassVar
# cv1: CV
# In C1, the code in this function (_is_type) will look up "CV" in
# the module and not find it, so it will not consider cv1 as a
# ClassVar. This is a fairly obscure corner case, and the best
# way to fix it would be to eval() the string "CV" with the
# correct global and local namespaces. However that would involve
# a eval() penalty for every single field of every dataclass
# that's defined. It was judged not worth it.
match = _MODULE_IDENTIFIER_RE.match(annotation)
if match:
ns = None
module_name = match.group(1)
if not module_name:
# No module name, assume the class's module did
# "from dataclasses import InitVar".
ns = sys.modules.get(cls.__module__).__dict__
else:
# Look up module_name in the class's module.
module = sys.modules.get(cls.__module__)
if module and module.__dict__.get(module_name) is a_module:
ns = sys.modules.get(a_type.__module__).__dict__
if ns and is_type_predicate(ns.get(match.group(2)), a_module):
return True
return False
def _get_field(cls, a_name, a_type, default_kw_only):
# Return a Field object for this field name and type. ClassVars and
# InitVars are also returned, but marked as such (see f._field_type).
# default_kw_only is the value of kw_only to use if there isn't a field()
# that defines it.
# If the default value isn't derived from Field, then it's only a
# normal default value. Convert it to a Field().
default = getattr(cls, a_name, MISSING)
if isinstance(default, Field):
f = default
else:
if isinstance(default, types.MemberDescriptorType):
# This is a field in __slots__, so it has no default value.
default = MISSING
f = field(default=default)
# Only at this point do we know the name and the type. Set them.
f.name = a_name
f.type = a_type
# Assume it's a normal field until proven otherwise. We're next
# going to decide if it's a ClassVar or InitVar, everything else
# is just a normal field.
f._field_type = _FIELD
# In addition to checking for actual types here, also check for
# string annotations. get_type_hints() won't always work for us
# (see https://github.com/python/typing/issues/508 for example),
# plus it's expensive and would require an eval for every string
# annotation. So, make a best effort to see if this is a ClassVar
# or InitVar using regex's and checking that the thing referenced
# is actually of the correct type.
# For the complete discussion, see https://bugs.python.org/issue33453
# If typing has not been imported, then it's impossible for any
# annotation to be a ClassVar. So, only look for ClassVar if
# typing has been imported by any module (not necessarily cls's
# module).
typing = sys.modules.get('typing')
if typing:
if (_is_classvar(a_type, typing)
or (isinstance(f.type, str)
and _is_type(f.type, cls, typing, typing.ClassVar,
_is_classvar))):
f._field_type = _FIELD_CLASSVAR
# If the type is InitVar, or if it's a matching string annotation,
# then it's an InitVar.
if f._field_type is _FIELD:
# The module we're checking against is the module we're
# currently in (dataclasses.py).
dataclasses = sys.modules[__name__]
if (_is_initvar(a_type, dataclasses)
or (isinstance(f.type, str)
and _is_type(f.type, cls, dataclasses, dataclasses.InitVar,
_is_initvar))):
f._field_type = _FIELD_INITVAR
# Validations for individual fields. This is delayed until now,
# instead of in the Field() constructor, since only here do we
# know the field name, which allows for better error reporting.
# Special restrictions for ClassVar and InitVar.
if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR):
if f.default_factory is not MISSING:
raise TypeError(f'field {f.name} cannot have a '
'default factory')
# Should I check for other field settings? default_factory
# seems the most serious to check for. Maybe add others. For
# example, how about init=False (or really,
# init=<not-the-default-init-value>)? It makes no sense for
# ClassVar and InitVar to specify init=<anything>.
# kw_only validation and assignment.
if f._field_type in (_FIELD, _FIELD_INITVAR):
# For real and InitVar fields, if kw_only wasn't specified use the
# default value.
if f.kw_only is MISSING:
f.kw_only = default_kw_only
else:
# Make sure kw_only isn't set for ClassVars
assert f._field_type is _FIELD_CLASSVAR
if f.kw_only is not MISSING:
raise TypeError(f'field {f.name} is a ClassVar but specifies '
'kw_only')
# For real fields, disallow mutable defaults. Use unhashable as a proxy
# indicator for mutability. Read the __hash__ attribute from the class,
# not the instance.
if f._field_type is _FIELD and f.default.__class__.__hash__ is None:
raise ValueError(f'mutable default {type(f.default)} for field '
f'{f.name} is not allowed: use default_factory')
return f
def _set_qualname(cls, value):
# Ensure that the functions returned from _create_fn uses the proper
# __qualname__ (the class they belong to).
if isinstance(value, FunctionType):
value.__qualname__ = f"{cls.__qualname__}.{value.__name__}"
return value
def _set_new_attribute(cls, name, value):
# Never overwrites an existing attribute. Returns True if the
# attribute already exists.
if name in cls.__dict__:
return True
_set_qualname(cls, value)
setattr(cls, name, value)
return False
# Decide if/how we're going to create a hash function. Key is
# (unsafe_hash, eq, frozen, does-hash-exist). Value is the action to
# take. The common case is to do nothing, so instead of providing a
# function that is a no-op, use None to signify that.
def _hash_set_none(cls, fields, globals):
return None
def _hash_add(cls, fields, globals):
flds = [f for f in fields if (f.compare if f.hash is None else f.hash)]
return _set_qualname(cls, _hash_fn(flds, globals))
def _hash_exception(cls, fields, globals):
# Raise an exception.
raise TypeError(f'Cannot overwrite attribute __hash__ '
f'in class {cls.__name__}')
#
# +-------------------------------------- unsafe_hash?
# | +------------------------------- eq?
# | | +------------------------ frozen?
# | | | +---------------- has-explicit-hash?
# | | | |
# | | | | +------- action
# | | | | |
# v v v v v
_hash_action = {(False, False, False, False): None,
(False, False, False, True ): None,
(False, False, True, False): None,
(False, False, True, True ): None,
(False, True, False, False): _hash_set_none,
(False, True, False, True ): None,
(False, True, True, False): _hash_add,
(False, True, True, True ): None,
(True, False, False, False): _hash_add,
(True, False, False, True ): _hash_exception,
(True, False, True, False): _hash_add,
(True, False, True, True ): _hash_exception,
(True, True, False, False): _hash_add,
(True, True, False, True ): _hash_exception,
(True, True, True, False): _hash_add,
(True, True, True, True ): _hash_exception,
}
# See https://bugs.python.org/issue32929#msg312829 for an if-statement
# version of this table.
def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen,
match_args, kw_only, slots, weakref_slot):
# Now that dicts retain insertion order, there's no reason to use
# an ordered dict. I am leveraging that ordering here, because
# derived class fields overwrite base class fields, but the order
# is defined by the base class, which is found first.
fields = {}
if cls.__module__ in sys.modules:
globals = sys.modules[cls.__module__].__dict__
else:
# Theoretically this can happen if someone writes
# a custom string to cls.__module__. In which case
# such dataclass won't be fully introspectable
# (w.r.t. typing.get_type_hints) but will still function
# correctly.
globals = {}
setattr(cls, _PARAMS, _DataclassParams(init, repr, eq, order,
unsafe_hash, frozen,
match_args, kw_only,
slots, weakref_slot))
# Find our base classes in reverse MRO order, and exclude
# ourselves. In reversed order so that more derived classes
# override earlier field definitions in base classes. As long as
# we're iterating over them, see if any are frozen.
any_frozen_base = False
has_dataclass_bases = False
for b in cls.__mro__[-1:0:-1]:
# Only process classes that have been processed by our
# decorator. That is, they have a _FIELDS attribute.
base_fields = getattr(b, _FIELDS, None)
if base_fields is not None:
has_dataclass_bases = True
for f in base_fields.values():
fields[f.name] = f
if getattr(b, _PARAMS).frozen:
any_frozen_base = True
# Annotations defined specifically in this class (not in base classes).
#
# Fields are found from cls_annotations, which is guaranteed to be
# ordered. Default values are from class attributes, if a field
# has a default. If the default value is a Field(), then it
# contains additional info beyond (and possibly including) the
# actual default value. Pseudo-fields ClassVars and InitVars are
# included, despite the fact that they're not real fields. That's
# dealt with later.
cls_annotations = inspect.get_annotations(cls)
# Now find fields in our class. While doing so, validate some
# things, and set the default values (as class attributes) where
# we can.
cls_fields = []
# Get a reference to this module for the _is_kw_only() test.
KW_ONLY_seen = False
dataclasses = sys.modules[__name__]
for name, type in cls_annotations.items():
# See if this is a marker to change the value of kw_only.
if (_is_kw_only(type, dataclasses)
or (isinstance(type, str)
and _is_type(type, cls, dataclasses, dataclasses.KW_ONLY,
_is_kw_only))):
# Switch the default to kw_only=True, and ignore this
# annotation: it's not a real field.
if KW_ONLY_seen:
raise TypeError(f'{name!r} is KW_ONLY, but KW_ONLY '
'has already been specified')
KW_ONLY_seen = True
kw_only = True
else:
# Otherwise it's a field of some type.
cls_fields.append(_get_field(cls, name, type, kw_only))
for f in cls_fields:
fields[f.name] = f
# If the class attribute (which is the default value for this
# field) exists and is of type 'Field', replace it with the
# real default. This is so that normal class introspection
# sees a real default value, not a Field.
if isinstance(getattr(cls, f.name, None), Field):
if f.default is MISSING:
# If there's no default, delete the class attribute.
# This happens if we specify field(repr=False), for
# example (that is, we specified a field object, but
# no default value). Also if we're using a default
# factory. The class attribute should not be set at
# all in the post-processed class.
delattr(cls, f.name)
else:
setattr(cls, f.name, f.default)
# Do we have any Field members that don't also have annotations?
for name, value in cls.__dict__.items():
if isinstance(value, Field) and not name in cls_annotations:
raise TypeError(f'{name!r} is a field but has no type annotation')