Skip to content

Commit 2a7bacb

Browse files
authored
bpo-33453: Handle string type annotations in dataclasses. (GH-6768)
1 parent d8dcd57 commit 2a7bacb

File tree

7 files changed

+399
-20
lines changed

7 files changed

+399
-20
lines changed

Lib/dataclasses.py

Lines changed: 116 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
import sys
23
import copy
34
import types
@@ -187,6 +188,10 @@ def __repr__(self):
187188
# __init__.
188189
_POST_INIT_NAME = '__post_init__'
189190

191+
# String regex that string annotations for ClassVar or InitVar must match.
192+
# Allows "identifier.identifier[" or "identifier[".
193+
# https://bugs.python.org/issue33453 for details.
194+
_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)')
190195

191196
class _InitVarMeta(type):
192197
def __getitem__(self, params):
@@ -532,6 +537,80 @@ def _hash_fn(fields):
532537
[f'return hash({self_tuple})'])
533538

534539

540+
def _is_classvar(a_type, typing):
541+
if typing:
542+
# This test uses a typing internal class, but it's the best
543+
# way to test if this is a ClassVar.
544+
return (a_type is typing.ClassVar
545+
or (type(a_type) is typing._GenericAlias
546+
and a_type.__origin__ is typing.ClassVar))
547+
548+
549+
def _is_initvar(a_type, dataclasses):
550+
# The module we're checking against is the module we're
551+
# currently in (dataclasses.py).
552+
return a_type is dataclasses.InitVar
553+
554+
555+
def _is_type(annotation, cls, a_module, a_type, is_type_predicate):
556+
# Given a type annotation string, does it refer to a_type in
557+
# a_module? For example, when checking that annotation denotes a
558+
# ClassVar, then a_module is typing, and a_type is
559+
# typing.ClassVar.
560+
561+
# It's possible to look up a_module given a_type, but it involves
562+
# looking in sys.modules (again!), and seems like a waste since
563+
# the caller already knows a_module.
564+
565+
# - annotation is a string type annotation
566+
# - cls is the class that this annotation was found in
567+
# - a_module is the module we want to match
568+
# - a_type is the type in that module we want to match
569+
# - is_type_predicate is a function called with (obj, a_module)
570+
# that determines if obj is of the desired type.
571+
572+
# Since this test does not do a local namespace lookup (and
573+
# instead only a module (global) lookup), there are some things it
574+
# gets wrong.
575+
576+
# With string annotations, this will work:
577+
# CV = ClassVar
578+
# @dataclass
579+
# class C0:
580+
# cv0: CV
581+
582+
# But this will not:
583+
# @dataclass
584+
# class C1:
585+
# CV = ClassVar
586+
# cv1: CV
587+
588+
# In C1, the code in this function will look up "CV" in the module
589+
# and not find it, so it will not consider cv1 as a ClassVar.
590+
# This is a fairly obscure corner case, and the best way to fix it
591+
# would be to eval() the string "CV" with the correct global and
592+
# local namespaces. However that would involve a eval() penalty
593+
# for every single field of every dataclass that's defined. It
594+
# was judged not worth it.
595+
596+
match = _MODULE_IDENTIFIER_RE.match(annotation)
597+
if match:
598+
ns = None
599+
module_name = match.group(1)
600+
if not module_name:
601+
# No module name, assume the class's module did
602+
# "from dataclasses import InitVar".
603+
ns = sys.modules.get(cls.__module__).__dict__
604+
else:
605+
# Look up module_name in the class's module.
606+
module = sys.modules.get(cls.__module__)
607+
if module and module.__dict__.get(module_name) is a_module:
608+
ns = sys.modules.get(a_type.__module__).__dict__
609+
if ns and is_type_predicate(ns.get(match.group(2)), a_module):
610+
return True
611+
return False
612+
613+
535614
def _get_field(cls, a_name, a_type):
536615
# Return a Field object for this field name and type. ClassVars
537616
# and InitVars are also returned, but marked as such (see
@@ -548,34 +627,54 @@ def _get_field(cls, a_name, a_type):
548627
default = MISSING
549628
f = field(default=default)
550629

551-
# Assume it's a normal field until proven otherwise.
552-
f._field_type = _FIELD
553-
554630
# Only at this point do we know the name and the type. Set them.
555631
f.name = a_name
556632
f.type = a_type
557633

558-
# If typing has not been imported, then it's impossible for
559-
# any annotation to be a ClassVar. So, only look for ClassVar
560-
# if typing has been imported.
634+
# Assume it's a normal field until proven otherwise. We're next
635+
# going to decide if it's a ClassVar or InitVar, everything else
636+
# is just a normal field.
637+
f._field_type = _FIELD
638+
639+
# In addition to checking for actual types here, also check for
640+
# string annotations. get_type_hints() won't always work for us
641+
# (see https://github.com/python/typing/issues/508 for example),
642+
# plus it's expensive and would require an eval for every stirng
643+
# annotation. So, make a best effort to see if this is a
644+
# ClassVar or InitVar using regex's and checking that the thing
645+
# referenced is actually of the correct type.
646+
647+
# For the complete discussion, see https://bugs.python.org/issue33453
648+
649+
# If typing has not been imported, then it's impossible for any
650+
# annotation to be a ClassVar. So, only look for ClassVar if
651+
# typing has been imported by any module (not necessarily cls's
652+
# module).
561653
typing = sys.modules.get('typing')
562-
if typing is not None:
654+
if typing:
563655
# This test uses a typing internal class, but it's the best
564656
# way to test if this is a ClassVar.
565-
if (type(a_type) is typing._GenericAlias and
566-
a_type.__origin__ is typing.ClassVar):
567-
# This field is a ClassVar, so it's not a field.
657+
if (_is_classvar(a_type, typing)
658+
or (isinstance(f.type, str)
659+
and _is_type(f.type, cls, typing, typing.ClassVar,
660+
_is_classvar))):
568661
f._field_type = _FIELD_CLASSVAR
569662

663+
# If the type is InitVar, or if it's a matching string annotation,
664+
# then it's an InitVar.
570665
if f._field_type is _FIELD:
571-
# Check if this is an InitVar.
572-
if a_type is InitVar:
573-
# InitVars are not fields, either.
666+
# The module we're checking against is the module we're
667+
# currently in (dataclasses.py).
668+
dataclasses = sys.modules[__name__]
669+
if (_is_initvar(a_type, dataclasses)
670+
or (isinstance(f.type, str)
671+
and _is_type(f.type, cls, dataclasses, dataclasses.InitVar,
672+
_is_initvar))):
574673
f._field_type = _FIELD_INITVAR
575674

576-
# Validations for fields. This is delayed until now, instead of
577-
# in the Field() constructor, since only here do we know the field
578-
# name, which allows better error reporting.
675+
# Validations for individual fields. This is delayed until now,
676+
# instead of in the Field() constructor, since only here do we
677+
# know the field name, which allows for better error reporting.
579678

580679
# Special restrictions for ClassVar and InitVar.
581680
if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR):
@@ -605,7 +704,6 @@ def _set_new_attribute(cls, name, value):
605704
return False
606705

607706

608-
609707
# Decide if/how we're going to create a hash function. Key is
610708
# (unsafe_hash, eq, frozen, does-hash-exist). Value is the action to
611709
# take. The common case is to do nothing, so instead of providing a
@@ -865,7 +963,7 @@ def fields(class_or_instance):
865963

866964
# Might it be worth caching this, per class?
867965
try:
868-
fields = getattr(class_or_instance, _FIELDS)
966+
fields = getattr(class_or_instance, _FIELDS)
869967
except AttributeError:
870968
raise TypeError('must be called with a dataclass type or instance')
871969

Lib/test/dataclass_module_1.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#from __future__ import annotations
2+
USING_STRINGS = False
3+
4+
# dataclass_module_1.py and dataclass_module_1_str.py are identical
5+
# except only the latter uses string annotations.
6+
7+
import dataclasses
8+
import typing
9+
10+
T_CV2 = typing.ClassVar[int]
11+
T_CV3 = typing.ClassVar
12+
13+
T_IV2 = dataclasses.InitVar[int]
14+
T_IV3 = dataclasses.InitVar
15+
16+
@dataclasses.dataclass
17+
class CV:
18+
T_CV4 = typing.ClassVar
19+
cv0: typing.ClassVar[int] = 20
20+
cv1: typing.ClassVar = 30
21+
cv2: T_CV2
22+
cv3: T_CV3
23+
not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar.
24+
25+
@dataclasses.dataclass
26+
class IV:
27+
T_IV4 = dataclasses.InitVar
28+
iv0: dataclasses.InitVar[int]
29+
iv1: dataclasses.InitVar
30+
iv2: T_IV2
31+
iv3: T_IV3
32+
not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar.

Lib/test/dataclass_module_1_str.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from __future__ import annotations
2+
USING_STRINGS = True
3+
4+
# dataclass_module_1.py and dataclass_module_1_str.py are identical
5+
# except only the latter uses string annotations.
6+
7+
import dataclasses
8+
import typing
9+
10+
T_CV2 = typing.ClassVar[int]
11+
T_CV3 = typing.ClassVar
12+
13+
T_IV2 = dataclasses.InitVar[int]
14+
T_IV3 = dataclasses.InitVar
15+
16+
@dataclasses.dataclass
17+
class CV:
18+
T_CV4 = typing.ClassVar
19+
cv0: typing.ClassVar[int] = 20
20+
cv1: typing.ClassVar = 30
21+
cv2: T_CV2
22+
cv3: T_CV3
23+
not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar.
24+
25+
@dataclasses.dataclass
26+
class IV:
27+
T_IV4 = dataclasses.InitVar
28+
iv0: dataclasses.InitVar[int]
29+
iv1: dataclasses.InitVar
30+
iv2: T_IV2
31+
iv3: T_IV3
32+
not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar.

Lib/test/dataclass_module_2.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#from __future__ import annotations
2+
USING_STRINGS = False
3+
4+
# dataclass_module_2.py and dataclass_module_2_str.py are identical
5+
# except only the latter uses string annotations.
6+
7+
from dataclasses import dataclass, InitVar
8+
from typing import ClassVar
9+
10+
T_CV2 = ClassVar[int]
11+
T_CV3 = ClassVar
12+
13+
T_IV2 = InitVar[int]
14+
T_IV3 = InitVar
15+
16+
@dataclass
17+
class CV:
18+
T_CV4 = ClassVar
19+
cv0: ClassVar[int] = 20
20+
cv1: ClassVar = 30
21+
cv2: T_CV2
22+
cv3: T_CV3
23+
not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar.
24+
25+
@dataclass
26+
class IV:
27+
T_IV4 = InitVar
28+
iv0: InitVar[int]
29+
iv1: InitVar
30+
iv2: T_IV2
31+
iv3: T_IV3
32+
not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar.

Lib/test/dataclass_module_2_str.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from __future__ import annotations
2+
USING_STRINGS = True
3+
4+
# dataclass_module_2.py and dataclass_module_2_str.py are identical
5+
# except only the latter uses string annotations.
6+
7+
from dataclasses import dataclass, InitVar
8+
from typing import ClassVar
9+
10+
T_CV2 = ClassVar[int]
11+
T_CV3 = ClassVar
12+
13+
T_IV2 = InitVar[int]
14+
T_IV3 = InitVar
15+
16+
@dataclass
17+
class CV:
18+
T_CV4 = ClassVar
19+
cv0: ClassVar[int] = 20
20+
cv1: ClassVar = 30
21+
cv2: T_CV2
22+
cv3: T_CV3
23+
not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar.
24+
25+
@dataclass
26+
class IV:
27+
T_IV4 = InitVar
28+
iv0: InitVar[int]
29+
iv1: InitVar
30+
iv2: T_IV2
31+
iv3: T_IV3
32+
not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar.

0 commit comments

Comments
 (0)