@@ -56,6 +56,107 @@ def exact_type(self):
56
56
else :
57
57
return self .type
58
58
59
+ def group (* choices ): return '(' + '|' .join (choices ) + ')'
60
+ def any (* choices ): return group (* choices ) + '*'
61
+ def maybe (* choices ): return group (* choices ) + '?'
62
+
63
+ # Note: we use unicode matching for names ("\w") but ascii matching for
64
+ # number literals.
65
+ Whitespace = r'[ \f\t]*'
66
+ Comment = r'#[^\r\n]*'
67
+ Ignore = Whitespace + any (r'\\\r?\n' + Whitespace ) + maybe (Comment )
68
+ Name = r'\w+'
69
+
70
+ Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
71
+ Binnumber = r'0[bB](?:_?[01])+'
72
+ Octnumber = r'0[oO](?:_?[0-7])+'
73
+ Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
74
+ Intnumber = group (Hexnumber , Binnumber , Octnumber , Decnumber )
75
+ Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
76
+ Pointfloat = group (r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?' ,
77
+ r'\.[0-9](?:_?[0-9])*' ) + maybe (Exponent )
78
+ Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
79
+ Floatnumber = group (Pointfloat , Expfloat )
80
+ Imagnumber = group (r'[0-9](?:_?[0-9])*[jJ]' , Floatnumber + r'[jJ]' )
81
+ Number = group (Imagnumber , Floatnumber , Intnumber )
82
+
83
+ # Return the empty string, plus all of the valid string prefixes.
84
+ def _all_string_prefixes ():
85
+ # The valid string prefixes. Only contain the lower case versions,
86
+ # and don't contain any permutations (include 'fr', but not
87
+ # 'rf'). The various permutations will be generated.
88
+ _valid_string_prefixes = ['b' , 'r' , 'u' , 'f' , 'br' , 'fr' ]
89
+ # if we add binary f-strings, add: ['fb', 'fbr']
90
+ result = {'' }
91
+ for prefix in _valid_string_prefixes :
92
+ for t in _itertools .permutations (prefix ):
93
+ # create a list with upper and lower versions of each
94
+ # character
95
+ for u in _itertools .product (* [(c , c .upper ()) for c in t ]):
96
+ result .add ('' .join (u ))
97
+ return result
98
+
99
+ @functools .lru_cache
100
+ def _compile (expr ):
101
+ return re .compile (expr , re .UNICODE )
102
+
103
+ # Note that since _all_string_prefixes includes the empty string,
104
+ # StringPrefix can be the empty string (making it optional).
105
+ StringPrefix = group (* _all_string_prefixes ())
106
+
107
+ # Tail end of ' string.
108
+ Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
109
+ # Tail end of " string.
110
+ Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
111
+ # Tail end of ''' string.
112
+ Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
113
+ # Tail end of """ string.
114
+ Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
115
+ Triple = group (StringPrefix + "'''" , StringPrefix + '"""' )
116
+ # Single-line ' or " string.
117
+ String = group (StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'" ,
118
+ StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"' )
119
+
120
+ # Sorting in reverse order puts the long operators before their prefixes.
121
+ # Otherwise if = came before ==, == would get recognized as two instances
122
+ # of =.
123
+ Special = group (* map (re .escape , sorted (EXACT_TOKEN_TYPES , reverse = True )))
124
+ Funny = group (r'\r?\n' , Special )
125
+
126
+ PlainToken = group (Number , Funny , String , Name )
127
+ Token = Ignore + PlainToken
128
+
129
+ # First (or only) line of ' or " string.
130
+ ContStr = group (StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
131
+ group ("'" , r'\\\r?\n' ),
132
+ StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
133
+ group ('"' , r'\\\r?\n' ))
134
+ PseudoExtras = group (r'\\\r?\n|\Z' , Comment , Triple )
135
+ PseudoToken = Whitespace + group (PseudoExtras , Number , Funny , ContStr , Name )
136
+
137
+ # For a given string prefix plus quotes, endpats maps it to a regex
138
+ # to match the remainder of that string. _prefix can be empty, for
139
+ # a normal single or triple quoted string (with no prefix).
140
+ endpats = {}
141
+ for _prefix in _all_string_prefixes ():
142
+ endpats [_prefix + "'" ] = Single
143
+ endpats [_prefix + '"' ] = Double
144
+ endpats [_prefix + "'''" ] = Single3
145
+ endpats [_prefix + '"""' ] = Double3
146
+ del _prefix
147
+
148
+ # A set of all of the single and triple quoted string prefixes,
149
+ # including the opening quotes.
150
+ single_quoted = set ()
151
+ triple_quoted = set ()
152
+ for t in _all_string_prefixes ():
153
+ for u in (t + '"' , t + "'" ):
154
+ single_quoted .add (u )
155
+ for u in (t + '"""' , t + "'''" ):
156
+ triple_quoted .add (u )
157
+ del t , u
158
+
159
+ tabsize = 8
59
160
60
161
class TokenError (Exception ): pass
61
162
0 commit comments