Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c0eaeca

Browse files
committed
Updated tokenize to support the inverse byte literals new in 3.3
1 parent 50364b4 commit c0eaeca

2 files changed

Lines changed: 28 additions & 6 deletions

File tree

Lib/test/test_tokenize.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,18 @@
563563
NAME 'grün' (2, 0) (2, 4)
564564
OP '=' (2, 5) (2, 6)
565565
STRING "'green'" (2, 7) (2, 14)
566+
567+
Legacy unicode literals:
568+
569+
>>> dump_tokens("Örter = u'places'\\ngrün = UR'green'")
570+
ENCODING 'utf-8' (0, 0) (0, 0)
571+
NAME 'Örter' (1, 0) (1, 5)
572+
OP '=' (1, 6) (1, 7)
573+
STRING "u'places'" (1, 8) (1, 17)
574+
NEWLINE '\\n' (1, 17) (1, 18)
575+
NAME 'grün' (2, 0) (2, 4)
576+
OP '=' (2, 5) (2, 6)
577+
STRING "UR'green'" (2, 7) (2, 16)
566578
"""
567579

568580
from test import support

Lib/tokenize.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ def maybe(*choices): return group(*choices) + '?'
127127
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
128128
Number = group(Imagnumber, Floatnumber, Intnumber)
129129

130+
StringPrefix = r'(?:[uU][rR]?|[bB][rR]|[rR][bB]|[rR]|[uU])?'
131+
130132
# Tail end of ' string.
131133
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
132134
# Tail end of " string.
@@ -135,10 +137,10 @@ def maybe(*choices): return group(*choices) + '?'
135137
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
136138
# Tail end of """ string.
137139
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
138-
Triple = group("[bBuU]?[rR]?'''", '[bBuU]?[rR]?"""')
140+
Triple = group(StringPrefix + "'''", StringPrefix + '"""')
139141
# Single-line ' or " string.
140-
String = group(r"[bBuU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
141-
r'[bBuU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
142+
String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
143+
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
142144

143145
# Because of leftmost-then-longest match semantics, be sure to put the
144146
# longest operators first (e.g., if = came before ==, == would get
@@ -156,9 +158,9 @@ def maybe(*choices): return group(*choices) + '?'
156158
Token = Ignore + PlainToken
157159

158160
# First (or only) line of ' or " string.
159-
ContStr = group(r"[bBuU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
161+
ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
160162
group("'", r'\\\r?\n'),
161-
r'[bBuU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
163+
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
162164
group('"', r'\\\r?\n'))
163165
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
164166
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
@@ -170,12 +172,16 @@ def _compile(expr):
170172
"'''": Single3, '"""': Double3,
171173
"r'''": Single3, 'r"""': Double3,
172174
"b'''": Single3, 'b"""': Double3,
173-
"br'''": Single3, 'br"""': Double3,
174175
"R'''": Single3, 'R"""': Double3,
175176
"B'''": Single3, 'B"""': Double3,
177+
"br'''": Single3, 'br"""': Double3,
176178
"bR'''": Single3, 'bR"""': Double3,
177179
"Br'''": Single3, 'Br"""': Double3,
178180
"BR'''": Single3, 'BR"""': Double3,
181+
"rb'''": Single3, 'rb"""': Double3,
182+
"Rb'''": Single3, 'Rb"""': Double3,
183+
"rB'''": Single3, 'rB"""': Double3,
184+
"RB'''": Single3, 'RB"""': Double3,
179185
"u'''": Single3, 'u"""': Double3,
180186
"ur'''": Single3, 'ur"""': Double3,
181187
"R'''": Single3, 'R"""': Double3,
@@ -192,6 +198,8 @@ def _compile(expr):
192198
"b'''", 'b"""', "B'''", 'B"""',
193199
"br'''", 'br"""', "Br'''", 'Br"""',
194200
"bR'''", 'bR"""', "BR'''", 'BR"""',
201+
"rb'''", 'rb"""', "rB'''", 'rB"""',
202+
"Rb'''", 'Rb"""', "RB'''", 'RB"""',
195203
"u'''", 'u"""', "U'''", 'U"""',
196204
"ur'''", 'ur"""', "Ur'''", 'Ur"""',
197205
"uR'''", 'uR"""', "UR'''", 'UR"""'):
@@ -202,6 +210,8 @@ def _compile(expr):
202210
"b'", 'b"', "B'", 'B"',
203211
"br'", 'br"', "Br'", 'Br"',
204212
"bR'", 'bR"', "BR'", 'BR"' ,
213+
"rb'", 'rb"', "rB'", 'rB"',
214+
"Rb'", 'Rb"', "RB'", 'RB"' ,
205215
"u'", 'u"', "U'", 'U"',
206216
"ur'", 'ur"', "Ur'", 'Ur"',
207217
"uR'", 'uR"', "UR'", 'UR"' ):

0 commit comments

Comments
 (0)