Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 1c8222c

Browse files
committed
Issue 25311: Add support for f-strings to tokenize.py. Also added some comments to explain what's happening, since it's not so obvious.
1 parent f1c47e4 commit 1c8222c

2 files changed

Lines changed: 84 additions & 51 deletions

File tree

Lib/test/test_tokenize.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,23 @@ def test_string(self):
332332
b\
333333
c"""', """\
334334
STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
335+
""")
336+
self.check_tokenize('f"abc"', """\
337+
STRING 'f"abc"' (1, 0) (1, 6)
338+
""")
339+
self.check_tokenize('fR"a{b}c"', """\
340+
STRING 'fR"a{b}c"' (1, 0) (1, 9)
341+
""")
342+
self.check_tokenize('f"""abc"""', """\
343+
STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)
344+
""")
345+
self.check_tokenize(r'f"abc\
346+
def"', """\
347+
STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)
348+
""")
349+
self.check_tokenize(r'Rf"abc\
350+
def"', """\
351+
STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
335352
""")
336353

337354
def test_function(self):

Lib/tokenize.py

Lines changed: 67 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import collections
3030
from io import TextIOWrapper
3131
from itertools import chain
32+
import itertools as _itertools
3233
import re
3334
import sys
3435
from token import *
@@ -131,7 +132,28 @@ def maybe(*choices): return group(*choices) + '?'
131132
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
132133
Number = group(Imagnumber, Floatnumber, Intnumber)
133134

134-
StringPrefix = r'(?:[bB][rR]?|[rR][bB]?|[uU])?'
135+
# Return the empty string, plus all of the valid string prefixes.
136+
def _all_string_prefixes():
137+
# The valid string prefixes. Only contain the lower case versions,
138+
# and don't contain any permuations (include 'fr', but not
139+
# 'rf'). The various permutations will be generated.
140+
_valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
141+
# if we add binary f-strings, add: ['fb', 'fbr']
142+
result = set([''])
143+
for prefix in _valid_string_prefixes:
144+
for t in _itertools.permutations(prefix):
145+
# create a list with upper and lower versions of each
146+
# character
147+
for u in _itertools.product(*[(c, c.upper()) for c in t]):
148+
result.add(''.join(u))
149+
return result
150+
151+
def _compile(expr):
152+
return re.compile(expr, re.UNICODE)
153+
154+
# Note that since _all_string_prefixes includes the empty string,
155+
# StringPrefix can be the empty string (making it optional).
156+
StringPrefix = group(*_all_string_prefixes())
135157

136158
# Tail end of ' string.
137159
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
@@ -169,50 +191,25 @@ def maybe(*choices): return group(*choices) + '?'
169191
PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
170192
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
171193

172-
def _compile(expr):
173-
return re.compile(expr, re.UNICODE)
174-
175-
endpats = {"'": Single, '"': Double,
176-
"'''": Single3, '"""': Double3,
177-
"r'''": Single3, 'r"""': Double3,
178-
"b'''": Single3, 'b"""': Double3,
179-
"R'''": Single3, 'R"""': Double3,
180-
"B'''": Single3, 'B"""': Double3,
181-
"br'''": Single3, 'br"""': Double3,
182-
"bR'''": Single3, 'bR"""': Double3,
183-
"Br'''": Single3, 'Br"""': Double3,
184-
"BR'''": Single3, 'BR"""': Double3,
185-
"rb'''": Single3, 'rb"""': Double3,
186-
"Rb'''": Single3, 'Rb"""': Double3,
187-
"rB'''": Single3, 'rB"""': Double3,
188-
"RB'''": Single3, 'RB"""': Double3,
189-
"u'''": Single3, 'u"""': Double3,
190-
"U'''": Single3, 'U"""': Double3,
191-
'r': None, 'R': None, 'b': None, 'B': None,
192-
'u': None, 'U': None}
193-
194-
triple_quoted = {}
195-
for t in ("'''", '"""',
196-
"r'''", 'r"""', "R'''", 'R"""',
197-
"b'''", 'b"""', "B'''", 'B"""',
198-
"br'''", 'br"""', "Br'''", 'Br"""',
199-
"bR'''", 'bR"""', "BR'''", 'BR"""',
200-
"rb'''", 'rb"""', "rB'''", 'rB"""',
201-
"Rb'''", 'Rb"""', "RB'''", 'RB"""',
202-
"u'''", 'u"""', "U'''", 'U"""',
203-
):
204-
triple_quoted[t] = t
205-
single_quoted = {}
206-
for t in ("'", '"',
207-
"r'", 'r"', "R'", 'R"',
208-
"b'", 'b"', "B'", 'B"',
209-
"br'", 'br"', "Br'", 'Br"',
210-
"bR'", 'bR"', "BR'", 'BR"' ,
211-
"rb'", 'rb"', "rB'", 'rB"',
212-
"Rb'", 'Rb"', "RB'", 'RB"' ,
213-
"u'", 'u"', "U'", 'U"',
214-
):
215-
single_quoted[t] = t
194+
# For a given string prefix plus quotes, endpats maps it to a regex
195+
# to match the remainder of that string. _prefix can be empty, for
196+
# a normal single or triple quoted string (with no prefix).
197+
endpats = {}
198+
for _prefix in _all_string_prefixes():
199+
endpats[_prefix + "'"] = Single
200+
endpats[_prefix + '"'] = Double
201+
endpats[_prefix + "'''"] = Single3
202+
endpats[_prefix + '"""'] = Double3
203+
204+
# A set of all of the single and triple quoted string prefixes,
205+
# including the opening quotes.
206+
single_quoted = set()
207+
triple_quoted = set()
208+
for t in _all_string_prefixes():
209+
for u in (t + '"', t + "'"):
210+
single_quoted.add(u)
211+
for u in (t + '"""', t + "'''"):
212+
triple_quoted.add(u)
216213

217214
tabsize = 8
218215

@@ -626,6 +623,7 @@ def _tokenize(readline, encoding):
626623
yield stashed
627624
stashed = None
628625
yield TokenInfo(COMMENT, token, spos, epos, line)
626+
629627
elif token in triple_quoted:
630628
endprog = _compile(endpats[token])
631629
endmatch = endprog.match(line, pos)
@@ -638,19 +636,37 @@ def _tokenize(readline, encoding):
638636
contstr = line[start:]
639637
contline = line
640638
break
641-
elif initial in single_quoted or \
642-
token[:2] in single_quoted or \
643-
token[:3] in single_quoted:
639+
640+
# Check up to the first 3 chars of the token to see if
641+
# they're in the single_quoted set. If so, they start
642+
# a string.
643+
# We're using the first 3, because we're looking for
644+
# "rb'" (for example) at the start of the token. If
645+
# we switch to longer prefixes, this needs to be
646+
# adjusted.
647+
# Note that initial == token[:1].
648+
# Also note that single quote checking must come afer
649+
# triple quote checking (above).
650+
elif (initial in single_quoted or
651+
token[:2] in single_quoted or
652+
token[:3] in single_quoted):
644653
if token[-1] == '\n': # continued string
645654
strstart = (lnum, start)
646-
endprog = _compile(endpats[initial] or
647-
endpats[token[1]] or
648-
endpats[token[2]])
655+
# Again, using the first 3 chars of the
656+
# token. This is looking for the matching end
657+
# regex for the correct type of quote
658+
# character. So it's really looking for
659+
# endpats["'"] or endpats['"'], by trying to
660+
# skip string prefix characters, if any.
661+
endprog = _compile(endpats.get(initial) or
662+
endpats.get(token[1]) or
663+
endpats.get(token[2]))
649664
contstr, needcont = line[start:], 1
650665
contline = line
651666
break
652667
else: # ordinary string
653668
yield TokenInfo(STRING, token, spos, epos, line)
669+
654670
elif initial.isidentifier(): # ordinary name
655671
if token in ('async', 'await'):
656672
if async_def:

0 commit comments

Comments
 (0)