Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ecf16ee

Browse files
authored
gh-115154: Fix untokenize handling of unicode named literals (#115171)
1 parent d504968 commit ecf16ee

File tree

3 files changed

+85
-10
lines changed

3 files changed

+85
-10
lines changed

Lib/test/test_tokenize.py

+37-3
Original file line numberDiff line numberDiff line change
@@ -1877,6 +1877,43 @@ def test_roundtrip(self):
18771877
" print('Can not import' # comment2\n)"
18781878
"else: print('Loaded')\n")
18791879

1880+
self.check_roundtrip("f'\\N{EXCLAMATION MARK}'")
1881+
self.check_roundtrip(r"f'\\N{SNAKE}'")
1882+
self.check_roundtrip(r"f'\\N{{SNAKE}}'")
1883+
self.check_roundtrip(r"f'\N{SNAKE}'")
1884+
self.check_roundtrip(r"f'\\\N{SNAKE}'")
1885+
self.check_roundtrip(r"f'\\\\\N{SNAKE}'")
1886+
self.check_roundtrip(r"f'\\\\\\\N{SNAKE}'")
1887+
1888+
self.check_roundtrip(r"f'\\N{1}'")
1889+
self.check_roundtrip(r"f'\\\\N{2}'")
1890+
self.check_roundtrip(r"f'\\\\\\N{3}'")
1891+
self.check_roundtrip(r"f'\\\\\\\\N{4}'")
1892+
1893+
self.check_roundtrip(r"f'\\N{{'")
1894+
self.check_roundtrip(r"f'\\\\N{{'")
1895+
self.check_roundtrip(r"f'\\\\\\N{{'")
1896+
self.check_roundtrip(r"f'\\\\\\\\N{{'")
1897+
cases = [
1898+
"""
1899+
if 1:
1900+
"foo"
1901+
"bar"
1902+
""",
1903+
"""
1904+
if 1:
1905+
("foo"
1906+
"bar")
1907+
""",
1908+
"""
1909+
if 1:
1910+
"foo"
1911+
"bar"
1912+
""" ]
1913+
for case in cases:
1914+
self.check_roundtrip(case)
1915+
1916+
18801917
def test_continuation(self):
18811918
# Balancing continuation
18821919
self.check_roundtrip("a = (3,4, \n"
@@ -1911,9 +1948,6 @@ def test_random_files(self):
19111948
tempdir = os.path.dirname(__file__) or os.curdir
19121949
testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
19131950

1914-
# TODO: Remove this once we can untokenize PEP 701 syntax
1915-
testfiles.remove(os.path.join(tempdir, "test_fstring.py"))
1916-
19171951
if not support.is_resource_enabled("cpu"):
19181952
testfiles = random.sample(testfiles, 10)
19191953

Lib/tokenize.py

+46-7
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ def __init__(self):
168168
self.tokens = []
169169
self.prev_row = 1
170170
self.prev_col = 0
171+
self.prev_type = None
171172
self.encoding = None
172173

173174
def add_whitespace(self, start):
@@ -183,6 +184,29 @@ def add_whitespace(self, start):
183184
if col_offset:
184185
self.tokens.append(" " * col_offset)
185186

187+
def escape_brackets(self, token):
188+
characters = []
189+
consume_until_next_bracket = False
190+
for character in token:
191+
if character == "}":
192+
if consume_until_next_bracket:
193+
consume_until_next_bracket = False
194+
else:
195+
characters.append(character)
196+
if character == "{":
197+
n_backslashes = sum(
198+
1 for char in _itertools.takewhile(
199+
"\\".__eq__,
200+
characters[-2::-1]
201+
)
202+
)
203+
if n_backslashes % 2 == 0:
204+
characters.append(character)
205+
else:
206+
consume_until_next_bracket = True
207+
characters.append(character)
208+
return "".join(characters)
209+
186210
def untokenize(self, iterable):
187211
it = iter(iterable)
188212
indents = []
@@ -214,25 +238,29 @@ def untokenize(self, iterable):
214238
startline = False
215239
elif tok_type == FSTRING_MIDDLE:
216240
if '{' in token or '}' in token:
241+
token = self.escape_brackets(token)
242+
last_line = token.splitlines()[-1]
217243
end_line, end_col = end
218-
end = (end_line, end_col + token.count('{') + token.count('}'))
219-
token = re.sub('{', '{{', token)
220-
token = re.sub('}', '}}', token)
221-
244+
extra_chars = last_line.count("{{") + last_line.count("}}")
245+
end = (end_line, end_col + extra_chars)
246+
elif tok_type in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
247+
self.tokens.append(" ")
222248

223249
self.add_whitespace(start)
224250
self.tokens.append(token)
225251
self.prev_row, self.prev_col = end
226252
if tok_type in (NEWLINE, NL):
227253
self.prev_row += 1
228254
self.prev_col = 0
255+
self.prev_type = tok_type
229256
return "".join(self.tokens)
230257

231258
def compat(self, token, iterable):
232259
indents = []
233260
toks_append = self.tokens.append
234261
startline = token[0] in (NEWLINE, NL)
235262
prevstring = False
263+
in_fstring = 0
236264

237265
for tok in _itertools.chain([token], iterable):
238266
toknum, tokval = tok[:2]
@@ -251,6 +279,10 @@ def compat(self, token, iterable):
251279
else:
252280
prevstring = False
253281

282+
if toknum == FSTRING_START:
283+
in_fstring += 1
284+
elif toknum == FSTRING_END:
285+
in_fstring -= 1
254286
if toknum == INDENT:
255287
indents.append(tokval)
256288
continue
@@ -263,11 +295,18 @@ def compat(self, token, iterable):
263295
toks_append(indents[-1])
264296
startline = False
265297
elif toknum == FSTRING_MIDDLE:
266-
if '{' in tokval or '}' in tokval:
267-
tokval = re.sub('{', '{{', tokval)
268-
tokval = re.sub('}', '}}', tokval)
298+
tokval = self.escape_brackets(tokval)
299+
300+
# Insert a space between two consecutive brackets if we are in an f-string
301+
if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring:
302+
tokval = ' ' + tokval
303+
304+
# Insert a space between two consecutive f-strings
305+
if toknum in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
306+
self.tokens.append(" ")
269307

270308
toks_append(tokval)
309+
self.prev_type = toknum
271310

272311

273312
def untokenize(iterable):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix a bug that was causing the :func:`tokenize.untokenize` function to
2+
handle unicode named literals incorrectly. Patch by Pablo Galindo

0 commit comments

Comments
 (0)