From 5ee5d57f1149a655682cde8cd7929c00863c7c4c Mon Sep 17 00:00:00 2001 From: Anthony Shaw Date: Mon, 28 Aug 2023 08:25:22 +1000 Subject: [PATCH 1/8] Add two tests for ast.unparse for unescaped quote support from PEP701 --- Lib/test/test_unparse.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index b3efb61e83049e..ab6625f42c1982 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -197,6 +197,10 @@ def test_fstrings_complicated(self): self.check_ast_roundtrip('''f"a\\r\\nb"''') self.check_ast_roundtrip('''f"\\u2028{'x'}"''') + def test_fstrings_pep701(self): + self.check_ast_roundtrip('f" something { my_dict["key"] } something else "') + self.check_ast_roundtrip('f"{f"{f"{f"{f"{f"{1+1}"}"}"}"}"}"') + def test_strings(self): self.check_ast_roundtrip("u'foo'") self.check_ast_roundtrip("r'foo'") From cb043e0f5d84509b30b1584941cab5954f13fbc7 Mon Sep 17 00:00:00 2001 From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com> Date: Sun, 3 Sep 2023 04:24:44 +0800 Subject: [PATCH 2/8] unparse with new fstring syntax --- Lib/ast.py | 22 ++++++++++++---------- Lib/test/test_tokenize.py | 2 +- Lib/test/test_unparse.py | 19 +++++++++++++------ 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 45b95963f81885..97bcd5a3d8d98e 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1247,11 +1247,17 @@ def visit_JoinedStr(self, node): new_fstring_parts = [] quote_types = list(_ALL_QUOTES) for value, is_constant in fstring_parts: - value, quote_types = self._str_literal_helper( - value, - quote_types=quote_types, - escape_special_whitespace=is_constant, - ) + if is_constant: + value, quote_types = self._str_literal_helper( + value, + quote_types=quote_types, + escape_special_whitespace=True, + ) + elif "\n" in value: + if "'" in quote_types: + quote_types.remove("'") + if '"' in quote_types: + quote_types.remove('"') new_fstring_parts.append(value) value = "".join(new_fstring_parts) @@ -1273,16 +1279,12 @@ def _write_fstring_inner(self, node): def visit_FormattedValue(self, node): def unparse_inner(inner): - unparser = type(self)(_avoid_backslashes=True) + unparser = type(self)(_avoid_backslashes=False) unparser.set_precedence(_Precedence.TEST.next(), inner) return unparser.visit(inner) with self.delimit("{", "}"): expr = unparse_inner(node.value) - if "\\" in expr: - raise ValueError( - "Unable to avoid backslash in f-string expression part" - ) if expr.startswith("{"): # Separate pair of opening brackets as "{ {" self.write(" ") diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 7863e27fccd972..dbefee655c377c 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1860,7 +1860,7 @@ def test_random_files(self): testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py")) - # TODO: Remove this once we can unparse PEP 701 syntax + # TODO: Remove this once we can untokenize PEP 701 syntax testfiles.remove(os.path.join(tempdir, "test_fstring.py")) for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'): diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index ab6625f42c1982..e8f17165e8d5a9 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -382,8 +382,15 @@ def test_invalid_fstring_value(self): ) ) - def test_invalid_fstring_backslash(self): - self.check_invalid(ast.FormattedValue(value=ast.Constant(value="\\\\"))) + def test_fstring_backslash(self): + # valid since Python 3.12 + self.assertEqual(ast.unparse( + ast.FormattedValue( + value=ast.Constant(value="\\\\"), + conversion=-1, + format_spec=None, + ) + ), "{'\\\\\\\\'}") def test_invalid_yield_from(self): self.check_invalid(ast.YieldFrom(value=None)) @@ -506,11 +513,11 @@ def test_class_bases_and_keywords(self): self.check_src_roundtrip("class X(*args, **kwargs):\n pass") def test_fstrings(self): - self.check_src_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''') - self.check_src_roundtrip('''f"\\u2028{'x'}"''') + self.check_src_roundtrip("f'-{f'*{f'+{f'.{x}.'}+'}*'}-'") + self.check_src_roundtrip("""f'\\u2028{'x'}'""") self.check_src_roundtrip(r"f'{x}\n'") - self.check_src_roundtrip('''f''\'{"""\n"""}\\n''\'''') - self.check_src_roundtrip('''f''\'{f"""{x}\n"""}\\n''\'''') + self.check_src_roundtrip("f'{'\\n'}\\n'") + self.check_src_roundtrip("f'{f'{x}\\n'}\\n'") def test_docstrings(self): docstrings = ( From 5ea8f4f9793f44a4352087f9af212c2379ba6cb4 Mon Sep 17 00:00:00 2001 From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com> Date: Sun, 3 Sep 2023 04:39:29 +0800 Subject: [PATCH 3/8] Add news --- .../Library/2023-09-03-04-37-52.gh-issue-108469.kusj40.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-09-03-04-37-52.gh-issue-108469.kusj40.rst diff --git a/Misc/NEWS.d/next/Library/2023-09-03-04-37-52.gh-issue-108469.kusj40.rst b/Misc/NEWS.d/next/Library/2023-09-03-04-37-52.gh-issue-108469.kusj40.rst new file mode 100644 index 00000000000000..ac0f682963daec --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-09-03-04-37-52.gh-issue-108469.kusj40.rst @@ -0,0 +1,3 @@ +:func:`ast.unparse` now supports new :term:`f-string` syntax introduced in +Python 3.12. Note that the :term:`f-string` quotes are reselected for simplicity +under the new syntax. (Patch by Steven Sun) From 1da1d4c162b580e5542ba1c0e06bb5314c9ca3dc Mon Sep 17 00:00:00 2001 From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com> Date: Sun, 3 Sep 2023 04:42:14 +0800 Subject: [PATCH 4/8] Update Lib/test/test_unparse.py --- Lib/test/test_unparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index e8f17165e8d5a9..38c59e6d430b58 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -514,7 +514,7 @@ def test_class_bases_and_keywords(self): def test_fstrings(self): self.check_src_roundtrip("f'-{f'*{f'+{f'.{x}.'}+'}*'}-'") - self.check_src_roundtrip("""f'\\u2028{'x'}'""") + self.check_src_roundtrip("f'\\u2028{'x'}'") self.check_src_roundtrip(r"f'{x}\n'") self.check_src_roundtrip("f'{'\\n'}\\n'") self.check_src_roundtrip("f'{f'{x}\\n'}\\n'") From 43d177c32b3d8889f312e42ed0eba2c390ac41bf Mon Sep 17 00:00:00 2001 From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com> Date: Sun, 3 Sep 2023 16:55:46 +0800 Subject: [PATCH 5/8] simplify things --- Lib/ast.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 97bcd5a3d8d98e..fe2aaf0d906700 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1254,10 +1254,7 @@ def visit_JoinedStr(self, node): escape_special_whitespace=True, ) elif "\n" in value: - if "'" in quote_types: - quote_types.remove("'") - if '"' in quote_types: - quote_types.remove('"') + quote_types = [q for q in quote_types if q in _MULTI_QUOTES] new_fstring_parts.append(value) value = "".join(new_fstring_parts) From 21229ce1f0c8fb2e580df62972296d629960f30e Mon Sep 17 00:00:00 2001 From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com> Date: Sun, 3 Sep 2023 20:35:37 +0800 Subject: [PATCH 6/8] reimplement quote selection logic --- Lib/ast.py | 55 +++++++++++++++++++++++----------------- Lib/test/test_unparse.py | 6 +++++ 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index fe2aaf0d906700..f49441269843b8 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1225,40 +1225,49 @@ def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES): def visit_JoinedStr(self, node): self.write("f") - if self._avoid_backslashes: - with self.buffered() as buffer: - self._write_fstring_inner(node) - return self._write_str_avoiding_backslashes("".join(buffer)) - - # If we don't need to avoid backslashes globally (i.e., we only need - # to avoid them inside FormattedValues), it's cosmetically preferred - # to use escaped whitespace. That is, it's preferred to use backslashes - # for cases like: f"{x}\n". To accomplish this, we keep track of what - # in our buffer corresponds to FormattedValues and what corresponds to - # Constant parts of the f-string, and allow escapes accordingly. + fstring_parts = [] for value in node.values: with self.buffered() as buffer: self._write_fstring_inner(value) - fstring_parts.append( - ("".join(buffer), isinstance(value, Constant)) - ) + fstring_parts.append(("".join(buffer), isinstance(value, Constant))) + + # We decide if we need to write a multi-line `f-string` since it is only + # necessary when we have "\n" inside formatted values. + use_multiline = any( + "\n" in value for value, is_constant in fstring_parts if not is_constant + ) + + # We then choose the quote type we use. We let `repr` do this work for + # now. This can be easily modified afterwards. + quote = repr( + "".join(value for value, is_constant in fstring_parts if is_constant) + )[0] + quote_type = quote * 3 if use_multiline else quote new_fstring_parts = [] - quote_types = list(_ALL_QUOTES) for value, is_constant in fstring_parts: if is_constant: - value, quote_types = self._str_literal_helper( - value, - quote_types=quote_types, - escape_special_whitespace=True, - ) - elif "\n" in value: - quote_types = [q for q in quote_types if q in _MULTI_QUOTES] + consecutive_quotes = 0 + res = [] + for c in value: + if c == "\\" or not c.isprintable(): + res.append(c.encode("unicode_escape").decode("ascii")) + continue + if c == quote: + if consecutive_quotes == len(quote_type) - 1: + # escape when we see a full `quote_type` + res.append("\\") + consecutive_quotes = 0 + else: + consecutive_quotes += 1 + else: + consecutive_quotes = 0 + res.append(c) + value = "".join(res) new_fstring_parts.append(value) value = "".join(new_fstring_parts) - quote_type = quote_types[0] self.write(f"{quote_type}{value}{quote_type}") def _write_fstring_inner(self, node): diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 38c59e6d430b58..5c7f526e3b0426 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -196,10 +196,14 @@ def test_fstrings_complicated(self): self.check_ast_roundtrip('f"""{g(\'\'\'\n\'\'\')}"""') self.check_ast_roundtrip('''f"a\\r\\nb"''') self.check_ast_roundtrip('''f"\\u2028{'x'}"''') + self.check_ast_roundtrip("f\"'''{1}\\\"\\\"\\\"\"") + self.check_ast_roundtrip('f\'\\\'\\\'\\\'{1}"""\'') + self.check_ast_roundtrip('f\'\'\'\'\'\\\'\'\'\\\'{x:\n}""""\'\'\'') def test_fstrings_pep701(self): self.check_ast_roundtrip('f" something { my_dict["key"] } something else "') self.check_ast_roundtrip('f"{f"{f"{f"{f"{f"{1+1}"}"}"}"}"}"') + self.check_ast_roundtrip("f'{f'{f'{f'{f'{f'{1+1}'}'}'}'}'}'") def test_strings(self): self.check_ast_roundtrip("u'foo'") @@ -518,6 +522,8 @@ def test_fstrings(self): self.check_src_roundtrip(r"f'{x}\n'") self.check_src_roundtrip("f'{'\\n'}\\n'") self.check_src_roundtrip("f'{f'{x}\\n'}\\n'") + self.check_src_roundtrip('f\'\\\'\\\'\\\'{1}"""\'') + self.check_src_roundtrip('f\'\'\'\'\'\\\'\'\'\\\'{x:\n}""""\'\'\'') def test_docstrings(self): docstrings = ( From 9b1b5ddcbaa37fa3926428e4c0abaf866c3a78c3 Mon Sep 17 00:00:00 2001 From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com> Date: Tue, 5 Sep 2023 09:22:21 +0800 Subject: [PATCH 7/8] Revert "reimplement quote selection logic" This reverts commit 21229ce1f0c8fb2e580df62972296d629960f30e. --- Lib/ast.py | 55 +++++++++++++++++----------------------- Lib/test/test_unparse.py | 6 ----- 2 files changed, 23 insertions(+), 38 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index f49441269843b8..fe2aaf0d906700 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1225,49 +1225,40 @@ def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES): def visit_JoinedStr(self, node): self.write("f") - + if self._avoid_backslashes: + with self.buffered() as buffer: + self._write_fstring_inner(node) + return self._write_str_avoiding_backslashes("".join(buffer)) + + # If we don't need to avoid backslashes globally (i.e., we only need + # to avoid them inside FormattedValues), it's cosmetically preferred + # to use escaped whitespace. That is, it's preferred to use backslashes + # for cases like: f"{x}\n". To accomplish this, we keep track of what + # in our buffer corresponds to FormattedValues and what corresponds to + # Constant parts of the f-string, and allow escapes accordingly. fstring_parts = [] for value in node.values: with self.buffered() as buffer: self._write_fstring_inner(value) - fstring_parts.append(("".join(buffer), isinstance(value, Constant))) - - # We decide if we need to write a multi-line `f-string` since it is only - # necessary when we have "\n" inside formatted values. - use_multiline = any( - "\n" in value for value, is_constant in fstring_parts if not is_constant - ) - - # We then choose the quote type we use. We let `repr` do this work for - # now. This can be easily modified afterwards. - quote = repr( - "".join(value for value, is_constant in fstring_parts if is_constant) - )[0] - quote_type = quote * 3 if use_multiline else quote + fstring_parts.append( + ("".join(buffer), isinstance(value, Constant)) + ) new_fstring_parts = [] + quote_types = list(_ALL_QUOTES) for value, is_constant in fstring_parts: if is_constant: - consecutive_quotes = 0 - res = [] - for c in value: - if c == "\\" or not c.isprintable(): - res.append(c.encode("unicode_escape").decode("ascii")) - continue - if c == quote: - if consecutive_quotes == len(quote_type) - 1: - # escape when we see a full `quote_type` - res.append("\\") - consecutive_quotes = 0 - else: - consecutive_quotes += 1 - else: - consecutive_quotes = 0 - res.append(c) - value = "".join(res) + value, quote_types = self._str_literal_helper( + value, + quote_types=quote_types, + escape_special_whitespace=True, + ) + elif "\n" in value: + quote_types = [q for q in quote_types if q in _MULTI_QUOTES] new_fstring_parts.append(value) value = "".join(new_fstring_parts) + quote_type = quote_types[0] self.write(f"{quote_type}{value}{quote_type}") def _write_fstring_inner(self, node): diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 5c7f526e3b0426..38c59e6d430b58 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -196,14 +196,10 @@ def test_fstrings_complicated(self): self.check_ast_roundtrip('f"""{g(\'\'\'\n\'\'\')}"""') self.check_ast_roundtrip('''f"a\\r\\nb"''') self.check_ast_roundtrip('''f"\\u2028{'x'}"''') - self.check_ast_roundtrip("f\"'''{1}\\\"\\\"\\\"\"") - self.check_ast_roundtrip('f\'\\\'\\\'\\\'{1}"""\'') - self.check_ast_roundtrip('f\'\'\'\'\'\\\'\'\'\\\'{x:\n}""""\'\'\'') def test_fstrings_pep701(self): self.check_ast_roundtrip('f" something { my_dict["key"] } something else "') self.check_ast_roundtrip('f"{f"{f"{f"{f"{f"{1+1}"}"}"}"}"}"') - self.check_ast_roundtrip("f'{f'{f'{f'{f'{f'{1+1}'}'}'}'}'}'") def test_strings(self): self.check_ast_roundtrip("u'foo'") @@ -522,8 +518,6 @@ def test_fstrings(self): self.check_src_roundtrip(r"f'{x}\n'") self.check_src_roundtrip("f'{'\\n'}\\n'") self.check_src_roundtrip("f'{f'{x}\\n'}\\n'") - self.check_src_roundtrip('f\'\\\'\\\'\\\'{1}"""\'') - self.check_src_roundtrip('f\'\'\'\'\'\\\'\'\'\\\'{x:\n}""""\'\'\'') def test_docstrings(self): docstrings = ( From 88394d1ca4e3ae4bbe5210741c3c086fc1ae857b Mon Sep 17 00:00:00 2001 From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com> Date: Tue, 5 Sep 2023 09:31:04 +0800 Subject: [PATCH 8/8] clean up unnecessary parts --- Lib/ast.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index fe2aaf0d906700..17ec7ff6f8bc12 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1225,17 +1225,7 @@ def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES): def visit_JoinedStr(self, node): self.write("f") - if self._avoid_backslashes: - with self.buffered() as buffer: - self._write_fstring_inner(node) - return self._write_str_avoiding_backslashes("".join(buffer)) - - # If we don't need to avoid backslashes globally (i.e., we only need - # to avoid them inside FormattedValues), it's cosmetically preferred - # to use escaped whitespace. That is, it's preferred to use backslashes - # for cases like: f"{x}\n". To accomplish this, we keep track of what - # in our buffer corresponds to FormattedValues and what corresponds to - # Constant parts of the f-string, and allow escapes accordingly. + fstring_parts = [] for value in node.values: with self.buffered() as buffer: @@ -1276,7 +1266,7 @@ def _write_fstring_inner(self, node): def visit_FormattedValue(self, node): def unparse_inner(inner): - unparser = type(self)(_avoid_backslashes=False) + unparser = type(self)() unparser.set_precedence(_Precedence.TEST.next(), inner) return unparser.visit(inner)