From 3ce123379608544de7fc5ac0e75fc46afae2b64a Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Thu, 8 Feb 2024 16:00:27 +0000
Subject: [PATCH 1/8] gh-115154: Fix untokenize handling of unicode named
 literals

Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
---
 Lib/test/test_tokenize.py                                 | 2 ++
 Lib/tokenize.py                                           | 8 +++-----
 .../2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst        | 2 ++
 3 files changed, 7 insertions(+), 5 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 21e8637a7ca905..8392f543be15d8 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1877,6 +1877,8 @@ def test_roundtrip(self):
                              "    print('Can not import' # comment2\n)"
                              "else:   print('Loaded')\n")
 
+        self.check_roundtrip("f'\\N{EXCLAMATION MARK}'")
+
     def test_continuation(self):
         # Balancing continuation
         self.check_roundtrip("a = (3,4, \n"
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 0ab1893d42f72f..f8ac40116c9bd7 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -215,9 +215,8 @@ def untokenize(self, iterable):
             elif tok_type == FSTRING_MIDDLE:
                 if '{' in token or '}' in token:
                     end_line, end_col = end
-                    end = (end_line, end_col + token.count('{') + token.count('}'))
-                    token = re.sub('{', '{{', token)
-                    token = re.sub('}', '}}', token)
+                    token = re.sub(r'(?<!\\N){([^{}]*)}', r'{{\1}}', token)
+                    end = (end_line, end_col + token.count('{') + token.count('}') - 2)
 
 
             self.add_whitespace(start)
@@ -264,8 +263,7 @@ def compat(self, token, iterable):
                 startline = False
             elif toknum == FSTRING_MIDDLE:
                 if '{' in tokval or '}' in tokval:
-                    tokval = re.sub('{', '{{', tokval)
-                    tokval = re.sub('}', '}}', tokval)
+                    tokval = re.sub(r'(?<!\\N){([^{}]*)}', r'{{\1}}', tokval)
 
             toks_append(tokval)
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst
new file mode 100644
index 00000000000000..89184ec95e6292
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst	
@@ -0,0 +1,2 @@
+Fix a bug that was causing the :func:`tokenize.untokenize` function to not
+handle correctly unicode named literals. Patch by Pablo Galindo

From d212f925f0da7d50c303a9a3491a6cf3482f4175 Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Sun, 11 Feb 2024 12:56:45 +0000
Subject: [PATCH 2/8] fixup! gh-115154: Fix untokenize handling of unicode
 named literals

---
 Lib/tokenize.py | 8 +++++---
 lol.py          | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)
 create mode 100644 lol.py

diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index f8ac40116c9bd7..0ab1893d42f72f 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -215,8 +215,9 @@ def untokenize(self, iterable):
             elif tok_type == FSTRING_MIDDLE:
                 if '{' in token or '}' in token:
                     end_line, end_col = end
-                    token = re.sub(r'(?<!\\N){([^{}]*)}', r'{{\1}}', token)
-                    end = (end_line, end_col + token.count('{') + token.count('}') - 2)
+                    end = (end_line, end_col + token.count('{') + token.count('}'))
+                    token = re.sub('{', '{{', token)
+                    token = re.sub('}', '}}', token)
 
 
             self.add_whitespace(start)
@@ -263,7 +264,8 @@ def compat(self, token, iterable):
                 startline = False
             elif toknum == FSTRING_MIDDLE:
                 if '{' in tokval or '}' in tokval:
-                    tokval = re.sub(r'(?<!\\N){([^{}]*)}', r'{{\1}}', tokval)
+                    tokval = re.sub('{', '{{', tokval)
+                    tokval = re.sub('}', '}}', tokval)
 
             toks_append(tokval)
 
diff --git a/lol.py b/lol.py
new file mode 100644
index 00000000000000..9d1d64baa646ab
--- /dev/null
+++ b/lol.py
@@ -0,0 +1 @@
+compile('match y:\n case e(e=v,v,', '<na>', 'exec')

From 31f6ff41a802746f11af9bd781f34d48ad796fc9 Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Sun, 11 Feb 2024 13:37:14 +0000
Subject: [PATCH 3/8] Fix tokenizing of test_fstring

---
 Lib/test/test_tokenize.py |  6 ++---
 Lib/tokenize.py           | 49 ++++++++++++++++++++++++++++++++-------
 2 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 8392f543be15d8..14f340d5383199 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1878,6 +1878,9 @@ def test_roundtrip(self):
                              "else:   print('Loaded')\n")
 
         self.check_roundtrip("f'\\N{EXCLAMATION MARK}'")
+        self.check_roundtrip(r"f'\\N{EXCLAMATION MARK}'")  
+        self.check_roundtrip(r"f'\\N{SNAKE}'")  
+
 
     def test_continuation(self):
         # Balancing continuation
@@ -1913,9 +1916,6 @@ def test_random_files(self):
         tempdir = os.path.dirname(__file__) or os.curdir
         testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
 
-        # TODO: Remove this once we can untokenize PEP 701 syntax
-        testfiles.remove(os.path.join(tempdir, "test_fstring.py"))
-
         if not support.is_resource_enabled("cpu"):
             testfiles = random.sample(testfiles, 10)
 
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 0ab1893d42f72f..08321a49a577e0 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -168,6 +168,7 @@ def __init__(self):
         self.tokens = []
         self.prev_row = 1
         self.prev_col = 0
+        self.prev_type = None
         self.encoding = None
 
     def add_whitespace(self, start):
@@ -182,6 +183,23 @@ def add_whitespace(self, start):
         col_offset = col - self.prev_col
         if col_offset:
             self.tokens.append(" " * col_offset)
+    
+    def escape_brackets(self, token):
+        characters = []
+        consume_until_next_bracket = False
+        for character in token:
+            if character == "}":
+                if consume_until_next_bracket:
+                    consume_until_next_bracket = False  
+                else:
+                    characters.append(character)
+            if character == "{":
+                if characters[-2:] != ["\\", "N"]:
+                    characters.append(character)
+                else:
+                    consume_until_next_bracket = True
+            characters.append(character)
+        return "".join(characters)
 
     def untokenize(self, iterable):
         it = iter(iterable)
@@ -214,11 +232,13 @@ def untokenize(self, iterable):
                 startline = False
             elif tok_type == FSTRING_MIDDLE:
                 if '{' in token or '}' in token:
+                    token = self.escape_brackets(token)
+                    last_line = token.splitlines()[-1]
                     end_line, end_col = end
-                    end = (end_line, end_col + token.count('{') + token.count('}'))
-                    token = re.sub('{', '{{', token)
-                    token = re.sub('}', '}}', token)
-
+                    extra_chars = last_line.count("{{") + last_line.count("}}")
+                    end = (end_line, end_col + extra_chars)
+            elif tok_type in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
+                self.tokens.append(" ")
 
             self.add_whitespace(start)
             self.tokens.append(token)
@@ -226,6 +246,7 @@ def untokenize(self, iterable):
             if tok_type in (NEWLINE, NL):
                 self.prev_row += 1
                 self.prev_col = 0
+            self.prev_type = tok_type
         return "".join(self.tokens)
 
     def compat(self, token, iterable):
@@ -233,6 +254,7 @@ def compat(self, token, iterable):
         toks_append = self.tokens.append
         startline = token[0] in (NEWLINE, NL)
         prevstring = False
+        in_fstring = 0
 
         for tok in _itertools.chain([token], iterable):
             toknum, tokval = tok[:2]
@@ -250,7 +272,11 @@ def compat(self, token, iterable):
                 prevstring = True
             else:
                 prevstring = False
-
+            
+            if toknum == FSTRING_START:
+                in_fstring += 1
+            elif toknum == FSTRING_END:
+                in_fstring -= 1
             if toknum == INDENT:
                 indents.append(tokval)
                 continue
@@ -263,11 +289,18 @@ def compat(self, token, iterable):
                 toks_append(indents[-1])
                 startline = False
             elif toknum == FSTRING_MIDDLE:
-                if '{' in tokval or '}' in tokval:
-                    tokval = re.sub('{', '{{', tokval)
-                    tokval = re.sub('}', '}}', tokval)
+                tokval = self.escape_brackets(tokval)
+
+            # Insert a space between two consecutive brackets if e are in an f-string
+            if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring:
+                tokval = ' ' + tokval
+
+            # Insert a space between two consecutive f-strings
+            if toknum in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
+                self.tokens.append(" ")
 
             toks_append(tokval)
+            self.prev_type = toknum
 
 
 def untokenize(iterable):

From ce7ddd7b6a45d9209349d9a3d965e97a2c8b1692 Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Sun, 11 Feb 2024 13:47:03 +0000
Subject: [PATCH 4/8] Fix linting

---
 Lib/test/test_tokenize.py | 4 ++--
 Lib/tokenize.py           | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 14f340d5383199..f787b2c0c93e47 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1878,8 +1878,8 @@ def test_roundtrip(self):
                              "else:   print('Loaded')\n")
 
         self.check_roundtrip("f'\\N{EXCLAMATION MARK}'")
-        self.check_roundtrip(r"f'\\N{EXCLAMATION MARK}'")  
-        self.check_roundtrip(r"f'\\N{SNAKE}'")  
+        self.check_roundtrip(r"f'\\N{EXCLAMATION MARK}'")
+        self.check_roundtrip(r"f'\\N{SNAKE}'")
 
 
     def test_continuation(self):
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 08321a49a577e0..39c65be3c5c208 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -183,14 +183,14 @@ def add_whitespace(self, start):
         col_offset = col - self.prev_col
         if col_offset:
             self.tokens.append(" " * col_offset)
-    
+
     def escape_brackets(self, token):
         characters = []
         consume_until_next_bracket = False
         for character in token:
             if character == "}":
                 if consume_until_next_bracket:
-                    consume_until_next_bracket = False  
+                    consume_until_next_bracket = False
                 else:
                     characters.append(character)
             if character == "{":
@@ -272,7 +272,7 @@ def compat(self, token, iterable):
                 prevstring = True
             else:
                 prevstring = False
-            
+
             if toknum == FSTRING_START:
                 in_fstring += 1
             elif toknum == FSTRING_END:

From c9b33aedccd536cc4e37b09bda3a0a3e6fa32468 Mon Sep 17 00:00:00 2001
From: Pablo Galindo Salgado <Pablogsal@gmail.com>
Date: Sun, 11 Feb 2024 15:43:55 +0000
Subject: [PATCH 5/8] Apply suggestions from code review

Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
---
 Lib/tokenize.py                                               | 2 +-
 .../2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 39c65be3c5c208..81db518e0aa3c8 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -291,7 +291,7 @@ def compat(self, token, iterable):
             elif toknum == FSTRING_MIDDLE:
                 tokval = self.escape_brackets(tokval)
 
-            # Insert a space between two consecutive brackets if e are in an f-string
+            # Insert a space between two consecutive brackets if we are in an f-string
             if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring:
                 tokval = ' ' + tokval
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst
index 89184ec95e6292..045596bfcdca43 100644
--- a/Misc/NEWS.d/next/Core and Builtins/2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst	
+++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-08-16-01-18.gh-issue-115154.ji96FV.rst	
@@ -1,2 +1,2 @@
-Fix a bug that was causing the :func:`tokenize.untokenize` function to not
-handle correctly unicode named literals. Patch by Pablo Galindo
+Fix a bug that was causing the :func:`tokenize.untokenize` function to
+handle unicode named literals incorrectly. Patch by Pablo Galindo

From 59b406c1e44a7dc8b6b5b1896136db51c82ac8bf Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Sun, 11 Feb 2024 15:45:29 +0000
Subject: [PATCH 6/8] fixup! Apply suggestions from code review

---
 Lib/test/test_tokenize.py | 18 ++++++++++++++++++
 lol.py                    |  1 -
 2 files changed, 18 insertions(+), 1 deletion(-)
 delete mode 100644 lol.py

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index f787b2c0c93e47..e264f10286f3f1 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1880,6 +1880,24 @@ def test_roundtrip(self):
         self.check_roundtrip("f'\\N{EXCLAMATION MARK}'")
         self.check_roundtrip(r"f'\\N{EXCLAMATION MARK}'")
         self.check_roundtrip(r"f'\\N{SNAKE}'")
+        cases = [
+    """
+if 1:
+    "foo"
+"bar"
+""",
+    """
+if 1:
+    ("foo"
+     "bar")
+""",
+    """
+if 1:
+    "foo"
+    "bar"
+""" ]
+        for case in cases:
+            self.check_roundtrip(case)
 
 
     def test_continuation(self):
diff --git a/lol.py b/lol.py
deleted file mode 100644
index 9d1d64baa646ab..00000000000000
--- a/lol.py
+++ /dev/null
@@ -1 +0,0 @@
-compile('match y:\n case e(e=v,v,', '<na>', 'exec')

From 4ab43521a435827f2227e00bc9e160a8d3149154 Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Mon, 12 Feb 2024 15:59:06 +0000
Subject: [PATCH 7/8] Fix scaped \N

---
 Lib/test/test_tokenize.py | 1 +
 Lib/tokenize.py           | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index e264f10286f3f1..ec5d0a7b6f59cd 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1880,6 +1880,7 @@ def test_roundtrip(self):
         self.check_roundtrip("f'\\N{EXCLAMATION MARK}'")
         self.check_roundtrip(r"f'\\N{EXCLAMATION MARK}'")
         self.check_roundtrip(r"f'\\N{SNAKE}'")
+        self.check_roundtrip(r"f'\\N{{SNAKE}}'")
         cases = [
     """
 if 1:
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 81db518e0aa3c8..99815cb258a293 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -194,7 +194,7 @@ def escape_brackets(self, token):
                 else:
                     characters.append(character)
             if character == "{":
-                if characters[-2:] != ["\\", "N"]:
+                if characters[-2:] != ["\\", "N"] or characters[-3:] == ["\\", "\\", "N"]:
                     characters.append(character)
                 else:
                     consume_until_next_bracket = True

From b630f684078b8b03ab5f25f00f9f1b16b7e108e0 Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Mon, 19 Feb 2024 14:34:04 +0000
Subject: [PATCH 8/8] Fix multiple backslashes

---
 Lib/test/test_tokenize.py | 15 ++++++++++++++-
 Lib/tokenize.py           |  8 +++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index ec5d0a7b6f59cd..4428e8cea1964c 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1878,9 +1878,22 @@ def test_roundtrip(self):
                              "else:   print('Loaded')\n")
 
         self.check_roundtrip("f'\\N{EXCLAMATION MARK}'")
-        self.check_roundtrip(r"f'\\N{EXCLAMATION MARK}'")
         self.check_roundtrip(r"f'\\N{SNAKE}'")
         self.check_roundtrip(r"f'\\N{{SNAKE}}'")
+        self.check_roundtrip(r"f'\N{SNAKE}'")
+        self.check_roundtrip(r"f'\\\N{SNAKE}'")
+        self.check_roundtrip(r"f'\\\\\N{SNAKE}'")
+        self.check_roundtrip(r"f'\\\\\\\N{SNAKE}'")
+
+        self.check_roundtrip(r"f'\\N{1}'")
+        self.check_roundtrip(r"f'\\\\N{2}'")
+        self.check_roundtrip(r"f'\\\\\\N{3}'")
+        self.check_roundtrip(r"f'\\\\\\\\N{4}'")
+
+        self.check_roundtrip(r"f'\\N{{'")
+        self.check_roundtrip(r"f'\\\\N{{'")
+        self.check_roundtrip(r"f'\\\\\\N{{'")
+        self.check_roundtrip(r"f'\\\\\\\\N{{'")
         cases = [
     """
 if 1:
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 99815cb258a293..7f418bb7a1b37f 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -194,7 +194,13 @@ def escape_brackets(self, token):
                 else:
                     characters.append(character)
             if character == "{":
-                if characters[-2:] != ["\\", "N"] or characters[-3:] == ["\\", "\\", "N"]:
+                n_backslashes = sum(
+                    1 for char in _itertools.takewhile(
+                        "\\".__eq__,
+                        characters[-2::-1]
+                    )
+                )
+                if n_backslashes % 2 == 0:
                     characters.append(character)
                 else:
                     consume_until_next_bracket = True