From bd3cb12c9f77bf651239cc1c0426d170d54c3026 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 18 Oct 2022 16:44:30 +0200
Subject: [PATCH] gh-98401: Reject invalid escape sequences in strings

A backslash-character pair that is not a valid escape sequence now
generates a SyntaxError.
---
 Doc/library/re.rst                            |  6 +--
 Doc/reference/lexical_analysis.rst            |  7 +--
 Doc/whatsnew/3.12.rst                         |  4 ++
 Lib/test/test_codecs.py                       | 20 +++----
 Lib/test/test_codeop.py                       | 10 ++--
 Lib/test/test_fstring.py                      |  6 +--
 Lib/test/test_string_literals.py              | 30 ++++-------
 ...2-10-18-17-38-22.gh-issue-98401.3kHNtJ.rst |  2 +
 Objects/bytesobject.c                         | 11 ++--
 Objects/unicodeobject.c                       | 11 ++--
 Parser/string_parser.c                        | 54 ++++++++++---------
 11 files changed, 77 insertions(+), 84 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-10-18-17-38-22.gh-issue-98401.3kHNtJ.rst

diff --git a/Doc/library/re.rst b/Doc/library/re.rst
index 5b304f717b07fa..de69990fd9d1ab 100644
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -29,9 +29,9 @@ a literal backslash, one might have to write ``'\\\\'`` as the pattern
 string, because the regular expression must be ``\\``, and each
 backslash must be expressed as ``\\`` inside a regular Python string
 literal. Also, please note that any invalid escape sequences in Python's
-usage of the backslash in string literals now generate a :exc:`DeprecationWarning`
-and in the future this will become a :exc:`SyntaxError`. This behaviour
-will happen even if it is a valid escape sequence for a regular expression.
+usage of the backslash in string literals now generate a :exc:`SyntaxError`.
+This behaviour will happen even if it is a valid escape sequence for a regular
+expression.
 
 The solution is to use Python's raw string notation for regular expression
 patterns; backslashes are not handled in any special way in a string literal
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst
index 4ab6e90a623449..c82dee8304e4dc 100644
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -646,9 +646,10 @@ escape sequences only recognized in string literals fall into the category of
 unrecognized escapes for bytes literals.
 
    .. versionchanged:: 3.6
-      Unrecognized escape sequences produce a :exc:`DeprecationWarning`.  In
-      a future Python version they will be a :exc:`SyntaxWarning` and
-      eventually a :exc:`SyntaxError`.
+      Unrecognized escape sequences produce a :exc:`DeprecationWarning`.
+
+   .. versionchanged:: 3.12
+      Unrecognized escape sequences produce a :exc:`SyntaxError`.
 
 Even in a raw literal, quotes can be escaped with a backslash, but the
 backslash remains in the result; for example, ``r"\""`` is a valid string
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index 525efc405c8520..9907c75cd0e6be 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -79,6 +79,10 @@ New Features
 Other Language Changes
 ======================
 
+* A backslash-character pair that is not a valid escape sequence now generates
+  a :exc:`SyntaxError`.
+  (Contributed by Victor Stinner in :gh:`98401`.)
+
 * :class:`types.MappingProxyType` instances are now hashable if the underlying
   mapping is hashable.
   (Contributed by Serhiy Storchaka in :gh:`87995`.)
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 57f3648eb7017c..b6ae7155d8c678 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1197,15 +1197,15 @@ def test_escape(self):
         for i in range(97, 123):
             b = bytes([i])
             if b not in b'abfnrtvx':
-                with self.assertWarns(DeprecationWarning):
+                with self.assertRaises(SyntaxError):
                     check(b"\\" + b, b"\\" + b)
-            with self.assertWarns(DeprecationWarning):
+            with self.assertRaises(SyntaxError):
                 check(b"\\" + b.upper(), b"\\" + b.upper())
-        with self.assertWarns(DeprecationWarning):
+        with self.assertRaises(SyntaxError):
             check(br"\8", b"\\8")
-        with self.assertWarns(DeprecationWarning):
+        with self.assertRaises(SyntaxError):
             check(br"\9", b"\\9")
-        with self.assertWarns(DeprecationWarning):
+        with self.assertRaises(SyntaxError):
             check(b"\\\xfa", b"\\\xfa")
         for i in range(0o400, 0o1000):
             with self.assertWarns(DeprecationWarning):
@@ -2425,16 +2425,16 @@ def test_escape_decode(self):
         for i in range(97, 123):
             b = bytes([i])
             if b not in b'abfnrtuvx':
-                with self.assertWarns(DeprecationWarning):
+                with self.assertRaises(SyntaxError):
                     check(b"\\" + b, "\\" + chr(i))
             if b.upper() not in b'UN':
-                with self.assertWarns(DeprecationWarning):
+                with self.assertRaises(SyntaxError):
                     check(b"\\" + b.upper(), "\\" + chr(i-32))
-        with self.assertWarns(DeprecationWarning):
+        with self.assertRaises(SyntaxError):
             check(br"\8", "\\8")
-        with self.assertWarns(DeprecationWarning):
+        with self.assertRaises(SyntaxError):
             check(br"\9", "\\9")
-        with self.assertWarns(DeprecationWarning):
+        with self.assertRaises(SyntaxError):
             check(b"\\\xfa", "\\\xfa")
         for i in range(0o400, 0o1000):
             with self.assertWarns(DeprecationWarning):
diff --git a/Lib/test/test_codeop.py b/Lib/test/test_codeop.py
index 133096d25a44bc..0b4ac87bc2cd77 100644
--- a/Lib/test/test_codeop.py
+++ b/Lib/test/test_codeop.py
@@ -313,7 +313,7 @@ def test_warning(self):
                 (".*literal", SyntaxWarning),
                 (".*invalid", DeprecationWarning),
                 ) as w:
-            compile_command(r"'\e' is 0")
+            compile_command(r"'\777' is 0")
             self.assertEqual(len(w.warnings), 2)
 
         # bpo-41520: check SyntaxWarning treated as an SyntaxError
@@ -324,21 +324,21 @@ def test_warning(self):
         # Check DeprecationWarning treated as an SyntaxError
         with warnings.catch_warnings(), self.assertRaises(SyntaxError):
             warnings.simplefilter('error', DeprecationWarning)
-            compile_command(r"'\e'", symbol='exec')
+            compile_command(r"'\777'", symbol='exec')
 
     def test_incomplete_warning(self):
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter('always')
-            self.assertIncomplete("'\\e' + (")
+            self.assertIncomplete("'\\777' + (")
         self.assertEqual(w, [])
 
     def test_invalid_warning(self):
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter('always')
-            self.assertInvalid("'\\e' 1")
+            self.assertInvalid("'\\777' 1")
         self.assertEqual(len(w), 1)
         self.assertEqual(w[0].category, DeprecationWarning)
-        self.assertRegex(str(w[0].message), 'invalid escape sequence')
+        self.assertRegex(str(w[0].message), 'invalid octal escape sequence')
         self.assertEqual(w[0].filename, '<input>')
 
 
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index bf3a5b0bbccdfb..f60761106b2e08 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -776,9 +776,9 @@ def test_backslashes_in_string_part(self):
         self.assertEqual(f'2\x203', '2 3')
         self.assertEqual(f'\x203', ' 3')
 
-        with self.assertWarns(DeprecationWarning):  # invalid escape sequence
-            value = eval(r"f'\{6*7}'")
-        self.assertEqual(value, '\\42')
+        with self.assertRaisesRegex(SyntaxError, 'invalid escape sequence'):
+            eval(r"f'\{6*7}'")
+
         self.assertEqual(f'\\{6*7}', '\\42')
         self.assertEqual(fr'\{6*7}', '\\42')
 
diff --git a/Lib/test/test_string_literals.py b/Lib/test/test_string_literals.py
index 7247b7e48bc2b6..0ed4e6703f81b5 100644
--- a/Lib/test/test_string_literals.py
+++ b/Lib/test/test_string_literals.py
@@ -109,23 +109,12 @@ def test_eval_str_invalid_escape(self):
         for b in range(1, 128):
             if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
                 continue
-            with self.assertWarns(DeprecationWarning):
+            with self.assertRaises(SyntaxError):
                 self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
 
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter('always', category=DeprecationWarning)
+        with self.assertRaises(SyntaxError) as cm:
             eval("'''\n\\z'''")
-        self.assertEqual(len(w), 1)
-        self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
-        self.assertEqual(w[0].filename, '<string>')
-        self.assertEqual(w[0].lineno, 1)
-
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter('error', category=DeprecationWarning)
-            with self.assertRaises(SyntaxError) as cm:
-                eval("'''\n\\z'''")
-            exc = cm.exception
-        self.assertEqual(w, [])
+        exc = cm.exception
         self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
         self.assertEqual(exc.filename, '<string>')
         self.assertEqual(exc.lineno, 1)
@@ -186,16 +175,15 @@ def test_eval_bytes_invalid_escape(self):
         for b in range(1, 128):
             if b in b"""\n\r"'01234567\\abfnrtvx""":
                 continue
-            with self.assertWarns(DeprecationWarning):
+            with self.assertRaises(SyntaxError):
                 self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
 
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter('always', category=DeprecationWarning)
+        with self.assertRaises(SyntaxError) as cm:
             eval("b'''\n\\z'''")
-        self.assertEqual(len(w), 1)
-        self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
-        self.assertEqual(w[0].filename, '<string>')
-        self.assertEqual(w[0].lineno, 1)
+        exc = cm.exception
+        self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
+        self.assertEqual(exc.filename, '<string>')
+        self.assertEqual(exc.lineno, 1)
 
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter('error', category=DeprecationWarning)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-18-17-38-22.gh-issue-98401.3kHNtJ.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-18-17-38-22.gh-issue-98401.3kHNtJ.rst
new file mode 100644
index 00000000000000..5b113ae50da62c
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-18-17-38-22.gh-issue-98401.3kHNtJ.rst	
@@ -0,0 +1,2 @@
+A backslash-character pair that is not a valid escape sequence now generates
+a :exc:`SyntaxError`. Patch by Victor Stinner.
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 80660881920fb7..f783a684b5eb39 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1192,13 +1192,10 @@ PyObject *PyBytes_DecodeEscape(const char *s,
             }
         }
         else {
-            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
-                                 "invalid escape sequence '\\%c'",
-                                 c) < 0)
-            {
-                Py_DECREF(result);
-                return NULL;
-            }
+            PyErr_Format(PyExc_SyntaxError,
+                         "invalid escape sequence '\\%c'", c);
+            Py_DECREF(result);
+            return NULL;
         }
     }
     return result;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d090915146f804..022f674a41e2b7 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5967,13 +5967,10 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
             }
         }
         else {
-            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
-                                 "invalid escape sequence '\\%c'",
-                                 c) < 0)
-            {
-                Py_DECREF(result);
-                return NULL;
-            }
+            PyErr_Format(PyExc_SyntaxError,
+                         "invalid escape sequence '\\%c'", c);
+            Py_DECREF(result);
+            return NULL;
         }
     }
     return result;
diff --git a/Parser/string_parser.c b/Parser/string_parser.c
index 9bc3b082136be5..942bf823eea9b3 100644
--- a/Parser/string_parser.c
+++ b/Parser/string_parser.c
@@ -13,38 +13,42 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
 {
     unsigned char c = *first_invalid_escape;
     int octal = ('4' <= c && c <= '7');
-    PyObject *msg =
-        octal
-        ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
-                               first_invalid_escape)
-        : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c);
-    if (msg == NULL) {
-        return -1;
-    }
-    if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
-                                 t->lineno, NULL, NULL) < 0) {
-        if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
-            /* Replace the DeprecationWarning exception with a SyntaxError
-               to get a more accurate error report */
-            PyErr_Clear();
-
-            /* This is needed, in order for the SyntaxError to point to the token t,
-               since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
-               error location, if p->known_err_token is not set. */
-            p->known_err_token = t;
-            if (octal) {
+
+    if (octal) {
+        PyObject *msg = PyUnicode_FromFormat(
+                            "invalid octal escape sequence '\\%.3s'",
+                            first_invalid_escape);
+        if (msg == NULL) {
+            return -1;
+        }
+        if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
+                                     t->lineno, NULL, NULL) < 0) {
+            if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
+                /* Replace the DeprecationWarning exception with a SyntaxError
+                   to get a more accurate error report */
+                PyErr_Clear();
+
+                /* This is needed, in order for the SyntaxError to point to the token t,
+                   since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
+                   error location, if p->known_err_token is not set. */
+                p->known_err_token = t;
                 RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
                                    first_invalid_escape);
             }
-            else {
-                RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
-            }
+            Py_DECREF(msg);
+            return -1;
         }
         Py_DECREF(msg);
+        return 0;
+    }
+    else {
+        /* This is needed, in order for the SyntaxError to point to the token t,
+           since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
+           error location, if p->known_err_token is not set. */
+        p->known_err_token = t;
+        RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
         return -1;
     }
-    Py_DECREF(msg);
-    return 0;
 }
 
 static PyObject *