[3.11] gh-88943: Improve syntax error for non-ASCII character that follows a numerical literal (GH-109081) (GH-109091)

serhiy-storchaka · web-flow · commit dae62d456e4f · 2023-09-07T14:54:07.000Z
It now points on the invalid non-ASCII character, not on the valid numerical literal. (cherry picked from commit b2729e9)
diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py
@@ -238,6 +238,10 @@ def check(test, error=False):
             check(f"[{num}for x in ()]")
             check(f"{num}spam", error=True)
 
+            # gh-88943: Invalid non-ASCII character following a numerical literal.
+            with self.assertRaisesRegex(SyntaxError, r"invalid character '⁄' \(U\+2044\)"):
+                compile(f"{num}⁄7", "<testcase>", "eval")
+
             with warnings.catch_warnings():
                 warnings.filterwarnings('ignore', '"is" with a literal',
                                         SyntaxWarning)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst b/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst
@@ -0,0 +1,3 @@
+Improve syntax error for non-ASCII character that follows a numerical
+literal. It now points on the invalid non-ASCII character, not on the valid
+numerical literal.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
@@ -1303,7 +1303,7 @@ verify_end_of_number(struct tok_state *tok, int c, const char *kind)
         tok_nextc(tok);
     }
     else /* In future releases, only error will remain. */
-    if (is_potential_identifier_char(c)) {
+    if (c < 128 && is_potential_identifier_char(c)) {
         tok_backup(tok, c);
         syntaxerror(tok, "invalid %s literal", kind);
         return 0;

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+Improve syntax error for non-ASCII character that follows a numerical`
	`2`	`+literal. It now points on the invalid non-ASCII character, not on the valid`
	`3`	`+numerical literal.`
Original file line number	Diff line number	Diff line change
`@@ -1303,7 +1303,7 @@ verify_end_of_number(struct tok_state tok, int c, const char kind)`
`1303`	`1303`	`tok_nextc(tok);`
`1304`	`1304`	`}`
`1305`	`1305`	`else /* In future releases, only error will remain. */`
`1306`		`- if (is_potential_identifier_char(c)) {`
	`1306`	`+ if (c < 128 && is_potential_identifier_char(c)) {`
`1307`	`1307`	`tok_backup(tok, c);`
`1308`	`1308`	`syntaxerror(tok, "invalid %s literal", kind);`
`1309`	`1309`	`return 0;`