From 71b921185988b86a096e415cc8e5177574785af7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 22 Apr 2022 21:08:49 +0300 Subject: [PATCH] [3.10] gh-90568: Fix exception type for \N with a named sequence in RE (GH-91665) (GH-91830) re.error is now raised instead of TypeError. (cherry picked from commit 6ccfa31421393910b52936e0447625db06f2a655) (cherry picked from commit 9c18d783c38fca57a63b61aa778d8a8d18945d95) Co-authored-by: Serhiy Storchaka --- Lib/sre_parse.py | 4 ++-- Lib/test/test_re.py | 4 ++++ .../Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 53706676e9f7b8..d3ff196032b300 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -330,7 +330,7 @@ def _class_escape(source, escape): charname = source.getuntil('}', 'character name') try: c = ord(unicodedata.lookup(charname)) - except KeyError: + except (KeyError, TypeError): raise source.error("undefined character name %r" % charname, len(charname) + len(r'\N{}')) return LITERAL, c @@ -390,7 +390,7 @@ def _escape(source, escape, state): charname = source.getuntil('}', 'character name') try: c = ord(unicodedata.lookup(charname)) - except KeyError: + except (KeyError, TypeError): raise source.error("undefined character name %r" % charname, len(charname) + len(r'\N{}')) return LITERAL, c diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 56e98b7aedce7c..007064093c4d19 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -753,6 +753,10 @@ def test_named_unicode_escapes(self): "undefined character name 'SPAM'", 0) self.checkPatternError(r'[\N{SPAM}]', "undefined character name 'SPAM'", 1) + self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}', + "undefined character name 'KEYCAP NUMBER SIGN'", 0) + self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]', + "undefined character name 'KEYCAP NUMBER SIGN'", 1) self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0) self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1) diff --git a/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst b/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst new file mode 100644 index 00000000000000..4411c715830e2e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst @@ -0,0 +1,3 @@ +Parsing ``\N`` escapes of Unicode Named Character Sequences in a +:mod:`regular expression ` raises now :exc:`re.error` instead of +``TypeError``.