bpo-29104: Fixed parsing backslashes in f-strings. (#490)

serhiy-storchaka · web-flow · commit 0cd7a3f196cf · 2017-05-25T13:33:55.000+03:00
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
@@ -361,6 +361,20 @@ def test_backslashes_in_string_part(self):
         self.assertEqual(f'2\x203', '2 3')
         self.assertEqual(f'\x203', ' 3')
 
+        with self.assertWarns(DeprecationWarning):  # invalid escape sequence
+            value = eval(r"f'\{6*7}'")
+        self.assertEqual(value, '\\42')
+        self.assertEqual(f'\\{6*7}', '\\42')
+        self.assertEqual(fr'\{6*7}', '\\42')
+
+        AMPERSAND = 'spam'
+        # Get the right unicode character (&), or pick up local variable
+        # depending on the number of backslashes.
+        self.assertEqual(f'\N{AMPERSAND}', '&')
+        self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
+        self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
+        self.assertEqual(f'\\\N{AMPERSAND}', '\\&')
+
     def test_misformed_unicode_character_name(self):
         # These test are needed because unicode names are parsed
         # differently inside f-strings.
diff --git a/Misc/NEWS b/Misc/NEWS
@@ -10,6 +10,8 @@ What's New in Python 3.7.0 alpha 1?
 Core and Builtins
 -----------------
 
+- bpo-29104: Fixed parsing backslashes in f-strings.
+
 - bpo-27945: Fixed various segfaults with dict when input collections are
   mutated during searching, inserting or comparing.  Based on patches by
   Duane Griffin and Tim Mitchell.
diff --git a/Python/ast.c b/Python/ast.c
@@ -4197,9 +4197,11 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
     while (s < end) {
         if (*s == '\\') {
             *p++ = *s++;
-            if (*s & 0x80) {
+            if (s >= end || *s & 0x80) {
                 strcpy(p, "u005c");
                 p += 5;
+                if (s >= end)
+                    break;
             }
         }
         if (*s & 0x80) { /* XXX inefficient */
@@ -4352,59 +4354,68 @@ fstring_find_literal(const char **str, const char *end, int raw,
        brace (which isn't part of a unicode name escape such as
        "\N{EULER CONSTANT}"), or the end of the string. */
 
-    const char *literal_start = *str;
-    const char *literal_end;
-    int in_named_escape = 0;
+    const char *s = *str;
+    const char *literal_start = s;
     int result = 0;
 
     assert(*literal == NULL);
-    for (; *str < end; (*str)++) {
-        char ch = **str;
-        if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
-            *(*str-2) == '\\' && *(*str-1) == 'N') {
-            in_named_escape = 1;
-        } else if (in_named_escape && ch == '}') {
-            in_named_escape = 0;
-        } else if (ch == '{' || ch == '}') {
+    while (s < end) {
+        char ch = *s++;
+        if (!raw && ch == '\\' && s < end) {
+            ch = *s++;
+            if (ch == 'N') {
+                if (s < end && *s++ == '{') {
+                    while (s < end && *s++ != '}') {
+                    }
+                    continue;
+                }
+                break;
+            }
+            if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
+                return -1;
+            }
+        }
+        if (ch == '{' || ch == '}') {
             /* Check for doubled braces, but only at the top level. If
                we checked at every level, then f'{0:{3}}' would fail
                with the two closing braces. */
             if (recurse_lvl == 0) {
-                if (*str+1 < end && *(*str+1) == ch) {
+                if (s < end && *s == ch) {
                     /* We're going to tell the caller that the literal ends
                        here, but that they should continue scanning. But also
                        skip over the second brace when we resume scanning. */
-                    literal_end = *str+1;
-                    *str += 2;
+                    *str = s + 1;
                     result = 1;
                     goto done;
                 }
 
                 /* Where a single '{' is the start of a new expression, a
                    single '}' is not allowed. */
                 if (ch == '}') {
+                    *str = s - 1;
                     ast_error(c, n, "f-string: single '}' is not allowed");
                     return -1;
                 }
             }
             /* We're either at a '{', which means we're starting another
                expression; or a '}', which means we're at the end of this
                f-string (for a nested format_spec). */
+            s--;
             break;
         }
     }
-    literal_end = *str;
-    assert(*str <= end);
-    assert(*str == end || **str == '{' || **str == '}');
+    *str = s;
+    assert(s <= end);
+    assert(s == end || *s == '{' || *s == '}');
 done:
-    if (literal_start != literal_end) {
+    if (literal_start != s) {
         if (raw)
             *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
-                                                    literal_end-literal_start,
+                                                    s - literal_start,
                                                     NULL, NULL);
         else
             *literal = decode_unicode_with_escapes(c, n, literal_start,
-                                                   literal_end-literal_start);
+                                                   s - literal_start);
         if (!*literal)
             return -1;
     }