Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0cd7a3f

Browse files
bpo-29104: Fixed parsing backslashes in f-strings. (#490)
1 parent d1c3c13 commit 0cd7a3f

3 files changed

Lines changed: 48 additions & 21 deletions

File tree

Lib/test/test_fstring.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,20 @@ def test_backslashes_in_string_part(self):
361361
self.assertEqual(f'2\x203', '2 3')
362362
self.assertEqual(f'\x203', ' 3')
363363

364+
with self.assertWarns(DeprecationWarning): # invalid escape sequence
365+
value = eval(r"f'\{6*7}'")
366+
self.assertEqual(value, '\\42')
367+
self.assertEqual(f'\\{6*7}', '\\42')
368+
self.assertEqual(fr'\{6*7}', '\\42')
369+
370+
AMPERSAND = 'spam'
371+
# Get the right unicode character (&), or pick up local variable
372+
# depending on the number of backslashes.
373+
self.assertEqual(f'\N{AMPERSAND}', '&')
374+
self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
375+
self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
376+
self.assertEqual(f'\\\N{AMPERSAND}', '\\&')
377+
364378
def test_misformed_unicode_character_name(self):
365379
# These test are needed because unicode names are parsed
366380
# differently inside f-strings.

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ What's New in Python 3.7.0 alpha 1?
1010
Core and Builtins
1111
-----------------
1212

13+
- bpo-29104: Fixed parsing backslashes in f-strings.
14+
1315
- bpo-27945: Fixed various segfaults with dict when input collections are
1416
mutated during searching, inserting or comparing. Based on patches by
1517
Duane Griffin and Tim Mitchell.

Python/ast.c

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4197,9 +4197,11 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
41974197
while (s < end) {
41984198
if (*s == '\\') {
41994199
*p++ = *s++;
4200-
if (*s & 0x80) {
4200+
if (s >= end || *s & 0x80) {
42014201
strcpy(p, "u005c");
42024202
p += 5;
4203+
if (s >= end)
4204+
break;
42034205
}
42044206
}
42054207
if (*s & 0x80) { /* XXX inefficient */
@@ -4352,59 +4354,68 @@ fstring_find_literal(const char **str, const char *end, int raw,
43524354
brace (which isn't part of a unicode name escape such as
43534355
"\N{EULER CONSTANT}"), or the end of the string. */
43544356

4355-
const char *literal_start = *str;
4356-
const char *literal_end;
4357-
int in_named_escape = 0;
4357+
const char *s = *str;
4358+
const char *literal_start = s;
43584359
int result = 0;
43594360

43604361
assert(*literal == NULL);
4361-
for (; *str < end; (*str)++) {
4362-
char ch = **str;
4363-
if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
4364-
*(*str-2) == '\\' && *(*str-1) == 'N') {
4365-
in_named_escape = 1;
4366-
} else if (in_named_escape && ch == '}') {
4367-
in_named_escape = 0;
4368-
} else if (ch == '{' || ch == '}') {
4362+
while (s < end) {
4363+
char ch = *s++;
4364+
if (!raw && ch == '\\' && s < end) {
4365+
ch = *s++;
4366+
if (ch == 'N') {
4367+
if (s < end && *s++ == '{') {
4368+
while (s < end && *s++ != '}') {
4369+
}
4370+
continue;
4371+
}
4372+
break;
4373+
}
4374+
if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
4375+
return -1;
4376+
}
4377+
}
4378+
if (ch == '{' || ch == '}') {
43694379
/* Check for doubled braces, but only at the top level. If
43704380
we checked at every level, then f'{0:{3}}' would fail
43714381
with the two closing braces. */
43724382
if (recurse_lvl == 0) {
4373-
if (*str+1 < end && *(*str+1) == ch) {
4383+
if (s < end && *s == ch) {
43744384
/* We're going to tell the caller that the literal ends
43754385
here, but that they should continue scanning. But also
43764386
skip over the second brace when we resume scanning. */
4377-
literal_end = *str+1;
4378-
*str += 2;
4387+
*str = s + 1;
43794388
result = 1;
43804389
goto done;
43814390
}
43824391

43834392
/* Where a single '{' is the start of a new expression, a
43844393
single '}' is not allowed. */
43854394
if (ch == '}') {
4395+
*str = s - 1;
43864396
ast_error(c, n, "f-string: single '}' is not allowed");
43874397
return -1;
43884398
}
43894399
}
43904400
/* We're either at a '{', which means we're starting another
43914401
expression; or a '}', which means we're at the end of this
43924402
f-string (for a nested format_spec). */
4403+
s--;
43934404
break;
43944405
}
43954406
}
4396-
literal_end = *str;
4397-
assert(*str <= end);
4398-
assert(*str == end || **str == '{' || **str == '}');
4407+
*str = s;
4408+
assert(s <= end);
4409+
assert(s == end || *s == '{' || *s == '}');
43994410
done:
4400-
if (literal_start != literal_end) {
4411+
if (literal_start != s) {
44014412
if (raw)
44024413
*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
4403-
literal_end-literal_start,
4414+
s - literal_start,
44044415
NULL, NULL);
44054416
else
44064417
*literal = decode_unicode_with_escapes(c, n, literal_start,
4407-
literal_end-literal_start);
4418+
s - literal_start);
44084419
if (!*literal)
44094420
return -1;
44104421
}

0 commit comments

Comments
 (0)