Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 62402a3

Browse files
Jackenmenfelix-hildenJelleZijlstra
authored
Support named escapes (\N{...}) in string processing (psf#2319)
Co-authored-by: Felix Hildén <[email protected]> Co-authored-by: Jelle Zijlstra <[email protected]>
1 parent 229498e commit 62402a3

4 files changed

Lines changed: 143 additions & 30 deletions

File tree

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
- Fix incorrect custom breakpoint indices when string group contains fake f-strings
1414
(#2311)
1515
- Fix regression where `R` prefixes would be lowercased for docstrings (#2285)
16+
- Fix handling of named escapes (`\N{...}`) when `--experimental-string-processing` is
17+
used (#2319)
1618

1719
## 21.5b2
1820

src/black/trans.py

Lines changed: 61 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
List,
1616
Optional,
1717
Sequence,
18+
Set,
1819
Tuple,
1920
TypeVar,
2021
Union,
@@ -1243,6 +1244,61 @@ def more_splits_should_be_made() -> bool:
12431244
last_line.comments = line.comments.copy()
12441245
yield Ok(last_line)
12451246

1247+
def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1248+
"""
1249+
Yields:
1250+
All ranges of @string which, if @string were to be split there,
1251+
would result in the splitting of an \\N{...} expression (which is NOT
1252+
allowed).
1253+
"""
1254+
# True - the previous backslash was unescaped
1255+
# False - the previous backslash was escaped *or* there was no backslash
1256+
previous_was_unescaped_backslash = False
1257+
it = iter(enumerate(string))
1258+
for idx, c in it:
1259+
if c == "\\":
1260+
previous_was_unescaped_backslash = not previous_was_unescaped_backslash
1261+
continue
1262+
if not previous_was_unescaped_backslash or c != "N":
1263+
previous_was_unescaped_backslash = False
1264+
continue
1265+
previous_was_unescaped_backslash = False
1266+
1267+
begin = idx - 1 # the position of backslash before \N{...}
1268+
for idx, c in it:
1269+
if c == "}":
1270+
end = idx
1271+
break
1272+
else:
1273+
# malformed nameescape expression?
1274+
# should have been detected by AST parsing earlier...
1275+
raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
1276+
yield begin, end
1277+
1278+
def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
1279+
"""
1280+
Yields:
1281+
All ranges of @string which, if @string were to be split there,
1282+
would result in the splitting of an f-expression (which is NOT
1283+
allowed).
1284+
"""
1285+
if "f" not in get_string_prefix(string).lower():
1286+
return
1287+
1288+
for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
1289+
yield match.span()
1290+
1291+
def _get_illegal_split_indices(self, string: str) -> Set[Index]:
1292+
illegal_indices: Set[Index] = set()
1293+
iterators = [
1294+
self._iter_fexpr_slices(string),
1295+
self._iter_nameescape_slices(string),
1296+
]
1297+
for it in iterators:
1298+
for begin, end in it:
1299+
illegal_indices.update(range(begin, end + 1))
1300+
return illegal_indices
1301+
12461302
def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
12471303
"""
12481304
This method contains the algorithm that StringSplitter uses to
@@ -1272,40 +1328,15 @@ def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
12721328
assert is_valid_index(max_break_idx)
12731329
assert_is_leaf_string(string)
12741330

1275-
_fexpr_slices: Optional[List[Tuple[Index, Index]]] = None
1276-
1277-
def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
1278-
"""
1279-
Yields:
1280-
All ranges of @string which, if @string were to be split there,
1281-
would result in the splitting of an f-expression (which is NOT
1282-
allowed).
1283-
"""
1284-
nonlocal _fexpr_slices
1285-
1286-
if _fexpr_slices is None:
1287-
_fexpr_slices = []
1288-
for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
1289-
_fexpr_slices.append(match.span())
1290-
1291-
yield from _fexpr_slices
1292-
1293-
is_fstring = "f" in get_string_prefix(string).lower()
1331+
_illegal_split_indices = self._get_illegal_split_indices(string)
12941332

1295-
def breaks_fstring_expression(i: Index) -> bool:
1333+
def breaks_unsplittable_expression(i: Index) -> bool:
12961334
"""
12971335
Returns:
12981336
True iff returning @i would result in the splitting of an
1299-
f-expression (which is NOT allowed).
1337+
unsplittable expression (which is NOT allowed).
13001338
"""
1301-
if not is_fstring:
1302-
return False
1303-
1304-
for (start, end) in fexpr_slices():
1305-
if start <= i < end:
1306-
return True
1307-
1308-
return False
1339+
return i in _illegal_split_indices
13091340

13101341
def passes_all_checks(i: Index) -> bool:
13111342
"""
@@ -1329,7 +1360,7 @@ def passes_all_checks(i: Index) -> bool:
13291360
is_space
13301361
and is_not_escaped
13311362
and is_big_enough
1332-
and not breaks_fstring_expression(i)
1363+
and not breaks_unsplittable_expression(i)
13331364
)
13341365

13351366
# First, we check all indices BELOW @max_break_idx.

tests/data/long_strings.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,38 @@ def foo():
207207
" of it."
208208
)
209209

210+
string_with_nameescape = (
211+
"........................................................................ \N{LAO KO LA}"
212+
)
213+
214+
string_with_nameescape = (
215+
"........................................................................... \N{LAO KO LA}"
216+
)
217+
218+
string_with_nameescape = (
219+
"............................................................................ \N{LAO KO LA}"
220+
)
221+
222+
string_with_nameescape_and_escaped_backslash = (
223+
"...................................................................... \\\N{LAO KO LA}"
224+
)
225+
226+
string_with_nameescape_and_escaped_backslash = (
227+
"......................................................................... \\\N{LAO KO LA}"
228+
)
229+
230+
string_with_nameescape_and_escaped_backslash = (
231+
".......................................................................... \\\N{LAO KO LA}"
232+
)
233+
234+
string_with_escaped_nameescape = (
235+
"........................................................................ \\N{LAO KO LA}"
236+
)
237+
238+
string_with_escaped_nameescape = (
239+
"........................................................................... \\N{LAO KO LA}"
240+
)
241+
210242

211243
# output
212244

@@ -587,3 +619,43 @@ def foo():
587619
"This is a really long string that can't be merged because it has a likely pragma at the end" # pylint: disable=some-pylint-check
588620
" of it."
589621
)
622+
623+
string_with_nameescape = (
624+
"........................................................................"
625+
" \N{LAO KO LA}"
626+
)
627+
628+
string_with_nameescape = (
629+
"..........................................................................."
630+
" \N{LAO KO LA}"
631+
)
632+
633+
string_with_nameescape = (
634+
"............................................................................"
635+
" \N{LAO KO LA}"
636+
)
637+
638+
string_with_nameescape_and_escaped_backslash = (
639+
"......................................................................"
640+
" \\\N{LAO KO LA}"
641+
)
642+
643+
string_with_nameescape_and_escaped_backslash = (
644+
"........................................................................."
645+
" \\\N{LAO KO LA}"
646+
)
647+
648+
string_with_nameescape_and_escaped_backslash = (
649+
".........................................................................."
650+
" \\\N{LAO KO LA}"
651+
)
652+
653+
string_with_escaped_nameescape = (
654+
"........................................................................ \\N{LAO"
655+
" KO LA}"
656+
)
657+
658+
string_with_escaped_nameescape = (
659+
"..........................................................................."
660+
" \\N{LAO KO LA}"
661+
)

tests/data/long_strings__regression.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,10 @@ async def foo(self):
514514

515515
x = F"This is a long string which contains an f-expr that should not split {{{[i for i in range(5)]}}}."
516516

517+
x = (
518+
"\N{BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR}\N{VARIATION SELECTOR-16}"
519+
)
520+
517521

518522
# output
519523

@@ -1142,3 +1146,7 @@ async def foo(self):
11421146
"This is a long string which contains an f-expr that should not split"
11431147
f" {{{[i for i in range(5)]}}}."
11441148
)
1149+
1150+
x = (
1151+
"\N{BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR}\N{VARIATION SELECTOR-16}"
1152+
)

0 commit comments

Comments
 (0)