1515 List ,
1616 Optional ,
1717 Sequence ,
18+ Set ,
1819 Tuple ,
1920 TypeVar ,
2021 Union ,
@@ -1243,6 +1244,61 @@ def more_splits_should_be_made() -> bool:
12431244 last_line .comments = line .comments .copy ()
12441245 yield Ok (last_line )
12451246
1247+ def _iter_nameescape_slices (self , string : str ) -> Iterator [Tuple [Index , Index ]]:
1248+ """
1249+ Yields:
1250+ All ranges of @string which, if @string were to be split there,
1251+ would result in the splitting of an \\ N{...} expression (which is NOT
1252+ allowed).
1253+ """
1254+ # True - the previous backslash was unescaped
1255+ # False - the previous backslash was escaped *or* there was no backslash
1256+ previous_was_unescaped_backslash = False
1257+ it = iter (enumerate (string ))
1258+ for idx , c in it :
1259+ if c == "\\ " :
1260+ previous_was_unescaped_backslash = not previous_was_unescaped_backslash
1261+ continue
1262+ if not previous_was_unescaped_backslash or c != "N" :
1263+ previous_was_unescaped_backslash = False
1264+ continue
1265+ previous_was_unescaped_backslash = False
1266+
1267+ begin = idx - 1 # the position of backslash before \N{...}
1268+ for idx , c in it :
1269+ if c == "}" :
1270+ end = idx
1271+ break
1272+ else :
1273+ # malformed nameescape expression?
1274+ # should have been detected by AST parsing earlier...
1275+ raise RuntimeError (f"{ self .__class__ .__name__ } LOGIC ERROR!" )
1276+ yield begin , end
1277+
1278+ def _iter_fexpr_slices (self , string : str ) -> Iterator [Tuple [Index , Index ]]:
1279+ """
1280+ Yields:
1281+ All ranges of @string which, if @string were to be split there,
1282+ would result in the splitting of an f-expression (which is NOT
1283+ allowed).
1284+ """
1285+ if "f" not in get_string_prefix (string ).lower ():
1286+ return
1287+
1288+ for match in re .finditer (self .RE_FEXPR , string , re .VERBOSE ):
1289+ yield match .span ()
1290+
1291+ def _get_illegal_split_indices (self , string : str ) -> Set [Index ]:
1292+ illegal_indices : Set [Index ] = set ()
1293+ iterators = [
1294+ self ._iter_fexpr_slices (string ),
1295+ self ._iter_nameescape_slices (string ),
1296+ ]
1297+ for it in iterators :
1298+ for begin , end in it :
1299+ illegal_indices .update (range (begin , end + 1 ))
1300+ return illegal_indices
1301+
12461302 def _get_break_idx (self , string : str , max_break_idx : int ) -> Optional [int ]:
12471303 """
12481304 This method contains the algorithm that StringSplitter uses to
@@ -1272,40 +1328,15 @@ def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
12721328 assert is_valid_index (max_break_idx )
12731329 assert_is_leaf_string (string )
12741330
1275- _fexpr_slices : Optional [List [Tuple [Index , Index ]]] = None
1276-
1277- def fexpr_slices () -> Iterator [Tuple [Index , Index ]]:
1278- """
1279- Yields:
1280- All ranges of @string which, if @string were to be split there,
1281- would result in the splitting of an f-expression (which is NOT
1282- allowed).
1283- """
1284- nonlocal _fexpr_slices
1285-
1286- if _fexpr_slices is None :
1287- _fexpr_slices = []
1288- for match in re .finditer (self .RE_FEXPR , string , re .VERBOSE ):
1289- _fexpr_slices .append (match .span ())
1290-
1291- yield from _fexpr_slices
1292-
1293- is_fstring = "f" in get_string_prefix (string ).lower ()
1331+ _illegal_split_indices = self ._get_illegal_split_indices (string )
12941332
1295- def breaks_fstring_expression (i : Index ) -> bool :
1333+ def breaks_unsplittable_expression (i : Index ) -> bool :
12961334 """
12971335 Returns:
12981336 True iff returning @i would result in the splitting of an
1299- f- expression (which is NOT allowed).
1337+ unsplittable expression (which is NOT allowed).
13001338 """
1301- if not is_fstring :
1302- return False
1303-
1304- for (start , end ) in fexpr_slices ():
1305- if start <= i < end :
1306- return True
1307-
1308- return False
1339+ return i in _illegal_split_indices
13091340
13101341 def passes_all_checks (i : Index ) -> bool :
13111342 """
@@ -1329,7 +1360,7 @@ def passes_all_checks(i: Index) -> bool:
13291360 is_space
13301361 and is_not_escaped
13311362 and is_big_enough
1332- and not breaks_fstring_expression (i )
1363+ and not breaks_unsplittable_expression (i )
13331364 )
13341365
13351366 # First, we check all indices BELOW @max_break_idx.
0 commit comments