@@ -890,7 +890,7 @@ def _plain_replace(self, a, alo, ahi, b, blo, bhi):
890890 for g in first , second :
891891 yield from g
892892
893- def _fancy_replace (self , a , alo , ahi , b , blo , bhi ):
893+ def _fancy_replace (self , a , alo , ahi , b , blo , bhi , _gravity = 1e-6 ):
894894 r"""
895895 When replacing one block of lines with another, search the blocks
896896 for *similar* lines; the best-matching pair (if any) is used as a
@@ -918,26 +918,39 @@ def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
918918 # search for the pair that matches best without being identical
919919 # (identical lines must be junk lines, & we don't want to synch up
920920 # on junk -- unless we have to)
921+
922+ # for pathological cases with many equal ratios prefer to split
923+ # closer to the middle of a, b chunks such that the resulting
924+ # branching is more optimal (bisect-like)
925+ def _drag_to_center (i , lo , hi ):
926+ # any convex function with a maximum at (lo + hi - 1) / 2
927+ # this one is zero at edges lo, hi - 1 and _gravity in the middle
928+ return _gravity * (1 - ((2 * i - lo - hi + 1 ) / (hi - lo - 1 )) ** 2 )
929+ # with the weight above, the best_ratio becomes slightly bigger
930+ # which means that the real cutoff is slightly smaller than 0.75
931+
921932 for j in range (blo , bhi ):
922933 bj = b [j ]
923934 cruncher .set_seq2 (bj )
935+ weight_b = _drag_to_center (j , blo , bhi )
924936 for i in range (alo , ahi ):
925937 ai = a [i ]
926938 if ai == bj :
927939 if eqi is None :
928940 eqi , eqj = i , j
929941 continue
930942 cruncher .set_seq1 (ai )
943+ weight_ab = weight_b + _drag_to_center (i , alo , ahi )
931944 # computing similarity is expensive, so use the quick
932945 # upper bounds first -- have seen this speed up messy
933946 # compares by a factor of 3.
934947 # note that ratio() is only expensive to compute the first
935948 # time it's called on a sequence pair; the expensive part
936949 # of the computation is cached by cruncher
937- if cruncher .real_quick_ratio () > best_ratio and \
938- cruncher .quick_ratio () > best_ratio and \
939- cruncher .ratio () > best_ratio :
940- best_ratio , best_i , best_j = cruncher .ratio (), i , j
950+ if cruncher .real_quick_ratio () + weight_ab > best_ratio and \
951+ cruncher .quick_ratio () + weight_ab > best_ratio and \
952+ cruncher .ratio () + weight_ab > best_ratio :
953+ best_ratio , best_i , best_j = cruncher .ratio () + weight_ab , i , j
941954 if best_ratio < cutoff :
942955 # no non-identical "pretty close" pair
943956 if eqi is None :
0 commit comments