11#! /usr/bin/env python
22
3+ from __future__ import generators
4+
35"""
46Module difflib -- helpers for computing deltas between objects.
57
2224__all__ = ['get_close_matches' , 'ndiff' , 'restore' , 'SequenceMatcher' ,
2325 'Differ' ]
2426
25- TRACE = 0
26-
2727class SequenceMatcher :
2828
2929 """
@@ -406,9 +406,6 @@ def find_longest_match(self, alo, ahi, blo, bhi):
406406 a [besti + bestsize ] == b [bestj + bestsize ]:
407407 bestsize = bestsize + 1
408408
409- if TRACE :
410- print "get_matching_blocks" , alo , ahi , blo , bhi
411- print " returns" , besti , bestj , bestsize
412409 return besti , bestj , bestsize
413410
414411 def get_matching_blocks (self ):
@@ -432,8 +429,6 @@ def get_matching_blocks(self):
432429 la , lb = len (self .a ), len (self .b )
433430 self .__helper (0 , la , 0 , lb , self .matching_blocks )
434431 self .matching_blocks .append ( (la , lb , 0 ) )
435- if TRACE :
436- print '*** matching blocks' , self .matching_blocks
437432 return self .matching_blocks
438433
439434 # builds list of matching blocks covering a[alo:ahi] and
@@ -694,7 +689,7 @@ class Differ:
694689
695690 Finally, we compare the two:
696691
697- >>> result = d.compare(text1, text2)
692+ >>> result = list( d.compare(text1, text2) )
698693
699694 'result' is a list of strings, so let's pretty-print it:
700695
@@ -731,7 +726,7 @@ class Differ:
731726 Construct a text differencer, with optional filters.
732727
733728 compare(a, b)
734- Compare two sequences of lines; return the resulting delta (list) .
729+ Compare two sequences of lines; generate the resulting delta.
735730 """
736731
737732 def __init__ (self , linejunk = None , charjunk = None ):
@@ -753,16 +748,15 @@ def __init__(self, linejunk=None, charjunk=None):
753748
754749 self .linejunk = linejunk
755750 self .charjunk = charjunk
756- self .results = []
757751
758752 def compare (self , a , b ):
759753 r"""
760- Compare two sequences of lines; return the resulting delta (list) .
754+ Compare two sequences of lines; generate the resulting delta.
761755
762756 Each sequence must contain individual single-line strings ending with
763757 newlines. Such sequences can be obtained from the `readlines()` method
764- of file-like objects. The list returned is also made up of
765- newline- terminated strings, ready to be used with the `writelines()`
758+ of file-like objects. The delta generated also consists of newline-
759+ terminated strings, ready to be printed as-is via the writeline()
766760 method of a file-like object.
767761
768762 Example:
@@ -783,34 +777,38 @@ def compare(self, a, b):
783777 cruncher = SequenceMatcher (self .linejunk , a , b )
784778 for tag , alo , ahi , blo , bhi in cruncher .get_opcodes ():
785779 if tag == 'replace' :
786- self ._fancy_replace (a , alo , ahi , b , blo , bhi )
780+ g = self ._fancy_replace (a , alo , ahi , b , blo , bhi )
787781 elif tag == 'delete' :
788- self ._dump ('-' , a , alo , ahi )
782+ g = self ._dump ('-' , a , alo , ahi )
789783 elif tag == 'insert' :
790- self ._dump ('+' , b , blo , bhi )
784+ g = self ._dump ('+' , b , blo , bhi )
791785 elif tag == 'equal' :
792- self ._dump (' ' , a , alo , ahi )
786+ g = self ._dump (' ' , a , alo , ahi )
793787 else :
794788 raise ValueError , 'unknown tag ' + `tag`
795- results = self . results
796- self . results = []
797- return results
789+
790+ for line in g :
791+ yield line
798792
799793 def _dump (self , tag , x , lo , hi ):
800- """Store comparison results for a same-tagged range."""
794+ """Generate comparison results for a same-tagged range."""
801795 for i in xrange (lo , hi ):
802- self . results . append ( '%s %s' % (tag , x [i ]) )
796+ yield '%s %s' % (tag , x [i ])
803797
804798 def _plain_replace (self , a , alo , ahi , b , blo , bhi ):
805799 assert alo < ahi and blo < bhi
806800 # dump the shorter block first -- reduces the burden on short-term
807801 # memory if the blocks are of very different sizes
808802 if bhi - blo < ahi - alo :
809- self ._dump ('+' , b , blo , bhi )
810- self ._dump ('-' , a , alo , ahi )
803+ first = self ._dump ('+' , b , blo , bhi )
804+ second = self ._dump ('-' , a , alo , ahi )
811805 else :
812- self ._dump ('-' , a , alo , ahi )
813- self ._dump ('+' , b , blo , bhi )
806+ first = self ._dump ('-' , a , alo , ahi )
807+ second = self ._dump ('+' , b , blo , bhi )
808+
809+ for g in first , second :
810+ for line in g :
811+ yield line
814812
815813 def _fancy_replace (self , a , alo , ahi , b , blo , bhi ):
816814 r"""
@@ -830,12 +828,6 @@ def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
830828 ? ^ ^ ^
831829 """
832830
833- if TRACE :
834- self .results .append ('*** _fancy_replace %s %s %s %s\n '
835- % (alo , ahi , blo , bhi ))
836- self ._dump ('>' , a , alo , ahi )
837- self ._dump ('<' , b , blo , bhi )
838-
839831 # don't synch up unless the lines have a similarity score of at
840832 # least cutoff; best_ratio tracks the best score seen so far
841833 best_ratio , cutoff = 0.74 , 0.75
@@ -869,7 +861,8 @@ def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
869861 # no non-identical "pretty close" pair
870862 if eqi is None :
871863 # no identical pair either -- treat it as a straight replace
872- self ._plain_replace (a , alo , ahi , b , blo , bhi )
864+ for line in self ._plain_replace (a , alo , ahi , b , blo , bhi ):
865+ yield line
873866 return
874867 # no close pair, but an identical pair -- synch up on that
875868 best_i , best_j , best_ratio = eqi , eqj , 1.0
@@ -879,14 +872,10 @@ def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
879872
880873 # a[best_i] very similar to b[best_j]; eqi is None iff they're not
881874 # identical
882- if TRACE :
883- self .results .append ('*** best_ratio %s %s %s %s\n '
884- % (best_ratio , best_i , best_j ))
885- self ._dump ('>' , a , best_i , best_i + 1 )
886- self ._dump ('<' , b , best_j , best_j + 1 )
887875
888876 # pump out diffs from before the synch point
889- self ._fancy_helper (a , alo , best_i , b , blo , best_j )
877+ for line in self ._fancy_helper (a , alo , best_i , b , blo , best_j ):
878+ yield line
890879
891880 # do intraline marking on the synch pair
892881 aelt , belt = a [best_i ], b [best_j ]
@@ -908,22 +897,28 @@ def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
908897 btags += ' ' * lb
909898 else :
910899 raise ValueError , 'unknown tag ' + `tag`
911- self ._qformat (aelt , belt , atags , btags )
900+ for line in self ._qformat (aelt , belt , atags , btags ):
901+ yield line
912902 else :
913903 # the synch pair is identical
914- self . results . append ( ' ' + aelt )
904+ yield ' ' + aelt
915905
916906 # pump out diffs from after the synch point
917- self ._fancy_helper (a , best_i + 1 , ahi , b , best_j + 1 , bhi )
907+ for line in self ._fancy_helper (a , best_i + 1 , ahi , b , best_j + 1 , bhi ):
908+ yield line
918909
919910 def _fancy_helper (self , a , alo , ahi , b , blo , bhi ):
911+ g = []
920912 if alo < ahi :
921913 if blo < bhi :
922- self ._fancy_replace (a , alo , ahi , b , blo , bhi )
914+ g = self ._fancy_replace (a , alo , ahi , b , blo , bhi )
923915 else :
924- self ._dump ('-' , a , alo , ahi )
916+ g = self ._dump ('-' , a , alo , ahi )
925917 elif blo < bhi :
926- self ._dump ('+' , b , blo , bhi )
918+ g = self ._dump ('+' , b , blo , bhi )
919+
920+ for line in g :
921+ yield line
927922
928923 def _qformat (self , aline , bline , atags , btags ):
929924 r"""
@@ -949,13 +944,13 @@ def _qformat(self, aline, bline, atags, btags):
949944 atags = atags [common :].rstrip ()
950945 btags = btags [common :].rstrip ()
951946
952- self . results . append ( "- " + aline )
947+ yield "- " + aline
953948 if atags :
954- self . results . append ( "? %s%s\n " % ("\t " * common , atags ) )
949+ yield "? %s%s\n " % ("\t " * common , atags )
955950
956- self . results . append ( "+ " + bline )
951+ yield "+ " + bline
957952 if btags :
958- self . results . append ( "? %s%s\n " % ("\t " * common , btags ) )
953+ yield "? %s%s\n " % ("\t " * common , btags )
959954
960955# With respect to junk, an earlier version of ndiff simply refused to
961956# *start* a match with a junk element. The result was cases like this:
@@ -1050,7 +1045,7 @@ def ndiff(a, b, linejunk=IS_LINE_JUNK, charjunk=IS_CHARACTER_JUNK):
10501045
10511046def restore (delta , which ):
10521047 r"""
1053- Return one of the two sequences that generated a delta.
1048+ Generate one of the two sequences that generated a delta.
10541049
10551050 Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract
10561051 lines originating from file 1 or 2 (parameter `which`), stripping off line
@@ -1060,6 +1055,7 @@ def restore(delta, which):
10601055
10611056 >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
10621057 ... 'ore\ntree\nemu\n'.splitlines(1))
1058+ >>> diff = list(diff)
10631059 >>> print ''.join(restore(diff, 1)),
10641060 one
10651061 two
@@ -1075,11 +1071,9 @@ def restore(delta, which):
10751071 raise ValueError , ('unknown delta choice (must be 1 or 2): %r'
10761072 % which )
10771073 prefixes = (" " , tag )
1078- results = []
10791074 for line in delta :
10801075 if line [:2 ] in prefixes :
1081- results .append (line [2 :])
1082- return results
1076+ yield line [2 :]
10831077
10841078def _test ():
10851079 import doctest , difflib
0 commit comments