Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 63b08ac

Browse files
committed
Whoops! We just discovered that Gordon's revamp of this module was
accidentally wiped out by Ping's patch (which shouldn't have affected this file at all, had Ping done a cvs update). This checkin restores Gordon's version, with Fredrik's change merged back in.
1 parent a2aa34f commit 63b08ac

1 file changed

Lines changed: 310 additions & 49 deletions

File tree

Lib/filecmp.py

Lines changed: 310 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,329 @@
1-
"""Compare files."""
1+
"""Utilities for comparing files and directories.
22
3-
import os, stat, statcache
3+
Classes:
4+
dircmp
5+
6+
Functions:
7+
cmp(f1, f2, shallow=1, use_statcache=0) -> int
8+
cmpfiles(a, b, common) -> ([], [], [])
9+
10+
"""
11+
12+
import os
13+
import stat
14+
import statcache
415

516
_cache = {}
617
BUFSIZE=8*1024
718

8-
def cmp(f1, f2, shallow=1,use_statcache=0):
9-
"""Compare two files.
19+
def cmp(f1, f2, shallow=1,use_statcache=0):
20+
"""Compare two files.
1021
11-
Arguments:
22+
Arguments:
1223
13-
f1 -- First file name
24+
f1 -- First file name
1425
15-
f2 -- Second file name
26+
f2 -- Second file name
1627
17-
shallow -- Just check stat signature (do not read the files).
18-
defaults to 1.
28+
shallow -- Just check stat signature (do not read the files).
29+
defaults to 1.
1930
20-
use_statcache -- Do not stat() each file directly: go through
21-
the statcache module for more efficiency.
31+
use_statcache -- Do not stat() each file directly: go through
32+
the statcache module for more efficiency.
2233
23-
Return value:
34+
Return value:
2435
25-
integer -- 1 if the files are the same, 0 otherwise.
36+
integer -- 1 if the files are the same, 0 otherwise.
2637
27-
This function uses a cache for past comparisons and the results,
28-
with a cache invalidation mechanism relying on stale signatures.
29-
Of course, if 'use_statcache' is true, this mechanism is defeated,
30-
and the cache will never grow stale.
38+
This function uses a cache for past comparisons and the results,
39+
with a cache invalidation mechanism relying on stale signatures.
40+
Of course, if 'use_statcache' is true, this mechanism is defeated,
41+
and the cache will never grow stale.
3142
32-
"""
33-
if use_statcache:
34-
stat_function = statcache.stat
35-
else:
36-
stat_function = os.stat
37-
s1 = _sig(stat_function(f1))
38-
s2 = _sig(stat_function(f2))
39-
if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG:
40-
return 0
41-
if shallow and s1 == s2:
42-
return 1
43-
if s1[1] != s2[1]:
44-
return 0
43+
"""
44+
if use_statcache:
45+
stat_function = statcache.stat
46+
else:
47+
stat_function = os.stat
48+
s1 = _sig(stat_function(f1))
49+
s2 = _sig(stat_function(f2))
50+
if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG:
51+
return 0
52+
if shallow and s1 == s2:
53+
return 1
54+
if s1[1] != s2[1]:
55+
return 0
4556

46-
result = _cache.get((f1, f2))
47-
if result and (s1, s2) == result[:2]:
48-
return result[2]
49-
outcome = _do_cmp(f1, f2)
50-
_cache[f1, f2] = s1, s2, outcome
51-
return outcome
57+
result = _cache.get((f1, f2))
58+
if result and (s1, s2) == result[:2]:
59+
return result[2]
60+
outcome = _do_cmp(f1, f2)
61+
_cache[f1, f2] = s1, s2, outcome
62+
return outcome
5263

5364
def _sig(st):
54-
return (stat.S_IFMT(st[stat.ST_MODE]),
55-
st[stat.ST_SIZE],
56-
st[stat.ST_MTIME])
65+
return (stat.S_IFMT(st[stat.ST_MODE]),
66+
st[stat.ST_SIZE],
67+
st[stat.ST_MTIME])
5768

5869
def _do_cmp(f1, f2):
59-
bufsize = BUFSIZE
60-
fp1 = open(f1, 'rb')
61-
fp2 = open(f2, 'rb')
62-
while 1:
63-
b1 = fp1.read(bufsize)
64-
b2 = fp2.read(bufsize)
65-
if b1 != b2:
66-
return 0
67-
if not b1:
68-
return 1
70+
bufsize = BUFSIZE
71+
fp1 = open(f1, 'rb')
72+
fp2 = open(f2, 'rb')
73+
while 1:
74+
b1 = fp1.read(bufsize)
75+
b2 = fp2.read(bufsize)
76+
if b1 != b2:
77+
return 0
78+
if not b1:
79+
return 1
80+
81+
# Directory comparison class.
82+
#
83+
class dircmp:
84+
"""A class that manages the comparison of 2 directories.
85+
86+
dircmp(a,b,ignore=None,hide=None)
87+
A and B are directories.
88+
IGNORE is a list of names to ignore,
89+
defaults to ['RCS', 'CVS', 'tags'].
90+
HIDE is a list of names to hide,
91+
defaults to [os.curdir, os.pardir].
92+
93+
High level usage:
94+
x = dircmp(dir1, dir2)
95+
x.report() -> prints a report on the differences between dir1 and dir2
96+
or
97+
x.report_partial_closure() -> prints report on differences between dir1
98+
and dir2, and reports on common immediate subdirectories.
99+
x.report_full_closure() -> like report_partial_closure,
100+
but fully recursive.
101+
102+
Attributes:
103+
left_list, right_list: The files in dir1 and dir2,
104+
filtered by hide and ignore.
105+
common: a list of names in both dir1 and dir2.
106+
left_only, right_only: names only in dir1, dir2.
107+
common_dirs: subdirectories in both dir1 and dir2.
108+
common_files: files in both dir1 and dir2.
109+
common_funny: names in both dir1 and dir2 where the type differs between
110+
dir1 and dir2, or the name is not stat-able.
111+
same_files: list of identical files.
112+
diff_files: list of filenames which differ.
113+
funny_files: list of files which could not be compared.
114+
subdirs: a dictionary of dircmp objects, keyed by names in common_dirs.
115+
"""
116+
117+
def __init__(self, a, b, ignore=None, hide=None): # Initialize
118+
self.left = a
119+
self.right = b
120+
if hide is None:
121+
self.hide = [os.curdir, os.pardir] # Names never to be shown
122+
else:
123+
self.hide = hide
124+
if ignore is None:
125+
self.ignore = ['RCS', 'CVS', 'tags'] # Names ignored in comparison
126+
else:
127+
self.ignore = ignore
128+
129+
def phase0(self): # Compare everything except common subdirectories
130+
self.left_list = _filter(os.listdir(self.left),
131+
self.hide+self.ignore)
132+
self.right_list = _filter(os.listdir(self.right),
133+
self.hide+self.ignore)
134+
self.left_list.sort()
135+
self.right_list.sort()
136+
137+
__p4_attrs = ('subdirs',)
138+
__p3_attrs = ('same_files', 'diff_files', 'funny_files')
139+
__p2_attrs = ('common_dirs', 'common_files', 'common_funny')
140+
__p1_attrs = ('common', 'left_only', 'right_only')
141+
__p0_attrs = ('left_list', 'right_list')
142+
143+
def __getattr__(self, attr):
144+
if attr in self.__p4_attrs:
145+
self.phase4()
146+
elif attr in self.__p3_attrs:
147+
self.phase3()
148+
elif attr in self.__p2_attrs:
149+
self.phase2()
150+
elif attr in self.__p1_attrs:
151+
self.phase1()
152+
elif attr in self.__p0_attrs:
153+
self.phase0()
154+
else:
155+
raise AttributeError, attr
156+
return getattr(self, attr)
157+
158+
def phase1(self): # Compute common names
159+
a_only, b_only = [], []
160+
common = {}
161+
b = {}
162+
for fnm in self.right_list:
163+
b[fnm] = 1
164+
for x in self.left_list:
165+
if b.get(x, 0):
166+
common[x] = 1
167+
else:
168+
a_only.append(x)
169+
for x in self.right_list:
170+
if common.get(x, 0):
171+
pass
172+
else:
173+
b_only.append(x)
174+
self.common = common.keys()
175+
self.left_only = a_only
176+
self.right_only = b_only
177+
178+
def phase2(self): # Distinguish files, directories, funnies
179+
self.common_dirs = []
180+
self.common_files = []
181+
self.common_funny = []
182+
183+
for x in self.common:
184+
a_path = os.path.join(self.left, x)
185+
b_path = os.path.join(self.right, x)
186+
187+
ok = 1
188+
try:
189+
a_stat = statcache.stat(a_path)
190+
except os.error, why:
191+
# print 'Can\'t stat', a_path, ':', why[1]
192+
ok = 0
193+
try:
194+
b_stat = statcache.stat(b_path)
195+
except os.error, why:
196+
# print 'Can\'t stat', b_path, ':', why[1]
197+
ok = 0
198+
199+
if ok:
200+
a_type = stat.S_IFMT(a_stat[stat.ST_MODE])
201+
b_type = stat.S_IFMT(b_stat[stat.ST_MODE])
202+
if a_type <> b_type:
203+
self.common_funny.append(x)
204+
elif stat.S_ISDIR(a_type):
205+
self.common_dirs.append(x)
206+
elif stat.S_ISREG(a_type):
207+
self.common_files.append(x)
208+
else:
209+
self.common_funny.append(x)
210+
else:
211+
self.common_funny.append(x)
212+
213+
def phase3(self): # Find out differences between common files
214+
xx = cmpfiles(self.left, self.right, self.common_files)
215+
self.same_files, self.diff_files, self.funny_files = xx
216+
217+
def phase4(self): # Find out differences between common subdirectories
218+
# A new dircmp object is created for each common subdirectory,
219+
# these are stored in a dictionary indexed by filename.
220+
# The hide and ignore properties are inherited from the parent
221+
self.subdirs = {}
222+
for x in self.common_dirs:
223+
a_x = os.path.join(self.left, x)
224+
b_x = os.path.join(self.right, x)
225+
self.subdirs[x] = dircmp(a_x, b_x, self.ignore, self.hide)
226+
227+
def phase4_closure(self): # Recursively call phase4() on subdirectories
228+
self.phase4()
229+
for x in self.subdirs.keys():
230+
self.subdirs[x].phase4_closure()
231+
232+
def report(self): # Print a report on the differences between a and b
233+
# Output format is purposely lousy
234+
print 'diff', self.left, self.right
235+
if self.left_only:
236+
self.left_only.sort()
237+
print 'Only in', self.left, ':', self.left_only
238+
if self.right_only:
239+
self.right_only.sort()
240+
print 'Only in', self.right, ':', self.right_only
241+
if self.same_files:
242+
self.same_files.sort()
243+
print 'Identical files :', self.same_files
244+
if self.diff_files:
245+
self.diff_files.sort()
246+
print 'Differing files :', self.diff_files
247+
if self.funny_files:
248+
self.funny_files.sort()
249+
print 'Trouble with common files :', self.funny_files
250+
if self.common_dirs:
251+
self.common_dirs.sort()
252+
print 'Common subdirectories :', self.common_dirs
253+
if self.common_funny:
254+
self.common_funny.sort()
255+
print 'Common funny cases :', self.common_funny
256+
257+
def report_partial_closure(self): # Print reports on self and on subdirs
258+
self.report()
259+
for x in self.subdirs.keys():
260+
print
261+
self.subdirs[x].report()
262+
263+
def report_full_closure(self): # Report on self and subdirs recursively
264+
self.report()
265+
for x in self.subdirs.keys():
266+
print
267+
self.subdirs[x].report_full_closure()
268+
269+
270+
# Compare common files in two directories.
271+
# Return:
272+
# - files that compare equal
273+
# - files that compare different
274+
# - funny cases (can't stat etc.)
275+
#
276+
def cmpfiles(a, b, common):
277+
"""Compare common files in two directories.
278+
279+
cmpfiles(a,b,common)
280+
A and B are directory names
281+
COMMON is a list of file names
282+
returns a tuple of three lists:
283+
files that compare equal
284+
files that are different
285+
filenames that aren't regular files."""
286+
287+
res = ([], [], [])
288+
for x in common:
289+
res[_cmp(os.path.join(a, x), os.path.join(b, x))].append(x)
290+
return res
291+
292+
293+
# Compare two files.
294+
# Return:
295+
# 0 for equal
296+
# 1 for different
297+
# 2 for funny cases (can't stat, etc.)
298+
#
299+
def _cmp(a, b):
300+
try:
301+
return not abs(cmp(a, b))
302+
except os.error:
303+
return 2
304+
305+
306+
# Return a copy with items that occur in skip removed.
307+
#
308+
def _filter(list, skip):
309+
result = []
310+
for item in list:
311+
if item not in skip: result.append(item)
312+
return result
313+
314+
315+
# Demonstration and testing.
316+
#
317+
def demo():
318+
import sys
319+
import getopt
320+
options, args = getopt.getopt(sys.argv[1:], 'r')
321+
if len(args) <> 2: raise getopt.error, 'need exactly two args'
322+
dd = dircmp(args[0], args[1])
323+
if ('-r', '') in options:
324+
dd.report_full_closure()
325+
else:
326+
dd.report()
327+
328+
if __name__ == '__main__':
329+
demo()

0 commit comments

Comments
 (0)