|
1 | | -"""Utilities for comparing files and directories. |
| 1 | +"""Compare files.""" |
2 | 2 |
|
3 | | -Classes: |
4 | | - dircmp |
5 | | -
|
6 | | -Functions: |
7 | | - cmp(f1, f2, shallow=1, use_statcache=0) -> int |
8 | | - cmpfiles(a, b, common) -> ([], [], []) |
9 | | -
|
10 | | -""" |
11 | | - |
12 | | -import os |
13 | | -import stat |
14 | | -import statcache |
| 3 | +import os, stat, statcache |
15 | 4 |
|
16 | 5 | _cache = {} |
17 | 6 | BUFSIZE=8*1024 |
18 | 7 |
|
19 | | -def cmp(f1, f2, shallow=1,use_statcache=0): |
20 | | - """Compare two files. |
| 8 | +def cmp(f1, f2, shallow=1,use_statcache=0): |
| 9 | + """Compare two files. |
21 | 10 |
|
22 | | - Arguments: |
| 11 | + Arguments: |
23 | 12 |
|
24 | | - f1 -- First file name |
| 13 | + f1 -- First file name |
25 | 14 |
|
26 | | - f2 -- Second file name |
| 15 | + f2 -- Second file name |
27 | 16 |
|
28 | | - shallow -- Just check stat signature (do not read the files). |
29 | | - defaults to 1. |
| 17 | + shallow -- Just check stat signature (do not read the files). |
| 18 | + defaults to 1. |
30 | 19 |
|
31 | | - use_statcache -- Do not stat() each file directly: go through |
32 | | - the statcache module for more efficiency. |
| 20 | + use_statcache -- Do not stat() each file directly: go through |
| 21 | + the statcache module for more efficiency. |
33 | 22 |
|
34 | | - Return value: |
| 23 | + Return value: |
35 | 24 |
|
36 | | - integer -- 1 if the files are the same, 0 otherwise. |
| 25 | + integer -- 1 if the files are the same, 0 otherwise. |
37 | 26 |
|
38 | | - This function uses a cache for past comparisons and the results, |
39 | | - with a cache invalidation mechanism relying on stale signatures. |
40 | | - Of course, if 'use_statcache' is true, this mechanism is defeated, |
41 | | - and the cache will never grow stale. |
| 27 | + This function uses a cache for past comparisons and the results, |
| 28 | + with a cache invalidation mechanism relying on stale signatures. |
| 29 | + Of course, if 'use_statcache' is true, this mechanism is defeated, |
| 30 | + and the cache will never grow stale. |
42 | 31 |
|
43 | | - """ |
44 | | - stat_function = (os.stat, statcache.stat)[use_statcache] |
45 | | - s1, s2 = _sig(stat_function(f1)), _sig(stat_function(f2)) |
46 | | - if s1[0]!=stat.S_IFREG or s2[0]!=stat.S_IFREG: return 0 |
47 | | - if shallow and s1 == s2: return 1 |
48 | | - if s1[1]!=s2[1]: return 0 |
| 32 | + """ |
| 33 | + stat_function = (os.stat, statcache.stat)[use_statcache] |
| 34 | + s1, s2 = _sig(stat_function(f1)), _sig(stat_function(f2)) |
| 35 | + if s1[0]!=stat.S_IFREG or s2[0]!=stat.S_IFREG: return 0 |
| 36 | + if shallow and s1 == s2: return 1 |
| 37 | + if s1[1]!=s2[1]: return 0 |
49 | 38 |
|
50 | | - result = _cache.get((f1, f2)) |
51 | | - if result and (s1, s2)==result[:2]: |
52 | | - return result[2] |
53 | | - outcome = _do_cmp(f1, f2) |
54 | | - _cache[f1, f2] = s1, s2, outcome |
55 | | - return outcome |
| 39 | + result = _cache.get((f1, f2)) |
| 40 | + if result and (s1, s2)==result[:2]: |
| 41 | + return result[2] |
| 42 | + outcome = _do_cmp(f1, f2) |
| 43 | + _cache[f1, f2] = s1, s2, outcome |
| 44 | + return outcome |
56 | 45 |
|
57 | 46 | def _sig(st): |
58 | | - return (stat.S_IFMT(st[stat.ST_MODE]), |
59 | | - st[stat.ST_SIZE], |
60 | | - st[stat.ST_MTIME]) |
| 47 | + return (stat.S_IFMT(st[stat.ST_MODE]), |
| 48 | + st[stat.ST_SIZE], |
| 49 | + st[stat.ST_MTIME]) |
61 | 50 |
|
62 | 51 | def _do_cmp(f1, f2): |
63 | | - bufsize = BUFSIZE |
64 | | - fp1 , fp2 = open(f1, 'rb'), open(f2, 'rb') |
65 | | - while 1: |
66 | | - b1, b2 = fp1.read(bufsize), fp2.read(bufsize) |
67 | | - if b1!=b2: return 0 |
68 | | - if not b1: return 1 |
69 | | - |
70 | | -# Directory comparison class. |
71 | | -# |
72 | | -class dircmp: |
73 | | - """A class that manages the comparison of 2 directories. |
74 | | -
|
75 | | - dircmp(a,b,ignore=None,hide=None) |
76 | | - A and B are directories. |
77 | | - IGNORE is a list of names to ignore, |
78 | | - defaults to ['RCS', 'CVS', 'tags']. |
79 | | - HIDE is a list of names to hide, |
80 | | - defaults to [os.curdir, os.pardir]. |
81 | | -
|
82 | | - High level usage: |
83 | | - x = dircmp(dir1, dir2) |
84 | | - x.report() -> prints a report on the differences between dir1 and dir2 |
85 | | - or |
86 | | - x.report_partial_closure() -> prints report on differences between dir1 |
87 | | - and dir2, and reports on common immediate subdirectories. |
88 | | - x.report_full_closure() -> like report_partial_closure, |
89 | | - but fully recursive. |
90 | | -
|
91 | | - Attributes: |
92 | | - left_list, right_list: The files in dir1 and dir2, |
93 | | - filtered by hide and ignore. |
94 | | - common: a list of names in both dir1 and dir2. |
95 | | - left_only, right_only: names only in dir1, dir2. |
96 | | - common_dirs: subdirectories in both dir1 and dir2. |
97 | | - common_files: files in both dir1 and dir2. |
98 | | - common_funny: names in both dir1 and dir2 where the type differs between |
99 | | - dir1 and dir2, or the name is not stat-able. |
100 | | - same_files: list of identical files. |
101 | | - diff_files: list of filenames which differ. |
102 | | - funny_files: list of files which could not be compared. |
103 | | - subdirs: a dictionary of dircmp objects, keyed by names in common_dirs. |
104 | | - """ |
105 | | - |
106 | | - def __init__(self, a, b, ignore=None, hide=None): # Initialize |
107 | | - self.left = a |
108 | | - self.right = b |
109 | | - if hide is None: |
110 | | - self.hide = [os.curdir, os.pardir] # Names never to be shown |
111 | | - else: |
112 | | - self.hide = hide |
113 | | - if ignore is None: |
114 | | - self.ignore = ['RCS', 'CVS', 'tags'] # Names ignored in comparison |
115 | | - else: |
116 | | - self.ignore = ignore |
117 | | - |
118 | | - def phase0(self): # Compare everything except common subdirectories |
119 | | - self.left_list = _filter(os.listdir(self.left), |
120 | | - self.hide+self.ignore) |
121 | | - self.right_list = _filter(os.listdir(self.right), |
122 | | - self.hide+self.ignore) |
123 | | - self.left_list.sort() |
124 | | - self.right_list.sort() |
125 | | - |
126 | | - __p4_attrs = ('subdirs',) |
127 | | - __p3_attrs = ('same_files', 'diff_files', 'funny_files') |
128 | | - __p2_attrs = ('common_dirs', 'common_files', 'common_funny') |
129 | | - __p1_attrs = ('common', 'left_only', 'right_only') |
130 | | - __p0_attrs = ('left_list', 'right_list') |
131 | | - |
132 | | - def __getattr__(self, attr): |
133 | | - if attr in self.__p4_attrs: |
134 | | - self.phase4() |
135 | | - elif attr in self.__p3_attrs: |
136 | | - self.phase3() |
137 | | - elif attr in self.__p2_attrs: |
138 | | - self.phase2() |
139 | | - elif attr in self.__p1_attrs: |
140 | | - self.phase1() |
141 | | - elif attr in self.__p0_attrs: |
142 | | - self.phase0() |
143 | | - else: |
144 | | - raise AttributeError, attr |
145 | | - return getattr(self, attr) |
146 | | - |
147 | | - def phase1(self): # Compute common names |
148 | | - a_only, b_only = [], [] |
149 | | - common = {} |
150 | | - b = {} |
151 | | - for fnm in self.right_list: |
152 | | - b[fnm] = 1 |
153 | | - for x in self.left_list: |
154 | | - if b.get(x, 0): |
155 | | - common[x] = 1 |
156 | | - else: |
157 | | - a_only.append(x) |
158 | | - for x in self.right_list: |
159 | | - if common.get(x, 0): |
160 | | - pass |
161 | | - else: |
162 | | - b_only.append(x) |
163 | | - self.common = common.keys() |
164 | | - self.left_only = a_only |
165 | | - self.right_only = b_only |
166 | | - |
167 | | - def phase2(self): # Distinguish files, directories, funnies |
168 | | - self.common_dirs = [] |
169 | | - self.common_files = [] |
170 | | - self.common_funny = [] |
171 | | - |
172 | | - for x in self.common: |
173 | | - a_path = os.path.join(self.left, x) |
174 | | - b_path = os.path.join(self.right, x) |
175 | | - |
176 | | - ok = 1 |
177 | | - try: |
178 | | - a_stat = statcache.stat(a_path) |
179 | | - except os.error, why: |
180 | | - # print 'Can\'t stat', a_path, ':', why[1] |
181 | | - ok = 0 |
182 | | - try: |
183 | | - b_stat = statcache.stat(b_path) |
184 | | - except os.error, why: |
185 | | - # print 'Can\'t stat', b_path, ':', why[1] |
186 | | - ok = 0 |
187 | | - |
188 | | - if ok: |
189 | | - a_type = stat.S_IFMT(a_stat[stat.ST_MODE]) |
190 | | - b_type = stat.S_IFMT(b_stat[stat.ST_MODE]) |
191 | | - if a_type <> b_type: |
192 | | - self.common_funny.append(x) |
193 | | - elif stat.S_ISDIR(a_type): |
194 | | - self.common_dirs.append(x) |
195 | | - elif stat.S_ISREG(a_type): |
196 | | - self.common_files.append(x) |
197 | | - else: |
198 | | - self.common_funny.append(x) |
199 | | - else: |
200 | | - self.common_funny.append(x) |
201 | | - |
202 | | - def phase3(self): # Find out differences between common files |
203 | | - xx = cmpfiles(self.left, self.right, self.common_files) |
204 | | - self.same_files, self.diff_files, self.funny_files = xx |
205 | | - |
206 | | - def phase4(self): # Find out differences between common subdirectories |
207 | | - # A new dircmp object is created for each common subdirectory, |
208 | | - # these are stored in a dictionary indexed by filename. |
209 | | - # The hide and ignore properties are inherited from the parent |
210 | | - self.subdirs = {} |
211 | | - for x in self.common_dirs: |
212 | | - a_x = os.path.join(self.left, x) |
213 | | - b_x = os.path.join(self.right, x) |
214 | | - self.subdirs[x] = dircmp(a_x, b_x, self.ignore, self.hide) |
215 | | - |
216 | | - def phase4_closure(self): # Recursively call phase4() on subdirectories |
217 | | - self.phase4() |
218 | | - for x in self.subdirs.keys(): |
219 | | - self.subdirs[x].phase4_closure() |
220 | | - |
221 | | - def report(self): # Print a report on the differences between a and b |
222 | | - # Output format is purposely lousy |
223 | | - print 'diff', self.left, self.right |
224 | | - if self.left_only: |
225 | | - self.left_only.sort() |
226 | | - print 'Only in', self.left, ':', self.left_only |
227 | | - if self.right_only: |
228 | | - self.right_only.sort() |
229 | | - print 'Only in', self.right, ':', self.right_only |
230 | | - if self.same_files: |
231 | | - self.same_files.sort() |
232 | | - print 'Identical files :', self.same_files |
233 | | - if self.diff_files: |
234 | | - self.diff_files.sort() |
235 | | - print 'Differing files :', self.diff_files |
236 | | - if self.funny_files: |
237 | | - self.funny_files.sort() |
238 | | - print 'Trouble with common files :', self.funny_files |
239 | | - if self.common_dirs: |
240 | | - self.common_dirs.sort() |
241 | | - print 'Common subdirectories :', self.common_dirs |
242 | | - if self.common_funny: |
243 | | - self.common_funny.sort() |
244 | | - print 'Common funny cases :', self.common_funny |
245 | | - |
246 | | - def report_partial_closure(self): # Print reports on self and on subdirs |
247 | | - self.report() |
248 | | - for x in self.subdirs.keys(): |
249 | | - print |
250 | | - self.subdirs[x].report() |
251 | | - |
252 | | - def report_full_closure(self): # Report on self and subdirs recursively |
253 | | - self.report() |
254 | | - for x in self.subdirs.keys(): |
255 | | - print |
256 | | - self.subdirs[x].report_full_closure() |
257 | | - |
258 | | - |
259 | | -# Compare common files in two directories. |
260 | | -# Return: |
261 | | -# - files that compare equal |
262 | | -# - files that compare different |
263 | | -# - funny cases (can't stat etc.) |
264 | | -# |
265 | | -def cmpfiles(a, b, common): |
266 | | - """Compare common files in two directories. |
267 | | -
|
268 | | - cmpfiles(a,b,common) |
269 | | - A and B are directory names |
270 | | - COMMON is a list of file names |
271 | | - returns a tuple of three lists: |
272 | | - files that compare equal |
273 | | - files that are different |
274 | | - filenames that aren't regular files.""" |
275 | | - |
276 | | - res = ([], [], []) |
277 | | - for x in common: |
278 | | - res[_cmp(os.path.join(a, x), os.path.join(b, x))].append(x) |
279 | | - return res |
280 | | - |
281 | | - |
282 | | -# Compare two files. |
283 | | -# Return: |
284 | | -# 0 for equal |
285 | | -# 1 for different |
286 | | -# 2 for funny cases (can't stat, etc.) |
287 | | -# |
288 | | -def _cmp(a, b): |
289 | | - try: |
290 | | - return not abs(cmp(a, b)) |
291 | | - except os.error: |
292 | | - return 2 |
293 | | - |
294 | | - |
295 | | -# Return a copy with items that occur in skip removed. |
296 | | -# |
297 | | -def _filter(list, skip): |
298 | | - result = [] |
299 | | - for item in list: |
300 | | - if item not in skip: result.append(item) |
301 | | - return result |
302 | | - |
303 | | - |
304 | | -# Demonstration and testing. |
305 | | -# |
306 | | -def demo(): |
307 | | - import sys |
308 | | - import getopt |
309 | | - options, args = getopt.getopt(sys.argv[1:], 'r') |
310 | | - if len(args) <> 2: raise getopt.error, 'need exactly two args' |
311 | | - dd = dircmp(args[0], args[1]) |
312 | | - if ('-r', '') in options: |
313 | | - dd.report_full_closure() |
314 | | - else: |
315 | | - dd.report() |
316 | | - |
317 | | -if __name__ == '__main__': |
318 | | - demo() |
| 52 | + bufsize = BUFSIZE |
| 53 | + fp1 , fp2 = open(f1, 'rb'), open(f2, 'rb') |
| 54 | + while 1: |
| 55 | + b1, b2 = fp1.read(bufsize), fp2.read(bufsize) |
| 56 | + if b1!=b2: return 0 |
| 57 | + if not b1: return 1 |
0 commit comments