"""Utilities for comparing files and directories.Classes: dircmpFunctions: cmp(f1, f2, shallow=1) -> int cmpfiles(a, b, common) -> ([], [], [])"""importosimportstatfromitertoolsimportifilter,ifilterfalse,imap,izip__all__=["cmp","dircmp","cmpfiles"]_cache={}BUFSIZE=8*1024defcmp(f1,f2,shallow=1):"""Compare two files. Arguments: f1 -- First file name f2 -- Second file name shallow -- Just check stat signature (do not read the files). defaults to 1. Return value: True if the files are the same, False otherwise. This function uses a cache for past comparisons and the results, with a cache invalidation mechanism relying on stale signatures. """s1=_sig(os.stat(f1))s2=_sig(os.stat(f2))ifs1[0]!=stat.S_IFREGors2[0]!=stat.S_IFREG:returnFalseifshallowands1==s2:returnTrueifs1[1]!=s2[1]:returnFalseoutcome=_cache.get((f1,f2,s1,s2))ifoutcomeisNone:outcome=_do_cmp(f1,f2)iflen(_cache)>100:# limit the maximum size of the cache_cache.clear()_cache[f1,f2,s1,s2]=outcomereturnoutcomedef_sig(st):return(stat.S_IFMT(st.st_mode),st.st_size,st.st_mtime)def_do_cmp(f1,f2):bufsize=BUFSIZEwithopen(f1,'rb')asfp1,open(f2,'rb')asfp2:whileTrue:b1=fp1.read(bufsize)b2=fp2.read(bufsize)ifb1!=b2:returnFalseifnotb1:returnTrue# Directory comparison class.#classdircmp:"""A class that manages the comparison of 2 directories. dircmp(a,b,ignore=None,hide=None) A and B are directories. IGNORE is a list of names to ignore, defaults to ['RCS', 'CVS', 'tags']. HIDE is a list of names to hide, defaults to [os.curdir, os.pardir]. High level usage: x = dircmp(dir1, dir2) x.report() -> prints a report on the differences between dir1 and dir2 or x.report_partial_closure() -> prints report on differences between dir1 and dir2, and reports on common immediate subdirectories. x.report_full_closure() -> like report_partial_closure, but fully recursive. Attributes: left_list, right_list: The files in dir1 and dir2, filtered by hide and ignore. common: a list of names in both dir1 and dir2. left_only, right_only: names only in dir1, dir2. common_dirs: subdirectories in both dir1 and dir2. common_files: files in both dir1 and dir2. common_funny: names in both dir1 and dir2 where the type differs between dir1 and dir2, or the name is not stat-able. same_files: list of identical files. diff_files: list of filenames which differ. funny_files: list of files which could not be compared. subdirs: a dictionary of dircmp objects, keyed by names in common_dirs. """def__init__(self,a,b,ignore=None,hide=None):# Initializeself.left=aself.right=bifhideisNone:self.hide=[os.curdir,os.pardir]# Names never to be shownelse:self.hide=hideifignoreisNone:self.ignore=['RCS','CVS','tags']# Names ignored in comparisonelse:self.ignore=ignoredefphase0(self):# Compare everything except common subdirectoriesself.left_list=_filter(os.listdir(self.left),self.hide+self.ignore)self.right_list=_filter(os.listdir(self.right),self.hide+self.ignore)self.left_list.sort()self.right_list.sort()defphase1(self):# Compute common namesa=dict(izip(imap(os.path.normcase,self.left_list),self.left_list))b=dict(izip(imap(os.path.normcase,self.right_list),self.right_list))self.common=map(a.__getitem__,ifilter(b.__contains__,a))self.left_only=map(a.__getitem__,ifilterfalse(b.__contains__,a))self.right_only=map(b.__getitem__,ifilterfalse(a.__contains__,b))defphase2(self):# Distinguish files, directories, funniesself.common_dirs=[]self.common_files=[]self.common_funny=[]forxinself.common:a_path=os.path.join(self.left,x)b_path=os.path.join(self.right,x)ok=1try:a_stat=os.stat(a_path)exceptos.error,why:# print 'Can\'t stat', a_path, ':', why[1]ok=0try:b_stat=os.stat(b_path)exceptos.error,why:# print 'Can\'t stat', b_path, ':', why[1]ok=0ifok:a_type=stat.S_IFMT(a_stat.st_mode)b_type=stat.S_IFMT(b_stat.st_mode)ifa_type!=b_type:self.common_funny.append(x)elifstat.S_ISDIR(a_type):self.common_dirs.append(x)elifstat.S_ISREG(a_type):self.common_files.append(x)else:self.common_funny.append(x)else:self.common_funny.append(x)defphase3(self):# Find out differences between common filesxx=cmpfiles(self.left,self.right,self.common_files)self.same_files,self.diff_files,self.funny_files=xxdefphase4(self):# Find out differences between common subdirectories# A new dircmp object is created for each common subdirectory,# these are stored in a dictionary indexed by filename.# The hide and ignore properties are inherited from the parentself.subdirs={}forxinself.common_dirs:a_x=os.path.join(self.left,x)b_x=os.path.join(self.right,x)self.subdirs[x]=dircmp(a_x,b_x,self.ignore,self.hide)defphase4_closure(self):# Recursively call phase4() on subdirectoriesself.phase4()forsdinself.subdirs.itervalues():sd.phase4_closure()defreport(self):# Print a report on the differences between a and b# Output format is purposely lousyprint'diff',self.left,self.rightifself.left_only:self.left_only.sort()print'Only in',self.left,':',self.left_onlyifself.right_only:self.right_only.sort()print'Only in',self.right,':',self.right_onlyifself.same_files:self.same_files.sort()print'Identical files :',self.same_filesifself.diff_files:self.diff_files.sort()print'Differing files :',self.diff_filesifself.funny_files:self.funny_files.sort()print'Trouble with common files :',self.funny_filesifself.common_dirs:self.common_dirs.sort()print'Common subdirectories :',self.common_dirsifself.common_funny:self.common_funny.sort()print'Common funny cases :',self.common_funnydefreport_partial_closure(self):# Print reports on self and on subdirsself.report()forsdinself.subdirs.itervalues():printsd.report()defreport_full_closure(self):# Report on self and subdirs recursivelyself.report()forsdinself.subdirs.itervalues():printsd.report_full_closure()methodmap=dict(subdirs=phase4,same_files=phase3,diff_files=phase3,funny_files=phase3,common_dirs=phase2,common_files=phase2,common_funny=phase2,common=phase1,left_only=phase1,right_only=phase1,left_list=phase0,right_list=phase0)def__getattr__(self,attr):ifattrnotinself.methodmap:raiseAttributeError,attrself.methodmap[attr](self)returngetattr(self,attr)defcmpfiles(a,b,common,shallow=1):"""Compare common files in two directories. a, b -- directory names common -- list of file names found in both directories shallow -- if true, do comparison based solely on stat() information Returns a tuple of three lists: files that compare equal files that are different filenames that aren't regular files. """res=([],[],[])forxincommon:ax=os.path.join(a,x)bx=os.path.join(b,x)res[_cmp(ax,bx,shallow)].append(x)returnres# Compare two files.# Return:# 0 for equal# 1 for different# 2 for funny cases (can't stat, etc.)#def_cmp(a,b,sh,abs=abs,cmp=cmp):try:returnnotabs(cmp(a,b,sh))exceptos.error:return2# Return a copy with items that occur in skip removed.#def_filter(flist,skip):returnlist(ifilterfalse(skip.__contains__,flist))# Demonstration and testing.#defdemo():importsysimportgetoptoptions,args=getopt.getopt(sys.argv[1:],'r')iflen(args)!=2:raisegetopt.GetoptError('need exactly two args',None)dd=dircmp(args[0],args[1])if('-r','')inoptions:dd.report_full_closure()else:dd.report()if__name__=='__main__':demo()