Demo entry 6682267

defget_duplicate_files(root_dir):hash_to_paths=defaultdict(list)get_duplicate_files_helper(root_dir,hash_to_paths)return[pathsfor_,pathsinhash_to_paths.iteritems()iflen(paths)>1]defget_duplicate_files_helper(root_dir,hash_to_paths,computed_paths=set(),size_to_paths={}):forsub_dirinlist_dir(root_dir):subdir_path=join_path(root_dir,sub_dir)ifis_dir(subdir_path):get_duplicate_files_helper(subdir_path,hash_to_paths,computed_paths,size_to_paths)else:file_path=subdir_pathsize=get_file_size(file_path)# If we have previously encountered any files of the same size,# we go through them one at a time, compute their hashes if necessary# and append them to @hash_to_paths. This action in and of itself will# cause file paths with equal hashes to be grouped together by hash# in @hash_to_paths.ifsizeinsize_to_paths:hash_=compute_hash(file_path)hash_to_paths[hash_].append(file_path)compute_hash.add(file_path)forsame_size_pathinsize_to_paths[size]:ifsame_size_pathnotincomputed_paths:same_path_hash=compute_hash(same_size_path)hash_to_paths[same_path_hash].append(same_size_path)computed_paths.add(same_size_path)# In any case, we need to remember the size of the current file.size_to_paths[size].append(file_path)