Skript leiab dublikaat failid Python

From ICO wiki

Käivitamisel määra kataloogist, kus soovid duplikaat faile leida. Näiteks: python duplikaadid.py /etc /home <source lang="python"> import sys import os import hashlib if len(sys.argv) < 2:

   print "Usage",sys.argv[0],"<dir1> <dir2> ..."
   sys.exit()

for mydir in sys.argv[1:]:

   checksums = {}
   for filetree in os.walk(mydir):
       for file in filetree[2]:
           dirname = filetree[0]
           filename = file
           fullpath = dirname + "/" + filename
           filedata = open(fullpath).read()
           m = hashlib.md5()
           m.update(filedata)
           checksum = m.hexdigest()
           try:
               checksums[checksum].append(fullpath)
           except:
               checksums[checksum] = []
               checksums[checksum].append(fullpath)
   for checksum in checksums.keys():
       if len(checksums[checksum]) > 1:
           print checksum,checksums[checksum]