Skript leiab dublikaat failid Python: Difference between revisions

From ICO wiki
Jump to navigationJump to search
(Created page with '#!/usr/bin/env python import sys import os import hashlib if len(sys.argv) < 2: print "Usage",sys.argv[0],"<dir1> <dir2> ..." sys.exit() for mydir in sys.argv[1:]: …')
 
No edit summary
Line 1: Line 1:
#!/usr/bin/env python
#!/usr/bin/env python
import sys
import sys
import os
import os
import hashlib
import hashlib
if len(sys.argv) < 2:
if len(sys.argv) < 2:
     print "Usage",sys.argv[0],"<dir1> <dir2> ..."
     print "Usage",sys.argv[0],"<dir1> <dir2> ..."
     sys.exit()
     sys.exit()
for mydir in sys.argv[1:]:
for mydir in sys.argv[1:]:
     checksums = {}
     checksums = {}
Line 17: Line 14:
             fullpath = dirname + "/" + filename
             fullpath = dirname + "/" + filename
             filedata = open(fullpath).read()
             filedata = open(fullpath).read()
             m = hashlib.md5()
             m = hashlib.md5()
             m.update(filedata)
             m.update(filedata)
             checksum = m.hexdigest()
             checksum = m.hexdigest()
             try:
             try:
                 checksums[checksum].append(fullpath)
                 checksums[checksum].append(fullpath)
Line 27: Line 22:
                 checksums[checksum] = []
                 checksums[checksum] = []
                 checksums[checksum].append(fullpath)
                 checksums[checksum].append(fullpath)
           
     for checksum in checksums.keys():
     for checksum in checksums.keys():
         if len(checksums[checksum]) > 1:
         if len(checksums[checksum]) > 1:
             print checksum,checksums[checksum]
             print checksum,checksums[checksum]

Revision as of 23:51, 30 January 2011

  1. !/usr/bin/env python

import sys import os import hashlib if len(sys.argv) < 2:

   print "Usage",sys.argv[0],"<dir1> <dir2> ..."
   sys.exit()

for mydir in sys.argv[1:]:

   checksums = {}
   for filetree in os.walk(mydir):
       for file in filetree[2]:
           dirname = filetree[0]
           filename = file
           fullpath = dirname + "/" + filename
           filedata = open(fullpath).read()
           m = hashlib.md5()
           m.update(filedata)
           checksum = m.hexdigest()
           try:
               checksums[checksum].append(fullpath)
           except:
               checksums[checksum] = []
               checksums[checksum].append(fullpath)
   for checksum in checksums.keys():
       if len(checksums[checksum]) > 1:
           print checksum,checksums[checksum]