Skript leiab dublikaat failid Python: Difference between revisions

From ICO wiki
Jump to navigationJump to search
(Created page with '#!/usr/bin/env python import sys import os import hashlib if len(sys.argv) < 2: print "Usage",sys.argv[0],"<dir1> <dir2> ..." sys.exit() for mydir in sys.argv[1:]: …')
 
No edit summary
 
(5 intermediate revisions by the same user not shown)
Line 1: Line 1:
#!/usr/bin/env python
Käivitamisel määra kataloogist, kus soovid duplikaat faile leida. Näiteks: python duplikaadid.py /etc /home
 
<source lang="python">
import sys
import sys  
import os
import os
import hashlib
import hashlib
if len(sys.argv) < 2:
if len(sys.argv) < 2:
     print "Usage",sys.argv[0],"<dir1> <dir2> ..."
     print "Usage",sys.argv[0],"<dir1> <dir2> ..."
     sys.exit()
     sys.exit()
for mydir in sys.argv[1:]:
for mydir in sys.argv[1:]:
     checksums = {}
     checksums = {}
Line 17: Line 15:
             fullpath = dirname + "/" + filename
             fullpath = dirname + "/" + filename
             filedata = open(fullpath).read()
             filedata = open(fullpath).read()
             m = hashlib.md5()
             m = hashlib.md5()
             m.update(filedata)
             m.update(filedata)
             checksum = m.hexdigest()
             checksum = m.hexdigest()
             try:
             try:
                 checksums[checksum].append(fullpath)
                 checksums[checksum].append(fullpath)
Line 27: Line 23:
                 checksums[checksum] = []
                 checksums[checksum] = []
                 checksums[checksum].append(fullpath)
                 checksums[checksum].append(fullpath)
           
     for checksum in checksums.keys():
     for checksum in checksums.keys():
         if len(checksums[checksum]) > 1:
         if len(checksums[checksum]) > 1:
             print checksum,checksums[checksum]
             print checksum,checksums[checksum]

Latest revision as of 23:57, 30 January 2011

Käivitamisel määra kataloogist, kus soovid duplikaat faile leida. Näiteks: python duplikaadid.py /etc /home <source lang="python"> import sys import os import hashlib if len(sys.argv) < 2:

   print "Usage",sys.argv[0],"<dir1> <dir2> ..."
   sys.exit()

for mydir in sys.argv[1:]:

   checksums = {}
   for filetree in os.walk(mydir):
       for file in filetree[2]:
           dirname = filetree[0]
           filename = file
           fullpath = dirname + "/" + filename
           filedata = open(fullpath).read()
           m = hashlib.md5()
           m.update(filedata)
           checksum = m.hexdigest()
           try:
               checksums[checksum].append(fullpath)
           except:
               checksums[checksum] = []
               checksums[checksum].append(fullpath)
   for checksum in checksums.keys():
       if len(checksums[checksum]) > 1:
           print checksum,checksums[checksum]