|
|
(3 intermediate revisions by 2 users not shown) |
Line 1: |
Line 1: |
| Näeb välja nagu copy-pasta: http://stackoverflow.com/questions/2399196/finding-duplicate-files-by-content-across-multiple-directories
| | http://stackoverflow.com/questions/2399196/finding-duplicate-files-by-content-across-multiple-directories |
| | |
| <source lang = "python">
| |
| #!/usr/bin/python
| |
| # -*- coding: utf-8 -*-
| |
| #Skript leidmaks duplikaatfailid
| |
| #Chris Sinihelm, AK41, 2010
| |
| import hashlib
| |
| import os
| |
| def sha(filename):
| |
| d = hashlib.sha512()
| |
| try:
| |
| d.update(open(filename).read())
| |
| except Exception,e:
| |
| print e
| |
| else:
| |
| return d.hexdigest()
| |
| s={}
| |
| path = raw_input ("Sisesta koht, kus kohast duplikaatfaile otsitakse: ")
| |
| print "Sisestasid ", path
| |
| for r,d,f in os.walk(path):
| |
| for files in f:
| |
| filename=os.path.join(r,files)
| |
| digest=sha(filename)
| |
| if not s.has_key(digest):
| |
| s[digest]=filename
| |
| else:
| |
| print "Järgnevad on duplikaat failid: %s <==> %s " %( filename, s[digest])
| |
| </source>
| |
Latest revision as of 23:17, 24 October 2011