Skript, mis otsib üles duplikaatfailid - Python: Difference between revisions
From ICO wiki
Jump to navigationJump to search
No edit summary |
No edit summary |
||
Line 8: | Line 8: | ||
import hashlib | import hashlib | ||
import os | import os | ||
#Faili hashi lugemine | |||
def sha(filename): | def sha(filename): | ||
d = hashlib.sha512() | d = hashlib.sha512() | ||
Line 16: | Line 18: | ||
else: | else: | ||
return d.hexdigest() | return d.hexdigest() | ||
#Sõnastiku loomine | |||
s={} | s={} | ||
#Kus kohast faile otsitakse | |||
path = raw_input ("Sisesta koht, kus kohast duplikaatfaile otsitakse: ") | path = raw_input ("Sisesta koht, kus kohast duplikaatfaile otsitakse: ") | ||
print "Sisestasid ", path | print "Sisestasid ", path | ||
#"Jalutame läbi" etteantud pathil asuvad failid | |||
for r,d,f in os.walk(path): | for r,d,f in os.walk(path): | ||
for files in f: | for files in f: | ||
filename=os.path.join(r,files) | filename=os.path.join(r,files) | ||
#Koostame failist hashi | |||
digest=sha(filename) | digest=sha(filename) | ||
#Kontrollime, kas sellise sisuga/hashiga fail on sõnastikus | |||
if not s.has_key(digest): | if not s.has_key(digest): | ||
#Lisame hashi sõnastikku | |||
s[digest]=filename | s[digest]=filename | ||
else: | else: | ||
#Kuulutame leitud duplikaatsisuga failidest | |||
print "Järgnevad on duplikaat failid: %s <==> %s " %( filename, s[digest]) | print "Järgnevad on duplikaat failid: %s <==> %s " %( filename, s[digest]) | ||
</source> | </source> |
Revision as of 17:01, 30 January 2011
Näeb välja nagu copy-pasta: http://stackoverflow.com/questions/2399196/finding-duplicate-files-by-content-across-multiple-directories sellisel juhul oleks tore kui viidataks ka :)
#!/usr/bin/python
# -*- coding: utf-8 -*-
#Skript leidmaks duplikaatfailid
#Chris Sinihelm, AK41, 2010
import hashlib
import os
#Faili hashi lugemine
def sha(filename):
d = hashlib.sha512()
try:
d.update(open(filename).read())
except Exception,e:
print e
else:
return d.hexdigest()
#Sõnastiku loomine
s={}
#Kus kohast faile otsitakse
path = raw_input ("Sisesta koht, kus kohast duplikaatfaile otsitakse: ")
print "Sisestasid ", path
#"Jalutame läbi" etteantud pathil asuvad failid
for r,d,f in os.walk(path):
for files in f:
filename=os.path.join(r,files)
#Koostame failist hashi
digest=sha(filename)
#Kontrollime, kas sellise sisuga/hashiga fail on sõnastikus
if not s.has_key(digest):
#Lisame hashi sõnastikku
s[digest]=filename
else:
#Kuulutame leitud duplikaatsisuga failidest
print "Järgnevad on duplikaat failid: %s <==> %s " %( filename, s[digest])