import os
import hashlib
from tkinter import Tk, filedialog, Button, messagebox
class DuplicateFileManager:
def __init__(self, root):
self.root = root
self.root.title("Duplicate File Manager")
self.root.geometry("400x200")
# Folder selection button
Button(root, text="Select Folder", command=self.select_folder,
width=20).pack(pady=20)
# Delete duplicate files button
Button(root, text="Delete Duplicates", command=self.delete_duplicates,
width=20).pack(pady=20)
self.folder_path = None
self.duplicate_files = {}
def select_folder(self):
self.folder_path = filedialog.askdirectory(title="Select a Folder")
if self.folder_path:
messagebox.showinfo("Folder Selected", f"Selected folder:
{self.folder_path}")
self.find_duplicates()
def find_duplicates(self):
"""Find duplicate files based on file hash (MD5)."""
if not self.folder_path:
messagebox.showwarning("No Folder", "Please select a folder first.")
return
self.duplicate_files = {} # Reset duplicates dictionary
# Walk through the folder
for root_dir, dirs, files in os.walk(self.folder_path):
for file_name in files:
file_path = os.path.join(root_dir, file_name)
# Compute hash of the file
file_hash = self.get_file_hash(file_path)
if file_hash in self.duplicate_files:
self.duplicate_files[file_hash].append(file_path)
else:
self.duplicate_files[file_hash] = [file_path]
# Filter out unique files (no duplicates found)
self.duplicate_files = {hash: paths for hash, paths in
self.duplicate_files.items() if len(paths) > 1}
if not self.duplicate_files:
messagebox.showinfo("No Duplicates", "No duplicate files found.")
else:
# Show duplicates
duplicate_message = "Duplicate Files Found:\n\n"
for files in self.duplicate_files.values():
duplicate_message += "\n".join(files) + "\n"
messagebox.showinfo("Duplicate Files", duplicate_message)
def get_file_hash(self, file_path, hash_algorithm=hashlib.md5):
"""Return hash of the file."""
hash_obj = hash_algorithm()
with open(file_path, 'rb') as f:
while chunk := f.read(4096):
hash_obj.update(chunk)
return hash_obj.hexdigest()
def delete_duplicates(self):
"""Delete the duplicate files and keep the first occurrence."""
if not self.duplicate_files:
messagebox.showwarning("No Duplicates", "No duplicate files found to
delete.")
return
files_to_delete = []
for file_hash, file_paths in self.duplicate_files.items():
# Keep the first file and delete the rest
files_to_delete.extend(file_paths[1:])
if files_to_delete:
# Prompt to confirm deletion
confirm = messagebox.askyesno("Confirm Deletion", f"Are you sure you
want to delete {len(files_to_delete)} duplicate file(s)?")
if confirm:
for file in files_to_delete:
try:
os.remove(file)
print(f"Deleted duplicate file: {file}")
except Exception as e:
print(f"Error deleting file {file}: {e}")
messagebox.showinfo("Deletion Complete", f"Successfully deleted
{len(files_to_delete)} duplicate file(s).")
else:
messagebox.showinfo("Deletion Cancelled", "No files were deleted.")
else:
messagebox.showinfo("No Duplicates", "No duplicates to delete.")
if __name__ == "__main__":
root = Tk()
app = DuplicateFileManager(root)
root.mainloop()