每次繁忙时是否都会不经意间将几个同样的文件都保存下来,看着爆红的存盘,却找不到重复文件在哪,甚至有不同名字的相同文件,如何清除,这个程序将帮助你
import os
import hashlib
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from threading import Thread
from datetime import datetime
class DuplicateFileCleaner:
def __init__(self, root):
self.root = root
self.root.title("重复文件清理工具")
self.root.geometry("1000x700")
# 初始化变量
self.scanning = False
self.file_hashes = {}
self.duplicate_groups = []
# 设置UI样式
self.setup_style()
# 创建UI组件
self.create_widgets()
self.setup_layout()
def setup_style(self):
self.style = ttk.Style()
self.style.theme_use('clam')
# 自定义样式
self.style.configure('Title.TLabel', font=('Helvetica', 12, 'bold'))
self.style.configure('Group.TFrame', background='#f0f0f0', relief=tk.RAISED, borderwidth=1)
self.style.configure('GroupTitle.TLabel', font=('Helvetica', 10, 'bold'), background='#f0f0f0')
def create_widgets(self):
# 顶部控制面板
self.control_frame = ttk.Frame(self.root)
self.path_label = ttk.Label(self.control_frame, text="扫描目录:")
self.path_entry = ttk.Entry(self.control_frame, width=50)
self.browse_button = ttk.Button(self.control_frame, text="浏览...", command=self.browse_directory)
self.options_frame = ttk.LabelFrame(self.control_frame, text="扫描选项")
self.recursive_var = tk.BooleanVar(value=True)
self.recursive_check = ttk.Checkbutton(
self.options_frame, text="包含子目录", variable=self.recursive_var
)
self.min_size_label = ttk.Label(self.options_frame, text="最小文件大小(KB):")
self.min_size_entry = ttk.Entry(self.options_frame, width=8)
self.min_size_entry.insert(0, "10")
self.scan_button = ttk.Button(
self.control_frame, text="开始扫描",
command=self.start_scan, style='Accent.TButton'
)
self.stop_button = ttk.Button(
self.control_frame, text="停止扫描",
state=tk.DISABLED, command=self.stop_scan
)
# 进度显示
self.progress_frame = ttk.Frame(self.root)
self.progress_label = ttk.Label(self.progress_frame, text="准备就绪")
self.progress_bar = ttk.Progressbar(
self.progress_frame, orient=tk.HORIZONTAL,
mode='determinate', length=500
)
# 结果显示区域
self.result_canvas = tk.Canvas(self.root, bg='white')
self.scrollbar = ttk.Scrollbar(
self.root, orient=tk.VERTICAL,
command=self.result_canvas.yview
)
self.result_canvas.configure(yscrollcommand=self.scrollbar.set)
self.result_frame = ttk.Frame(self.result_canvas)
self.result_canvas.create_window(
(0, 0), window=self.result_frame,
anchor='nw', tags='result_frame'
)
# 底部操作按钮
self.action_frame = ttk.Frame(self.root)
self.delete_button = ttk.Button(
self.action_frame, text="删除选中文件",
state=tk.DISABLED, command=self.delete_selected_files
)
self.stats_label = ttk.Label(self.action_frame, text="")
def setup_layout(self):
# 控制面板布局
self.control_frame.pack(fill=tk.X, padx=10, pady=10)
self.path_label.grid(row=0, column=0, padx=5, pady=5, sticky=tk.W)
self.path_entry.grid(row=0, column=1, padx=5, pady=5, sticky=tk.EW)
self.browse_button.grid(row=0, column=2, padx=5, pady=5)
self.options_frame.grid(row=1, column=0, columnspan=3, padx=5, pady=5, sticky=tk.EW)
self.recursive_check.grid(row=0, column=0, padx=5, pady=2, sticky=tk.W)
self.min_size_label.grid(row=0, column=1, padx=5, pady=2, sticky=tk.W)
self.min_size_entry.grid(row=0, column=2, padx=5, pady=2, sticky=tk.W)
self.scan_button.grid(row=2, column=0, padx=5, pady=5)
self.stop_button.grid(row=2, column=1, padx=5, pady=5)
# 进度条布局
self.progress_frame.pack(fill=tk.X, padx=10, pady=5)
self.progress_label.pack(side=tk.LEFT)
self.progress_bar.pack(side=tk.LEFT, expand=True, fill=tk.X, padx=5)
# 结果区域布局
self.result_canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=5, pady=5)
self.scrollbar.pack(side=tk.RIGHT, fill=tk.Y, pady=5)
# 底部按钮布局
self.action_frame.pack(fill=tk.X, padx=10, pady=10)
self.delete_button.pack(side=tk.LEFT, padx=5)
self.stats_label.pack(side=tk.LEFT, padx=10)
# 配置网格权重
self.control_frame.columnconfigure(1, weight=1)
# 绑定画布滚动事件
self.result_frame.bind(
"<Configure>",
lambda e: self.result_canvas.configure(
scrollregion=self.result_canvas.bbox("all")
)
)
def browse_directory(self):
directory = filedialog.askdirectory()
if directory:
self.path_entry.delete(0, tk.END)
self.path_entry.insert(0, directory)
def start_scan(self):
directory = self.path_entry.get()
if not directory or not os.path.isdir(directory):
messagebox.showerror("错误", "请选择有效的目录!")
return
try:
min_size = int(self.min_size_entry.get())
except ValueError:
messagebox.showerror("错误", "最小文件大小必须是整数!")
return
# 重置状态
self.clear_results()
self.file_hashes = {}
self.duplicate_groups = []
self.scanning = True
# 更新UI状态
self.scan_button.config(state=tk.DISABLED)
self.stop_button.config(state=tk.NORMAL)
self.delete_button.config(state=tk.DISABLED)
self.progress_bar['value'] = 0
self.progress_label.config(text="扫描中...")
# 在新线程中执行扫描
scan_thread = Thread(
target=self.scan_directory,
args=(directory, self.recursive_var.get(), min_size),
daemon=True
)
scan_thread.start()
# 检查扫描状态
self.monitor_scan_progress()
def stop_scan(self):
self.scanning = False
self.progress_label.config(text="扫描已停止")
def scan_directory(self, directory, recursive, min_size):
try:
# 收集所有符合条件的文件
file_list = []
for root, dirs, files in os.walk(directory):
for file in files:
if not self.scanning:
return
file_path = os.path.join(root, file)
try:
file_size = os.path.getsize(file_path) / 1024 # KB
if file_size >= min_size:
file_list.append(file_path)
except (OSError, PermissionError):
continue
if not recursive:
break
total_files = len(file_list)
if total_files == 0:
self.update_progress("没有找到符合条件的文件", 100)
return
# 计算文件哈希值
for i, file_path in enumerate(file_list):
if not self.scanning:
return
try:
file_hash = self.calculate_hash(file_path)
if file_hash in self.file_hashes:
self.file_hashes[file_hash].append(file_path)
else:
self.file_hashes[file_hash] = [file_path]
except (OSError, PermissionError):
continue
# 更新进度
progress = (i + 1) / total_files * 100
self.update_progress(
f"扫描中... {i + 1}/{total_files} ({progress:.1f}%)",
progress
)
# 收集重复文件组
self.duplicate_groups = [
files for files in self.file_hashes.values() if len(files) > 1
]
# 显示结果
self.display_results()
except Exception as e:
self.update_progress(f"扫描出错: {str(e)}", 0)
finally:
self.scanning = False
self.scan_button.config(state=tk.NORMAL)
self.stop_button.config(state=tk.DISABLED)
def monitor_scan_progress(self):
if self.scanning:
self.root.after(100, self.monitor_scan_progress)
def calculate_hash(self, file_path, block_size=65536):
hasher = hashlib.md5()
with open(file_path, 'rb') as f:
buf = f.read(block_size)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(block_size)
return hasher.hexdigest()
def clear_results(self):
# 清除所有结果组件
for widget in self.result_frame.winfo_children():
widget.destroy()
self.result_canvas.yview_moveto(0)
self.stats_label.config(text="")
def display_results(self):
self.clear_results()
if not self.duplicate_groups:
self.update_progress("没有找到重复文件", 100)
return
total_files = sum(len(group) for group in self.duplicate_groups)
total_groups = len(self.duplicate_groups)
self.stats_label.config(
text=f"找到 {total_groups} 组重复文件 (共 {total_files} 个文件)"
)
self.update_progress(f"找到 {total_groups} 组重复文件", 100)
# 为每组重复文件创建UI组件
for group_idx, file_group in enumerate(self.duplicate_groups):
group_frame = ttk.Frame(
self.result_frame,
style='Group.TFrame',
padding=5
)
group_frame.pack(fill=tk.X, pady=5, padx=5)
# 组标题
group_title = ttk.Label(
group_frame,
text=f"重复文件组 #{group_idx + 1} (共 {len(file_group)} 个)",
style='GroupTitle.TLabel'
)
group_title.pack(anchor=tk.W)
# 创建表格显示文件详情
tree = ttk.Treeview(
group_frame,
columns=('size', 'modified', 'path'),
selectmode='extended',
height=min(len(file_group), 5)
)
# 设置列
tree.heading('#0', text='文件名')
tree.heading('size', text='大小 (KB)')
tree.heading('modified', text='修改日期')
tree.heading('path', text='路径')
tree.column('#0', width=200, anchor=tk.W)
tree.column('size', width=80, anchor=tk.E)
tree.column('modified', width=120, anchor=tk.W)
tree.column('path', width=400, anchor=tk.W)
# 添加文件到表格
for file_path in file_group:
try:
file_name = os.path.basename(file_path)
file_size = os.path.getsize(file_path) / 1024 # KB
modified = os.path.getmtime(file_path)
modified_str = datetime.fromtimestamp(modified).strftime('%Y-%m-%d %H:%M')
tree.insert(
'', 'end',
text=file_name,
values=(f"{file_size:.1f}", modified_str, file_path),
tags=('file',)
)
except (OSError, PermissionError):
continue
# 添加滚动条
scrollbar = ttk.Scrollbar(group_frame, orient=tk.VERTICAL, command=tree.yview)
tree.configure(yscrollcommand=scrollbar.set)
# 布局
tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
self.delete_button.config(state=tk.NORMAL)
self.result_frame.update_idletasks()
self.result_canvas.config(scrollregion=self.result_canvas.bbox("all"))
def delete_selected_files(self):
# 收集所有选中的文件
selected_files = []
for group_frame in self.result_frame.winfo_children():
for widget in group_frame.winfo_children():
if isinstance(widget, ttk.Treeview):
selected_files.extend(widget.selection())
if not selected_files:
messagebox.showwarning("警告", "请先选择要删除的文件!")
return
confirm = messagebox.askyesno(
"确认删除",
f"确定要删除选中的 {len(selected_files)} 个文件吗?\n此操作不可恢复!"
)
if not confirm:
return
deleted_count = 0
errors = []
for group_frame in self.result_frame.winfo_children():
for widget in group_frame.winfo_children():
if isinstance(widget, ttk.Treeview):
tree = widget
for item in tree.selection():
file_path = tree.item(item, 'values')[2]
try:
os.remove(file_path)
tree.delete(item)
deleted_count += 1
except Exception as e:
errors.append(f"{file_path}: {str(e)}")
# 显示结果
message = f"成功删除 {deleted_count} 个文件"
if errors:
message += f"\n\n删除失败的文件:\n" + "\n".join(errors[:5])
if len(errors) > 5:
message += f"\n...以及其他 {len(errors) - 5} 个错误"
messagebox.showinfo("删除结果", message)
self.stats_label.config(text=f"已删除 {deleted_count} 个文件")
def update_progress(self, message, value):
self.progress_label.config(text=message)
self.progress_bar['value'] = value
if __name__ == "__main__":
root = tk.Tk()
app = DuplicateFileCleaner(root)
root.mainloop()
UI界面长这样
可以分出每组不同的重复文件,防止你看着一堆不同名字的文件而不知道哪些是重复,哪些是原版
目前的缺陷是不能多选,还是需要用户自己在每组里面选择删除,开发人员正在制作清重宝2.0
感谢使用和阅读