Python重复文件清理程序,清重宝1.0

每次繁忙时是否都会不经意间将几个同样的文件都保存下来,看着爆红的存盘,却找不到重复文件在哪,甚至有不同名字的相同文件,如何清除,这个程序将帮助你

import os
import hashlib
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from threading import Thread
from datetime import datetime


class DuplicateFileCleaner:
    def __init__(self, root):
        self.root = root
        self.root.title("重复文件清理工具")
        self.root.geometry("1000x700")

        # 初始化变量
        self.scanning = False
        self.file_hashes = {}
        self.duplicate_groups = []

        # 设置UI样式
        self.setup_style()

        # 创建UI组件
        self.create_widgets()
        self.setup_layout()

    def setup_style(self):
        self.style = ttk.Style()
        self.style.theme_use('clam')

        # 自定义样式
        self.style.configure('Title.TLabel', font=('Helvetica', 12, 'bold'))
        self.style.configure('Group.TFrame', background='#f0f0f0', relief=tk.RAISED, borderwidth=1)
        self.style.configure('GroupTitle.TLabel', font=('Helvetica', 10, 'bold'), background='#f0f0f0')

    def create_widgets(self):
        # 顶部控制面板
        self.control_frame = ttk.Frame(self.root)

        self.path_label = ttk.Label(self.control_frame, text="扫描目录:")
        self.path_entry = ttk.Entry(self.control_frame, width=50)
        self.browse_button = ttk.Button(self.control_frame, text="浏览...", command=self.browse_directory)

        self.options_frame = ttk.LabelFrame(self.control_frame, text="扫描选项")
        self.recursive_var = tk.BooleanVar(value=True)
        self.recursive_check = ttk.Checkbutton(
            self.options_frame, text="包含子目录", variable=self.recursive_var
        )
        self.min_size_label = ttk.Label(self.options_frame, text="最小文件大小(KB):")
        self.min_size_entry = ttk.Entry(self.options_frame, width=8)
        self.min_size_entry.insert(0, "10")

        self.scan_button = ttk.Button(
            self.control_frame, text="开始扫描",
            command=self.start_scan, style='Accent.TButton'
        )
        self.stop_button = ttk.Button(
            self.control_frame, text="停止扫描",
            state=tk.DISABLED, command=self.stop_scan
        )

        # 进度显示
        self.progress_frame = ttk.Frame(self.root)
        self.progress_label = ttk.Label(self.progress_frame, text="准备就绪")
        self.progress_bar = ttk.Progressbar(
            self.progress_frame, orient=tk.HORIZONTAL,
            mode='determinate', length=500
        )

        # 结果显示区域
        self.result_canvas = tk.Canvas(self.root, bg='white')
        self.scrollbar = ttk.Scrollbar(
            self.root, orient=tk.VERTICAL,
            command=self.result_canvas.yview
        )
        self.result_canvas.configure(yscrollcommand=self.scrollbar.set)

        self.result_frame = ttk.Frame(self.result_canvas)
        self.result_canvas.create_window(
            (0, 0), window=self.result_frame,
            anchor='nw', tags='result_frame'
        )

        # 底部操作按钮
        self.action_frame = ttk.Frame(self.root)
        self.delete_button = ttk.Button(
            self.action_frame, text="删除选中文件",
            state=tk.DISABLED, command=self.delete_selected_files
        )
        self.stats_label = ttk.Label(self.action_frame, text="")

    def setup_layout(self):
        # 控制面板布局
        self.control_frame.pack(fill=tk.X, padx=10, pady=10)
        self.path_label.grid(row=0, column=0, padx=5, pady=5, sticky=tk.W)
        self.path_entry.grid(row=0, column=1, padx=5, pady=5, sticky=tk.EW)
        self.browse_button.grid(row=0, column=2, padx=5, pady=5)

        self.options_frame.grid(row=1, column=0, columnspan=3, padx=5, pady=5, sticky=tk.EW)
        self.recursive_check.grid(row=0, column=0, padx=5, pady=2, sticky=tk.W)
        self.min_size_label.grid(row=0, column=1, padx=5, pady=2, sticky=tk.W)
        self.min_size_entry.grid(row=0, column=2, padx=5, pady=2, sticky=tk.W)

        self.scan_button.grid(row=2, column=0, padx=5, pady=5)
        self.stop_button.grid(row=2, column=1, padx=5, pady=5)

        # 进度条布局
        self.progress_frame.pack(fill=tk.X, padx=10, pady=5)
        self.progress_label.pack(side=tk.LEFT)
        self.progress_bar.pack(side=tk.LEFT, expand=True, fill=tk.X, padx=5)

        # 结果区域布局
        self.result_canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=5, pady=5)
        self.scrollbar.pack(side=tk.RIGHT, fill=tk.Y, pady=5)

        # 底部按钮布局
        self.action_frame.pack(fill=tk.X, padx=10, pady=10)
        self.delete_button.pack(side=tk.LEFT, padx=5)
        self.stats_label.pack(side=tk.LEFT, padx=10)

        # 配置网格权重
        self.control_frame.columnconfigure(1, weight=1)

        # 绑定画布滚动事件
        self.result_frame.bind(
            "<Configure>",
            lambda e: self.result_canvas.configure(
                scrollregion=self.result_canvas.bbox("all")
            )
        )

    def browse_directory(self):
        directory = filedialog.askdirectory()
        if directory:
            self.path_entry.delete(0, tk.END)
            self.path_entry.insert(0, directory)

    def start_scan(self):
        directory = self.path_entry.get()
        if not directory or not os.path.isdir(directory):
            messagebox.showerror("错误", "请选择有效的目录!")
            return

        try:
            min_size = int(self.min_size_entry.get())
        except ValueError:
            messagebox.showerror("错误", "最小文件大小必须是整数!")
            return

        # 重置状态
        self.clear_results()
        self.file_hashes = {}
        self.duplicate_groups = []
        self.scanning = True

        # 更新UI状态
        self.scan_button.config(state=tk.DISABLED)
        self.stop_button.config(state=tk.NORMAL)
        self.delete_button.config(state=tk.DISABLED)
        self.progress_bar['value'] = 0
        self.progress_label.config(text="扫描中...")

        # 在新线程中执行扫描
        scan_thread = Thread(
            target=self.scan_directory,
            args=(directory, self.recursive_var.get(), min_size),
            daemon=True
        )
        scan_thread.start()

        # 检查扫描状态
        self.monitor_scan_progress()

    def stop_scan(self):
        self.scanning = False
        self.progress_label.config(text="扫描已停止")

    def scan_directory(self, directory, recursive, min_size):
        try:
            # 收集所有符合条件的文件
            file_list = []
            for root, dirs, files in os.walk(directory):
                for file in files:
                    if not self.scanning:
                        return

                    file_path = os.path.join(root, file)
                    try:
                        file_size = os.path.getsize(file_path) / 1024  # KB
                        if file_size >= min_size:
                            file_list.append(file_path)
                    except (OSError, PermissionError):
                        continue

                if not recursive:
                    break

            total_files = len(file_list)
            if total_files == 0:
                self.update_progress("没有找到符合条件的文件", 100)
                return

            # 计算文件哈希值
            for i, file_path in enumerate(file_list):
                if not self.scanning:
                    return

                try:
                    file_hash = self.calculate_hash(file_path)
                    if file_hash in self.file_hashes:
                        self.file_hashes[file_hash].append(file_path)
                    else:
                        self.file_hashes[file_hash] = [file_path]
                except (OSError, PermissionError):
                    continue

                # 更新进度
                progress = (i + 1) / total_files * 100
                self.update_progress(
                    f"扫描中... {i + 1}/{total_files} ({progress:.1f}%)",
                    progress
                )

            # 收集重复文件组
            self.duplicate_groups = [
                files for files in self.file_hashes.values() if len(files) > 1
            ]

            # 显示结果
            self.display_results()

        except Exception as e:
            self.update_progress(f"扫描出错: {str(e)}", 0)
        finally:
            self.scanning = False
            self.scan_button.config(state=tk.NORMAL)
            self.stop_button.config(state=tk.DISABLED)

    def monitor_scan_progress(self):
        if self.scanning:
            self.root.after(100, self.monitor_scan_progress)

    def calculate_hash(self, file_path, block_size=65536):
        hasher = hashlib.md5()
        with open(file_path, 'rb') as f:
            buf = f.read(block_size)
            while len(buf) > 0:
                hasher.update(buf)
                buf = f.read(block_size)
        return hasher.hexdigest()

    def clear_results(self):
        # 清除所有结果组件
        for widget in self.result_frame.winfo_children():
            widget.destroy()

        self.result_canvas.yview_moveto(0)
        self.stats_label.config(text="")

    def display_results(self):
        self.clear_results()

        if not self.duplicate_groups:
            self.update_progress("没有找到重复文件", 100)
            return

        total_files = sum(len(group) for group in self.duplicate_groups)
        total_groups = len(self.duplicate_groups)

        self.stats_label.config(
            text=f"找到 {total_groups} 组重复文件 (共 {total_files} 个文件)"
        )
        self.update_progress(f"找到 {total_groups} 组重复文件", 100)

        # 为每组重复文件创建UI组件
        for group_idx, file_group in enumerate(self.duplicate_groups):
            group_frame = ttk.Frame(
                self.result_frame,
                style='Group.TFrame',
                padding=5
            )
            group_frame.pack(fill=tk.X, pady=5, padx=5)

            # 组标题
            group_title = ttk.Label(
                group_frame,
                text=f"重复文件组 #{group_idx + 1} (共 {len(file_group)} 个)",
                style='GroupTitle.TLabel'
            )
            group_title.pack(anchor=tk.W)

            # 创建表格显示文件详情
            tree = ttk.Treeview(
                group_frame,
                columns=('size', 'modified', 'path'),
                selectmode='extended',
                height=min(len(file_group), 5)
            )

            # 设置列
            tree.heading('#0', text='文件名')
            tree.heading('size', text='大小 (KB)')
            tree.heading('modified', text='修改日期')
            tree.heading('path', text='路径')

            tree.column('#0', width=200, anchor=tk.W)
            tree.column('size', width=80, anchor=tk.E)
            tree.column('modified', width=120, anchor=tk.W)
            tree.column('path', width=400, anchor=tk.W)

            # 添加文件到表格
            for file_path in file_group:
                try:
                    file_name = os.path.basename(file_path)
                    file_size = os.path.getsize(file_path) / 1024  # KB
                    modified = os.path.getmtime(file_path)
                    modified_str = datetime.fromtimestamp(modified).strftime('%Y-%m-%d %H:%M')

                    tree.insert(
                        '', 'end',
                        text=file_name,
                        values=(f"{file_size:.1f}", modified_str, file_path),
                        tags=('file',)
                    )
                except (OSError, PermissionError):
                    continue

            # 添加滚动条
            scrollbar = ttk.Scrollbar(group_frame, orient=tk.VERTICAL, command=tree.yview)
            tree.configure(yscrollcommand=scrollbar.set)

            # 布局
            tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
            scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

        self.delete_button.config(state=tk.NORMAL)
        self.result_frame.update_idletasks()
        self.result_canvas.config(scrollregion=self.result_canvas.bbox("all"))

    def delete_selected_files(self):
        # 收集所有选中的文件
        selected_files = []
        for group_frame in self.result_frame.winfo_children():
            for widget in group_frame.winfo_children():
                if isinstance(widget, ttk.Treeview):
                    selected_files.extend(widget.selection())

        if not selected_files:
            messagebox.showwarning("警告", "请先选择要删除的文件!")
            return

        confirm = messagebox.askyesno(
            "确认删除",
            f"确定要删除选中的 {len(selected_files)} 个文件吗?\n此操作不可恢复!"
        )
        if not confirm:
            return

        deleted_count = 0
        errors = []

        for group_frame in self.result_frame.winfo_children():
            for widget in group_frame.winfo_children():
                if isinstance(widget, ttk.Treeview):
                    tree = widget
                    for item in tree.selection():
                        file_path = tree.item(item, 'values')[2]
                        try:
                            os.remove(file_path)
                            tree.delete(item)
                            deleted_count += 1
                        except Exception as e:
                            errors.append(f"{file_path}: {str(e)}")

        # 显示结果
        message = f"成功删除 {deleted_count} 个文件"
        if errors:
            message += f"\n\n删除失败的文件:\n" + "\n".join(errors[:5])
            if len(errors) > 5:
                message += f"\n...以及其他 {len(errors) - 5} 个错误"

        messagebox.showinfo("删除结果", message)
        self.stats_label.config(text=f"已删除 {deleted_count} 个文件")

    def update_progress(self, message, value):
        self.progress_label.config(text=message)
        self.progress_bar['value'] = value


if __name__ == "__main__":
    root = tk.Tk()
    app = DuplicateFileCleaner(root)
    root.mainloop()

UI界面长这样

可以分出每组不同的重复文件,防止你看着一堆不同名字的文件而不知道哪些是重复,哪些是原版

目前的缺陷是不能多选,还是需要用户自己在每组里面选择删除,开发人员正在制作清重宝2.0

感谢使用和阅读

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值