DeepSeek R1 给我写了一个基于清晰度排序的图片视频人类检查工具

Deng_Xian_Shemg

已于 2025-01-22 11:05:59 修改

阅读量680

点赞数 5

文章标签：音视频深度学习 python tkinter DeepSeek R1 AI编程

于 2025-01-21 19:33:07 首次发布

本文链接：https://blog.csdn.net/Deng_Xian_Sheng/article/details/145289293

版权

用于制作深度学习数据集时，快速的浏览图片与视频并勾选需要的文件。

我用了，没问题！

在这里插入图片描述

项目文档
ps：有些部分优化过，代码与文档不符

图片、视频、检查工具，主要用于深度学习数据集制作。

它能辅助人类快速的对图片、视频、进行筛选，勾选需要的内容。

具体：
它是一个从命令行启动的脚本，它接受参数：

给定目录，如果此参数不传递，该工具会退出。
工作目录，如果此参数不传递，会使用当前目录下的photo_video_check_work_dir目录。
工作目录会存放一些json文件，用于程序因逻辑需要，存储图片路径、哈希和保存勾选的图片路径。

它运行后，首先会检查给定目录是否存在，如果存在则递归遍历给定目录下的：

.jpg
.png
.jpeg
.mp4
.avi
.mov
.mkv
.flv
.wmv
.tiff
.webp

后缀的文件，以及没有后缀的文件，并尝试通过文件头部确定是否符合以上格式，然后尝试打开，如果失败不会报错。

它会在遍历文件的同时：

验证文件的哈希sha256
读取视频文件的码率&分辨率&拉普拉斯算子&Sobel梯度
读取图片文件的分辨率

如果文件已经遍历过则不存入file_list.json中。

依据图片文件的分辨率，视频文件的码率、分辨率、拉普拉斯算子、Sobel梯度、对文件进行排序。
图片文件与视频文件交错排列，图片只与视频比较分辨率，当图片的分辨率比视频的分辨率高时，图片在上面。

排序结果体现在file_list.json中。

以上预处理工作进行时，它会在命令行界面显示一个进度条。
当以上预处理工作完成后，它会弹出一个tkinter的默认全屏窗口，用于人类检查。

窗口界面如下：

窗口标题：基于清晰度排序的图片、视频检查工具
窗口标题下红字温馨提示：排除：“水印、高斯&运动模糊、昏暗、显示不全、扭曲”内容
窗口温馨提示下方勾选状态：红字已勾选/什么都不显示
窗口如果显示图片：以“适合”的缩放形式展示图片一张，位于屏幕中心。
窗口如果显示视频: 以“适合”的缩放形式展示从视频中依照长度均匀提取的关键帧4张，按顺序上下左右排列，4张图片的中心点位于屏幕中心。

操作：
通过键盘左右键来切换文件，通过键盘上下键来勾选/取消勾选文件。

其他的特性：

如果第二次运行，它在遍历文件时，首先会判断当前路径是否在file_list.json中存在，其次判断哈希是否存在，两者有一者存在则忽略此文件。
如果第二次运行，它会从被勾选的文件中的，最后一个的，下一个文件继续进行人类检查。

import os
import sys
import json
import argparse
import hashlib
import io
import cv2
import numpy as np
import tkinter as tk
from PIL import Image, ImageTk
from tqdm import tqdm
import filetype
from functools import cmp_to_key
import multiprocessing
import ffmpeg
import fnmatch

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('source_dir', help='源目录')
    parser.add_argument('--work_dir', help='工作目录', default='photo_video_check_work_dir')
    parser.add_argument('--exclude', action='append', default=[], 
                      help='排除的文件或目录模式（支持通配符）')
    parser.add_argument('--type', choices=['image', 'video'], 
                      help='仅处理图片或视频')
    args = parser.parse_args()

    if not os.path.isdir(args.source_dir):
        print("错误：源目录不存在")
        sys.exit(1)

    work_dir = os.path.abspath(args.work_dir)
    os.makedirs(work_dir, exist_ok=True)
    file_list_path = os.path.join(work_dir, 'file_list.json')

    print("扫描文件中...")
    all_files = []
    exclude_patterns = args.exclude
    
    for root, dirs, files in os.walk(args.source_dir, topdown=True):
        # 统一使用POSIX路径格式匹配
        rel_root = os.path.relpath(root, args.source_dir).replace(os.sep, '/')
        
        # 排除当前目录
        if any(fnmatch.fnmatch(rel_root, p) for p in exclude_patterns):
            dirs[:] = []
            files[:] = []
            continue
        
        # 处理子目录排除
        filtered_dirs = []
        for d in dirs:
            dir_rel_path = os.path.join(rel_root, d).replace(os.sep, '/')
            if not any(fnmatch.fnmatch(dir_rel_path, p) for p in exclude_patterns):
                filtered_dirs.append(d)
        dirs[:] = filtered_dirs

        # 处理文件排除
        for file in files:
            file_path = os.path.join(root, file)
            rel_path = os.path.relpath(file_path, args.source_dir).replace(os.sep, '/')
            if not any(fnmatch.fnmatch(rel_path, p) for p in exclude_patterns):
                all_files.append(file_path)

    existing_files = []
    if os.path.exists(file_list_path):
        with open(file_list_path, 'r') as f:
            existing_files = json.load(f)
    existing_hashes = {item['path']: item['hash'] for item in existing_files}

    print("并行处理文件中...")
    ctx = multiprocessing.get_context('spawn')
    with ctx.Pool() as pool:
        args_list = [(path, existing_hashes, work_dir, args.type) for path in all_files]
        results = []
        for result in tqdm(pool.imap(process_file_wrapper, args_list), total=len(args_list), desc='处理文件'):
            if result is not None:
                results.append(result)

    combined = {item['path']: item for item in existing_files}
    for item in results:
        combined[item['path']] = item
    combined_list = list(combined.values())

    combined_list.sort(key=cmp_to_key(compare_items))

    with open(file_list_path, 'w') as f:
        json.dump(combined_list, f, indent=2)

    start_gui(combined_list, file_list_path)

def process_file_wrapper(args):
    try:
        return process_single_file(*args)
    except:
        return None

def process_single_file(path, existing_hashes, work_dir, type_filter):
    file_hash = compute_sha256(path)
    
    if path in existing_hashes and existing_hashes[path] == file_hash:
        return None

    kind = filetype.guess(path)
    if not kind:
        return None
    mime_type = kind.mime
    is_image = mime_type.startswith('image/')
    is_video = mime_type.startswith('video/')
    if not is_image and not is_video:
        return None

    # 类型过滤
    if type_filter:
        if type_filter == 'image' and not is_image:
            return None
        if type_filter == 'video' and not is_video:
            return None

    try:
        if is_image:
            meta = process_image(path)
        else:
            meta = process_video(path, file_hash, work_dir)
    except Exception as e:
        return None

    if not meta:
        return None

    return {
        'path': path,
        'hash': file_hash,
        'type': 'image' if is_image else 'video',
        **meta,
        'selected': False
    }

def compute_sha256(path):
    sha = hashlib.sha256()
    with open(path, 'rb') as f:
        for chunk in iter(lambda: f.read(65536), b''):
            sha.update(chunk)
    return sha.hexdigest()

def process_image(path):
    try:
        with Image.open(path) as img:
            img.verify()
        with Image.open(path) as img:
            frame = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            laplacian = cv2.Laplacian(gray, cv2.CV_64F).var()
            sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0).var()
            sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1).var()
            sobel = (sobel_x + sobel_y) / 2
            return {
                'resolution': img.size,
                'laplacian': laplacian,
                'sobel': sobel
            }
    except Exception as e:
        return None

def process_video(path, file_hash, work_dir):
    thumbnail_dir = os.path.join(work_dir, 'thumbnails')
    os.makedirs(thumbnail_dir, exist_ok=True)

    try:
        format_probe = ffmpeg.probe(path)
        duration = float(format_probe['format'].get('duration', 0))
        if duration <= 0:
            return None

        selected_times = [duration * i / 5 for i in range(1, 5)]

        stream_probe = ffmpeg.probe(
            path,
            select_streams='v:0',
            show_entries='stream=width,height,bit_rate'
        )
        streams = stream_probe.get('streams', [{}])
        if not streams:
            return None
        stream = streams[0]
        w = int(stream.get('width', 0))
        h = int(stream.get('height', 0))
        bitrate = int(stream.get('bit_rate', 0)) if stream.get('bit_rate') else 0

        thumb_paths = []
        for t in selected_times:
            t_str = f"{t:.3f}".replace('.', '_')
            thumb_name = f"{file_hash}_{t_str}.png"
            thumb_path = os.path.join(thumbnail_dir, thumb_name)
            
            if os.path.exists(thumb_path):
                thumb_paths.append(thumb_path)
                continue

            try:
                out, _ = (
                    ffmpeg
                    .input(path, ss=str(t))
                    .output('pipe:', vframes=1, format='image2', vcodec='png', **{'qscale:v': '0'})
                    .run(capture_stdout=True, quiet=True)
                )
                if out:
                    with Image.open(io.BytesIO(out)) as img:
                        img.save(thumb_path, format='PNG', compress_level=0, optimize=True)
                        thumb_paths.append(thumb_path)
            except Exception as e:
                continue

        if not thumb_paths:
            return None

        laps, sobs = [], []
        for thumb_path in thumb_paths:
            try:
                with Image.open(thumb_path) as img:
                    frame = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
                    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                    lap = cv2.Laplacian(gray, cv2.CV_64F).var()
                    sob = (cv2.Sobel(gray, cv2.CV_64F,1,0).var() + 
                          cv2.Sobel(gray, cv2.CV_64F,0,1).var())/2
                    laps.append(lap)
                    sobs.append(sob)
            except:
                continue

        return {
            'resolution': (w, h),
            'bitrate': bitrate,
            'laplacian': sum(laps)/len(laps) if laps else 0,
            'sobel': sum(sobs)/len(sobs) if sobs else 0,
            'thumbnails': thumb_paths
        }
    except Exception as e:
        return None

def compare_items(a, b):
    W_RESOLUTION = 1e8
    W_BITRATE = 1e5
    W_SOBEL = 1e3
    W_LAPLACIAN = 1e1

    a_res = a['resolution'][0] * a['resolution'][1]
    b_res = b['resolution'][0] * b['resolution'][1]

    a_bitrate = a.get('bitrate', 0)
    b_bitrate = b.get('bitrate', 0)

    a_sobel = a.get('sobel', 0)
    b_sobel = b.get('sobel', 0)

    a_laplacian = a.get('laplacian', 0)
    b_laplacian = b.get('laplacian', 0)

    def calculate_score(item):
        base = item['resolution'][0] * item['resolution'][1] * W_RESOLUTION
        base += item.get('sobel', 0) * W_SOBEL
        base += item.get('laplacian', 0) * W_LAPLACIAN
        
        if item['type'] == 'video':
            base += item.get('bitrate', 0) * W_BITRATE
        return base

    if a['type'] != b['type']:
        a_score = a_res * W_RESOLUTION + a_sobel * W_SOBEL + a_laplacian * W_LAPLACIAN
        b_score = b_res * W_RESOLUTION + b_sobel * W_SOBEL + b_laplacian * W_LAPLACIAN
    else:
        a_score = calculate_score(a)
        b_score = calculate_score(b)

    return int(b_score - a_score)

def start_gui(file_list, list_path):
    class App:
        def __init__(self, root, files, list_path):
            self.root = root
            self.files = files
            self.list_path = list_path
            self.current_idx = 0
            self.fullscreen = True
            self.setup_ui()
            self.load_state()
            self.show_item()

        def setup_ui(self):
            self.root.title("基于清晰度排序的图片、视频检查工具")
            self.toggle_fullscreen()

            self.tip_label = tk.Label(self.root, 
                text="排除：“水印、高斯&运动模糊、昏暗、显示不全、扭曲”内容", 
                fg='red', font=('Arial', 14))
            self.tip_label.pack(pady=10)

            self.status_var = tk.StringVar()
            self.status_label = tk.Label(self.root, textvariable=self.status_var,
                                       fg='red', font=('Arial', 12))
            self.status_label.pack()

            self.info_var = tk.StringVar()
            self.info_label = tk.Label(self.root, textvariable=self.info_var,
                                     font=('Arial', 12), bg='white')
            self.info_label.place(relx=0.99, rely=0.03, anchor='ne')

            self.frame = tk.Frame(self.root)
            self.frame.pack(expand=True, fill='both')

            self.root.bind('<Left>', lambda e: self.navigate(-1))
            self.root.bind('<Right>', lambda e: self.navigate(1))
            self.root.bind('<Up>', lambda e: self.toggle_select())
            self.root.bind('<Down>', lambda e: self.toggle_select())
            self.root.bind('<F11>', lambda e: self.toggle_fullscreen())
            self.root.bind('<Escape>', lambda e: self.root.destroy())
            self.root.protocol("WM_DELETE_WINDOW", self.on_close)

        def toggle_fullscreen(self, event=None):
            self.fullscreen = not self.fullscreen
            self.root.attributes('-fullscreen', self.fullscreen)
            if not self.fullscreen:
                self.root.geometry('800x600')

        def on_close(self):
            self.save_list()
            self.root.destroy()

        def load_state(self):
            selected = [i for i, f in enumerate(self.files) if f.get('selected', False)]
            if selected:
                self.current_idx = (max(selected) + 1) % len(self.files)

        def navigate(self, delta):
            self.current_idx = (self.current_idx + delta) % len(self.files)
            self.show_item()

        def toggle_select(self):
            self.files[self.current_idx]['selected'] = not self.files[self.current_idx]['selected']
            self.update_status()
            self.save_list()

        def update_status(self):
            status = "已勾选" if self.files[self.current_idx]['selected'] else ""
            self.status_var.set(status)
            
            item = self.files[self.current_idx]
            info = f"{self.current_idx+1}/{len(self.files)}"
            if item['type'] == 'image':
                w, h = item['resolution']
                info += f" | 分辨率: {w}x{h}"
            else:
                w, h = item['resolution']
                br = item.get('bitrate', 0)//1000
                info += f" | 分辨率: {w}x{h} | 码率: {br}kbps"
            self.info_var.set(info)

        def save_list(self):
            with open(self.list_path, 'w') as f:
                json.dump(self.files, f, indent=2)

        def show_item(self):
            for widget in self.frame.winfo_children():
                widget.destroy()

            item = self.files[self.current_idx]
            if item['type'] == 'image':
                self.show_image(item['path'])
            else:
                self.show_video()
            self.update_status()

        def show_image(self, path):
            try:
                img = Image.open(path)
                frame_width = self.frame.winfo_width() or self.root.winfo_width()
                frame_height = self.frame.winfo_height() or self.root.winfo_height()
                
                if frame_width <= 1 or frame_height <= 1:
                    frame_width = 800
                    frame_height = 600

                img_width, img_height = img.size
                width_ratio = frame_width / img_width
                height_ratio = frame_height / img_height
                ratio = min(width_ratio, height_ratio)
                new_size = (int(img_width * ratio), int(img_height * ratio))
                
                resized_img = img.resize(new_size, Image.Resampling.LANCZOS)
                photo = ImageTk.PhotoImage(resized_img)
                label = tk.Label(self.frame, image=photo)
                label.image = photo
                label.place(relx=0.5, rely=0.5, anchor='center')
            except Exception as e:
                label = tk.Label(self.frame, text=f"无法加载图片：{str(e)}")
                label.pack()

        def show_video(self):
            try:
                item = self.files[self.current_idx]
                thumb_paths = item.get('thumbnails', [])
                if not thumb_paths:
                    raise ValueError("没有找到关键帧图片")
                
                frames = []
                for thumb_path in thumb_paths[:4]:
                    try:
                        img = Image.open(thumb_path)
                        frames.append(img)
                    except:
                        continue

                if not frames:
                    raise ValueError("无法加载关键帧图片")

                rows, cols = 2, 2
                frame_width = self.frame.winfo_width() or self.root.winfo_width()
                frame_height = self.frame.winfo_height() or self.root.winfo_height()
                
                cell_width = frame_width // cols
                cell_height = frame_height // rows

                for i, img in enumerate(frames[:4]):
                    img_width, img_height = img.size
                    if img_width == 0 or img_height == 0:
                        continue

                    width_ratio = cell_width / img_width
                    height_ratio = cell_height / img_height
                    ratio = min(width_ratio, height_ratio)
                    new_size = (int(img_width * ratio), int(img_height * ratio))
                    
                    resized_img = img.resize(new_size, Image.Resampling.LANCZOS)
                    photo = ImageTk.PhotoImage(resized_img)
                    label = tk.Label(self.frame, image=photo)
                    label.image = photo
                    label.grid(row=i//cols, column=i%cols, sticky='nsew')

                for i in range(rows):
                    self.frame.rowconfigure(i, weight=1)
                for j in range(cols):
                    self.frame.columnconfigure(j, weight=1)
            except Exception as e:
                label = tk.Label(self.frame, text=f"无法加载视频：{str(e)}")
                label.pack()

    root = tk.Tk()
    root.resizable(True, True)
    app = App(root, file_list, list_path)
    root.mainloop()

if __name__ == '__main__':
    multiprocessing.freeze_support()
    main()

If this helps you, and if you have enough money, can you give me 1$? I am facing a financial crisis.
If you do this, I will pass on the kindness.
This is my bank card number:5592921230414708
Thank you!!