用于制作深度学习数据集时,快速的浏览图片与视频并勾选需要的文件。
我用了,没问题!
项目文档
ps:有些部分优化过,代码与文档不符
图片、视频、检查工具,主要用于深度学习数据集制作。
它能辅助人类快速的对图片、视频、进行筛选,勾选需要的内容。
具体:
它是一个从命令行启动的脚本,它接受参数:
- 给定目录,如果此参数不传递,该工具会退出。
- 工作目录,如果此参数不传递,会使用当前目录下的photo_video_check_work_dir目录。
工作目录会存放一些json文件,用于程序因逻辑需要,存储图片路径、哈希和保存勾选的图片路径。
它运行后,首先会检查给定目录是否存在,如果存在则递归遍历给定目录下的:
- .jpg
- .png
- .jpeg
- .mp4
- .avi
- .mov
- .mkv
- .flv
- .wmv
- .tiff
- .webp
后缀的文件,以及没有后缀的文件,并尝试通过文件头部确定是否符合以上格式,然后尝试打开,如果失败不会报错。
它会在遍历文件的同时:
- 验证文件的哈希sha256
- 读取视频文件的码率&分辨率&拉普拉斯算子&Sobel梯度
- 读取图片文件的分辨率
如果文件已经遍历过则不存入file_list.json中。
依据图片文件的分辨率,视频文件的码率、分辨率、拉普拉斯算子、Sobel梯度、对文件进行排序。
图片文件与视频文件交错排列,图片只与视频比较分辨率,当图片的分辨率比视频的分辨率高时,图片在上面。
排序结果体现在file_list.json中。
以上预处理工作进行时,它会在命令行界面显示一个进度条。
当以上预处理工作完成后,它会弹出一个tkinter的默认全屏窗口,用于人类检查。
窗口界面如下:
- 窗口标题:基于清晰度排序的图片、视频检查工具
- 窗口标题下红字温馨提示:排除:“水印、高斯&运动模糊、昏暗、显示不全、扭曲”内容
- 窗口温馨提示下方勾选状态:红字已勾选/什么都不显示
- 窗口如果显示图片:以“适合”的缩放形式展示图片一张,位于屏幕中心。
- 窗口如果显示视频: 以“适合”的缩放形式展示从视频中依照长度均匀提取的关键帧4张,按顺序上下左右排列,4张图片的中心点位于屏幕中心。
操作:
通过键盘左右键来切换文件,通过键盘上下键来勾选/取消勾选文件。
其他的特性:
- 如果第二次运行,它在遍历文件时,首先会判断当前路径是否在file_list.json中存在,其次判断哈希是否存在,两者有一者存在则忽略此文件。
- 如果第二次运行,它会从被勾选的文件中的,最后一个的,下一个文件继续进行人类检查。
import os
import sys
import json
import argparse
import hashlib
import io
import cv2
import numpy as np
import tkinter as tk
from PIL import Image, ImageTk
from tqdm import tqdm
import filetype
from functools import cmp_to_key
import multiprocessing
import ffmpeg
import fnmatch
def main():
parser = argparse.ArgumentParser()
parser.add_argument('source_dir', help='源目录')
parser.add_argument('--work_dir', help='工作目录', default='photo_video_check_work_dir')
parser.add_argument('--exclude', action='append', default=[],
help='排除的文件或目录模式(支持通配符)')
parser.add_argument('--type', choices=['image', 'video'],
help='仅处理图片或视频')
args = parser.parse_args()
if not os.path.isdir(args.source_dir):
print("错误:源目录不存在")
sys.exit(1)
work_dir = os.path.abspath(args.work_dir)
os.makedirs(work_dir, exist_ok=True)
file_list_path = os.path.join(work_dir, 'file_list.json')
print("扫描文件中...")
all_files = []
exclude_patterns = args.exclude
for root, dirs, files in os.walk(args.source_dir, topdown=True):
# 统一使用POSIX路径格式匹配
rel_root = os.path.relpath(root, args.source_dir).replace(os.sep, '/')
# 排除当前目录
if any(fnmatch.fnmatch(rel_root, p) for p in exclude_patterns):
dirs[:] = []
files[:] = []
continue
# 处理子目录排除
filtered_dirs = []
for d in dirs:
dir_rel_path = os.path.join(rel_root, d).replace(os.sep, '/')
if not any(fnmatch.fnmatch(dir_rel_path, p) for p in exclude_patterns):
filtered_dirs.append(d)
dirs[:] = filtered_dirs
# 处理文件排除
for file in files:
file_path = os.path.join(root, file)
rel_path = os.path.relpath(file_path, args.source_dir).replace(os.sep, '/')
if not any(fnmatch.fnmatch(rel_path, p) for p in exclude_patterns):
all_files.append(file_path)
existing_files = []
if os.path.exists(file_list_path):
with open(file_list_path, 'r') as f:
existing_files = json.load(f)
existing_hashes = {item['path']: item['hash'] for item in existing_files}
print("并行处理文件中...")
ctx = multiprocessing.get_context('spawn')
with ctx.Pool() as pool:
args_list = [(path, existing_hashes, work_dir, args.type) for path in all_files]
results = []
for result in tqdm(pool.imap(process_file_wrapper, args_list), total=len(args_list), desc='处理文件'):
if result is not None:
results.append(result)
combined = {item['path']: item for item in existing_files}
for item in results:
combined[item['path']] = item
combined_list = list(combined.values())
combined_list.sort(key=cmp_to_key(compare_items))
with open(file_list_path, 'w') as f:
json.dump(combined_list, f, indent=2)
start_gui(combined_list, file_list_path)
def process_file_wrapper(args):
try:
return process_single_file(*args)
except:
return None
def process_single_file(path, existing_hashes, work_dir, type_filter):
file_hash = compute_sha256(path)
if path in existing_hashes and existing_hashes[path] == file_hash:
return None
kind = filetype.guess(path)
if not kind:
return None
mime_type = kind.mime
is_image = mime_type.startswith('image/')
is_video = mime_type.startswith('video/')
if not is_image and not is_video:
return None
# 类型过滤
if type_filter:
if type_filter == 'image' and not is_image:
return None
if type_filter == 'video' and not is_video:
return None
try:
if is_image:
meta = process_image(path)
else:
meta = process_video(path, file_hash, work_dir)
except Exception as e:
return None
if not meta:
return None
return {
'path': path,
'hash': file_hash,
'type': 'image' if is_image else 'video',
**meta,
'selected': False
}
def compute_sha256(path):
sha = hashlib.sha256()
with open(path, 'rb') as f:
for chunk in iter(lambda: f.read(65536), b''):
sha.update(chunk)
return sha.hexdigest()
def process_image(path):
try:
with Image.open(path) as img:
img.verify()
with Image.open(path) as img:
frame = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
laplacian = cv2.Laplacian(gray, cv2.CV_64F).var()
sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0).var()
sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1).var()
sobel = (sobel_x + sobel_y) / 2
return {
'resolution': img.size,
'laplacian': laplacian,
'sobel': sobel
}
except Exception as e:
return None
def process_video(path, file_hash, work_dir):
thumbnail_dir = os.path.join(work_dir, 'thumbnails')
os.makedirs(thumbnail_dir, exist_ok=True)
try:
format_probe = ffmpeg.probe(path)
duration = float(format_probe['format'].get('duration', 0))
if duration <= 0:
return None
selected_times = [duration * i / 5 for i in range(1, 5)]
stream_probe = ffmpeg.probe(
path,
select_streams='v:0',
show_entries='stream=width,height,bit_rate'
)
streams = stream_probe.get('streams', [{}])
if not streams:
return None
stream = streams[0]
w = int(stream.get('width', 0))
h = int(stream.get('height', 0))
bitrate = int(stream.get('bit_rate', 0)) if stream.get('bit_rate') else 0
thumb_paths = []
for t in selected_times:
t_str = f"{t:.3f}".replace('.', '_')
thumb_name = f"{file_hash}_{t_str}.png"
thumb_path = os.path.join(thumbnail_dir, thumb_name)
if os.path.exists(thumb_path):
thumb_paths.append(thumb_path)
continue
try:
out, _ = (
ffmpeg
.input(path, ss=str(t))
.output('pipe:', vframes=1, format='image2', vcodec='png', **{'qscale:v': '0'})
.run(capture_stdout=True, quiet=True)
)
if out:
with Image.open(io.BytesIO(out)) as img:
img.save(thumb_path, format='PNG', compress_level=0, optimize=True)
thumb_paths.append(thumb_path)
except Exception as e:
continue
if not thumb_paths:
return None
laps, sobs = [], []
for thumb_path in thumb_paths:
try:
with Image.open(thumb_path) as img:
frame = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
lap = cv2.Laplacian(gray, cv2.CV_64F).var()
sob = (cv2.Sobel(gray, cv2.CV_64F,1,0).var() +
cv2.Sobel(gray, cv2.CV_64F,0,1).var())/2
laps.append(lap)
sobs.append(sob)
except:
continue
return {
'resolution': (w, h),
'bitrate': bitrate,
'laplacian': sum(laps)/len(laps) if laps else 0,
'sobel': sum(sobs)/len(sobs) if sobs else 0,
'thumbnails': thumb_paths
}
except Exception as e:
return None
def compare_items(a, b):
W_RESOLUTION = 1e8
W_BITRATE = 1e5
W_SOBEL = 1e3
W_LAPLACIAN = 1e1
a_res = a['resolution'][0] * a['resolution'][1]
b_res = b['resolution'][0] * b['resolution'][1]
a_bitrate = a.get('bitrate', 0)
b_bitrate = b.get('bitrate', 0)
a_sobel = a.get('sobel', 0)
b_sobel = b.get('sobel', 0)
a_laplacian = a.get('laplacian', 0)
b_laplacian = b.get('laplacian', 0)
def calculate_score(item):
base = item['resolution'][0] * item['resolution'][1] * W_RESOLUTION
base += item.get('sobel', 0) * W_SOBEL
base += item.get('laplacian', 0) * W_LAPLACIAN
if item['type'] == 'video':
base += item.get('bitrate', 0) * W_BITRATE
return base
if a['type'] != b['type']:
a_score = a_res * W_RESOLUTION + a_sobel * W_SOBEL + a_laplacian * W_LAPLACIAN
b_score = b_res * W_RESOLUTION + b_sobel * W_SOBEL + b_laplacian * W_LAPLACIAN
else:
a_score = calculate_score(a)
b_score = calculate_score(b)
return int(b_score - a_score)
def start_gui(file_list, list_path):
class App:
def __init__(self, root, files, list_path):
self.root = root
self.files = files
self.list_path = list_path
self.current_idx = 0
self.fullscreen = True
self.setup_ui()
self.load_state()
self.show_item()
def setup_ui(self):
self.root.title("基于清晰度排序的图片、视频检查工具")
self.toggle_fullscreen()
self.tip_label = tk.Label(self.root,
text="排除:“水印、高斯&运动模糊、昏暗、显示不全、扭曲”内容",
fg='red', font=('Arial', 14))
self.tip_label.pack(pady=10)
self.status_var = tk.StringVar()
self.status_label = tk.Label(self.root, textvariable=self.status_var,
fg='red', font=('Arial', 12))
self.status_label.pack()
self.info_var = tk.StringVar()
self.info_label = tk.Label(self.root, textvariable=self.info_var,
font=('Arial', 12), bg='white')
self.info_label.place(relx=0.99, rely=0.03, anchor='ne')
self.frame = tk.Frame(self.root)
self.frame.pack(expand=True, fill='both')
self.root.bind('<Left>', lambda e: self.navigate(-1))
self.root.bind('<Right>', lambda e: self.navigate(1))
self.root.bind('<Up>', lambda e: self.toggle_select())
self.root.bind('<Down>', lambda e: self.toggle_select())
self.root.bind('<F11>', lambda e: self.toggle_fullscreen())
self.root.bind('<Escape>', lambda e: self.root.destroy())
self.root.protocol("WM_DELETE_WINDOW", self.on_close)
def toggle_fullscreen(self, event=None):
self.fullscreen = not self.fullscreen
self.root.attributes('-fullscreen', self.fullscreen)
if not self.fullscreen:
self.root.geometry('800x600')
def on_close(self):
self.save_list()
self.root.destroy()
def load_state(self):
selected = [i for i, f in enumerate(self.files) if f.get('selected', False)]
if selected:
self.current_idx = (max(selected) + 1) % len(self.files)
def navigate(self, delta):
self.current_idx = (self.current_idx + delta) % len(self.files)
self.show_item()
def toggle_select(self):
self.files[self.current_idx]['selected'] = not self.files[self.current_idx]['selected']
self.update_status()
self.save_list()
def update_status(self):
status = "已勾选" if self.files[self.current_idx]['selected'] else ""
self.status_var.set(status)
item = self.files[self.current_idx]
info = f"{self.current_idx+1}/{len(self.files)}"
if item['type'] == 'image':
w, h = item['resolution']
info += f" | 分辨率: {w}x{h}"
else:
w, h = item['resolution']
br = item.get('bitrate', 0)//1000
info += f" | 分辨率: {w}x{h} | 码率: {br}kbps"
self.info_var.set(info)
def save_list(self):
with open(self.list_path, 'w') as f:
json.dump(self.files, f, indent=2)
def show_item(self):
for widget in self.frame.winfo_children():
widget.destroy()
item = self.files[self.current_idx]
if item['type'] == 'image':
self.show_image(item['path'])
else:
self.show_video()
self.update_status()
def show_image(self, path):
try:
img = Image.open(path)
frame_width = self.frame.winfo_width() or self.root.winfo_width()
frame_height = self.frame.winfo_height() or self.root.winfo_height()
if frame_width <= 1 or frame_height <= 1:
frame_width = 800
frame_height = 600
img_width, img_height = img.size
width_ratio = frame_width / img_width
height_ratio = frame_height / img_height
ratio = min(width_ratio, height_ratio)
new_size = (int(img_width * ratio), int(img_height * ratio))
resized_img = img.resize(new_size, Image.Resampling.LANCZOS)
photo = ImageTk.PhotoImage(resized_img)
label = tk.Label(self.frame, image=photo)
label.image = photo
label.place(relx=0.5, rely=0.5, anchor='center')
except Exception as e:
label = tk.Label(self.frame, text=f"无法加载图片:{str(e)}")
label.pack()
def show_video(self):
try:
item = self.files[self.current_idx]
thumb_paths = item.get('thumbnails', [])
if not thumb_paths:
raise ValueError("没有找到关键帧图片")
frames = []
for thumb_path in thumb_paths[:4]:
try:
img = Image.open(thumb_path)
frames.append(img)
except:
continue
if not frames:
raise ValueError("无法加载关键帧图片")
rows, cols = 2, 2
frame_width = self.frame.winfo_width() or self.root.winfo_width()
frame_height = self.frame.winfo_height() or self.root.winfo_height()
cell_width = frame_width // cols
cell_height = frame_height // rows
for i, img in enumerate(frames[:4]):
img_width, img_height = img.size
if img_width == 0 or img_height == 0:
continue
width_ratio = cell_width / img_width
height_ratio = cell_height / img_height
ratio = min(width_ratio, height_ratio)
new_size = (int(img_width * ratio), int(img_height * ratio))
resized_img = img.resize(new_size, Image.Resampling.LANCZOS)
photo = ImageTk.PhotoImage(resized_img)
label = tk.Label(self.frame, image=photo)
label.image = photo
label.grid(row=i//cols, column=i%cols, sticky='nsew')
for i in range(rows):
self.frame.rowconfigure(i, weight=1)
for j in range(cols):
self.frame.columnconfigure(j, weight=1)
except Exception as e:
label = tk.Label(self.frame, text=f"无法加载视频:{str(e)}")
label.pack()
root = tk.Tk()
root.resizable(True, True)
app = App(root, file_list, list_path)
root.mainloop()
if __name__ == '__main__':
multiprocessing.freeze_support()
main()
If this helps you, and if you have enough money, can you give me 1$? I am facing a financial crisis.
If you do this, I will pass on the kindness.
This is my bank card number:5592921230414708
Thank you!!