在使用YoLo前的数据处理（xml、json转txt，边界框可视化，标签映射，图片-标签文件对齐检查）-CSDN博客

本文链接：https://blog.csdn.net/weixin_64788227/article/details/148839278

1.xml、json转txt

有时当我们拿到数据集时，并不是txt格式，而是xml或者json格式。当我们用yolo进行训练的话，此时就需要将其转换为txt格式。

xml-txt

import os
import xml.etree.ElementTree as ET


def convert_xml_to_yolo(xml_dir, img_dir, output_dir, classes):
    """
    将XML标签文件转换为YOLO训练所需的TXT格式。

    Args:
        xml_dir (str): 包含XML标签文件的目录路径。
        img_dir (str): 包含对应图片的目录路径（用于获取图片尺寸）。
        output_dir (str): 转换后的TXT文件保存的目录路径。
        classes (list): 数据集中所有类别的列表，顺序必须与YOLO训练时使用的类别顺序一致。
    """

    os.makedirs(output_dir, exist_ok=True)  # 确保输出目录存在

    for xml_file in os.listdir(xml_dir):
        if not xml_file.endswith('.xml'):
            continue

        xml_path = os.path.join(xml_dir, xml_file)
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # 获取图片文件名（不含扩展名）
        img_name_without_ext = os.path.splitext(xml_file)[0]
        # 尝试查找对应的图片文件，获取其尺寸
        # 注意：这里我们假设图片文件与XML文件同名，只是扩展名不同
        # 实际项目中可能需要更复杂的逻辑来匹配，比如遍历常见的图片扩展名
        img_path = None
        for ext in ['.jpg', '.jpeg', '.png', '.bmp']:  # 常见的图片扩展名
            temp_img_path = os.path.join(img_dir, img_name_without_ext + ext)
            if os.path.exists(temp_img_path):
                img_path = temp_img_path
                break

        if img_path is None:
            print(f"警告：未找到与 {xml_file} 对应的图片文件，跳过此XML文件。")
            continue

        # 使用Pillow库获取图片尺寸（如果安装了Pillow）
        # 如果没有安装，请 pip install Pillow
        try:
            from PIL import Image
            with Image.open(img_path) as img:
                img_width, img_height = img.size
        except ImportError:
            print("警告：未安装Pillow库。请安装 'pip install Pillow' 以正确获取图片尺寸。")
            print("将尝试从XML中获取尺寸（如果存在），否则将跳过此文件。")
            # 尝试从XML中获取尺寸，如果XML中没有，则无法处理
            size_elem = root.find('size')
            if size_elem is not None:
                img_width = int(size_elem.find('width').text)
                img_height = int(size_elem.find('height').text)
            else:
                print(f"错误：无法获取 {xml_file} 的图片尺寸，跳过此文件。")
                continue
        except Exception as e:
            print(f"错误：打开图片 {img_path} 时发生错误：{e}，跳过此文件。")
            continue

        # 创建YOLO格式的TXT文件
        txt_output_path = os.path.join(output_dir, img_name_without_ext + '.txt')
        with open(txt_output_path, 'w') as f:
            for obj in root.findall('object'):
                class_name = obj.find('name').text
                if class_name not in classes:
                    print(f"警告：类别 '{class_name}' 不在预设的类别列表中，跳过此对象。")
                    continue

                class_id = classes.index(class_name)

                bndbox = obj.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)

                # 将边界框坐标转换为YOLO格式
                # YOLO格式：<object-class> <x_center> <y_center> <width> <height> (所有值都归一化到0-1之间)
                x_center = (xmin + xmax) / 2.0 / img_width
                y_center = (ymin + ymax) / 2.0 / img_height
                width = (xmax - xmin) / img_width
                height = (ymax - ymin) / img_height

                f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")

    print("转换完成！")


# --- 使用示例 ---
if __name__ == "__main__":
    # 请根据你的实际情况修改以下路径和类别列表
    xml_directory = 'C:/Users/Administrator/Desktop/data/VOC2020/Annotations'  # XML标签文件所在的文件夹
    image_directory = 'C:/Users/Administrator/Desktop/data/VOC2020/JPEGImages'  # 对应图片文件所在的文件夹
    output_directory = 'output'  # 转换后TXT文件保存的文件夹

    # 你的数据集中的所有类别名称，顺序必须与YOLO训练时保持一致
    # 例如：如果你有 'fire_car','car','at_car','person','police' 四个类别，那么它们在列表中的索引就是 0,1,2,3
    my_classes = ['fire_car','car','at_car','person','police']

    convert_xml_to_yolo(xml_directory, image_directory, output_directory, my_classes)

json-txt

import os
import json
from PIL import Image

def convert_json_to_yolo(json_dir, img_dir, output_dir, classes):
    """
    将JSON标签文件转换为YOLO训练所需的TXT格式。

    Args:
        json_dir (str): 包含JSON标签文件的目录路径。
        img_dir (str): 包含对应图片的目录路径（用于获取图片尺寸）。
        output_dir (str): 转换后的TXT文件保存的目录路径。
        classes (list): 数据集中所有类别的列表，顺序必须与YOLO训练时使用的类别顺序一致。
    """

    os.makedirs(output_dir, exist_ok=True)  # 确保输出目录存在

    for json_file in os.listdir(json_dir):
        if not json_file.endswith('.json'):
            continue

        json_path = os.path.join(json_dir, json_file)
        
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)

        # 获取图片文件名（不含扩展名）
        # 假设JSON文件名与图片文件名相同，只是扩展名不同
        img_name_without_ext = os.path.splitext(json_file)[0]
        
        img_path = None
        for ext in ['.jpg', '.jpeg', '.png', '.bmp']:  # 常见的图片扩展名
            temp_img_path = os.path.join(img_dir, img_name_without_ext + ext)
            if os.path.exists(temp_img_path):
                img_path = temp_img_path
                break

        if img_path is None:
            print(f"警告：未找到与 {json_file} 对应的图片文件，跳过此JSON文件。")
            continue

        try:
            with Image.open(img_path) as img:
                img_width, img_height = img.size
        except Exception as e:
            print(f"错误：打开图片 {img_path} 时发生错误：{e}，跳过此文件。")
            continue

        # 创建YOLO格式的TXT文件
        txt_output_path = os.path.join(output_dir, img_name_without_ext + '.txt')
        with open(txt_output_path, 'w') as f:
            # 假设JSON结构中包含一个 'shapes' 键，其中包含所有标注对象
            # 并且每个 shape 包含 'label' 和 'points' (xmin, ymin, xmax, ymax)
            # 你可能需要根据你的JSON文件实际结构调整这部分代码
            if 'shapes' in data:
                for shape in data['shapes']:
                    class_name = shape['label']
                    
                    if class_name not in classes:
                        print(f"警告：类别 '{class_name}' 不在预设的类别列表中，跳过此对象。")
                        continue

                    class_id = classes.index(class_name)

                    # 假设 'points' 包含两个点：[[xmin, ymin], [xmax, ymax]]
                    points = shape['points']
                    xmin = min(points[0][0], points[1][0])
                    ymin = min(points[0][1], points[1][1])
                    xmax = max(points[0][0], points[1][0])
                    ymax = max(points[0][1], points[1][1])

                    # 将边界框坐标转换为YOLO格式
                    x_center = (xmin + xmax) / 2.0 / img_width
                    y_center = (ymin + ymax) / 2.0 / img_height
                    width = (xmax - xmin) / img_width
                    height = (ymax - ymin) / img_height

                    f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
            else:
                print(f"警告：JSON文件 {json_file} 中未找到 'shapes' 键，跳过此文件。请检查JSON结构。")

    print("转换完成！")


# --- 使用示例 ---
if __name__ == "__main__":
    # 请根据你的实际情况修改以下路径和类别列表
    json_directory = 'C:/Users/Administrator/Desktop/data/VOC2020/Annotations'  # JSON标签文件所在的文件夹
    image_directory = 'C:/Users/Administrator/Desktop/data/VOC2020/JPEGImages'  # 对应图片文件所在的文件夹
    output_directory = 'output_json_to_yolo'  # 转换后TXT文件保存的文件夹

    # 你的数据集中所有类别名称，顺序必须与YOLO训练时保持一致
    my_classes = ['fire_car','car','at_car','person','police'] # 示例类别

    convert_json_to_yolo(json_directory, image_directory, output_directory, my_classes)

2.边界框可视化

当我们拿到别人开源的目标检测任务的数据集时，在使用前我们应该进行边界框可视化，来判断标注的效果如何，这直接决定了我们模型训练的效果。下面是进行边界框可视化的代码，可以直接复制使用，只需更换一下txt文件与图片的地址即可。

import cv2
import numpy as np
import os


def draw_boxes_on_image(image_path, label_path, output_dir, img_size=(640, 640)):
    """
    在图片上绘制 YOLO 格式的边界框。

    Args:
        image_path (str): 输入图片文件的完整路径。
        label_path (str): 对应 YOLO 标签文件的完整路径。
        output_dir (str): 保存绘制后图片的目录。
        img_size (tuple): YOLO 模型训练时的图片尺寸 (宽, 高)。
                         在YOLOv5及更新版本中，通常是方形，例如(640, 640)。
                         如果您的模型使用了不同尺寸，请相应调整。
    """
    try:
        # 读取图片
        img = cv2.imread(image_path)
        if img is None:
            print(f"错误: 无法读取图片 {image_path}")
            return

        # 获取图片的原始尺寸
        h, w, _ = img.shape

        # 创建输出目录（如果不存在）
        os.makedirs(output_dir, exist_ok=True)

        # 读取标签文件
        with open(label_path, 'r') as f:
            lines = f.readlines()

        for line in lines:
            parts = line.strip().split()
            if len(parts) < 5:
                print(f"警告: 标签文件 {label_path} 中的行格式不正确: {line.strip()}")
                continue

            class_id = int(parts[0])
            # YOLO 格式的中心点和宽高是归一化的 (0-1)
            x_center, y_center, bbox_width, bbox_height = map(float, parts[1:5])

            # 将归一化坐标转换回像素坐标
            # 注意：这里的转换需要考虑YOLO模型预处理时可能对图片进行的缩放。
            # YOLO通常会将图片缩放到一个固定尺寸（如640x640），并在该尺寸上计算归一化坐标。
            # 为了准确地在原始图片上绘制，我们需要将YOLO的归一化坐标映射回原始图片的像素坐标。
            # 这里的计算方式假设YOLO的归一化坐标是相对于其处理尺寸（例如640x640）而言的，
            # 然后再将这个尺寸下的像素坐标等比例缩放回原始图片尺寸。

            # 计算YOLO处理尺寸下的像素坐标
            x_center_scaled = x_center * img_size[0]
            y_center_scaled = y_center * img_size[1]
            bbox_width_scaled = bbox_width * img_size[0]
            bbox_height_scaled = bbox_height * img_size[1]

            # 计算YOLO处理尺寸下的左上角和右下角坐标
            x_min_scaled = int(x_center_scaled - bbox_width_scaled / 2)
            y_min_scaled = int(y_center_scaled - bbox_height_scaled / 2)
            x_max_scaled = int(x_center_scaled + bbox_width_scaled / 2)
            y_max_scaled = int(y_center_scaled + bbox_height_scaled / 2)

            # 将YOLO处理尺寸下的像素坐标映射回原始图片尺寸
            # 原始图片的缩放比例
            scale_x = w / img_size[0]
            scale_y = h / img_size[1]

            x_min = int(x_min_scaled * scale_x)
            y_min = int(y_min_scaled * scale_y)
            x_max = int(x_max_scaled * scale_x)
            y_max = int(y_max_scaled * scale_y)

            # 绘制矩形框
            color = (0, 255, 0)  # 绿色
            thickness = 2
            cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, thickness)

            # 绘制类别ID
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 0.7
            font_thickness = 2
            text = f"Class: {class_id}"
            cv2.putText(img, text, (x_min, y_min - 10), font, font_scale, color, font_thickness)

        # 保存绘制后的图片
        output_image_path = os.path.join(output_dir, os.path.basename(image_path))
        cv2.imwrite(output_image_path, img)
        print(f"已处理 {image_path} 并保存到 {output_image_path}")

    except Exception as e:
        print(f"处理 {image_path} 时发生错误: {e}")


def process_dataset(image_folder, label_folder, output_folder, img_size=(640, 640)):
    """
    遍历数据集中的所有图片和标签文件，并在图片上绘制边界框。

    Args:
        image_folder (str): 存放图片文件的目录。
        label_folder (str): 存放 YOLO 标签文件的目录。
        output_folder (str): 保存绘制后图片的目录。
        img_size (tuple): YOLO 模型训练时的图片尺寸 (宽, 高)。
    """
    if not os.path.isdir(image_folder):
        print(f"错误: 图片目录不存在: {image_folder}")
        return
    if not os.path.isdir(label_folder):
        print(f"错误: 标签目录不存在: {label_folder}")
        return

    # 遍历图片文件夹
    for filename in os.listdir(image_folder):
        # 检查文件是否是图片
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            image_path = os.path.join(image_folder, filename)

            # 构造对应的标签文件路径
            # 假设图片名和标签文件名（不含扩展名）是相同的
            label_filename = os.path.splitext(filename)[0] + '.txt'
            label_path = os.path.join(label_folder, label_filename)

            if os.path.exists(label_path):
                draw_boxes_on_image(image_path, label_path, output_folder, img_size)
            else:
                print(f"警告: 未找到 {image_path} 对应的标签文件 {label_path}")


if __name__ == "__main__":
    # --- 配置您的路径 ---
    images_dir = 'C:/Users/Administrator/Desktop/data/fire/VOCdevkit/train/images'  # 替换为您的图片文件夹路径
    labels_dir = 'C:/Users/Administrator/Desktop/data/fire/VOCdevkit/train/labels'  # 替换为您的标签文件夹路径
    output_visualizations_dir = 'fire'  # 替换为您希望保存结果的文件夹路径

    # YOLO 模型训练时的图片尺寸。例如，YOLOv5 常用 640x640。
    # 如果您的 YOLO 模型是在其他尺寸上训练的，请务必修改此参数。
    yolo_input_image_size = (640, 640)

    print(f"开始处理数据集...")
    process_dataset(images_dir, labels_dir, output_visualizations_dir, yolo_input_image_size)
    print(f"数据集处理完成。结果保存在 {output_visualizations_dir}")

3.标签映射

在实际工作中，你可能想转换一下标签类别的索引，比如将0换为1,2换为3.这里提供一个脚本，只需更换标签文件所在文件夹的路径，以及转换与被转换的索引。

import os

def remap_yolo_labels(folder_path):
    """
    Remaps the first column (class ID) in YOLO format label files within a specified folder.
    Specifically, it maps '0' to '1' and '1' to '0'.

    Args:
        folder_path (str): The path to the folder containing the YOLO label files.
    """
    # Ensure the folder path exists
    if not os.path.isdir(folder_path):
        print(f"Error: Folder not found at '{folder_path}'")
        return

    # Iterate through all files in the specified folder
    for filename in os.listdir(folder_path):
        # Process only files with a '.txt' extension (common for YOLO label files)
        if filename.endswith(".txt"):
            filepath = os.path.join(folder_path, filename)
            updated_lines = []

            try:
                # Read the original label file
                with open(filepath, 'r') as f:
                    for line in f:
                        parts = line.strip().split()
                        if parts:  # Ensure the line is not empty
                            class_id = int(parts[0])  # Get the first element (class ID)

                            # Perform the remapping
                            if class_id == 0:    #具体使用的时候要换一下哦，我这里是把类别0换成了3。
                                parts[0] = '3'
                            # elif class_id == 1:#如果你对两个类别都进行更换，就加上这段代码。加上后的逻辑是0--》3,1--》2.
                            #     parts[0] = '2'

                            updated_lines.append(" ".join(parts))
                        else:
                            updated_lines.append("") # Keep empty lines if they exist

                # Write the updated content back to the file
                with open(filepath, 'w') as f:
                    for line in updated_lines:
                        f.write(line + '\n')
                print(f"Processed '{filename}': Class IDs remapped.")

            except Exception as e:
                print(f"Error processing file '{filename}': {e}")

# --- Usage Example ---
if __name__ == "__main__":
    # IMPORTANT: Replace 'your_label_folder_path_here' with the actual path to your YOLO label folder.
    # For example:
    # label_folder = "C:/Users/YourUser/Desktop/yolo_labels"
    # label_folder = "/home/user/data/yolo_labels"
    label_folder = "C:/Users/Administrator/Desktop/data/police/TrafficPolice/valid/labels" #填标签文件所在的文件夹的地址

    # Create a dummy folder and files for testing if they don't exist
    if not os.path.exists(label_folder):
        os.makedirs(label_folder)
        print(f"Created dummy folder: {label_folder}")

        # Create some dummy label files for demonstration
        with open(os.path.join(label_folder, "image1.txt"), "w") as f:
            f.write("0 0.1 0.2 0.3 0.4\n")
            f.write("1 0.5 0.6 0.7 0.8\n")
            f.write("2 0.9 0.8 0.7 0.6\n") # This line won't be changed
        print("Created dummy file: image1.txt")

        with open(os.path.join(label_folder, "image2.txt"), "w") as f:
            f.write("1 0.1 0.2 0.3 0.4\n")
            f.write("0 0.5 0.6 0.7 0.8\n")
        print("Created dummy file: image2.txt")

    print(f"\n--- Starting remapping in '{label_folder}' ---")
    remap_yolo_labels(label_folder)
    print("\n--- Remapping complete ---")

    # Optional: Verify the changes by printing file content
    print("\n--- Verifying changes ---")
    for filename in os.listdir(label_folder):
        if filename.endswith(".txt"):
            filepath = os.path.join(label_folder, filename)
            print(f"\nContent of '{filename}':")
            with open(filepath, 'r') as f:
                print(f.read().strip())

4.图片-标签文件对齐检查

在使用 YOLO 进行训练时，如果出现图片（images）与标签文件（labels）数量不一致的情况，我们需要找出哪些图片缺少对应的标签文件。这个过程非常重要，因为 YOLO 模型在训练时需要每张图片都有一个对应的标签文件来指示图中目标的位置和类别。如果缺少标签文件，模型将无法正确学习，甚至可能导致训练失败。这里我们提供一个脚本来检测是否存在没有对应标签文件的图片。

import os

def find_image_without_txt(image_folder_path, txt_folder_path):
    """
    在一个图片文件夹中查找没有在TXT标签文件夹中找到对应TXT标签文件的图片。

    Args:
        image_folder_path (str): 包含图片的文件夹路径。
        txt_folder_path (str): 包含TXT标签文件的文件夹路径。

    Returns:
        list: 一个包含没有对应TXT文件的图片文件名的列表。
    """

    images_without_txt = []

    # 获取图片文件夹中的所有图片文件
    all_image_files = os.listdir(image_folder_path)
    image_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp')
    image_files = [f for f in all_image_files if f.lower().endswith(image_extensions)]

    # 获取TXT文件夹中的所有TXT文件
    all_txt_files = os.listdir(txt_folder_path)
    # 使用集合 (set) 来存储 TXT 文件的基本名称（不含扩展名），以便高效查找
    txt_files = {os.path.splitext(f)[0] for f in all_txt_files if f.lower().endswith('.txt')}

    for image_file in image_files:
        # 提取图片文件名（不含扩展名）
        base_name = os.path.splitext(image_file)[0]

        # 检查对应的TXT文件是否存在于 TXT 文件名集合中
        if base_name not in txt_files:
            images_without_txt.append(image_file)

    return images_without_txt


# --- 使用示例 ---
if __name__ == "__main__":
    # **请将以下路径替换为你的实际文件夹路径！**
    # 图片文件夹路径
    image_folder_to_scan = 'C:/Users/Administrator/Desktop/data/val/img'
    # TXT标签文件夹路径
    txt_folder_to_scan = 'C:/Users/Administrator/Desktop/data/val/label'

    # 检查文件夹是否存在
    if not os.path.isdir(image_folder_to_scan):
        print(f"错误：图片文件夹 '{image_folder_to_scan}' 不存在。")
    elif not os.path.isdir(txt_folder_to_scan):
        print(f"错误：TXT标签文件夹 '{txt_folder_to_scan}' 不存在。")
    else:
        unlabeled_images = find_image_without_txt(image_folder_to_scan, txt_folder_to_scan)

        if unlabeled_images:
            print("以下图片没有对应的TXT标签文件：")
            for img in unlabeled_images:
                print(img)
        else:
            print("所有图片都有对应的TXT标签文件。")

实际使用时，只需更换存放图片与标签文件的文件夹路径即可。若存在异常，则会提示哪些图片没有对应的txt标签文件，若没有异常，则提示所有图片都有标签文件。若对大家有帮助，请点赞收藏一下，以免用到的时候找不到哦。