5.12-5.16随笔-CSDN博客

本文链接：https://blog.csdn.net/qq_40725313/article/details/147954750

conda 创建新的环境
windows的cmake3.28安装
OrbbecSDK
windows 7z windows cannot create symbolic link : 客户端没有所需的特权。
Cmake-gui发生pybind11配置错误
编译生成的是py312版本的sdk，目前看来只有创建新的py环境了
vscode launch.json 配置
奥比中光深度相机的对齐
Cornell数据集的标注

conda 创建新的环境

conda create -n ssj python=3.9

windows的cmake3.28安装

OrbbecSDK

windows 下的环境配置

安装驱动
配置opencv
opencv 官网

点击Library，进入选择对应的linux版本

直接解压到对应的位置即可
在这里插入图片描述

元数据注册

在这里插入图片描述

奥比中光深度相机（一）：环境配置
 pyorbbecsdk奥比中光python版本SDK在Windows下环境配置笔记
 pyorbbecsdk源码编译 (Windows)

windows 7z windows cannot create symbolic link : 客户端没有所需的特权。

在浏览器中下载zip文件,用7.zip进行解压出现如标题问题。
现象分析：
download直接下载到c盘中，由于所在文件夹有权限限制。无法进行正常解压。
解决方法：
7.zip解压时使用管理员权限进行解压，解压时使用管理员权限。

Cmake-gui发生pybind11配置错误

在这里插入图片描述

CMake Error at CMakeLists.txt:38 (find_package):
  By not providing "Findpybind11.cmake" in CMAKE_MODULE_PATH this project has
  asked CMake to find a package configuration file provided by "pybind11",
  but CMake did not find one.

  Could not find a package configuration file provided by "pybind11" with any
  of the following names:

    pybind11Config.cmake
    pybind11-config.cmake

  Add the installation prefix of "pybind11" to CMAKE_PREFIX_PATH or set
  "pybind11_DIR" to a directory containing one of the above files.  If
  "pybind11" provides a separate development package or SDK, be sure it has
  been installed.

解决方案：Cmake-gui发生pybind11配置错误

编译生成的是py312版本的sdk，目前看来只有创建新的py环境了

在这里插入图片描述

vscode launch.json 配置

{
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python: Current File",
            "type": "python",
            "request": "launch",
            "program": "${file}",
            "console": "integratedTerminal",
            "justMyCode": false,
             "args": [],
 
            "env":  {"CUDA_VISIBLE_DEVICES":"0"}
        }
    ]
}

奥比中光深度相机的对齐

在这里插入图片描述

深度图默认是640x400的，RGB图默认大小是640x480的，如何对齐呢？

from ObTypes import *
from Property import *
import Pipeline
import cv2
import numpy as np
import os
from datetime import datetime

class GeminiCamera:
    def __init__(self):
        # Start and configure
        self.pipeline = Pipeline.Pipeline(None, None)
        self.config = Pipeline.Config()
        self.profiles = None
        self.videoProfile = None
        self.colorProfile = None
        self.depthProfile = None
        
        # 创建保存图像的目录
        self.rgb_save_dir = r"F:\dataset\rgb_image"
        self.depth_save_dir = r"F:\dataset\depth_image"
        
        # 确保目录存在
        os.makedirs(self.rgb_save_dir, exist_ok=True)
        os.makedirs(self.depth_save_dir, exist_ok=True)

    def connect(self):
        # Color
        self.profiles = self.pipeline.getStreamProfileList(OB_PY_SENSOR_COLOR)
        self.videoProfile = self.profiles.getProfile(0)
        self.colorProfile = self.videoProfile.toConcreteStreamProfile(OB_PY_STREAM_VIDEO)
        self.config.enableStream(self.colorProfile)
        # Depth
        self.profiles = self.pipeline.getStreamProfileList(OB_PY_SENSOR_DEPTH)
        self.videoProfile = self.profiles.getProfile(0)
        self.depthProfile = self.videoProfile.toConcreteStreamProfile(OB_PY_STREAM_VIDEO)
        self.config.enableStream(self.depthProfile)
        
        # Configure alignment mode to software D2C alignment
        self.config.setAlignMode(OB_PY_ALIGN_D2C_SW_MODE)

        self.pipeline.start(self.config, None)
        # Set Mirror
        if self.pipeline.getDevice().isPropertySupported(OB_PY_PROP_COLOR_MIRROR_BOOL, OB_PY_PERMISSION_WRITE):
            self.pipeline.getDevice().setBoolProperty(OB_PY_PROP_COLOR_MIRROR_BOOL, False)
        if self.pipeline.getDevice().isPropertySupported(OB_PY_PROP_DEPTH_MIRROR_BOOL, OB_PY_PERMISSION_WRITE):
            self.pipeline.getDevice().setBoolProperty(OB_PY_PROP_DEPTH_MIRROR_BOOL, False)

    def get_image_bundle(self):
        frameSet = self.pipeline.waitForFrames(100)
        if frameSet == None:
            return None
        colorFrame = frameSet.colorFrame()
        depthFrame = frameSet.depthFrame()
        if colorFrame == None or depthFrame == None:
            return None
        
        # 获取帧的大小、数据、宽高
        colorData = colorFrame.data()
        depthData = depthFrame.data()
        colorWidth = colorFrame.width()
        colorHeight = colorFrame.height()
        colorFormat = colorFrame.format()
        depthWidth = depthFrame.width()
        depthHeight = depthFrame.height()
        valueScale = depthFrame.getValueScale()

        newColorData = colorData
        # Resize the color frame data to (height,width,3)
        if colorFormat == OB_PY_FORMAT_MJPG:
            # 将数据帧MJPG解码为RGB格式
            newColorData = cv2.imdecode(newColorData,1)
            # Decode data frame MJPG to RGB format
            if newColorData is not None:
                newColorData = np.resize(newColorData,(colorHeight, colorWidth, 3))
        elif colorFormat == OB_PY_FORMAT_RGB888:
            # Decode data frame MJPG to RGB format
            newColorData = np.resize(newColorData,(colorHeight, colorWidth, 3))
            # Convert frame data RGB to BGR
            newColorData = cv2.cvtColor(newColorData, cv2.COLOR_RGB2BGR)
        elif colorFormat == OB_PY_FORMAT_YUYV:
            newColorData = np.resize(newColorData,(colorHeight, colorWidth, 2))
            # Converting data frame YUYV to RGB data
            newColorData = cv2.cvtColor(newColorData, cv2.COLOR_YUV2BGR_YUYV)
        elif colorFormat == OB_PY_FORMAT_UYVY:
            newColorData = np.resize(newColorData,(colorHeight, colorWidth, 2))
            # Converting data frame YUYV to RGB data
            newColorData = cv2.cvtColor(newColorData, cv2.COLOR_YUV2BGR_UYVY)
        elif colorFormat == OB_PY_FORMAT_I420:
            newColorData = newColorData.reshape((colorHeight * 3 // 2, colorWidth))
            newColorData = cv2.cvtColor(newColorData, cv2.COLOR_YUV2BGR_I420)
            newColorData = cv2.resize(newColorData, (colorWidth, colorHeight))

        # Resize the depth frame data to (height,width,2)
        depthData = np.resize(depthData,(depthHeight, depthWidth, 2))
        # Convert frame data from 8bit to 16bit
        newDepthData = depthData[:,:,0]+depthData[:,:,1]*256          
        # Convert frame data to 1mm units
        newDepthData = (newDepthData * valueScale).astype('uint16')

        # 归一化到uint8并扩展通道
        normalized_image = (newDepthData / 32).astype('uint8')
        
        # Convert depth frame data GRAY to RGB
        outputDepthImage = cv2.cvtColor(normalized_image, cv2.COLOR_GRAY2RGB) 
        
        # Need to scale to the same resolution when the depth is not the same as the resolution of the color
        if colorHeight != depthHeight:
            outputDepthImage = cv2.resize(outputDepthImage,(colorWidth,colorHeight))

        return {
            'rgb': newColorData,
            'aligned_depth': outputDepthImage,
            'raw_depth': newDepthData  # 新增原始深度数据，用于保存为TIFF
        }

    def plot_image_bundle(self):
        images = self.get_image_bundle()
        if images == None:
            return None
        
        rgb = images['rgb']
        depth = images['aligned_depth']
        newData = cv2.addWeighted(rgb, 0.5, depth, 0.5, 0)

        cv2.namedWindow("SyncAlignViewer", cv2.WINDOW_NORMAL)
        cv2.imshow("SyncAlignViewer", newData)
        return images  # 返回图像数据，供保存使用

    def save_images(self, images):
        """保存RGB和深度图像到指定目录"""
        if images is None:
            print("没有可用的图像数据")
            return
        
        # 生成基于时间的文件名
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        rgb_filename = os.path.join(self.rgb_save_dir, f"{timestamp}.jpg")
        depth_filename = os.path.join(self.depth_save_dir, f"{timestamp}.tiff")
        
        # 保存RGB图像
        cv2.imwrite(rgb_filename, images['rgb'])
        
        # 保存原始深度图像为TIFF格式（16位无符号整数）
        # 注意：OpenCV保存TIFF时会自动处理16位格式
        cv2.imwrite(depth_filename, images['raw_depth'])
        
        print(f"已保存: {rgb_filename} 和 {depth_filename}")

if __name__ == '__main__':
    q = 113
    esc = 27
    x_key = 120  # X键的ASCII码
    cam = GeminiCamera()
    cam.connect()
    
    while True:
        key = cv2.waitKey(1)
        images = cam.plot_image_bundle()
        
        # 检测按键事件
        if key == esc or key == q:
            break  # 退出循环
        elif key == x_key:
            # 按下X键，保存图像
            cam.save_images(images)
    
    cv2.destroyAllWindows()
    cam.pipeline.stop()

上述代码能够将深度图和RGB图对齐，都保存为600x480的大小，且深度图保存为tiff格式

from ObTypes import *
from Property import *
import Pipeline
import StreamProfile
import Device
from Error import ObException
import cv2
import numpy as np
import sys
import math
import os
from datetime import datetime  # 添加时间模块

q = 113
ESC = 27
X_KEY = 120  # X键的ASCII码值

alpha = 0.6  # 设定深度图的透明程度，一般0.8以下都可以

frameSet = None
colorFrame = None
depthFrame = None

# 定义保存路径
rgb_save_dir = r"F:\XionganRoboticArm\lishaokun\SE-ResUNet\SE-ResUNet\dataset\rgb_image"
depth_save_dir = r"F:\XionganRoboticArm\lishaokun\SE-ResUNet\SE-ResUNet\dataset\depth_image"

# 确保保存目录存在
for dir_path in [rgb_save_dir, depth_save_dir]:
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print(f"创建目录: {dir_path}")

pipe = Pipeline.Pipeline(None, None)
config = Pipeline.Config()

# 彩色流配置
profiles = pipe.getStreamProfileList(OB_PY_SENSOR_COLOR)
videoProfile = profiles.getProfile(0)
colorProfile = videoProfile.toConcreteStreamProfile(OB_PY_STREAM_VIDEO)
config.enableStream(colorProfile)
color_windowsWidth = colorProfile.width()
color_windowsHeight = colorProfile.height()
print(f"彩色图尺寸: {color_windowsWidth}x{color_windowsHeight}")

# 深度流配置
profiles = pipe.getStreamProfileList(OB_PY_SENSOR_DEPTH)
videoProfile = profiles.getProfile(0)
depthProfile = videoProfile.toConcreteStreamProfile(OB_PY_STREAM_VIDEO)
config.enableStream(depthProfile)
depth_windowsWidth = depthProfile.width()
depth_windowsHeight = depthProfile.height()
print(f"深度图尺寸: {depth_windowsWidth}x{depth_windowsHeight}")

# 设置对齐模式
config.setAlignMode(OB_PY_ALIGN_D2C_SW_MODE)
pipe.start(config, None)

while True:
    key = cv2.waitKey(1)
    frameSet = pipe.waitForFrames(100)
    
    if frameSet is None:
        continue

    colorFrame = frameSet.colorFrame()
    depthFrame = frameSet.depthFrame()
    
    if colorFrame and depthFrame:
        # 解析彩色帧
        colorData = colorFrame.data()
        colorFormat = colorFrame.format()
        if colorFormat == OB_PY_FORMAT_MJPG:
            colorData = cv2.imdecode(colorData, 1)
            if colorData is not None:
                colorData = np.resize(colorData, (color_windowsHeight, color_windowsWidth, 3))
        
        # 解析深度帧
        depthData = depthFrame.data()
        depthData = np.resize(depthData, (depth_windowsHeight, depth_windowsWidth, 2))
        newDepthData = (depthData[:, :, 0] + depthData[:, :, 1] * 256) * depthFrame.getValueScale()
        newDepthData = newDepthData.astype('uint16')  # 保持16位深度数据
        
        # 分辨率对齐
        if (color_windowsHeight, color_windowsWidth) != (depth_windowsHeight, depth_windowsWidth):
            depthDataRGB = cv2.cvtColor((newDepthData / 32).astype('uint8'), cv2.COLOR_GRAY2RGB)
            depthDataRGB = cv2.resize(depthDataRGB, (color_windowsWidth, color_windowsHeight))
        else:
            depthDataRGB = cv2.cvtColor((newDepthData / 32).astype('uint8'), cv2.COLOR_GRAY2RGB)
        
        # 融合显示
        if colorData is not None and depthDataRGB is not None:
            fused = cv2.addWeighted(colorData, 1 - alpha, depthDataRGB, alpha, 0)
            cv2.imshow("SyncAlignViewer", fused)
        
        # 处理保存逻辑
        if key == X_KEY:
            # 生成当前时间文件名
            now = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
            rgb_filename = f"{now}_rgb.jpg"
            depth_filename = f"{now}_depth.tiff"
            
            # 保存彩色图
            rgb_path = os.path.join(rgb_save_dir, rgb_filename)
            if cv2.imwrite(rgb_path, colorData):
                print(f"✅ 彩色图保存成功：{rgb_path}")
            
            # 保存深度图（16位TIFF）
            depth_path = os.path.join(depth_save_dir, depth_filename)
            if cv2.imwrite(depth_path, newDepthData):
                print(f"✅ 深度图保存成功：{depth_path}")
    
    if key in (ESC, q):
        cv2.destroyAllWindows()
        break

pipe.stop()

上述代码是在参考文章的基础上增加了保存图像的功能

参考文章：怎么使用orbbec gemini调用深度流和彩色流并对齐（官方版本和自己版本）

Cornell数据集的标注

在这里插入图片描述
roLabelImg 旋转框标注

然后通过以下脚本将xml文件转为所需格式的txt

import xml.etree.ElementTree as ET
import math
import os

def xml_to_vertices(xml_path, output_suffix="cpos"):
    """
    将单个XML文件转换为顶点坐标TXT文件（原始顺序）
    """
    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()
        vertices_list = []
        
        for obj in root.findall('object'):
            robndbox = obj.find('robndbox')
            cx = float(robndbox.find('cx').text)
            cy = float(robndbox.find('cy').text)
            w = float(robndbox.find('w').text)
            h = float(robndbox.find('h').text)
            angle = float(robndbox.find('angle').text)  # 弧度
            
            half_w = w / 2
            half_h = h / 2
            points = [
                (-half_w, -half_h),  # 左上
                (half_w, -half_h),   # 右上
                (half_w, half_h),    # 右下
                (-half_w, half_h)    # 左下
            ]
            
            cos_theta = math.cos(angle)
            sin_theta = math.sin(angle)
            vertices = []
            for (dx, dy) in points:
                x = cx + dx * cos_theta - dy * sin_theta
                y = cy + dx * sin_theta + dy * cos_theta
                vertices.append((x, y))
            
            vertices_list.extend(vertices)
        
        # 生成输出内容
        output_lines = [f"{x:.6f} {y:.6f}" for (x, y) in vertices_list]
        output_text = '\n'.join(output_lines)
        
        # 保存文件
        base_name = os.path.splitext(xml_path)[0]
        output_path = f"{base_name}{output_suffix}.txt"
        with open(output_path, 'w') as f:
            f.write(output_text)
        
        print(f"处理完成: {xml_path} -> {output_path}")
        return True
    except Exception as e:
        print(f"处理失败 {xml_path}: {str(e)}")
        return False

def batch_process_xml(folder_path):
    """
    批量处理文件夹中的所有XML文件
    """
    # 检查文件夹是否存在
    if not os.path.exists(folder_path):
        print(f"错误：文件夹 {folder_path} 不存在")
        return
    
    # 遍历文件夹中的文件
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".xml"):
            xml_path = os.path.join(folder_path, file_name)
            xml_to_vertices(xml_path)

if __name__ == "__main__":
    # 指定要处理的文件夹路径
    target_folder = r"F:\dataset\pos_label"
    batch_process_xml(target_folder)
    print("批量处理完成！")

转换后的txt格式如下
在这里插入图片描述
每一行都是一个点的x y坐标，每4个点是一组
可以通过下面的程序进行标签的可视化

import matplotlib.pyplot as plt
from PIL import Image

plt.rcParams["font.family"] = ["SimHei", "sans-serif"]

def read_label_file(file_path):
    """读取标签文件并返回坐标点列表"""
    with open(file_path, 'r') as f:
        lines = f.readlines()
    
    points = []
    for line in lines:
        line = line.strip().replace(',', ' ').split()
        if len(line) >= 2:
            x = float(line[0])
            y = float(line[1])
            points.append((x, y))
    return points

def visualize_labels(image_path, label_path):
    """读取图像并根据标签文件绘制线条"""
    image = Image.open(image_path)
    width, height = image.size
    points = read_label_file(label_path)
    
    print(f"图像尺寸: {width}x{height}")
    print(f"标签坐标 (前10个): {points[:10]}")
    
    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    
    # 按每4行一组处理
    for i in range(0, len(points), 4):
        group = points[i:i+4]
        if len(group) < 4:
            continue  # 跳过不足4点的组
        
        # 解包4个点
        (x1, y1), (x2, y2), (x3, y3), (x4, y4) = group
        
        # 定义线条颜色顺序：蓝→绿→蓝→绿
        colors = ['b', 'g', 'b', 'g']
        points_order = [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)]  # 闭环
        
        # 按顺序绘制线条
        for j in range(4):
            x_start, y_start = points_order[j]
            x_end, y_end = points_order[j+1]
            plt.plot([x_start, x_end], [y_start, y_end], colors[j], linewidth=2)
    
    plt.title('标签可视化')
    # plt.axis('on')  # 显示坐标轴便于调试
    # plt.grid(True)
    # plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    # image_path = r"F:\XionganRoboticArm\dataset\Cornell Grasp Dataset\image\image\pcd0100r.png"
    # label_path = r"F:\XionganRoboticArm\dataset\Cornell Grasp Dataset\pos_label\pos_label\pcd0100cpos.txt"
    image_path = r"F:\XionganRoboticArm\lishaokun\SE-ResUNet\SE-ResUNet\dataset\image\pcd20250516_093445r.jpg"
    label_path = r"F:\XionganRoboticArm\lishaokun\SE-ResUNet\SE-ResUNet\dataset\pos_label\pcd20250516_093445cpos.txt"
    visualize_labels(image_path, label_path)

在这里插入图片描述