目录
conda 创建新的环境
conda create -n ssj python=3.9
windows的cmake3.28安装
OrbbecSDK
- 安装驱动
- 配置opencv
opencv 官网
点击Library,进入选择对应的linux版本
直接解压到对应的位置即可
- 元数据注册
奥比中光深度相机(一):环境配置
pyorbbecsdk奥比中光python版本SDK在Windows下环境配置笔记
pyorbbecsdk源码编译 (Windows)
windows 7z windows cannot create symbolic link : 客户端没有所需的特权。
在浏览器中下载zip文件,用7.zip进行解压出现如标题问题。
现象分析:
download直接下载到c盘中,由于所在文件夹有权限限制。无法进行正常解压。
解决方法:
7.zip解压时使用管理员权限进行解压,解压时使用管理员权限。
Cmake-gui发生pybind11配置错误
CMake Error at CMakeLists.txt:38 (find_package):
By not providing "Findpybind11.cmake" in CMAKE_MODULE_PATH this project has
asked CMake to find a package configuration file provided by "pybind11",
but CMake did not find one.
Could not find a package configuration file provided by "pybind11" with any
of the following names:
pybind11Config.cmake
pybind11-config.cmake
Add the installation prefix of "pybind11" to CMAKE_PREFIX_PATH or set
"pybind11_DIR" to a directory containing one of the above files. If
"pybind11" provides a separate development package or SDK, be sure it has
been installed.
编译生成的是py312版本的sdk,目前看来只有创建新的py环境了
vscode launch.json 配置
{
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": false,
"args": [],
"env": {"CUDA_VISIBLE_DEVICES":"0"}
}
]
}
奥比中光深度相机的对齐
深度图默认是640x400的,RGB图默认大小是640x480的,如何对齐呢?
from ObTypes import *
from Property import *
import Pipeline
import cv2
import numpy as np
import os
from datetime import datetime
class GeminiCamera:
def __init__(self):
# Start and configure
self.pipeline = Pipeline.Pipeline(None, None)
self.config = Pipeline.Config()
self.profiles = None
self.videoProfile = None
self.colorProfile = None
self.depthProfile = None
# 创建保存图像的目录
self.rgb_save_dir = r"F:\dataset\rgb_image"
self.depth_save_dir = r"F:\dataset\depth_image"
# 确保目录存在
os.makedirs(self.rgb_save_dir, exist_ok=True)
os.makedirs(self.depth_save_dir, exist_ok=True)
def connect(self):
# Color
self.profiles = self.pipeline.getStreamProfileList(OB_PY_SENSOR_COLOR)
self.videoProfile = self.profiles.getProfile(0)
self.colorProfile = self.videoProfile.toConcreteStreamProfile(OB_PY_STREAM_VIDEO)
self.config.enableStream(self.colorProfile)
# Depth
self.profiles = self.pipeline.getStreamProfileList(OB_PY_SENSOR_DEPTH)
self.videoProfile = self.profiles.getProfile(0)
self.depthProfile = self.videoProfile.toConcreteStreamProfile(OB_PY_STREAM_VIDEO)
self.config.enableStream(self.depthProfile)
# Configure alignment mode to software D2C alignment
self.config.setAlignMode(OB_PY_ALIGN_D2C_SW_MODE)
self.pipeline.start(self.config, None)
# Set Mirror
if self.pipeline.getDevice().isPropertySupported(OB_PY_PROP_COLOR_MIRROR_BOOL, OB_PY_PERMISSION_WRITE):
self.pipeline.getDevice().setBoolProperty(OB_PY_PROP_COLOR_MIRROR_BOOL, False)
if self.pipeline.getDevice().isPropertySupported(OB_PY_PROP_DEPTH_MIRROR_BOOL, OB_PY_PERMISSION_WRITE):
self.pipeline.getDevice().setBoolProperty(OB_PY_PROP_DEPTH_MIRROR_BOOL, False)
def get_image_bundle(self):
frameSet = self.pipeline.waitForFrames(100)
if frameSet == None:
return None
colorFrame = frameSet.colorFrame()
depthFrame = frameSet.depthFrame()
if colorFrame == None or depthFrame == None:
return None
# 获取帧的大小、数据、宽高
colorData = colorFrame.data()
depthData = depthFrame.data()
colorWidth = colorFrame.width()
colorHeight = colorFrame.height()
colorFormat = colorFrame.format()
depthWidth = depthFrame.width()
depthHeight = depthFrame.height()
valueScale = depthFrame.getValueScale()
newColorData = colorData
# Resize the color frame data to (height,width,3)
if colorFormat == OB_PY_FORMAT_MJPG:
# 将数据帧MJPG解码为RGB格式
newColorData = cv2.imdecode(newColorData,1)
# Decode data frame MJPG to RGB format
if newColorData is not None:
newColorData = np.resize(newColorData,(colorHeight, colorWidth, 3))
elif colorFormat == OB_PY_FORMAT_RGB888:
# Decode data frame MJPG to RGB format
newColorData = np.resize(newColorData,(colorHeight, colorWidth, 3))
# Convert frame data RGB to BGR
newColorData = cv2.cvtColor(newColorData, cv2.COLOR_RGB2BGR)
elif colorFormat == OB_PY_FORMAT_YUYV:
newColorData = np.resize(newColorData,(colorHeight, colorWidth, 2))
# Converting data frame YUYV to RGB data
newColorData = cv2.cvtColor(newColorData, cv2.COLOR_YUV2BGR_YUYV)
elif colorFormat == OB_PY_FORMAT_UYVY:
newColorData = np.resize(newColorData,(colorHeight, colorWidth, 2))
# Converting data frame YUYV to RGB data
newColorData = cv2.cvtColor(newColorData, cv2.COLOR_YUV2BGR_UYVY)
elif colorFormat == OB_PY_FORMAT_I420:
newColorData = newColorData.reshape((colorHeight * 3 // 2, colorWidth))
newColorData = cv2.cvtColor(newColorData, cv2.COLOR_YUV2BGR_I420)
newColorData = cv2.resize(newColorData, (colorWidth, colorHeight))
# Resize the depth frame data to (height,width,2)
depthData = np.resize(depthData,(depthHeight, depthWidth, 2))
# Convert frame data from 8bit to 16bit
newDepthData = depthData[:,:,0]+depthData[:,:,1]*256
# Convert frame data to 1mm units
newDepthData = (newDepthData * valueScale).astype('uint16')
# 归一化到uint8并扩展通道
normalized_image = (newDepthData / 32).astype('uint8')
# Convert depth frame data GRAY to RGB
outputDepthImage = cv2.cvtColor(normalized_image, cv2.COLOR_GRAY2RGB)
# Need to scale to the same resolution when the depth is not the same as the resolution of the color
if colorHeight != depthHeight:
outputDepthImage = cv2.resize(outputDepthImage,(colorWidth,colorHeight))
return {
'rgb': newColorData,
'aligned_depth': outputDepthImage,
'raw_depth': newDepthData # 新增原始深度数据,用于保存为TIFF
}
def plot_image_bundle(self):
images = self.get_image_bundle()
if images == None:
return None
rgb = images['rgb']
depth = images['aligned_depth']
newData = cv2.addWeighted(rgb, 0.5, depth, 0.5, 0)
cv2.namedWindow("SyncAlignViewer", cv2.WINDOW_NORMAL)
cv2.imshow("SyncAlignViewer", newData)
return images # 返回图像数据,供保存使用
def save_images(self, images):
"""保存RGB和深度图像到指定目录"""
if images is None:
print("没有可用的图像数据")
return
# 生成基于时间的文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
rgb_filename = os.path.join(self.rgb_save_dir, f"{timestamp}.jpg")
depth_filename = os.path.join(self.depth_save_dir, f"{timestamp}.tiff")
# 保存RGB图像
cv2.imwrite(rgb_filename, images['rgb'])
# 保存原始深度图像为TIFF格式(16位无符号整数)
# 注意:OpenCV保存TIFF时会自动处理16位格式
cv2.imwrite(depth_filename, images['raw_depth'])
print(f"已保存: {rgb_filename} 和 {depth_filename}")
if __name__ == '__main__':
q = 113
esc = 27
x_key = 120 # X键的ASCII码
cam = GeminiCamera()
cam.connect()
while True:
key = cv2.waitKey(1)
images = cam.plot_image_bundle()
# 检测按键事件
if key == esc or key == q:
break # 退出循环
elif key == x_key:
# 按下X键,保存图像
cam.save_images(images)
cv2.destroyAllWindows()
cam.pipeline.stop()
上述代码能够将深度图和RGB图对齐,都保存为600x480的大小,且深度图保存为tiff格式
from ObTypes import *
from Property import *
import Pipeline
import StreamProfile
import Device
from Error import ObException
import cv2
import numpy as np
import sys
import math
import os
from datetime import datetime # 添加时间模块
q = 113
ESC = 27
X_KEY = 120 # X键的ASCII码值
alpha = 0.6 # 设定深度图的透明程度,一般0.8以下都可以
frameSet = None
colorFrame = None
depthFrame = None
# 定义保存路径
rgb_save_dir = r"F:\XionganRoboticArm\lishaokun\SE-ResUNet\SE-ResUNet\dataset\rgb_image"
depth_save_dir = r"F:\XionganRoboticArm\lishaokun\SE-ResUNet\SE-ResUNet\dataset\depth_image"
# 确保保存目录存在
for dir_path in [rgb_save_dir, depth_save_dir]:
if not os.path.exists(dir_path):
os.makedirs(dir_path)
print(f"创建目录: {dir_path}")
pipe = Pipeline.Pipeline(None, None)
config = Pipeline.Config()
# 彩色流配置
profiles = pipe.getStreamProfileList(OB_PY_SENSOR_COLOR)
videoProfile = profiles.getProfile(0)
colorProfile = videoProfile.toConcreteStreamProfile(OB_PY_STREAM_VIDEO)
config.enableStream(colorProfile)
color_windowsWidth = colorProfile.width()
color_windowsHeight = colorProfile.height()
print(f"彩色图尺寸: {color_windowsWidth}x{color_windowsHeight}")
# 深度流配置
profiles = pipe.getStreamProfileList(OB_PY_SENSOR_DEPTH)
videoProfile = profiles.getProfile(0)
depthProfile = videoProfile.toConcreteStreamProfile(OB_PY_STREAM_VIDEO)
config.enableStream(depthProfile)
depth_windowsWidth = depthProfile.width()
depth_windowsHeight = depthProfile.height()
print(f"深度图尺寸: {depth_windowsWidth}x{depth_windowsHeight}")
# 设置对齐模式
config.setAlignMode(OB_PY_ALIGN_D2C_SW_MODE)
pipe.start(config, None)
while True:
key = cv2.waitKey(1)
frameSet = pipe.waitForFrames(100)
if frameSet is None:
continue
colorFrame = frameSet.colorFrame()
depthFrame = frameSet.depthFrame()
if colorFrame and depthFrame:
# 解析彩色帧
colorData = colorFrame.data()
colorFormat = colorFrame.format()
if colorFormat == OB_PY_FORMAT_MJPG:
colorData = cv2.imdecode(colorData, 1)
if colorData is not None:
colorData = np.resize(colorData, (color_windowsHeight, color_windowsWidth, 3))
# 解析深度帧
depthData = depthFrame.data()
depthData = np.resize(depthData, (depth_windowsHeight, depth_windowsWidth, 2))
newDepthData = (depthData[:, :, 0] + depthData[:, :, 1] * 256) * depthFrame.getValueScale()
newDepthData = newDepthData.astype('uint16') # 保持16位深度数据
# 分辨率对齐
if (color_windowsHeight, color_windowsWidth) != (depth_windowsHeight, depth_windowsWidth):
depthDataRGB = cv2.cvtColor((newDepthData / 32).astype('uint8'), cv2.COLOR_GRAY2RGB)
depthDataRGB = cv2.resize(depthDataRGB, (color_windowsWidth, color_windowsHeight))
else:
depthDataRGB = cv2.cvtColor((newDepthData / 32).astype('uint8'), cv2.COLOR_GRAY2RGB)
# 融合显示
if colorData is not None and depthDataRGB is not None:
fused = cv2.addWeighted(colorData, 1 - alpha, depthDataRGB, alpha, 0)
cv2.imshow("SyncAlignViewer", fused)
# 处理保存逻辑
if key == X_KEY:
# 生成当前时间文件名
now = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
rgb_filename = f"{now}_rgb.jpg"
depth_filename = f"{now}_depth.tiff"
# 保存彩色图
rgb_path = os.path.join(rgb_save_dir, rgb_filename)
if cv2.imwrite(rgb_path, colorData):
print(f"✅ 彩色图保存成功:{rgb_path}")
# 保存深度图(16位TIFF)
depth_path = os.path.join(depth_save_dir, depth_filename)
if cv2.imwrite(depth_path, newDepthData):
print(f"✅ 深度图保存成功:{depth_path}")
if key in (ESC, q):
cv2.destroyAllWindows()
break
pipe.stop()
上述代码是在参考文章的基础上增加了保存图像的功能
参考文章:怎么使用orbbec gemini调用深度流和彩色流并对齐(官方版本和自己版本)
Cornell数据集的标注
roLabelImg 旋转框标注
然后通过以下脚本将xml文件转为所需格式的txt
import xml.etree.ElementTree as ET
import math
import os
def xml_to_vertices(xml_path, output_suffix="cpos"):
"""
将单个XML文件转换为顶点坐标TXT文件(原始顺序)
"""
try:
tree = ET.parse(xml_path)
root = tree.getroot()
vertices_list = []
for obj in root.findall('object'):
robndbox = obj.find('robndbox')
cx = float(robndbox.find('cx').text)
cy = float(robndbox.find('cy').text)
w = float(robndbox.find('w').text)
h = float(robndbox.find('h').text)
angle = float(robndbox.find('angle').text) # 弧度
half_w = w / 2
half_h = h / 2
points = [
(-half_w, -half_h), # 左上
(half_w, -half_h), # 右上
(half_w, half_h), # 右下
(-half_w, half_h) # 左下
]
cos_theta = math.cos(angle)
sin_theta = math.sin(angle)
vertices = []
for (dx, dy) in points:
x = cx + dx * cos_theta - dy * sin_theta
y = cy + dx * sin_theta + dy * cos_theta
vertices.append((x, y))
vertices_list.extend(vertices)
# 生成输出内容
output_lines = [f"{x:.6f} {y:.6f}" for (x, y) in vertices_list]
output_text = '\n'.join(output_lines)
# 保存文件
base_name = os.path.splitext(xml_path)[0]
output_path = f"{base_name}{output_suffix}.txt"
with open(output_path, 'w') as f:
f.write(output_text)
print(f"处理完成: {xml_path} -> {output_path}")
return True
except Exception as e:
print(f"处理失败 {xml_path}: {str(e)}")
return False
def batch_process_xml(folder_path):
"""
批量处理文件夹中的所有XML文件
"""
# 检查文件夹是否存在
if not os.path.exists(folder_path):
print(f"错误:文件夹 {folder_path} 不存在")
return
# 遍历文件夹中的文件
for file_name in os.listdir(folder_path):
if file_name.endswith(".xml"):
xml_path = os.path.join(folder_path, file_name)
xml_to_vertices(xml_path)
if __name__ == "__main__":
# 指定要处理的文件夹路径
target_folder = r"F:\dataset\pos_label"
batch_process_xml(target_folder)
print("批量处理完成!")
转换后的txt格式如下
每一行都是一个点的x y坐标,每4个点是一组
可以通过下面的程序进行标签的可视化
import matplotlib.pyplot as plt
from PIL import Image
plt.rcParams["font.family"] = ["SimHei", "sans-serif"]
def read_label_file(file_path):
"""读取标签文件并返回坐标点列表"""
with open(file_path, 'r') as f:
lines = f.readlines()
points = []
for line in lines:
line = line.strip().replace(',', ' ').split()
if len(line) >= 2:
x = float(line[0])
y = float(line[1])
points.append((x, y))
return points
def visualize_labels(image_path, label_path):
"""读取图像并根据标签文件绘制线条"""
image = Image.open(image_path)
width, height = image.size
points = read_label_file(label_path)
print(f"图像尺寸: {width}x{height}")
print(f"标签坐标 (前10个): {points[:10]}")
plt.figure(figsize=(10, 10))
plt.imshow(image)
# 按每4行一组处理
for i in range(0, len(points), 4):
group = points[i:i+4]
if len(group) < 4:
continue # 跳过不足4点的组
# 解包4个点
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = group
# 定义线条颜色顺序:蓝→绿→蓝→绿
colors = ['b', 'g', 'b', 'g']
points_order = [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)] # 闭环
# 按顺序绘制线条
for j in range(4):
x_start, y_start = points_order[j]
x_end, y_end = points_order[j+1]
plt.plot([x_start, x_end], [y_start, y_end], colors[j], linewidth=2)
plt.title('标签可视化')
# plt.axis('on') # 显示坐标轴便于调试
# plt.grid(True)
# plt.tight_layout()
plt.show()
if __name__ == "__main__":
# image_path = r"F:\XionganRoboticArm\dataset\Cornell Grasp Dataset\image\image\pcd0100r.png"
# label_path = r"F:\XionganRoboticArm\dataset\Cornell Grasp Dataset\pos_label\pos_label\pcd0100cpos.txt"
image_path = r"F:\XionganRoboticArm\lishaokun\SE-ResUNet\SE-ResUNet\dataset\image\pcd20250516_093445r.jpg"
label_path = r"F:\XionganRoboticArm\lishaokun\SE-ResUNet\SE-ResUNet\dataset\pos_label\pcd20250516_093445cpos.txt"
visualize_labels(image_path, label_path)