目录
原版代码decord:
def load_video_frames_from_video_file(
video_path,
image_size,
offload_video_to_cpu,
img_mean=(0.485, 0.456, 0.406),
img_std=(0.229, 0.224, 0.225),
compute_device=torch.device("cuda"),
):
"""Load the video frames from a video file."""
import decord
img_mean = torch.tensor(img_mean, dtype=torch.float32)[:, None, None]
img_std = torch.tensor(img_std, dtype=torch.float32)[:, None, None]
# Get the original video height and width
decord.bridge.set_bridge("torch")
video_height, video_width, _ = decord.VideoReader(video_path).next().shape
# Iterate over all frames in the video
images = []
for frame in decord.VideoReader(video_path, width=image_size, height=image_size):
images.append(frame.permute(2, 0, 1))
images = torch.stack(images, dim=0).float() / 255.0
if not offload_video_to_cpu:
images = images.to(compute_device)
img_mean = img_mean.to(compute_device)
img_std = img_std.to(compute_device)
# normalize by mean and std
images -= img_mean
images /= img_std
return images, video_height, video_width
新代码opencv:
import cv2
import torch
import numpy as np
def load_video_frames_from_video_file(
video_path,
image_size,
offload_video_to_cpu,
img_mean=(0.485, 0.456, 0.406),
img_std=(0.229, 0.224, 0.225),
compute_device=torch.device("cuda"),
):
"""Use OpenCV to load video frames and return normalized torch tensor."""
# 转成 tensor 格式 (3, 1, 1),便于广播
img_mean = torch.tensor(img_mean, dtype=torch.float32)[:, None, None]
img_std = torch.tensor(img_std, dtype=torch.float32)[:, None, None]
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise IOError(f"Cannot open video: {video_path}")
video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
images = []
while True:
ret, frame = cap.read()
if not ret:
break
# Resize 到 (image_size, image_size)
frame = cv2.resize(frame, (image_size, image_size))
# BGR → RGB
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# 转成 torch tensor,shape: (H, W, 3)
frame_tensor = torch.from_numpy(frame).float() / 255.0 # [0, 1]
# (H, W, 3) → (3, H, W)
frame_tensor = frame_tensor.permute(2, 0, 1)
images.append(frame_tensor)
cap.release()
images = torch.stack(images, dim=0) # (N, 3, H, W)
if not offload_video_to_cpu:
images = images.to(compute_device)
img_mean = img_mean.to(compute_device)
img_std = img_std.to(compute_device)
# normalize
images = (images - img_mean) / img_std
return images, video_height, video_width
decord 和opencv处理结果不同:
import cv2
import torch
import numpy as np
import decord
def load_video_frames_from_video_file(
video_path,
image_size,
offload_video_to_cpu,
img_mean=(0.485, 0.456, 0.406),
img_std=(0.229, 0.224, 0.225),
compute_device=torch.device("cuda"),
):
"""Load the video frames from a video file."""
img_mean = torch.tensor(img_mean, dtype=torch.float32)[:, None, None]
img_std = torch.tensor(img_std, dtype=torch.float32)[:, None, None]
# Get the original video height and width
decord.bridge.set_bridge("torch")
video_height, video_width, _ = decord.VideoReader(video_path).next().shape
# Iterate over all frames in the video
images = []
for frame in decord.VideoReader(video_path, width=image_size, height=image_size):
images.append(frame.permute(2, 0, 1))
images = torch.stack(images, dim=0).float() / 255.0
if not offload_video_to_cpu:
images = images.to(compute_device)
img_mean = img_mean.to(compute_device)
img_std = img_std.to(compute_device)
# normalize by mean and std
images -= img_mean
images /= img_std
return images, video_height, video_width
def load_video_frames_from_video_file_cv(
video_path,
image_size,
offload_video_to_cpu,
img_mean=(0.485, 0.456, 0.406),
img_std=(0.229, 0.224, 0.225),
compute_device=torch.device("cuda"),
):
"""Use OpenCV to load video frames and return normalized torch tensor."""
# 转成 tensor 格式 (3, 1, 1),便于广播
img_mean = torch.tensor(img_mean, dtype=torch.float32)[:, None, None]
img_std = torch.tensor(img_std, dtype=torch.float32)[:, None, None]
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise IOError(f"Cannot open video: {video_path}")
video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
images = []
while True:
ret, frame = cap.read()
if not ret:
break
# Resize 到 (image_size, image_size)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (image_size, image_size))
# BGR → RGB
# 转成 torch tensor,shape: (H, W, 3)
frame_tensor = torch.from_numpy(frame).float() / 255.0 # [0, 1]
# (H, W, 3) → (3, H, W)
frame_tensor = frame_tensor.permute(2, 0, 1)
images.append(frame_tensor)
cap.release()
images = torch.stack(images, dim=0) # (N, 3, H, W)
if not offload_video_to_cpu:
images = images.to(compute_device)
img_mean = img_mean.to(compute_device)
img_std = img_std.to(compute_device)
# normalize
images = (images - img_mean) / img_std
return images, video_height, video_width
if __name__ == '__main__':
path_mp4=r"F:\data\0624_key\record(3)\record\0624_1429_0.mp4"
image_size= 1024
offload_video_to_cpu = 0
images, video_height, video_width =load_video_frames_from_video_file(path_mp4,image_size,offload_video_to_cpu)
images1, video_height1, video_width1 = load_video_frames_from_video_file_cv(path_mp4,image_size,offload_video_to_cpu)
print(torch.equal(images[0], images1[0]))
diff=images[0]-images1[0]
print(diff.max(),diff.min())
frame_decord = (images[0].clamp(0, 1).permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
frame_opencv = (images1[0].clamp(0, 1).permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
# 2. 计算差异图
diff = cv2.absdiff(frame_decord, frame_opencv) # 逐像素绝对差值
# 3. 拼接图像做对比
vis = np.concatenate([frame_decord, frame_opencv, diff], axis=1)
# 4. 显示图像
cv2.imshow("Decord | OpenCV | Diff", vis)
cv2.waitKey(0)