此处给出我在进行毕业设计过程中写的三份脚本,作为demo 展示模型的预处理,输出信息提取和TFOD API的应用。
script1
加载本地的MNIST模型,对本地的手写数字进行推理
# test the validation of the saved file and the camera
import cv2
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# 1. 加载训练好的模型
model = tf.keras.models.load_model("/home/ncut/models/myMNIST_Model.keras")
'''
# 2. 从摄像头捕获一张图片
cap = cv2.VideoCapture(0) # 0 表示默认摄像头
if not cap.isOpened():
print("无法打开摄像头")
exit()
ret, frame = cap.read()
cap.release()
if not ret:
print("无法捕获摄像头图像")
exit()
'''
frame = cv2.imread("~/code_garden/testcase_folder/five.png")
# 3. 预处理图像
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # 转换为灰度图
img_resized = cv2.resize(gray, (28, 28)) # 调整大小到 28x28
img_normalized = 1.0 - (img_resized.astype("float32") / 255.0)
img_input = img_normalized.reshape(1, 28, 28, 1) # 展平以匹配模型输入格式
plt.imshow(img_input[0, :, :, 0], cmap='gray')
plt.title("image after preprocess")
plt.axis("off")
plt.show()
# 4. 进行推理
predictions = model.predict(img_input)
print("预测概率:", predictions)
predicted_class = np.argmax(predictions)
print("预测类别:", predicted_class)
confidence = np.max(predictions)
print("confidence value:", confidence)
# 5. 可视化结果
plt.figure(figsize=(10, 4))
# 左侧显示原始摄像头拍摄的彩色图像
plt.subplot(1, 3, 1)
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # OpenCV 读取的 BGR 需转换为 RGB
plt.title("The original color photo")
plt.axis("off")
# 中间显示处理后的灰度图
plt.subplot(1, 3, 2)
plt.imshow(img_normalized, cmap="gray")
plt.title("gray map(28x28)")
plt.axis("off")
# 右侧显示推理结果
plt.subplot(1, 3, 3)
plt.title("inference result")
plt.text(0.1, 0.6, f"predicition class: {predicted_class}", fontsize=14)
plt.text(0.1, 0.4, f"confidence: {confidence:.4f}", fontsize=14)
#plt.text(0.1, 0.2, f"Loss: {loss_value:.4f}", fontsize=14)
plt.axis("off")
plt.tight_layout()
plt.show()
Scripts2
实际使用的脚本,用类进行封装,同样是对本地图片的读取。这里使用了 @staticmethod 修饰器,类似 C++ 中的 static method,独立于类的实例而存在。在主函数调用时,睡眠三秒,方便使用者为接下来的摄像头数据读取做准备。
import argparse
import cv2
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
import time
class MNISTProcessor:
def __init__(self, model_path="/home/ncut/models/myMNIST_Model.keras"):
"""初始化模型"""
self.model = tf.keras.models.load_model(model_path)
def process_image(self, input_path="/home/ncut/Pictures/five.png", output_path=None):
"""
完整处理流程入口
:param input_path: 输入图像路径
:param output_path: 输出图像路径 (可选)
:return: 处理后的结果图像
"""
# 1. 加载图像
image = self._load_image(input_path)
if image is None:
raise FileNotFoundError(f"图像文件 {input_path} 不存在或无法读取")
# 2. 执行推理
predicted_class, confidence, processed_image = self.predict(image)
if output_path is None:
output_path = f"/dev/shm/mnist_result_{int(time.time())}.png"
# 3. 绘制,保存结果
self.draw_results_save(image.copy(), predicted_class, confidence, processed_image, output_path)
return True;
def predict(self, image):
"""执行推理"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
img_resized = cv2.resize(gray, (28, 28), interpolation=cv2.INTER_CUBIC) # resize the width and height
img_normalized = 1.0 - (img_resized.astype("float32") / 255.0)
img_input = img_normalized.reshape(1, 28, 28, 1) # reshape can change the dimension
predictions = self.model.predict(img_input)
predicted_class = np.argmax(predictions) # pick out the largest element of the array
confidence = np.max(predictions)
return predicted_class, confidence, img_normalized
def draw_results_save(self, image, predicted_class, confidence, img_normalized, output_path):
"""在图像上绘制检测结果"""
plt.figure(figsize=(10, 4))
# 左侧显示原始摄像头拍摄的彩色图像
plt.subplot(1, 3, 1)
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # OpenCV 读取的 BGR 需转换为 RGB
plt.title("The original color photo")
plt.axis("off")
# 中间显示处理后的灰度图
plt.subplot(1, 3, 2)
plt.imshow(img_normalized, cmap="gray")
plt.title("gray map(28x28)")
plt.axis("off")
# 右侧显示推理结果
plt.subplot(1, 3, 3)
plt.title("inference result")
plt.text(0.1, 0.6, f"predicition class: {predicted_class}", fontsize=14)
plt.text(0.1, 0.4, f"confidence: {confidence:.4f}", fontsize=14)
#plt.text(0.1, 0.2, f"Loss: {loss_value:.4f}", fontsize=14)
plt.axis("off")
plt.tight_layout()
#plt.show()
plt.savefig(output_path, bbox_inches="tight", pad_inches=0)
plt.close()
@staticmethod
def _load_image(path):
"""加载图像"""
if not os.path.exists(path):
return None
return cv2.imread(path)
@staticmethod
def _save_cvimage(image, path):
"""保存图像到指定路径"""
cv2.imwrite(path, image)
def main():
# 命令行参数解析
parser = argparse.ArgumentParser(description='MNIST 处理器')
parser.add_argument('--input', required=True, help='输入图像路径')
parser.add_argument('--output', help='输出图像路径 (可选)')
args = parser.parse_args()
print("parse succeed")
# 创建处理器实例
processor = MNISTProcessor()
try:
# 执行处理流程
result_bool = processor.process_image(args.input, args.output)
# 可选:显示结果(调试时使用)
if os.environ.get('DEBUG_SHOW'):
plt.imshow(cv2.cvtColor(cv2.imread(args.output), cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()
except Exception as e:
print(f"处理失败: {str(e)}")
exit(1)
if __name__ == "__main__":
print("sleep for 3 seconds")
time.sleep(3)
main()
Script3
能在虚拟机资源受限环境下实现的模型训练和推理,简单的训练模型demo。
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
# ==============================================
# 1. 加载本地 MNIST 数据集
# ==============================================
def load_local_mnist(path='./datasets/mnist.npz'):
with np.load(path, allow_pickle=True) as f:
x_train = f['x_train']
y_train = f['y_train']
x_test = f['x_test']
y_test = f['y_test']
return (x_train, y_train), (x_test, y_test)
# 从本地路径加载数据
local_mnist_path = '/home/ncut/.keras/datasets/mnist.npz' # 修改为你的实际路径
(x_train, y_train), (x_test, y_test) = load_local_mnist(local_mnist_path)
# ==============================================
# 2. 数据预处理
# ==============================================
# 归一化像素值到 [0,1] 并展平图像(可选)
x_train = x_train.reshape(-1, 28*28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28*28).astype('float32') / 255.0
# ==============================================
# 3. 可视化前 25 张训练集图片
# ==============================================
def plot_mnist_samples(images, labels, num_samples=25):
plt.figure(figsize=(10, 10))
for i in range(num_samples):
plt.subplot(5, 5, i+1)
plt.imshow(images[i].reshape(28, 28), cmap='gray') # 若已展平,需 reshape 回 28x28
plt.title(f"Label: {labels[i]}")
plt.axis('off')
plt.tight_layout()
plt.show()
# 调用可视化函数(使用原始未展平的图像数据)
_, (x_train_original, _) = load_local_mnist(local_mnist_path) # 重新加载未展平的数据用于可视化
plot_mnist_samples(x_train_original, y_train)
# ==============================================
# 4. 训练模型(基于展平数据)
# ==============================================
model = tf.keras.Sequential([
tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
history = model.fit(
x_train, y_train,
epochs=5,
validation_split=0.2,
verbose=1
)
# ==============================================
# 5. 评估测试集
# ==============================================
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'\nTest accuracy: {test_acc:.4f}')
Script4
训练模型,此处使用了数据增强和缓存,是自己在 Google colab上训练时的脚本。
import tensorflow as tf
import numpy as np
from sklearn.utils import class_weight
# 1. 加载 MNIST 数据
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# 2. 基本预处理:归一化 & 扩展通道维度
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = np.expand_dims(x_train, axis=-1) # (28,28) -> (28,28,1)
x_test = np.expand_dims(x_test, axis=-1)
# 3. 定义数据增强层
data_augmentation = tf.keras.Sequential([
tf.keras.layers.RandomRotation(0.1),
tf.keras.layers.RandomZoom(0.1),
tf.keras.layers.RandomTranslation(0.1, 0.1),
tf.keras.layers.Lambda(lambda x: tf.image.random_brightness(x, max_delta=0.1)),
tf.keras.layers.RandomContrast(0.1)
])
# 4. 定义数据增强函数
def augment(image, label):
# image 的 shape 是 (28,28,1),直接传入数据增强层即可
image = data_augmentation(image, training=True)
return image, label
# 5. 构建训练和验证数据集
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024)
train_dataset = train_dataset.map(augment, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.batch(64)
train_dataset = train_dataset.cache() # 缓存数据,避免重复处理
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_dataset = val_dataset.batch(64)
val_dataset = val_dataset.cache()
val_dataset = val_dataset.prefetch(tf.data.AUTOTUNE)
# 6. 计算 class_weight
# 这里使用原始的 y_train 数据计算类别权重
cw = class_weight.compute_class_weight(
class_weight='balanced',
classes=np.unique(y_train),
y=y_train
)
class_weights = dict(enumerate(cw))
print("Class weights:", class_weights)
# 7. 构建 CNN 模型
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(10, activation='softmax')
])
# 8. 编译模型
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
# 9. 训练模型,同时传入 class_weight
history = model.fit(
train_dataset,
epochs=15,
validation_data=val_dataset,
class_weight=class_weights
)
# 保存和加载模型
model.save("myMNIST_Model.keras")
reconstructed_model = tf.keras.models.load_model("myMNIST_Model.keras")
# 10. 评估模型
test_loss1, test_acc1 = model.evaluate(x_test, y_test)
test_loss2, test_acc2 = reconstructed_model.evaluate(x_test, y_test)
print("Test loss1:", test_loss1)
print(f"Test accuracy1: {test_acc1:.4f}")
print("Test loss2:", test_loss2)
print(f"Test accuracy2: {test_acc2:.3f}%")
C++ subscriber
#include <ros/ros.h>
#include <sensor_msgs/Image.h>
#include <cv_bridge/cv_bridge.h>
#include <opencv2/opencv.hpp>
#include <cstdlib>
#include <ctime>
#include <string>
#include <sstream>
// 生成唯一文件名(替代 generate_uuid())
std::string generate_unique_id() {
static int counter = 0;
std::stringstream ss;
ss << time(nullptr) << "_" << counter++; // 时间戳 + 计数器
return ss.str();
}
void imageCallback(const sensor_msgs::ImageConstPtr& msg) {
ROS_INFO("process callback");
try {
// 转换 ROS 图像消息
cv_bridge::CvImagePtr cv_ptr = cv_bridge::toCvCopy(msg, "bgr8");
cv::Mat image = cv_ptr->image;
// 生成唯一文件名(避免多帧覆盖)
std::string uuid = generate_unique_id();
std::string temp_path = "/dev/shm/ros_MNIST_input_" + uuid + ".jpg";
std::string output_path = "/dev/shm/ros_MNIST_output_" + uuid + ".jpg";
// 保存输入图像
cv::imwrite(temp_path, image);
// 构建 Python 调用命令
std::string command =
"/home/ncut/miniconda3/envs/tf/bin/python /home/ncut/my_ws/src/graduation_design/scripts/MNIST_photo.py "
"--input " + temp_path + " "
"--output " + output_path + " "; // take care the // here
//"&"; // keep the same with u invoke spin() or spinOnce()
// 调用 Python 脚本
ROS_INFO("sleep for 3 seconds for u to prepare");
int ret = std::system(command.c_str());
if (ret != 0) {
ROS_ERROR("Python脚本调用失败,返回码: %d", ret);
return;
}
ROS_INFO("invoke python script sucessfully");
// (可选)轮询检查输出文件并显示结果
// 此处可添加异步读取 output_path 的逻辑
} catch (cv_bridge::Exception& e) {
ROS_ERROR("cv_bridge异常: %s", e.what());
}
}
class Cleaner {
public:
~Cleaner() {
// 执行清理命令
system("rm -f /dev/shm/ros_MNIST_input_*.jpg /dev/shm/ros_MNIST_output_*.jpg");
}
};
int main(int argc, char** argv) {
ros::init(argc, argv, "MNIST_sub_photo");
ros::NodeHandle nh;
int count = 0;
// For my pc, the topic name is camera/image_raw
ros::Subscriber sub = nh.subscribe("/camera/rgb/image_raw", 1, imageCallback);
// for a test
//Cleaner cleaner;
//ros::spin();
/* a demo of ros::spinOnce, not that useful */
ros::Rate loop_rate(0.04);
while(ros::ok()) {
ros::spinOnce(); // asynchronous way
loop_rate.sleep();
}
ROS_INFO("out of while loop");
return 0;
}
实际使用的C++ node,接收图片,使用cv_bridge功能包将ROS图片格式转为opencv。将文件保存,调用脚本,将保存地址传入,供脚本执行。/dev/shm/目录下的.jpg文件,由RAII类负责管理,RAII的析构函数保证了任何退出路径,包括异常,都会执行该语句,实现了对资源的管理,有效避免泄露。
additon
上述的四份脚本作为四份独立的脚本存储于我的虚拟机中,但它们的共通之处:模型的加载、数据的预处理、对模型检测结果的信息提取没有在此处体现。放任四份脚本中的共同之处不管而着急地对各个脚本进行处理,是一种放任思维的、不负责任的表现。
这份博客会在后面进行修改,提取出公共部分,对独立的部分进行描述说明。