MNIST 数据集与 TFOD API-CSDN博客

本文链接：https://blog.csdn.net/qq_64338302/article/details/147048082

此处给出我在进行毕业设计过程中写的三份脚本，作为demo 展示模型的预处理，输出信息提取和TFOD API的应用。

script1

加载本地的MNIST模型，对本地的手写数字进行推理

# test the validation of the saved file and the camera
import cv2
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# 1. 加载训练好的模型
model = tf.keras.models.load_model("/home/ncut/models/myMNIST_Model.keras")

'''
# 2. 从摄像头捕获一张图片
cap = cv2.VideoCapture(0)  # 0 表示默认摄像头
if not cap.isOpened():
    print("无法打开摄像头")
    exit()

ret, frame = cap.read()
cap.release()
if not ret:
    print("无法捕获摄像头图像")
    exit()
'''

frame = cv2.imread("~/code_garden/testcase_folder/five.png")
# 3. 预处理图像
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # 转换为灰度图
img_resized = cv2.resize(gray, (28, 28))  # 调整大小到 28x28
img_normalized = 1.0 - (img_resized.astype("float32") / 255.0)
img_input = img_normalized.reshape(1, 28, 28, 1)  # 展平以匹配模型输入格式


plt.imshow(img_input[0, :, :, 0], cmap='gray')
plt.title("image after preprocess")
plt.axis("off")
plt.show()

# 4. 进行推理
predictions = model.predict(img_input)
print("预测概率:", predictions)
predicted_class = np.argmax(predictions)
print("预测类别:", predicted_class)
confidence = np.max(predictions)
print("confidence value:", confidence)

# 5. 可视化结果
plt.figure(figsize=(10, 4))

# 左侧显示原始摄像头拍摄的彩色图像
plt.subplot(1, 3, 1)
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))  # OpenCV 读取的 BGR 需转换为 RGB
plt.title("The original color photo")
plt.axis("off")

# 中间显示处理后的灰度图
plt.subplot(1, 3, 2)
plt.imshow(img_normalized, cmap="gray")
plt.title("gray map(28x28)")
plt.axis("off")

# 右侧显示推理结果
plt.subplot(1, 3, 3)
plt.title("inference result")
plt.text(0.1, 0.6, f"predicition class: {predicted_class}", fontsize=14)
plt.text(0.1, 0.4, f"confidence: {confidence:.4f}", fontsize=14)
#plt.text(0.1, 0.2, f"Loss: {loss_value:.4f}", fontsize=14)
plt.axis("off")

plt.tight_layout()
plt.show()

Scripts2

实际使用的脚本，用类进行封装，同样是对本地图片的读取。这里使用了 @staticmethod 修饰器，类似 C++ 中的 static method，独立于类的实例而存在。在主函数调用时，睡眠三秒，方便使用者为接下来的摄像头数据读取做准备。

import argparse
import cv2
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
import time

class MNISTProcessor:
    def __init__(self, model_path="/home/ncut/models/myMNIST_Model.keras"):
        """初始化模型"""
        self.model = tf.keras.models.load_model(model_path)
    
    def process_image(self, input_path="/home/ncut/Pictures/five.png", output_path=None):
        """
        完整处理流程入口
        :param input_path: 输入图像路径
        :param output_path: 输出图像路径 (可选)
        :return: 处理后的结果图像
        """
        # 1. 加载图像
        image = self._load_image(input_path)
        if image is None:
            raise FileNotFoundError(f"图像文件 {input_path} 不存在或无法读取")

        # 2. 执行推理
        predicted_class, confidence, processed_image = self.predict(image)
        
        if output_path is None:
            output_path = f"/dev/shm/mnist_result_{int(time.time())}.png"

        # 3. 绘制,保存结果
        self.draw_results_save(image.copy(), predicted_class, confidence, processed_image, output_path)
        
        return True;
    
    def predict(self, image):
        """执行推理"""
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        img_resized = cv2.resize(gray, (28, 28), interpolation=cv2.INTER_CUBIC)    # resize the width and height
        img_normalized = 1.0 - (img_resized.astype("float32") / 255.0)
        img_input = img_normalized.reshape(1, 28, 28, 1)    # reshape can change the dimension

        predictions = self.model.predict(img_input)
        predicted_class = np.argmax(predictions)    # pick out the largest element of the array
        confidence = np.max(predictions)
        return predicted_class, confidence, img_normalized
    
    def draw_results_save(self, image, predicted_class, confidence, img_normalized, output_path):
        """在图像上绘制检测结果"""
        plt.figure(figsize=(10, 4))
        # 左侧显示原始摄像头拍摄的彩色图像
        plt.subplot(1, 3, 1)
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))  # OpenCV 读取的 BGR 需转换为 RGB
        plt.title("The original color photo")
        plt.axis("off")

        # 中间显示处理后的灰度图
        plt.subplot(1, 3, 2)
        plt.imshow(img_normalized, cmap="gray")
        plt.title("gray map(28x28)")
        plt.axis("off")

        # 右侧显示推理结果
        plt.subplot(1, 3, 3)
        plt.title("inference result")
        plt.text(0.1, 0.6, f"predicition class: {predicted_class}", fontsize=14)
        plt.text(0.1, 0.4, f"confidence: {confidence:.4f}", fontsize=14)
        #plt.text(0.1, 0.2, f"Loss: {loss_value:.4f}", fontsize=14)
        plt.axis("off")

        plt.tight_layout()
        #plt.show()

        plt.savefig(output_path, bbox_inches="tight", pad_inches=0)
        plt.close()

    @staticmethod
    def _load_image(path):
        """加载图像"""
        if not os.path.exists(path):
            return None
        return cv2.imread(path)

    @staticmethod
    def _save_cvimage(image, path):
        """保存图像到指定路径"""
        cv2.imwrite(path, image)

def main():
    # 命令行参数解析
    parser = argparse.ArgumentParser(description='MNIST 处理器')
    parser.add_argument('--input', required=True, help='输入图像路径')
    parser.add_argument('--output', help='输出图像路径 (可选)')
    args = parser.parse_args()
    print("parse succeed")

    # 创建处理器实例
    processor = MNISTProcessor()
    
    try:
        # 执行处理流程
        result_bool = processor.process_image(args.input, args.output)
        
        # 可选：显示结果（调试时使用）
        if os.environ.get('DEBUG_SHOW'):
            plt.imshow(cv2.cvtColor(cv2.imread(args.output), cv2.COLOR_BGR2RGB))
            plt.axis('off')
            plt.show()
            
    except Exception as e:
        print(f"处理失败: {str(e)}")
        exit(1)

if __name__ == "__main__":
    print("sleep for 3 seconds")
    time.sleep(3)
    main()

Script3

能在虚拟机资源受限环境下实现的模型训练和推理，简单的训练模型demo。

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# ==============================================
# 1. 加载本地 MNIST 数据集
# ==============================================
def load_local_mnist(path='./datasets/mnist.npz'):
    with np.load(path, allow_pickle=True) as f:
        x_train = f['x_train']
        y_train = f['y_train']
        x_test = f['x_test']
        y_test = f['y_test']
    return (x_train, y_train), (x_test, y_test)

# 从本地路径加载数据
local_mnist_path = '/home/ncut/.keras/datasets/mnist.npz'  # 修改为你的实际路径
(x_train, y_train), (x_test, y_test) = load_local_mnist(local_mnist_path)

# ==============================================
# 2. 数据预处理
# ==============================================
# 归一化像素值到 [0,1] 并展平图像（可选）
x_train = x_train.reshape(-1, 28*28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28*28).astype('float32') / 255.0

# ==============================================
# 3. 可视化前 25 张训练集图片
# ==============================================
def plot_mnist_samples(images, labels, num_samples=25):
    plt.figure(figsize=(10, 10))
    for i in range(num_samples):
        plt.subplot(5, 5, i+1)
        plt.imshow(images[i].reshape(28, 28), cmap='gray')  # 若已展平，需 reshape 回 28x28
        plt.title(f"Label: {labels[i]}")
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# 调用可视化函数（使用原始未展平的图像数据）
_, (x_train_original, _) = load_local_mnist(local_mnist_path)  # 重新加载未展平的数据用于可视化
plot_mnist_samples(x_train_original, y_train)

# ==============================================
# 4. 训练模型（基于展平数据）
# ==============================================
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    x_train, y_train,
    epochs=5,
    validation_split=0.2,
    verbose=1
)

# ==============================================
# 5. 评估测试集
# ==============================================
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'\nTest accuracy: {test_acc:.4f}')

Script4

训练模型，此处使用了数据增强和缓存，是自己在 Google colab上训练时的脚本。

import tensorflow as tf
import numpy as np
from sklearn.utils import class_weight

# 1. 加载 MNIST 数据
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# 2. 基本预处理：归一化 & 扩展通道维度
x_train = x_train.astype('float32') / 255.0
x_test  = x_test.astype('float32') / 255.0
x_train = np.expand_dims(x_train, axis=-1)  # (28,28) -> (28,28,1)
x_test  = np.expand_dims(x_test, axis=-1)

# 3. 定义数据增强层
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
    tf.keras.layers.RandomTranslation(0.1, 0.1),
    tf.keras.layers.Lambda(lambda x: tf.image.random_brightness(x, max_delta=0.1)),  
    tf.keras.layers.RandomContrast(0.1) 
])


# 4. 定义数据增强函数
def augment(image, label):
    # image 的 shape 是 (28,28,1)，直接传入数据增强层即可
    image = data_augmentation(image, training=True)
    return image, label

# 5. 构建训练和验证数据集
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024)
train_dataset = train_dataset.map(augment, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.batch(64)
train_dataset = train_dataset.cache()  # 缓存数据，避免重复处理
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_dataset = val_dataset.batch(64)
val_dataset = val_dataset.cache()
val_dataset = val_dataset.prefetch(tf.data.AUTOTUNE)

# 6. 计算 class_weight
# 这里使用原始的 y_train 数据计算类别权重
cw = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights = dict(enumerate(cw))
print("Class weights:", class_weights)

# 7. 构建 CNN 模型
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(10, activation='softmax')
])

# 8. 编译模型
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# 9. 训练模型，同时传入 class_weight
history = model.fit(
    train_dataset,
    epochs=15,
    validation_data=val_dataset,
    class_weight=class_weights
)

# 保存和加载模型
model.save("myMNIST_Model.keras")
reconstructed_model = tf.keras.models.load_model("myMNIST_Model.keras")

# 10. 评估模型
test_loss1, test_acc1 = model.evaluate(x_test, y_test)
test_loss2, test_acc2 = reconstructed_model.evaluate(x_test, y_test)
print("Test loss1:", test_loss1)
print(f"Test accuracy1: {test_acc1:.4f}")
print("Test loss2:", test_loss2)
print(f"Test accuracy2: {test_acc2:.3f}%")

C++ subscriber

#include <ros/ros.h>
#include <sensor_msgs/Image.h>
#include <cv_bridge/cv_bridge.h>
#include <opencv2/opencv.hpp>
#include <cstdlib>
#include <ctime>
#include <string>
#include <sstream>

// 生成唯一文件名（替代 generate_uuid()）
std::string generate_unique_id() {
    static int counter = 0;
    std::stringstream ss;
    ss << time(nullptr) << "_" << counter++;  // 时间戳 + 计数器
    return ss.str();
}

void imageCallback(const sensor_msgs::ImageConstPtr& msg) {
    ROS_INFO("process callback");
    try {
        // 转换 ROS 图像消息
        cv_bridge::CvImagePtr cv_ptr = cv_bridge::toCvCopy(msg, "bgr8");
        cv::Mat image = cv_ptr->image;

        // 生成唯一文件名（避免多帧覆盖）
        std::string uuid = generate_unique_id();
        std::string temp_path = "/dev/shm/ros_MNIST_input_" + uuid + ".jpg";
        std::string output_path = "/dev/shm/ros_MNIST_output_" + uuid + ".jpg";

        // 保存输入图像
        cv::imwrite(temp_path, image);

        // 构建 Python 调用命令
        std::string command = 
            "/home/ncut/miniconda3/envs/tf/bin/python  /home/ncut/my_ws/src/graduation_design/scripts/MNIST_photo.py "
            "--input " + temp_path + " "
            "--output " + output_path + " ";    // take care the // here
            //"&";  // keep the same with u invoke spin() or spinOnce()

        // 调用 Python 脚本
        ROS_INFO("sleep for 3 seconds for u to prepare");
        int ret = std::system(command.c_str());
        if (ret != 0) {
            ROS_ERROR("Python脚本调用失败，返回码: %d", ret);
            return;
        }
        ROS_INFO("invoke python script sucessfully");

        // （可选）轮询检查输出文件并显示结果
        // 此处可添加异步读取 output_path 的逻辑

    } catch (cv_bridge::Exception& e) {
        ROS_ERROR("cv_bridge异常: %s", e.what());
    }
}

class Cleaner {
public:
    ~Cleaner() {
        // 执行清理命令
        system("rm -f /dev/shm/ros_MNIST_input_*.jpg  /dev/shm/ros_MNIST_output_*.jpg");
    }
};

int main(int argc, char** argv) {
    ros::init(argc, argv, "MNIST_sub_photo");
    ros::NodeHandle nh;
    int count = 0;

    // For my pc, the topic name is camera/image_raw
    ros::Subscriber sub = nh.subscribe("/camera/rgb/image_raw", 1, imageCallback);

    // for a test
    //Cleaner cleaner;

    //ros::spin();
    

    /* a demo of ros::spinOnce, not that useful */
    ros::Rate loop_rate(0.04); 
    while(ros::ok()) {
        ros::spinOnce();    // asynchronous way
        loop_rate.sleep();
    }
    ROS_INFO("out of while loop");
    
    return 0;
}

实际使用的C++ node，接收图片，使用cv_bridge功能包将ROS图片格式转为opencv。将文件保存，调用脚本，将保存地址传入，供脚本执行。/dev/shm/目录下的.jpg文件，由RAII类负责管理，RAII的析构函数保证了任何退出路径，包括异常，都会执行该语句，实现了对资源的管理，有效避免泄露。