使用openvino加速部署paddleocr文本检测模型（C++版）

吾名招财

已于 2024-12-26 14:43:31 修改

阅读量672

点赞数 8

CC 4.0 BY-SA版权

分类专栏： OCR识别文章标签： openvino c++ 人工智能 paddleocr

于 2024-12-25 20:16:19 首次发布

本文链接：https://blog.csdn.net/qq_44870829/article/details/144717659

OCR识别专栏收录该内容

12 篇文章 ¥59.90 ¥99.00

订阅专栏

使用openvino加速部署paddleocr文本检测模型（C++版）

引言

引言

文本检测在openvino部署端的前后处理与在paddleocr中的不太一样。

1，前处理

在将文本检测的模型转换成onnx格式（输入输出大小都已固定），并部署到openvino后，其预处理后的输入图像大小必须为640*640的，而在paddlocr部署端其输入图像大小是动态可变的，若是完全按照paddlocr的前处理运行在openvino上则会导致推理失败崩溃，故在进行相关前处理时要将图像等比列缩放、扩展背景到960*960，其归一化的方式是完全一致的。

原图如下（219*178）：
在这里插入图片描述

前处理后的结果如下（960*960）：
在这里插入图片描述

2，后处理

https://blog.csdn.net/weixin_42148389/article/details/133685422
OCR文本检测算法：DBNet

DBNet是基于分割的文本检测算法。基于分割算法的流程一般为：先通过网络输出文本分割的概率图，然后使用设定阈值将概率图转化为二值图，然后通过后处理得到检测结果（文本框坐标）。缺点在于阈值的选取非常关键。

DBNet 针对这个问题，提出可微分二值化的概念：即对每一个像素点进行自适应二值化，二值化阈值由网络学习得到，将二值化这一步骤加入到网络里一起训练，生成鲁棒性更好的二值图，简化后处理。
在这里插入图片描述

图像经过openvino的推理后，其输出也是一张960*960的单通道掩膜图像，其内部像素值为分值，通过分数阈值来对其分割可得到二值化图像，如下，对二值化图像再进行轮廓查找然后求得外接矩形作为文本检测框，最后再按照比列缩放回去即可。

在这里插入图片描述

这里的后处理按照自己的简单来进行截取的，并没有真正的使用DB的后处理得到检测框，这里毕竟只是取外接矩形检测框即可，故先用此简单处理了，后续遇到弯曲文本检测等时再进行相关的处理。

而paddleocr的DB算法后处理也是对掩膜图像进行处理，最终输出文本框，原理基本一致，只是其内部实现更复杂一些以及结果更精确一些，更多后处理相关内容可以查看如下：
https://gitee.com/paddlepaddle/PaddleOCR/blob/release/2.6/deploy/cpp_infer/src/postprocess_op.cpp

3，C++部署代码

#include <iostream>
#include <string>

#include <vector>
#include <iterator>
#include <fstream>
#include <chrono>   // system_clock 的头文件

#include <cstdint>

using namespace std;
using namespace chrono; //chrono库一定写在std库后面，否则报错


#include <openvino/openvino.hpp> //include openvino runtime header files
#include <opencv2/opencv.hpp>    //opencv header file

/* ---------  Please modify the path of yolov5 model and image -----------*/
std::string model_file = "model.bin";
//std::string image_file = "E:/tupian/ocr2/fangxiangfenlei/zheng/Image_20231219110654464_1242_1072_389_164_0.jpg";
std::string image_file = "1.jpg";


void Normalize(cv::Mat* im, const std::vector<float>& mean, const std::vector<float>& scale, const bool is_scale) {
    double e = 1.0;
    if (is_scale) {
        e /= 255.0;
    }
    (*im).convertTo(*im, CV_32FC3, e);
    std::vector<cv::Mat> bgr_channels(3);
    cv::split(*im, bgr_channels);
    for (auto i = 0; i < bgr_channels.size(); i++) {
        bgr_channels[i].convertTo(bgr_channels[i], CV_32FC1, 1.0 * scale[i],
            (0.0 - mean[i]) * scale[i]);
    }
    cv::merge(bgr_channels, *im);
}

void PermuteBatch(const std::vector<cv::Mat> imgs, float* data) {
    for (int j = 0; j < imgs.size(); j++) {
        int rh = imgs[j].rows;
        int rw = imgs[j].cols;
        int rc = imgs[j].channels();
        for (int i = 0; i < rc; ++i) {
            cv::extractChannel(
                imgs[j], cv::Mat(rh, rw, CV_32FC1, data + (j * rc + i) * rh * rw), i);
        }
    }
}

void paddleOCRPreprocess(const cv::Mat& image, std::vector<float>& input, float& ratio, const int batch, const int channel, const int targetHeight, const int targetWidth,
    const std::vector<float>& mean, const std::vector<float>& scale)
{
    cv::Mat out;
    if (image.empty())
        throw "paddleOCRPreprocess : input image is empty()\n";
    if (targetHeight <= 0 || targetWidth <= 0)
        throw "paddleOCRPreprocess target size error targetHeight<=0 || targetWidth<=0";

    //图像大小变换及扩展
    //等比例缩放
    //先变为正方型
    int col = image.cols;
    int row = image.rows;
    int _max = MAX(col, row);
    cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
    image.copyTo(result(cv::Rect(0, 0, col, row)));
    ratio = float(targetHeight) / float(_max);//缩放的比列
    //再变成960*960
    cv::resize(result, out, cv::Size(targetHeight, targetWidth), 0.f, 0.f, cv::INTER_LINEAR);
    //cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, int(imgW - resize_img.cols), cv::BORDER_CONSTANT, { 127, 127, 127 });

    //归一化
    Normalize(&out, mean, scale, true);
    //维度变换(一致)
    std::vector<cv::Mat> norm_img_batch;
    norm_img_batch.push_back(out);
    std::vector<float> input_i(batch * channel * out.rows * out.cols, 0.0f);
    input = input_i;
    PermuteBatch(norm_img_batch, input.data());

}

void paddleOCRPostProcess(cv::Mat& detect_buffer, float& ratio, std::vector<cv::RotatedRect>& boxes, std::vector<float>& scores)
{
    //得到的是掩膜（里面是分值（每个像素点的分值，根据得分来筛选，0.6））
    cv::Mat thresh_img;
    cv::threshold(detect_buffer, thresh_img, 0.3, 255, cv::THRESH_BINARY);
    //类型转换为uint8
    normalize(thresh_img, thresh_img, 0, 255, cv::NORM_MINMAX, CV_8U);

    //直接二值化及寻找轮廓最小外接矩形,得到文本检测框（后处理直接简化后处理，不用DB的后处理（））
     //直接寻找其所有轮廓
    std::vector<std::vector<cv::Point>> contours;
    std::vector<cv::Vec4i> hierarchy;
    findContours(thresh_img, contours, hierarchy, cv::RETR_TREE, cv::CHAIN_APPROX_NONE);
    
    for (int i = 0; i < contours.size(); i++) {
        //制作掩膜，用于截取对应的区域分值 
        cv::Mat scoremask = cv::Mat::zeros(thresh_img.size(), thresh_img.type());
        drawContours(scoremask, contours, static_cast<int>(i), 255, -1);
        //寻找轮廓后要求其分数
        cv::Mat scoreImg = cv::Mat::zeros(detect_buffer.size(), detect_buffer.type());
        //std::cout << "depthImg type:" << depthImg.type() << std::endl;
        detect_buffer.copyTo(scoreImg, scoremask); //根据掩膜截取图像
        //求scoreImg的平均分数值
        //对此区域的非0值求平均值
        //求非0的像素值个数
        int no_zero_count = cv::countNonZero(scoreImg);
        if (no_zero_count > 0) {
            //求所有值的和
            double depth_sum = cv::sum(scoreImg)[0];
            //求有效值的平均深度值
            double depth_z = depth_sum / no_zero_count;
            //得到对应的平均深度值输出
            scores.push_back(depth_z);
            //dst_image = contourImg.clone();
        }
        else {
            scores.push_back(0);
        }
        //先过滤,默认面积小于100的过滤掉，防止干扰
        float area = contourArea(contours[i]);
        std::cout << "area:" << area << std::endl;
        if (area < 100) {
            continue;
        }
        //求每个轮廓外接矩形
        cv::RotatedRect box = minAreaRect(contours[i]);  //计算每个轮廓最小外接矩形
        box.size.width = box.size.width + 60;            //向外扩一下
        box.size.height = box.size.height + 60;          //向外扩一下

        box.center.x = box.center.x / ratio;
        box.center.y = box.center.y / ratio;
        box.size.width = box.size.width / ratio;
        box.size.height = box.size.height / ratio;
        boxes.push_back(box);
    }
}


int main(int argc, char* argv[]) {

    // -------- Get OpenVINO runtime version --------
    std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;

    // -------- Step 1. Initialize OpenVINO Runtime Core --------
    //初始化OpenVINO运行时核心
    ov::Core core;

    // -------- Step 2. Compile the Model --------
    //加载模型到内存并指定Plugin
    auto compiled_model = core.compile_model(model_file, "AUTO"); //AUTO GPU CPU

    // -------- Step 3. Create an Inference Request --------
    //创造推理引擎
    ov::InferRequest infer_request = compiled_model.create_infer_request();

    for (auto input : compiled_model.inputs()) {
        std::cout << input.get_any_name() << std::endl;
        std::cout << input.get_element_type() << std::endl;
        std::cout << input.get_partial_shape() << std::endl;
    }

    //时间测量
    auto start = std::chrono::system_clock::now();

    // -------- Step 4. Read a picture file and do the preprocess --------
    cv::RNG rng;
    cv::Mat img = cv::imread(image_file); //Load a picture into memory

    std::vector<float> input;
    int batch = 1;
    int channel = 3;
    int h = 960;
    int w = 960;
    std::vector<float> mean = { 0.485f, 0.456f, 0.406f };
    std::vector<float> scale = { 1 / 0.229f, 1 / 0.224f, 1 / 0.225f };
    bool is_scale_ = true;

    float ratio;
    paddleOCRPreprocess(img, input, ratio, batch, channel, h, w, mean, scale);

     //cv::Mat input = cv::dnn::blobFromImage(img, 1 / 255.0, cv::Size(960, 960), cv::Scalar(0, 0, 0), true);

     // -------- Step 5. Feed the blob into the input node of paddleocr_rec -------
     // Get input port for model with one input      //获取模型的输入端口
    auto input_port = compiled_model.input();
    // Create tensor from external memory
    ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), input.data());
    // Set input tensor for model with one input
    infer_request.set_input_tensor(input_tensor);

    // -------- Step 6. Start inference --------
    infer_request.infer();

    // -------- Step 7. Get the inference result --------
    auto detect = infer_request.get_output_tensor(0);
    auto detect_shape = detect.get_shape();
    std::cout << "The shape of Detection tensor:" << detect_shape << std::endl;


    cv::Mat detect_buffer(detect_shape[2], detect_shape[3], CV_32F, detect.data());

    int n2 = detect_shape[2];
    int n3 = detect_shape[3];
    int n = n2 * n3;

    std::vector<float> pred(n, 0.0);
    std::vector<unsigned char> cbuf(n, ' ');
    //维度变换
    std::vector<cv::Mat> norm_img_batch;
    norm_img_batch.push_back(detect_buffer);
    std::vector<float> out_data(batch * channel * detect_buffer.rows * detect_buffer.cols, 0.0f);
    PermuteBatch(norm_img_batch, out_data.data());
    for (int i = 0; i < n; i++) {
        pred[i] = float(out_data[i]);
        cbuf[i] = (unsigned char)((out_data[i]) * 255);
    }

    cv::Mat cbuf_map(n2, n3, CV_8UC1, (unsigned char*)cbuf.data());
    cv::Mat pred_map(n2, n3, CV_32F, (float*)pred.data());

    const double threshold = 0.3 * 255;
    const double maxvalue = 255;
    cv::Mat bit_map;
    cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
    if (false) {
        cv::Mat dila_ele =
            cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
        cv::dilate(bit_map, bit_map, dila_ele);
    }

    std::vector<cv::RotatedRect> boxes;
    std::vector<float> scores;
    paddleOCRPostProcess(detect_buffer, ratio, boxes, scores);

    for (int i = 0; i < boxes.size(); i++) {
        // 获取矩形的四个顶点
        cv::Point2f vertices[4];
        boxes[i].points(vertices);
        //绘制最小外接矩形
        for (int j = 0; j < 4; j++)
        {
            cv::line(img, vertices[j], vertices[(j + 1) % 4], cv::Scalar(0, 255, 0), 2, 8);  //绘制最小外接矩形每条边
        }
        cv::putText(img, "score:" + std::to_string(scores[i]), cv::Point(boxes[i].center.x, boxes[i].center.y), cv::FONT_HERSHEY_COMPLEX, 0.5, cv::Scalar(12, 23, 200), 1, 8);
        std::cout << "score: " << scores[i] << " \n";
    }

    auto end = std::chrono::system_clock::now();
    std::chrono::duration<double> elapsed = end - start;
    std::cout << "Elapsed time: " << elapsed.count() * 1000 << " ms\n";
   
    cv::imshow("demo", img);

    cv::waitKey(0);

    return 0;
}