#include "yolov5_clear_detect_inference.h" YoloV5ClearDetectInference::YoloV5ClearDetectInference() {} YoloV5ClearDetectInference::~YoloV5ClearDetectInference() {} int YoloV5ClearDetectInference::YoloV5ClearDetectInferenceInit(ClearModelInfo* pYoloV5ClearModelInfo, const std::string& strModelName, const std::string& strEngineName) { pYoloV5ClearModelInfo_ = pYoloV5ClearModelInfo; //资源分配(创建流,host及device侧内存) cudaSetDevice(DEVICE); //设置GPU //创建图像预处理CUDA流 pImagePreprocessStream_ = new cudaStream_t; CUDA_CHECK(cudaStreamCreate(pImagePreprocessStream_)); //创建模型推理CUDA流 pInferenceModelStream_ = new cudaStream_t; CUDA_CHECK(cudaStreamCreate(pInferenceModelStream_)); pGLogger_ = new Logger; //相关资源分配 pfBuffers_[0] = nullptr; pfBuffers_[1] = nullptr; CUDA_CHECK(cudaMalloc((void**)&pfBuffers_[0], pYoloV5ClearModelInfo_->modelInfo.uiInputSize * sizeof(float))); //输入资源分配 CUDA_CHECK(cudaMalloc((void**)&pfBuffers_[1], pYoloV5ClearModelInfo_->modelInfo.uiOutputSize * sizeof(float))); //输出资源分配 pu8ImgHost_ = new uint8_t; pu8ImgDevice_ = new uint8_t; CUDA_CHECK(cudaMallocHost((void**)&pu8ImgHost_, MAX_IMAGE_INPUT_SIZE_THRESH * pYoloV5ClearModelInfo_->modelInfo.uiChannel)); //在HOST侧申请预处理数据缓存 CUDA_CHECK(cudaMalloc((void**)&pu8ImgDevice_, MAX_IMAGE_INPUT_SIZE_THRESH * pYoloV5ClearModelInfo_->modelInfo.uiChannel)); //在DEVICE侧申请预处理数据缓存 pfInputData_ = new float[pYoloV5ClearModelInfo_->modelInfo.uiBatchSize * pYoloV5ClearModelInfo_->modelInfo.uiInputSize]; pfOutputData_ = new float[pYoloV5ClearModelInfo_->modelInfo.uiBatchSize * pYoloV5ClearModelInfo_->modelInfo.uiOutputSize]; //序列化引擎 //直接使用API创建一个模型,并将其序列化为流 编译成TensorRT引擎engine文件后无需再次调用,调用依次生成engine即可 //基于onnx解析器编译tensorrt引擎 #if 0 if (!strModelName.empty()) { IHostMemory* modelStream{ nullptr }; ONNXToModel(*pGLogger_, pYoloV5ClearModelInfo_->modelInfo.uiBatchSize, MAX_WORKSPAXE_SIZE, &modelStream, strModelName); assert(modelStream != nullptr); std::ofstream p(strEngineName, std::ios::binary); if (!p) { std::cerr << "could not open plan output file" << std::endl; return -1; } p.write(reinterpret_cast(modelStream->data()), modelStream->size()); modelStream->destroy(); } #endif //反序列化模型并运行推理 std::ifstream file(strEngineName, std::ios::binary); if (!file.good()) { std::cerr << "read " << strEngineName << " error!" << std::endl; return -1; } //创建tensorRT流对象trtModelStream,这个就跟文件流中的ifstream类似的 //trtModelStream是一块内存区域,用于保存序列化的plan文件 char *trtModelStream = nullptr; size_t size = 0; file.seekg(0, file.end); //将指针移动至距离文件末尾0处的位置 size = file.tellg(); //获得当前字符的位置 file.seekg(0, file.beg); //将指针移动至距离文件开头0处的位置 trtModelStream = new char[size]; assert(trtModelStream); file.read(trtModelStream, size); //将序列化engine模型(数据及数据大小)读入trtModelStream file.close(); //1.设置运行时环境 pRuntime_ = createInferRuntime(*pGLogger_); //创建运行时环境IRuntime对象,传入gLogger用于打印信息 assert(pRuntime_ != nullptr); //2.生成反序列化引擎 //engine = new ICudaEngine; bool didInitPlugins = initLibNvInferPlugins(nullptr, ""); pEngine_ = pRuntime_->deserializeCudaEngine(trtModelStream, size); //反序列化引擎engine(根据trtModelStream反序列化) assert(pEngine_ != nullptr); //3.创建上下文环境 //context = new IExecutionContext; pContext_ = pEngine_->createExecutionContext(); //创建上下文环境,主要用于inference函数中启动cuda核 assert(pContext_ != nullptr); delete[] trtModelStream; //析构trtModelStream // std::cout<<"Engine get NB Bindings is: "<getNbBindings()<getNbBindings() == 2); //获取绑定的输入输入 uiInputIndex_ = pEngine_->getBindingIndex((pYoloV5ClearModelInfo_->modelInfo.strInputBlobName).c_str()); uiOutputIndex_ = pEngine_->getBindingIndex((pYoloV5ClearModelInfo_->modelInfo.strOutputBlobName).c_str()); // std::cout<<"inputIndex: "<destroy(); //析构绘话 pEngine_->destroy(); //析构TensorRT引擎 pRuntime_->destroy(); //析构运行时环境 if(pGLogger_){ //释放Logger delete pGLogger_; pGLogger_ = nullptr; } if(pfInputData_){ delete[] pfInputData_; pfInputData_ = nullptr; } if(pfOutputData_){ delete[] pfOutputData_; pfOutputData_ = nullptr; } return 0; } int YoloV5ClearDetectInference::YoloV5ClearDetectInferenceModel(cv::Mat& frame, std::vector& vecRes) { size_t size_image_src = frame.cols * frame.rows * pYoloV5ClearModelInfo_->modelInfo.uiChannel; unsigned int img_width = frame.cols, img_height = frame.rows; size_t size_image_dst = pYoloV5ClearModelInfo_->modelInfo.uiModelWidth * pYoloV5ClearModelInfo_->modelInfo.uiModelHeight * pYoloV5ClearModelInfo_->modelInfo.uiChannel; auto preprocess_start = std::chrono::system_clock::now(); //计时开始 #ifdef ENABLE_CUDA_PREPROCESS memcpy(pu8ImgHost_, frame.data, size_image_src); //拷贝预处理数据到HOST侧 CUDA_CHECK(cudaMemcpyAsync(pu8ImgDevice_, pu8ImgHost_, size_image_src, cudaMemcpyHostToDevice, *pImagePreprocessStream_)); //拷贝预处理数据到Device侧 yolov5_detect_preprocess_kernel_img(pu8ImgDevice_, frame.cols, frame.rows, (float*)pfBuffers_[0], pYoloV5ClearModelInfo_->modelInfo.uiModelWidth, pYoloV5ClearModelInfo_->modelInfo.uiModelHeight, *pImagePreprocessStream_); cudaStreamSynchronize(*pImagePreprocessStream_); #else cv::Mat pr_img = preprocess_img(frame, pYoloV5ClearModelInfo_->modelInfo.uiModelWidth, pYoloV5ClearModelInfo_->modelInfo.uiModelHeight); // letterbox BGR to RGB int n = 0; for (int row = 0; row < pYoloV5ClearModelInfo_->modelInfo.uiModelHeight; ++row) { uchar* uc_pixel = pr_img.data + row * pr_img.step; for (int col = 0; col < pYoloV5ClearModelInfo_->modelInfo.uiModelWidth; ++col) { pfInputData_[n] = (float)uc_pixel[2]/ 255.0; // (float)uc_pixel[2] / 255.0; pfInputData_[n + pYoloV5ClearModelInfo_->modelInfo.uiModelHeight * pYoloV5ClearModelInfo_->modelInfo.uiModelWidth] = (float)uc_pixel[1]/ 255.0; //(float)uc_pixel[2] / 255.0; pfInputData_[n + 2 * pYoloV5ClearModelInfo_->modelInfo.uiModelHeight * pYoloV5ClearModelInfo_->modelInfo.uiModelWidth] = (float)uc_pixel[0]/ 255.0; // (float)uc_pixel[2] / 255.0; uc_pixel += pYoloV5ClearModelInfo_->modelInfo.uiChannel; ++n; } } #endif auto preprocess_end = std::chrono::system_clock::now(); // std::cout << "yolov5 clear preprocess time: " << std::chrono::duration_cast(preprocess_end - preprocess_start).count() << "ms" << std::endl; //2.推理 float fResizeRatio = GetResizeRatio(img_width, img_height, pYoloV5ClearModelInfo_->modelInfo.uiModelWidth, pYoloV5ClearModelInfo_->modelInfo.uiModelHeight); auto start = std::chrono::system_clock::now(); //计时开始 #ifdef ENABLE_CUDA_PREPROCESS doInferenceV4(*pContext_, *pInferenceModelStream_, (void**)pfBuffers_, uiOutputIndex_, pfOutputData_, pYoloV5ClearModelInfo_->modelInfo.uiOutputSize, pYoloV5ClearModelInfo_->modelInfo.uiBatchSize); #else doInferenceV3(*pContext_, *pInferenceModelStream_, (void**)pfBuffers_, uiInputIndex_, pfInputData_, pYoloV5ClearModelInfo_->modelInfo.uiInputSize, uiOutputIndex_, pfOutputData_, pYoloV5ClearModelInfo_->modelInfo.uiOutputSize, pYoloV5ClearModelInfo_->modelInfo.uiBatchSize); #endif auto end = std::chrono::system_clock::now(); // std::cout << "yolov5 clear inference time: " << std::chrono::duration_cast(end - start).count() << "ms" << std::endl; //3.后处理 auto decode_nms_start = std::chrono::system_clock::now(); yolov5ClearDecodeOpenCVNms(vecRes, pfOutputData_, pYoloV5ClearModelInfo_->modelInfo.uiOutputSize, pYoloV5ClearModelInfo_->clearModelParam.modelParam.uiDetSize, pYoloV5ClearModelInfo_->clearModelParam.modelParam.uiClassNum, pYoloV5ClearModelInfo_->clearModelParam.uiClearNum, pYoloV5ClearModelInfo_->clearModelParam.modelParam.fScoreThreshold, pYoloV5ClearModelInfo_->clearModelParam.modelParam.fNmsThreshold); auto decode_nms_end = std::chrono::system_clock::now(); // std::cout << "yolov5 clear post time: " << std::chrono::duration_cast(decode_nms_end - decode_nms_start).count() << "ms" << std::endl; // std::cout<<"this picture find "<modelInfo.uiModelWidth, pYoloV5ClearModelInfo_->modelInfo.uiModelHeight, vecRes[j].detection); //中心补边方式坐标还原 } }