Train_RFID_Linux/code/inference/yolov5_detect_inference.cu

212 lines
11 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "yolov5_detect_inference.h"
YoloV5DetectInference::YoloV5DetectInference() {}
YoloV5DetectInference::~YoloV5DetectInference() {}
int YoloV5DetectInference::YoloV5DetectInferenceInit(CommonModelInfo* pYoloV5ModelInfo, const std::string& strModelName, const std::string& strEngineName)
{
pYoloV5ModelInfo_ = pYoloV5ModelInfo;
//资源分配(创建流,host及device侧内存)
cudaSetDevice(DEVICE); //设置GPU
//创建图像预处理CUDA流
pImagePreprocessStream_ = new cudaStream_t;
CUDA_CHECK(cudaStreamCreate(pImagePreprocessStream_));
//创建模型推理CUDA流
pInferenceModelStream_ = new cudaStream_t;
CUDA_CHECK(cudaStreamCreate(pInferenceModelStream_));
pGLogger_ = new Logger;
//相关资源分配
pfBuffers_[0] = nullptr; pfBuffers_[1] = nullptr;
CUDA_CHECK(cudaMalloc((void**)&pfBuffers_[0], pYoloV5ModelInfo_->modelInfo.uiInputSize * sizeof(float))); //输入资源分配
CUDA_CHECK(cudaMalloc((void**)&pfBuffers_[1], pYoloV5ModelInfo_->modelInfo.uiOutputSize * sizeof(float))); //输出资源分配
pu8ImgHost_ = new uint8_t;
pu8ImgDevice_ = new uint8_t;
CUDA_CHECK(cudaMallocHost((void**)&pu8ImgHost_, MAX_IMAGE_INPUT_SIZE_THRESH * pYoloV5ModelInfo_->modelInfo.uiChannel)); //在HOST侧申请预处理数据缓存
CUDA_CHECK(cudaMalloc((void**)&pu8ImgDevice_, MAX_IMAGE_INPUT_SIZE_THRESH * pYoloV5ModelInfo_->modelInfo.uiChannel)); //在DEVICE侧申请预处理数据缓存
pfInputData_ = new float[pYoloV5ModelInfo_->modelInfo.uiBatchSize * pYoloV5ModelInfo_->modelInfo.uiInputSize];
pfOutputData_ = new float[pYoloV5ModelInfo_->modelInfo.uiBatchSize * pYoloV5ModelInfo_->modelInfo.uiOutputSize];
//序列化引擎
//直接使用API创建一个模型并将其序列化为流 编译成TensorRT引擎engine文件后无需再次调用,调用依次生成engine即可
//基于onnx解析器编译tensorrt引擎
#if 0
if (!strModelName.empty()) {
IHostMemory* modelStream{ nullptr };
ONNXToModel(*pGLogger_, pYoloV5ModelInfo_->modelInfo.uiBatchSize, MAX_WORKSPAXE_SIZE, &modelStream, strModelName);
assert(modelStream != nullptr);
std::ofstream p(strEngineName, std::ios::binary);
if (!p) {
std::cerr << "could not open plan output file" << std::endl;
return -1;
}
p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
modelStream->destroy();
}
#endif
//反序列化模型并运行推理
std::ifstream file(strEngineName, std::ios::binary);
if (!file.good()) {
std::cerr << "read " << strEngineName << " error!" << std::endl;
return -1;
}
//创建tensorRT流对象trtModelStream,这个就跟文件流中的ifstream类似的
//trtModelStream是一块内存区域,用于保存序列化的plan文件
char *trtModelStream = nullptr;
size_t size = 0;
file.seekg(0, file.end); //将指针移动至距离文件末尾0处的位置
size = file.tellg(); //获得当前字符的位置
file.seekg(0, file.beg); //将指针移动至距离文件开头0处的位置
trtModelStream = new char[size];
assert(trtModelStream);
file.read(trtModelStream, size); //将序列化engine模型(数据及数据大小)读入trtModelStream
file.close();
//1.设置运行时环境
pRuntime_ = createInferRuntime(*pGLogger_); //创建运行时环境IRuntime对象,传入gLogger用于打印信息
assert(pRuntime_ != nullptr);
//2.生成反序列化引擎
//engine = new ICudaEngine;
bool didInitPlugins = initLibNvInferPlugins(nullptr, "");
pEngine_ = pRuntime_->deserializeCudaEngine(trtModelStream, size); //反序列化引擎engine(根据trtModelStream反序列化)
assert(pEngine_ != nullptr);
//3.创建上下文环境
//context = new IExecutionContext;
pContext_ = pEngine_->createExecutionContext(); //创建上下文环境,主要用于inference函数中启动cuda核
assert(pContext_ != nullptr);
delete[] trtModelStream; //析构trtModelStream
std::cout<<"Engine get NB Bindings is: "<<pEngine_->getNbBindings()<<std::endl;
assert(pEngine_->getNbBindings() == 2);
//获取绑定的输入输入
uiInputIndex_ = pEngine_->getBindingIndex((pYoloV5ModelInfo_->modelInfo.strInputBlobName).c_str());
uiOutputIndex_ = pEngine_->getBindingIndex((pYoloV5ModelInfo_->modelInfo.strOutputBlobName).c_str());
std::cout<<"inputIndex: "<<uiInputIndex_<<"\toutputIndex: "<<uiOutputIndex_<<std::endl;
assert(uiInputIndex_ == 0);
assert(uiOutputIndex_ == 1);
return 0;
}
int YoloV5DetectInference::YoloV5DetectInferenceDeinit()
{
//资源释放
CUDA_CHECK(cudaStreamDestroy(*pImagePreprocessStream_)); //释放图像预处理CUDA流
if(pImagePreprocessStream_){
delete pImagePreprocessStream_;
pImagePreprocessStream_ = nullptr;
}
CUDA_CHECK(cudaStreamDestroy(*pInferenceModelStream_)); //释放模型推理CUDA流
if(pInferenceModelStream_){ //释放模型推理CUDA流
delete pInferenceModelStream_;
pInferenceModelStream_ = nullptr;
}
CUDA_CHECK(cudaFree(pu8ImgDevice_)); //释放设备端内存
CUDA_CHECK(cudaFreeHost(pu8ImgHost_)); //释放HOST端内存
CUDA_CHECK(cudaFree(pfBuffers_[0])); //释放输入数据设备端内存
CUDA_CHECK(cudaFree(pfBuffers_[1])); //释放输出数据设备端内存
//析构engine引擎资源
pContext_->destroy(); //析构绘话
pEngine_->destroy(); //析构TensorRT引擎
pRuntime_->destroy(); //析构运行时环境
if(pGLogger_){ //释放Logger
delete pGLogger_;
pGLogger_ = nullptr;
}
if(pfInputData_){
delete[] pfInputData_;
pfInputData_ = nullptr;
}
if(pfOutputData_){
delete[] pfOutputData_;
pfOutputData_ = nullptr;
}
return 0;
}
int YoloV5DetectInference::YoloV5DetectInferenceModel(cv::Mat& frame, std::vector<Detection>& vecRes)
{
size_t size_image_src = frame.cols * frame.rows * pYoloV5ModelInfo_->modelInfo.uiChannel;
unsigned int img_width = frame.cols, img_height = frame.rows;
size_t size_image_dst = pYoloV5ModelInfo_->modelInfo.uiModelWidth * pYoloV5ModelInfo_->modelInfo.uiModelHeight * pYoloV5ModelInfo_->modelInfo.uiChannel;
auto preprocess_start = std::chrono::system_clock::now(); //计时开始
// printf("frame cols: %d\t frame rows: %d\n", frame.cols, frame.rows);
// printf("model witdh: %d\t model height: %d\t model channle: %d\n", pYoloV5ModelInfo_->modelInfo.uiModelWidth,
// pYoloV5ModelInfo_->modelInfo.uiModelHeight, pYoloV5ModelInfo_->modelInfo.uiChannel);
#ifdef ENABLE_CUDA_PREPROCESS
memcpy(pu8ImgHost_, frame.data, size_image_src); //拷贝预处理数据到HOST侧
CUDA_CHECK(cudaMemcpyAsync(pu8ImgDevice_, pu8ImgHost_, size_image_src, cudaMemcpyHostToDevice, *pImagePreprocessStream_)); //拷贝预处理数据到Device侧
yolov5_detect_preprocess_kernel_img(pu8ImgDevice_, frame.cols, frame.rows, (float*)pfBuffers_[0], pYoloV5ModelInfo_->modelInfo.uiModelWidth, pYoloV5ModelInfo_->modelInfo.uiModelHeight, *pImagePreprocessStream_);
cudaStreamSynchronize(*pImagePreprocessStream_);
#else
cv::Mat pr_img = preprocess_img(frame, pYoloV5ModelInfo_->modelInfo.uiModelWidth, pYoloV5ModelInfo_->modelInfo.uiModelHeight); // letterbox BGR to RGB
int n = 0;
for (int row = 0; row < pYoloV5ModelInfo_->modelInfo.uiModelHeight; ++row) {
uchar* uc_pixel = pr_img.data + row * pr_img.step;
for (int col = 0; col < pYoloV5ModelInfo_->modelInfo.uiModelWidth; ++col) {
pfInputData_[n] = (float)uc_pixel[2]/ 255.0; // (float)uc_pixel[2] / 255.0;
pfInputData_[n + pYoloV5ModelInfo_->modelInfo.uiModelHeight * pYoloV5ModelInfo_->modelInfo.uiModelWidth] = (float)uc_pixel[1]/ 255.0; //(float)uc_pixel[2] / 255.0;
pfInputData_[n + 2 * pYoloV5ModelInfo_->modelInfo.uiModelHeight * pYoloV5ModelInfo_->modelInfo.uiModelWidth] = (float)uc_pixel[0]/ 255.0; // (float)uc_pixel[2] / 255.0;
uc_pixel += pYoloV5ModelInfo_->modelInfo.uiChannel;
++n;
}
}
#endif
auto preprocess_end = std::chrono::system_clock::now();
std::cout << "yolov5 preprocess time: " << std::chrono::duration_cast<std::chrono::milliseconds>(preprocess_end - preprocess_start).count() << "ms" << std::endl;
//2.推理
float fResizeRatio = GetResizeRatio(img_width, img_height, pYoloV5ModelInfo_->modelInfo.uiModelWidth, pYoloV5ModelInfo_->modelInfo.uiModelHeight);
auto start = std::chrono::system_clock::now(); //计时开始
#ifdef ENABLE_CUDA_PREPROCESS
doInferenceV4(*pContext_, *pInferenceModelStream_, (void**)pfBuffers_,
uiOutputIndex_, pfOutputData_, pYoloV5ModelInfo_->modelInfo.uiOutputSize,
pYoloV5ModelInfo_->modelInfo.uiBatchSize);
#else
doInferenceV3(*pContext_, *pInferenceModelStream_, (void**)pfBuffers_,
uiInputIndex_, pfInputData_, pYoloV5ModelInfo_->modelInfo.uiInputSize,
uiOutputIndex_, pfOutputData_, pYoloV5ModelInfo_->modelInfo.uiOutputSize,
pYoloV5ModelInfo_->modelInfo.uiBatchSize);
#endif
auto end = std::chrono::system_clock::now();
std::cout << "yolov5 inference time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
//3.后处理
auto decode_nms_start = std::chrono::system_clock::now();
yolov5DecodeOpenCVNms(vecRes, pfOutputData_, pYoloV5ModelInfo_->modelInfo.uiOutputSize,
pYoloV5ModelInfo_->modelParam.uiDetSize,
pYoloV5ModelInfo_->modelParam.uiClassNum,
pYoloV5ModelInfo_->modelParam.fScoreThreshold,
pYoloV5ModelInfo_->modelParam.fNmsThreshold);
auto decode_nms_end = std::chrono::system_clock::now();
std::cout << "yolov5 post time: " << std::chrono::duration_cast<std::chrono::milliseconds>(decode_nms_end - decode_nms_start).count() << "ms" << std::endl;
// std::cout<<"this picture find "<<vecRes.size()<<" objs"<<std::endl;
for(size_t j = 0; j < vecRes.size(); j++){
UpperVertexResetLocation(fResizeRatio, img_width, img_height, vecRes[j]); //左上顶点补边方式坐标还原
// CenterResetLocation(fResizeRatio, img_width, img_height, pYoloV5ModelInfo_->modelInfo.uiModelWidth, pYoloV5ModelInfo_->modelInfo.uiModelHeight, vecRes[j]); //中心补边方式坐标还原
}
}