209 lines
11 KiB
Plaintext
209 lines
11 KiB
Plaintext
|
|
#include "yolov5_clear_detect_inference.h"
|
|||
|
|
|
|||
|
|
YoloV5ClearDetectInference::YoloV5ClearDetectInference() {}
|
|||
|
|
|
|||
|
|
YoloV5ClearDetectInference::~YoloV5ClearDetectInference() {}
|
|||
|
|
|
|||
|
|
|
|||
|
|
int YoloV5ClearDetectInference::YoloV5ClearDetectInferenceInit(ClearModelInfo* pYoloV5ClearModelInfo, const std::string& strModelName, const std::string& strEngineName)
|
|||
|
|
{
|
|||
|
|
pYoloV5ClearModelInfo_ = pYoloV5ClearModelInfo;
|
|||
|
|
|
|||
|
|
//资源分配(创建流,host及device侧内存)
|
|||
|
|
cudaSetDevice(DEVICE); //设置GPU
|
|||
|
|
|
|||
|
|
//创建图像预处理CUDA流
|
|||
|
|
pImagePreprocessStream_ = new cudaStream_t;
|
|||
|
|
CUDA_CHECK(cudaStreamCreate(pImagePreprocessStream_));
|
|||
|
|
|
|||
|
|
//创建模型推理CUDA流
|
|||
|
|
pInferenceModelStream_ = new cudaStream_t;
|
|||
|
|
CUDA_CHECK(cudaStreamCreate(pInferenceModelStream_));
|
|||
|
|
|
|||
|
|
pGLogger_ = new Logger;
|
|||
|
|
|
|||
|
|
//相关资源分配
|
|||
|
|
pfBuffers_[0] = nullptr; pfBuffers_[1] = nullptr;
|
|||
|
|
CUDA_CHECK(cudaMalloc((void**)&pfBuffers_[0], pYoloV5ClearModelInfo_->modelInfo.uiInputSize * sizeof(float))); //输入资源分配
|
|||
|
|
CUDA_CHECK(cudaMalloc((void**)&pfBuffers_[1], pYoloV5ClearModelInfo_->modelInfo.uiOutputSize * sizeof(float))); //输出资源分配
|
|||
|
|
|
|||
|
|
pu8ImgHost_ = new uint8_t;
|
|||
|
|
pu8ImgDevice_ = new uint8_t;
|
|||
|
|
|
|||
|
|
CUDA_CHECK(cudaMallocHost((void**)&pu8ImgHost_, MAX_IMAGE_INPUT_SIZE_THRESH * pYoloV5ClearModelInfo_->modelInfo.uiChannel)); //在HOST侧申请预处理数据缓存
|
|||
|
|
CUDA_CHECK(cudaMalloc((void**)&pu8ImgDevice_, MAX_IMAGE_INPUT_SIZE_THRESH * pYoloV5ClearModelInfo_->modelInfo.uiChannel)); //在DEVICE侧申请预处理数据缓存
|
|||
|
|
|
|||
|
|
pfInputData_ = new float[pYoloV5ClearModelInfo_->modelInfo.uiBatchSize * pYoloV5ClearModelInfo_->modelInfo.uiInputSize];
|
|||
|
|
pfOutputData_ = new float[pYoloV5ClearModelInfo_->modelInfo.uiBatchSize * pYoloV5ClearModelInfo_->modelInfo.uiOutputSize];
|
|||
|
|
|
|||
|
|
//序列化引擎
|
|||
|
|
//直接使用API创建一个模型,并将其序列化为流 编译成TensorRT引擎engine文件后无需再次调用,调用依次生成engine即可
|
|||
|
|
//基于onnx解析器编译tensorrt引擎
|
|||
|
|
#if 0
|
|||
|
|
if (!strModelName.empty()) {
|
|||
|
|
IHostMemory* modelStream{ nullptr };
|
|||
|
|
ONNXToModel(*pGLogger_, pYoloV5ClearModelInfo_->modelInfo.uiBatchSize, MAX_WORKSPAXE_SIZE, &modelStream, strModelName);
|
|||
|
|
assert(modelStream != nullptr);
|
|||
|
|
std::ofstream p(strEngineName, std::ios::binary);
|
|||
|
|
if (!p) {
|
|||
|
|
std::cerr << "could not open plan output file" << std::endl;
|
|||
|
|
return -1;
|
|||
|
|
}
|
|||
|
|
p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
|
|||
|
|
modelStream->destroy();
|
|||
|
|
}
|
|||
|
|
#endif
|
|||
|
|
|
|||
|
|
//反序列化模型并运行推理
|
|||
|
|
std::ifstream file(strEngineName, std::ios::binary);
|
|||
|
|
if (!file.good()) {
|
|||
|
|
std::cerr << "read " << strEngineName << " error!" << std::endl;
|
|||
|
|
return -1;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
//创建tensorRT流对象trtModelStream,这个就跟文件流中的ifstream类似的
|
|||
|
|
//trtModelStream是一块内存区域,用于保存序列化的plan文件
|
|||
|
|
char *trtModelStream = nullptr;
|
|||
|
|
size_t size = 0;
|
|||
|
|
file.seekg(0, file.end); //将指针移动至距离文件末尾0处的位置
|
|||
|
|
size = file.tellg(); //获得当前字符的位置
|
|||
|
|
file.seekg(0, file.beg); //将指针移动至距离文件开头0处的位置
|
|||
|
|
trtModelStream = new char[size];
|
|||
|
|
assert(trtModelStream);
|
|||
|
|
file.read(trtModelStream, size); //将序列化engine模型(数据及数据大小)读入trtModelStream
|
|||
|
|
file.close();
|
|||
|
|
|
|||
|
|
//1.设置运行时环境
|
|||
|
|
pRuntime_ = createInferRuntime(*pGLogger_); //创建运行时环境IRuntime对象,传入gLogger用于打印信息
|
|||
|
|
assert(pRuntime_ != nullptr);
|
|||
|
|
//2.生成反序列化引擎
|
|||
|
|
//engine = new ICudaEngine;
|
|||
|
|
bool didInitPlugins = initLibNvInferPlugins(nullptr, "");
|
|||
|
|
pEngine_ = pRuntime_->deserializeCudaEngine(trtModelStream, size); //反序列化引擎engine(根据trtModelStream反序列化)
|
|||
|
|
assert(pEngine_ != nullptr);
|
|||
|
|
//3.创建上下文环境
|
|||
|
|
//context = new IExecutionContext;
|
|||
|
|
pContext_ = pEngine_->createExecutionContext(); //创建上下文环境,主要用于inference函数中启动cuda核
|
|||
|
|
assert(pContext_ != nullptr);
|
|||
|
|
delete[] trtModelStream; //析构trtModelStream
|
|||
|
|
// std::cout<<"Engine get NB Bindings is: "<<pEngine_->getNbBindings()<<std::endl;
|
|||
|
|
assert(pEngine_->getNbBindings() == 2);
|
|||
|
|
|
|||
|
|
//获取绑定的输入输入
|
|||
|
|
uiInputIndex_ = pEngine_->getBindingIndex((pYoloV5ClearModelInfo_->modelInfo.strInputBlobName).c_str());
|
|||
|
|
uiOutputIndex_ = pEngine_->getBindingIndex((pYoloV5ClearModelInfo_->modelInfo.strOutputBlobName).c_str());
|
|||
|
|
// std::cout<<"inputIndex: "<<uiInputIndex_<<"\toutputIndex: "<<uiOutputIndex_<<std::endl;
|
|||
|
|
assert(uiInputIndex_ == 0);
|
|||
|
|
assert(uiOutputIndex_ == 1);
|
|||
|
|
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
int YoloV5ClearDetectInference::YoloV5ClearDetectInferenceDeinit()
|
|||
|
|
{
|
|||
|
|
//资源释放
|
|||
|
|
CUDA_CHECK(cudaStreamDestroy(*pImagePreprocessStream_)); //释放图像预处理CUDA流
|
|||
|
|
if(pImagePreprocessStream_){
|
|||
|
|
delete pImagePreprocessStream_;
|
|||
|
|
pImagePreprocessStream_ = nullptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
CUDA_CHECK(cudaStreamDestroy(*pInferenceModelStream_)); //释放模型推理CUDA流
|
|||
|
|
if(pInferenceModelStream_){ //释放模型推理CUDA流
|
|||
|
|
delete pInferenceModelStream_;
|
|||
|
|
pInferenceModelStream_ = nullptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
CUDA_CHECK(cudaFree(pu8ImgDevice_)); //释放设备端内存
|
|||
|
|
CUDA_CHECK(cudaFreeHost(pu8ImgHost_)); //释放HOST端内存
|
|||
|
|
|
|||
|
|
CUDA_CHECK(cudaFree(pfBuffers_[0])); //释放输入数据设备端内存
|
|||
|
|
CUDA_CHECK(cudaFree(pfBuffers_[1])); //释放输出数据设备端内存
|
|||
|
|
|
|||
|
|
//析构engine引擎资源
|
|||
|
|
pContext_->destroy(); //析构绘话
|
|||
|
|
pEngine_->destroy(); //析构TensorRT引擎
|
|||
|
|
pRuntime_->destroy(); //析构运行时环境
|
|||
|
|
|
|||
|
|
if(pGLogger_){ //释放Logger
|
|||
|
|
delete pGLogger_;
|
|||
|
|
pGLogger_ = nullptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if(pfInputData_){
|
|||
|
|
delete[] pfInputData_;
|
|||
|
|
pfInputData_ = nullptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if(pfOutputData_){
|
|||
|
|
delete[] pfOutputData_;
|
|||
|
|
pfOutputData_ = nullptr;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
int YoloV5ClearDetectInference::YoloV5ClearDetectInferenceModel(cv::Mat& frame, std::vector<ClearDetection>& vecRes)
|
|||
|
|
{
|
|||
|
|
size_t size_image_src = frame.cols * frame.rows * pYoloV5ClearModelInfo_->modelInfo.uiChannel;
|
|||
|
|
unsigned int img_width = frame.cols, img_height = frame.rows;
|
|||
|
|
size_t size_image_dst = pYoloV5ClearModelInfo_->modelInfo.uiModelWidth * pYoloV5ClearModelInfo_->modelInfo.uiModelHeight * pYoloV5ClearModelInfo_->modelInfo.uiChannel;
|
|||
|
|
auto preprocess_start = std::chrono::system_clock::now(); //计时开始
|
|||
|
|
|
|||
|
|
#ifdef ENABLE_CUDA_PREPROCESS
|
|||
|
|
memcpy(pu8ImgHost_, frame.data, size_image_src); //拷贝预处理数据到HOST侧
|
|||
|
|
CUDA_CHECK(cudaMemcpyAsync(pu8ImgDevice_, pu8ImgHost_, size_image_src, cudaMemcpyHostToDevice, *pImagePreprocessStream_)); //拷贝预处理数据到Device侧
|
|||
|
|
yolov5_detect_preprocess_kernel_img(pu8ImgDevice_, frame.cols, frame.rows, (float*)pfBuffers_[0], pYoloV5ClearModelInfo_->modelInfo.uiModelWidth, pYoloV5ClearModelInfo_->modelInfo.uiModelHeight, *pImagePreprocessStream_);
|
|||
|
|
cudaStreamSynchronize(*pImagePreprocessStream_);
|
|||
|
|
#else
|
|||
|
|
cv::Mat pr_img = preprocess_img(frame, pYoloV5ClearModelInfo_->modelInfo.uiModelWidth, pYoloV5ClearModelInfo_->modelInfo.uiModelHeight); // letterbox BGR to RGB
|
|||
|
|
int n = 0;
|
|||
|
|
for (int row = 0; row < pYoloV5ClearModelInfo_->modelInfo.uiModelHeight; ++row) {
|
|||
|
|
uchar* uc_pixel = pr_img.data + row * pr_img.step;
|
|||
|
|
for (int col = 0; col < pYoloV5ClearModelInfo_->modelInfo.uiModelWidth; ++col) {
|
|||
|
|
pfInputData_[n] = (float)uc_pixel[2]/ 255.0; // (float)uc_pixel[2] / 255.0;
|
|||
|
|
pfInputData_[n + pYoloV5ClearModelInfo_->modelInfo.uiModelHeight * pYoloV5ClearModelInfo_->modelInfo.uiModelWidth] = (float)uc_pixel[1]/ 255.0; //(float)uc_pixel[2] / 255.0;
|
|||
|
|
pfInputData_[n + 2 * pYoloV5ClearModelInfo_->modelInfo.uiModelHeight * pYoloV5ClearModelInfo_->modelInfo.uiModelWidth] = (float)uc_pixel[0]/ 255.0; // (float)uc_pixel[2] / 255.0;
|
|||
|
|
uc_pixel += pYoloV5ClearModelInfo_->modelInfo.uiChannel;
|
|||
|
|
++n;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
#endif
|
|||
|
|
auto preprocess_end = std::chrono::system_clock::now();
|
|||
|
|
// std::cout << "yolov5 clear preprocess time: " << std::chrono::duration_cast<std::chrono::milliseconds>(preprocess_end - preprocess_start).count() << "ms" << std::endl;
|
|||
|
|
|
|||
|
|
//2.推理
|
|||
|
|
float fResizeRatio = GetResizeRatio(img_width, img_height, pYoloV5ClearModelInfo_->modelInfo.uiModelWidth, pYoloV5ClearModelInfo_->modelInfo.uiModelHeight);
|
|||
|
|
auto start = std::chrono::system_clock::now(); //计时开始
|
|||
|
|
#ifdef ENABLE_CUDA_PREPROCESS
|
|||
|
|
doInferenceV4(*pContext_, *pInferenceModelStream_, (void**)pfBuffers_,
|
|||
|
|
uiOutputIndex_, pfOutputData_, pYoloV5ClearModelInfo_->modelInfo.uiOutputSize,
|
|||
|
|
pYoloV5ClearModelInfo_->modelInfo.uiBatchSize);
|
|||
|
|
#else
|
|||
|
|
doInferenceV3(*pContext_, *pInferenceModelStream_, (void**)pfBuffers_,
|
|||
|
|
uiInputIndex_, pfInputData_, pYoloV5ClearModelInfo_->modelInfo.uiInputSize,
|
|||
|
|
uiOutputIndex_, pfOutputData_, pYoloV5ClearModelInfo_->modelInfo.uiOutputSize,
|
|||
|
|
pYoloV5ClearModelInfo_->modelInfo.uiBatchSize);
|
|||
|
|
#endif
|
|||
|
|
auto end = std::chrono::system_clock::now();
|
|||
|
|
// std::cout << "yolov5 clear inference time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
|
|||
|
|
|
|||
|
|
//3.后处理
|
|||
|
|
auto decode_nms_start = std::chrono::system_clock::now();
|
|||
|
|
yolov5ClearDecodeOpenCVNms(vecRes, pfOutputData_, pYoloV5ClearModelInfo_->modelInfo.uiOutputSize,
|
|||
|
|
pYoloV5ClearModelInfo_->clearModelParam.modelParam.uiDetSize,
|
|||
|
|
pYoloV5ClearModelInfo_->clearModelParam.modelParam.uiClassNum,
|
|||
|
|
pYoloV5ClearModelInfo_->clearModelParam.uiClearNum,
|
|||
|
|
pYoloV5ClearModelInfo_->clearModelParam.modelParam.fScoreThreshold,
|
|||
|
|
pYoloV5ClearModelInfo_->clearModelParam.modelParam.fNmsThreshold);
|
|||
|
|
auto decode_nms_end = std::chrono::system_clock::now();
|
|||
|
|
// std::cout << "yolov5 clear post time: " << std::chrono::duration_cast<std::chrono::milliseconds>(decode_nms_end - decode_nms_start).count() << "ms" << std::endl;
|
|||
|
|
// std::cout<<"this picture find "<<vecRes.size()<<" objs"<<std::endl;
|
|||
|
|
|
|||
|
|
for(size_t j = 0; j < vecRes.size(); j++){
|
|||
|
|
UpperVertexResetLocation(fResizeRatio, img_width, img_height, vecRes[j].detection); //左上顶点补边方式坐标还原
|
|||
|
|
// CenterResetLocation(fResizeRatio, img_width, img_height, pYoloV5ClearModelInfo_->modelInfo.uiModelWidth, pYoloV5ClearModelInfo_->modelInfo.uiModelHeight, vecRes[j].detection); //中心补边方式坐标还原
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|