Train_RFID_Linux/code/inference/retinanet_classify_inferenc...

200 lines
9.7 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "retinanet_classify_inference.h"
RetinanetClassifyInference::RetinanetClassifyInference() {}
RetinanetClassifyInference::~RetinanetClassifyInference() {}
int RetinanetClassifyInference::RetinanetClassifyInferenceInit(ModelInfo* pRetinanetClassifyModelInfo, const std::string& strModelName, const std::string& strDeployName, const std::string& strEngineName)
{
pRetinanetClassifyModelInfo_ = pRetinanetClassifyModelInfo;
//资源分配(创建流,host及device侧内存)
cudaSetDevice(DEVICE); //设置GPU
//创建图像预处理CUDA流
pImagePreprocessStream_ = new cudaStream_t;
CUDA_CHECK(cudaStreamCreate(pImagePreprocessStream_));
//创建模型推理CUDA流
pInferenceModelStream_ = new cudaStream_t;
CUDA_CHECK(cudaStreamCreate(pInferenceModelStream_));
pGLogger_ = new Logger;
//相关资源分配
pfBuffers_[0] = nullptr; pfBuffers_[1] = nullptr;
CUDA_CHECK(cudaMalloc((void**)&pfBuffers_[0], pRetinanetClassifyModelInfo_->uiInputSize * sizeof(float))); //输入资源分配
CUDA_CHECK(cudaMalloc((void**)&pfBuffers_[1], pRetinanetClassifyModelInfo_->uiOutputSize * sizeof(float))); //输出资源分配
pu8ImgHost_ = new uint8_t;
pu8ImgDevice_ = new uint8_t;
CUDA_CHECK(cudaMallocHost((void**)&pu8ImgHost_, MAX_IMAGE_INPUT_SIZE_THRESH * pRetinanetClassifyModelInfo_->uiChannel)); //在HOST侧申请预处理数据缓存
CUDA_CHECK(cudaMalloc((void**)&pu8ImgDevice_, MAX_IMAGE_INPUT_SIZE_THRESH * pRetinanetClassifyModelInfo_->uiChannel)); //在DEVICE侧申请预处理数据缓存
pfInputData_ = new float[pRetinanetClassifyModelInfo_->uiBatchSize * pRetinanetClassifyModelInfo_->uiInputSize];
pfOutputData_ = new float[pRetinanetClassifyModelInfo_->uiBatchSize * pRetinanetClassifyModelInfo_->uiOutputSize];
//序列化引擎
//直接使用API创建一个模型并将其序列化为流 编译成TensorRT引擎engine文件后无需再次调用,调用依次生成engine即可
//基于caffe解析器编译tensorrt引擎
#if 0
std::vector<std::string> vecOutputs = {pRetinanetClassifyModelInfo_->strOutputBlobName};
if (!strModelName.empty() && !strDeployName.empty()) {
IHostMemory* modelStream{ nullptr };
CaffeToModel(*pGLogger_, pRetinanetClassifyModelInfo_->uiBatchSize, MAX_WORKSPAXE_SIZE, &modelStream, strModelName, strDeployName, vecOutputs);
assert(modelStream != nullptr);
std::ofstream p(strEngineName, std::ios::binary);
if (!p) {
std::cerr << "could not open plan output file" << std::endl;
return -1;
}
p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
modelStream->destroy();
}
#endif
//反序列化模型并运行推理
std::ifstream file(strEngineName, std::ios::binary);
if (!file.good()) {
std::cerr << "read " << strEngineName << " error!" << std::endl;
return -1;
}
//创建tensorRT流对象trtModelStream,这个就跟文件流中的ifstream类似的
//trtModelStream是一块内存区域,用于保存序列化的plan文件
char *trtModelStream = nullptr;
size_t size = 0;
file.seekg(0, file.end); //将指针移动至距离文件末尾0处的位置
size = file.tellg(); //获得当前字符的位置
file.seekg(0, file.beg); //将指针移动至距离文件开头0处的位置
trtModelStream = new char[size];
assert(trtModelStream);
file.read(trtModelStream, size); //将序列化engine模型(数据及数据大小)读入trtModelStream
file.close();
//1.设置运行时环境
pRuntime_ = createInferRuntime(*pGLogger_); //创建运行时环境IRuntime对象,传入gLogger用于打印信息
assert(pRuntime_ != nullptr);
//2.生成反序列化引擎
pEngine_ = pRuntime_->deserializeCudaEngine(trtModelStream, size); //反序列化引擎engine(根据trtModelStream反序列化)
assert(pEngine_ != nullptr);
//3.创建上下文环境
pContext_ = pEngine_->createExecutionContext(); //创建上下文环境,主要用于inference函数中启动cuda核
assert(pContext_ != nullptr);
delete[] trtModelStream; //析构trtModelStream
std::cout<<"Engine get NB Bindings is: "<<pEngine_->getNbBindings()<<std::endl;
assert(pEngine_->getNbBindings() == 2);
//获取绑定的输入输入
uiInputIndex_ = pEngine_->getBindingIndex((pRetinanetClassifyModelInfo_->strInputBlobName).c_str());
uiOutputIndex_ = pEngine_->getBindingIndex((pRetinanetClassifyModelInfo_->strOutputBlobName).c_str());
std::cout<<"inputIndex: "<<uiInputIndex_<<"\toutputIndex: "<<uiOutputIndex_<<std::endl;
assert(uiInputIndex_ == 0);
assert(uiOutputIndex_ == 1);
return 0;
}
int RetinanetClassifyInference::RetinanetClassifyInferenceDeInit()
{
//资源释放
CUDA_CHECK(cudaStreamDestroy(*pImagePreprocessStream_)); //释放图像预处理CUDA流
if(pImagePreprocessStream_){
delete pImagePreprocessStream_;
pImagePreprocessStream_ = nullptr;
}
CUDA_CHECK(cudaStreamDestroy(*pInferenceModelStream_)); //释放模型推理CUDA流
if(pInferenceModelStream_){ //释放模型推理CUDA流
delete pInferenceModelStream_;
pInferenceModelStream_ = nullptr;
}
CUDA_CHECK(cudaFree(pu8ImgDevice_)); //释放设备端内存
CUDA_CHECK(cudaFreeHost(pu8ImgHost_)); //释放HOST端内存
CUDA_CHECK(cudaFree(pfBuffers_[0])); //释放输入数据设备端内存
CUDA_CHECK(cudaFree(pfBuffers_[1])); //释放输出数据设备端内存
//析构engine引擎资源
pContext_->destroy(); //析构绘话
pEngine_->destroy(); //析构TensorRT引擎
pRuntime_->destroy(); //析构运行时环境
if(pGLogger_){ //释放Logger
delete pGLogger_;
pGLogger_ = nullptr;
}
if(pfInputData_){
delete[] pfInputData_;
pfInputData_ = nullptr;
}
if(pfOutputData_){
delete[] pfOutputData_;
pfOutputData_ = nullptr;
}
return 0;
}
bool RetinanetClassifyInference::RetinanetClassifyInferenceModel(cv::Mat& frame)
{
size_t size_image_src = frame.cols * frame.rows * pRetinanetClassifyModelInfo_->uiChannel;
unsigned int img_width = frame.cols, img_height = frame.rows;
size_t size_image_dst = pRetinanetClassifyModelInfo_->uiModelWidth * pRetinanetClassifyModelInfo_->uiModelHeight * pRetinanetClassifyModelInfo_->uiChannel;
auto preprocess_start = std::chrono::system_clock::now(); //计时开始
// printf("frame cols: %d\t frame rows: %d\n", frame.cols, frame.rows);
// printf("model witdh: %d\t model height: %d\t model channle: %d\n", pRetinanetClassifyModelInfo_->uiModelWidth,
// pRetinanetClassifyModelInfo_->uiModelHeight, pRetinanetClassifyModelInfo_->uiChannel);
#ifdef ENABLE_CUDA_PREPROCESS
memcpy(pu8ImgHost_, frame.data, size_image_src); //拷贝预处理数据到HOST侧
CUDA_CHECK(cudaMemcpyAsync(pu8ImgDevice_, pu8ImgHost_, size_image_src, cudaMemcpyHostToDevice, *pImagePreprocessStream_)); //拷贝预处理数据到Device侧
retinanet_classify_preprocess_kernel_img(pu8ImgDevice_, frame.cols, frame.rows, (float*)pfBuffers_[0], pRetinanetClassifyModelInfo_->uiModelWidth, pRetinanetClassifyModelInfo_->uiModelHeight, *pImagePreprocessStream_);
cudaStreamSynchronize(*pImagePreprocessStream_);
#else
cv::Mat pr_img = preprocess_img(frame, pRetinanetClassifyModelInfo_->uiModelWidth, pRetinanetClassifyModelInfo_->uiModelHeight); // letterbox BGR to RGB
int n = 0;
for (int row = 0; row < pRetinanetClassifyModelInfo_->uiModelHeight; ++row) {
uchar* uc_pixel = pr_img.data + row * pr_img.step;
for (int col = 0; col < pRetinanetClassifyModelInfo_->uiModelWidth; ++col) {
pfInputData_[n] = (float)uc_pixel[2] - 104;
pfInputData_[n + pRetinanetClassifyModelInfo_->uiModelHeight * pRetinanetClassifyModelInfo_->uiModelWidth] = (float)uc_pixel[1] - 117;
pfInputData_[n + 2 * pRetinanetClassifyModelInfo_->uiModelHeight * pRetinanetClassifyModelInfo_->uiModelWidth] = (float)uc_pixel[0] - 123;
uc_pixel += pRetinanetClassifyModelInfo_->uiChannel;
++n;
}
}
#endif
auto preprocess_end = std::chrono::system_clock::now();
// std::cout << "retinanet classify preprocess time: " << std::chrono::duration_cast<std::chrono::milliseconds>(preprocess_end - preprocess_start).count() << "ms" << std::endl;
//2.推理
auto start = std::chrono::system_clock::now(); //计时开始
#ifdef ENABLE_CUDA_PREPROCESS
doInferenceV2(*pContext_, *pInferenceModelStream_, (void**)pfBuffers_,
uiOutputIndex_, pfOutputData_, pRetinanetClassifyModelInfo_->uiOutputSize,
pRetinanetClassifyModelInfo_->uiBatchSize);
#else
float a[2]={0};
doInference(*pContext_, *pInferenceModelStream_, (void**)pfBuffers_,
uiInputIndex_, pfInputData_, pRetinanetClassifyModelInfo_->uiInputSize,
uiOutputIndex_, pfOutputData_, pRetinanetClassifyModelInfo_->uiOutputSize,
pRetinanetClassifyModelInfo_->uiBatchSize);
#endif
auto end = std::chrono::system_clock::now();
// std::cout << "retinanet classify inference time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
//3.后处理
std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
// std::cout<<"after inference retinanet classify output[0] is: "<<pfOutputData_[0]<<" output[1] is: "<<pfOutputData_[2]<<std::endl;
if(pfOutputData_[0] < pfOutputData_[1]){
return true;
}else{
return false;
}
}