Train_RFID_Linux/code/inference/retinanet_classify_inferenc...

200 lines
9.7 KiB
Plaintext
Raw Normal View History

#include "retinanet_classify_inference.h"
RetinanetClassifyInference::RetinanetClassifyInference() {}
RetinanetClassifyInference::~RetinanetClassifyInference() {}
int RetinanetClassifyInference::RetinanetClassifyInferenceInit(ModelInfo* pRetinanetClassifyModelInfo, const std::string& strModelName, const std::string& strDeployName, const std::string& strEngineName)
{
pRetinanetClassifyModelInfo_ = pRetinanetClassifyModelInfo;
//资源分配(创建流,host及device侧内存)
cudaSetDevice(DEVICE); //设置GPU
//创建图像预处理CUDA流
pImagePreprocessStream_ = new cudaStream_t;
CUDA_CHECK(cudaStreamCreate(pImagePreprocessStream_));
//创建模型推理CUDA流
pInferenceModelStream_ = new cudaStream_t;
CUDA_CHECK(cudaStreamCreate(pInferenceModelStream_));
pGLogger_ = new Logger;
//相关资源分配
pfBuffers_[0] = nullptr; pfBuffers_[1] = nullptr;
CUDA_CHECK(cudaMalloc((void**)&pfBuffers_[0], pRetinanetClassifyModelInfo_->uiInputSize * sizeof(float))); //输入资源分配
CUDA_CHECK(cudaMalloc((void**)&pfBuffers_[1], pRetinanetClassifyModelInfo_->uiOutputSize * sizeof(float))); //输出资源分配
pu8ImgHost_ = new uint8_t;
pu8ImgDevice_ = new uint8_t;
CUDA_CHECK(cudaMallocHost((void**)&pu8ImgHost_, MAX_IMAGE_INPUT_SIZE_THRESH * pRetinanetClassifyModelInfo_->uiChannel)); //在HOST侧申请预处理数据缓存
CUDA_CHECK(cudaMalloc((void**)&pu8ImgDevice_, MAX_IMAGE_INPUT_SIZE_THRESH * pRetinanetClassifyModelInfo_->uiChannel)); //在DEVICE侧申请预处理数据缓存
pfInputData_ = new float[pRetinanetClassifyModelInfo_->uiBatchSize * pRetinanetClassifyModelInfo_->uiInputSize];
pfOutputData_ = new float[pRetinanetClassifyModelInfo_->uiBatchSize * pRetinanetClassifyModelInfo_->uiOutputSize];
//序列化引擎
//直接使用API创建一个模型并将其序列化为流 编译成TensorRT引擎engine文件后无需再次调用,调用依次生成engine即可
//基于caffe解析器编译tensorrt引擎
#if 0
std::vector<std::string> vecOutputs = {pRetinanetClassifyModelInfo_->strOutputBlobName};
if (!strModelName.empty() && !strDeployName.empty()) {
IHostMemory* modelStream{ nullptr };
CaffeToModel(*pGLogger_, pRetinanetClassifyModelInfo_->uiBatchSize, MAX_WORKSPAXE_SIZE, &modelStream, strModelName, strDeployName, vecOutputs);
assert(modelStream != nullptr);
std::ofstream p(strEngineName, std::ios::binary);
if (!p) {
std::cerr << "could not open plan output file" << std::endl;
return -1;
}
p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
modelStream->destroy();
}
#endif
//反序列化模型并运行推理
std::ifstream file(strEngineName, std::ios::binary);
if (!file.good()) {
std::cerr << "read " << strEngineName << " error!" << std::endl;
return -1;
}
//创建tensorRT流对象trtModelStream,这个就跟文件流中的ifstream类似的
//trtModelStream是一块内存区域,用于保存序列化的plan文件
char *trtModelStream = nullptr;
size_t size = 0;
file.seekg(0, file.end); //将指针移动至距离文件末尾0处的位置
size = file.tellg(); //获得当前字符的位置
file.seekg(0, file.beg); //将指针移动至距离文件开头0处的位置
trtModelStream = new char[size];
assert(trtModelStream);
file.read(trtModelStream, size); //将序列化engine模型(数据及数据大小)读入trtModelStream
file.close();
//1.设置运行时环境
pRuntime_ = createInferRuntime(*pGLogger_); //创建运行时环境IRuntime对象,传入gLogger用于打印信息
assert(pRuntime_ != nullptr);
//2.生成反序列化引擎
pEngine_ = pRuntime_->deserializeCudaEngine(trtModelStream, size); //反序列化引擎engine(根据trtModelStream反序列化)
assert(pEngine_ != nullptr);
//3.创建上下文环境
pContext_ = pEngine_->createExecutionContext(); //创建上下文环境,主要用于inference函数中启动cuda核
assert(pContext_ != nullptr);
delete[] trtModelStream; //析构trtModelStream
std::cout<<"Engine get NB Bindings is: "<<pEngine_->getNbBindings()<<std::endl;
assert(pEngine_->getNbBindings() == 2);
//获取绑定的输入输入
uiInputIndex_ = pEngine_->getBindingIndex((pRetinanetClassifyModelInfo_->strInputBlobName).c_str());
uiOutputIndex_ = pEngine_->getBindingIndex((pRetinanetClassifyModelInfo_->strOutputBlobName).c_str());
std::cout<<"inputIndex: "<<uiInputIndex_<<"\toutputIndex: "<<uiOutputIndex_<<std::endl;
assert(uiInputIndex_ == 0);
assert(uiOutputIndex_ == 1);
return 0;
}
int RetinanetClassifyInference::RetinanetClassifyInferenceDeInit()
{
//资源释放
CUDA_CHECK(cudaStreamDestroy(*pImagePreprocessStream_)); //释放图像预处理CUDA流
if(pImagePreprocessStream_){
delete pImagePreprocessStream_;
pImagePreprocessStream_ = nullptr;
}
CUDA_CHECK(cudaStreamDestroy(*pInferenceModelStream_)); //释放模型推理CUDA流
if(pInferenceModelStream_){ //释放模型推理CUDA流
delete pInferenceModelStream_;
pInferenceModelStream_ = nullptr;
}
CUDA_CHECK(cudaFree(pu8ImgDevice_)); //释放设备端内存
CUDA_CHECK(cudaFreeHost(pu8ImgHost_)); //释放HOST端内存
CUDA_CHECK(cudaFree(pfBuffers_[0])); //释放输入数据设备端内存
CUDA_CHECK(cudaFree(pfBuffers_[1])); //释放输出数据设备端内存
//析构engine引擎资源
pContext_->destroy(); //析构绘话
pEngine_->destroy(); //析构TensorRT引擎
pRuntime_->destroy(); //析构运行时环境
if(pGLogger_){ //释放Logger
delete pGLogger_;
pGLogger_ = nullptr;
}
if(pfInputData_){
delete[] pfInputData_;
pfInputData_ = nullptr;
}
if(pfOutputData_){
delete[] pfOutputData_;
pfOutputData_ = nullptr;
}
return 0;
}
bool RetinanetClassifyInference::RetinanetClassifyInferenceModel(cv::Mat& frame)
{
size_t size_image_src = frame.cols * frame.rows * pRetinanetClassifyModelInfo_->uiChannel;
unsigned int img_width = frame.cols, img_height = frame.rows;
size_t size_image_dst = pRetinanetClassifyModelInfo_->uiModelWidth * pRetinanetClassifyModelInfo_->uiModelHeight * pRetinanetClassifyModelInfo_->uiChannel;
auto preprocess_start = std::chrono::system_clock::now(); //计时开始
// printf("frame cols: %d\t frame rows: %d\n", frame.cols, frame.rows);
// printf("model witdh: %d\t model height: %d\t model channle: %d\n", pRetinanetClassifyModelInfo_->uiModelWidth,
// pRetinanetClassifyModelInfo_->uiModelHeight, pRetinanetClassifyModelInfo_->uiChannel);
#ifdef ENABLE_CUDA_PREPROCESS
memcpy(pu8ImgHost_, frame.data, size_image_src); //拷贝预处理数据到HOST侧
CUDA_CHECK(cudaMemcpyAsync(pu8ImgDevice_, pu8ImgHost_, size_image_src, cudaMemcpyHostToDevice, *pImagePreprocessStream_)); //拷贝预处理数据到Device侧
retinanet_classify_preprocess_kernel_img(pu8ImgDevice_, frame.cols, frame.rows, (float*)pfBuffers_[0], pRetinanetClassifyModelInfo_->uiModelWidth, pRetinanetClassifyModelInfo_->uiModelHeight, *pImagePreprocessStream_);
cudaStreamSynchronize(*pImagePreprocessStream_);
#else
cv::Mat pr_img = preprocess_img(frame, pRetinanetClassifyModelInfo_->uiModelWidth, pRetinanetClassifyModelInfo_->uiModelHeight); // letterbox BGR to RGB
int n = 0;
for (int row = 0; row < pRetinanetClassifyModelInfo_->uiModelHeight; ++row) {
uchar* uc_pixel = pr_img.data + row * pr_img.step;
for (int col = 0; col < pRetinanetClassifyModelInfo_->uiModelWidth; ++col) {
pfInputData_[n] = (float)uc_pixel[2] - 104;
pfInputData_[n + pRetinanetClassifyModelInfo_->uiModelHeight * pRetinanetClassifyModelInfo_->uiModelWidth] = (float)uc_pixel[1] - 117;
pfInputData_[n + 2 * pRetinanetClassifyModelInfo_->uiModelHeight * pRetinanetClassifyModelInfo_->uiModelWidth] = (float)uc_pixel[0] - 123;
uc_pixel += pRetinanetClassifyModelInfo_->uiChannel;
++n;
}
}
#endif
auto preprocess_end = std::chrono::system_clock::now();
// std::cout << "retinanet classify preprocess time: " << std::chrono::duration_cast<std::chrono::milliseconds>(preprocess_end - preprocess_start).count() << "ms" << std::endl;
//2.推理
auto start = std::chrono::system_clock::now(); //计时开始
#ifdef ENABLE_CUDA_PREPROCESS
doInferenceV2(*pContext_, *pInferenceModelStream_, (void**)pfBuffers_,
uiOutputIndex_, pfOutputData_, pRetinanetClassifyModelInfo_->uiOutputSize,
pRetinanetClassifyModelInfo_->uiBatchSize);
#else
float a[2]={0};
doInference(*pContext_, *pInferenceModelStream_, (void**)pfBuffers_,
uiInputIndex_, pfInputData_, pRetinanetClassifyModelInfo_->uiInputSize,
uiOutputIndex_, pfOutputData_, pRetinanetClassifyModelInfo_->uiOutputSize,
pRetinanetClassifyModelInfo_->uiBatchSize);
#endif
auto end = std::chrono::system_clock::now();
// std::cout << "retinanet classify inference time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
//3.后处理
std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
// std::cout<<"after inference retinanet classify output[0] is: "<<pfOutputData_[0]<<" output[1] is: "<<pfOutputData_[2]<<std::endl;
if(pfOutputData_[0] < pfOutputData_[1]){
return true;
}else{
return false;
}
}