generated from zhangwei/Train_Identify
			
		
			
				
	
	
		
			449 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			449 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			C++
		
	
	
	
| #include "InferenceModelEngine.h"
 | ||
| 
 | ||
| using namespace std;
 | ||
| using namespace ai_matrix;
 | ||
| 
 | ||
| 
 | ||
| InferenceModelEngine::InferenceModelEngine() {}
 | ||
| InferenceModelEngine::~InferenceModelEngine() {}
 | ||
| 
 | ||
| 
 | ||
| APP_ERROR InferenceModelEngine::Init()
 | ||
| {
 | ||
|     strPort0_ = engineName_ + "_" + std::to_string(engineId_) + "_0";
 | ||
| 
 | ||
|     //创建模型推理CUDA流   
 | ||
|     inference_model_stream_ = new cudaStream_t;
 | ||
|     CUDA_CHECK(cudaStreamCreate(inference_model_stream_));
 | ||
| 
 | ||
|     gLogger_ = new Logger;
 | ||
| 
 | ||
|     //相关资源分配
 | ||
|     buffers_[0] = nullptr; buffers_[1] = nullptr;
 | ||
|     CUDA_CHECK(cudaMalloc((void**)&buffers_[0], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));   //输入资源分配
 | ||
|     CUDA_CHECK(cudaMalloc((void**)&buffers_[1], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));    //输出资源分配
 | ||
| 
 | ||
|     LogInfo << "engineId_:" << engineId_ << " InferenceModelEngine Init ok";
 | ||
|     return APP_ERR_OK;
 | ||
| }
 | ||
| 
 | ||
| APP_ERROR InferenceModelEngine::DeInit()
 | ||
| {
 | ||
|     CUDA_CHECK(cudaStreamDestroy(*inference_model_stream_));  delete inference_model_stream_; inference_model_stream_ = nullptr; //释放模型推理CUDA流
 | ||
|     CUDA_CHECK(cudaFree(buffers_[0]));  //释放输入数据设备端内存
 | ||
|     CUDA_CHECK(cudaFree(buffers_[1])); //释放输出数据设备端内存
 | ||
| 
 | ||
|     //析构engine引擎资源
 | ||
|     context_->destroy();     //析构绘话
 | ||
|     engine_->destroy();      //析构TensorRT引擎
 | ||
|     runtime_->destroy();     //析构运行时环境
 | ||
| 
 | ||
|     delete gLogger_; gLogger_ = nullptr;
 | ||
| 
 | ||
|     LogInfo << "engineId_:" << engineId_ << " InferenceModelEngine DeInit ok";
 | ||
|     return APP_ERR_OK;
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| APP_ERROR InferenceModelEngine::Process()
 | ||
| {
 | ||
|     cudaSetDevice(DEVICE);  //设置GPU 
 | ||
| 
 | ||
|     //wts及engine模型名称
 | ||
|     std::string wts_name = MyYaml::GetIns()->GetStringValue("yolov5_wts_name");
 | ||
|     std::string engine_name = MyYaml::GetIns()->GetStringValue("yolov5_model_name");
 | ||
| 
 | ||
|     bool is_p6 = false; //默认不是P6模型
 | ||
| 
 | ||
|     /**********************************************************************************
 | ||
|         gw width_multiple系数: width_multiple控制网络的宽度。
 | ||
|         gd depth_multiple系数: depth_multiple控制网络的深度
 | ||
|         N模型:
 | ||
|             gd = 0.33;gw = 0.25;
 | ||
|         S模型:
 | ||
|             gd = 0.33;gw = 0.50;
 | ||
|         M模型:
 | ||
|             gd = 0.67;gw = 0.75;
 | ||
|         L模型:
 | ||
|             gd = 1.0;gw = 1.0;
 | ||
|         X模型:
 | ||
|             gd = 1.33;gw = 1.25;
 | ||
|      **********************************************************************************/
 | ||
|     float gd = 0.67, gw = 0.75;     //默认使用M模型
 | ||
| 
 | ||
| 
 | ||
|     //序列化引擎
 | ||
|     //直接使用API创建一个模型,并将其序列化为流  编译成TensorRT引擎engine文件后无需再次调用,调用依次生成engine即可
 | ||
|     #if 0
 | ||
|     if (!wts_name.empty()) {
 | ||
|         IHostMemory* modelStream{ nullptr };
 | ||
|         APIToModel(*gLogger_, BATCH_SIZE, &modelStream, is_p6, gd, gw, wts_name);
 | ||
|         assert(modelStream != nullptr);
 | ||
|         std::ofstream p(engine_name, std::ios::binary);
 | ||
|         if (!p) {
 | ||
|             std::cerr << "could not open plan output file" << std::endl;
 | ||
|             return -1;
 | ||
|         }
 | ||
|         p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
 | ||
|         modelStream->destroy();
 | ||
|     }
 | ||
|     #endif
 | ||
| 
 | ||
|     //反序列化模型并运行推理
 | ||
|     std::ifstream file(engine_name, std::ios::binary);
 | ||
|     if (!file.good()) {
 | ||
|         LogInfo << "read " << engine_name << " error!" << std::endl;
 | ||
|         exit(0);
 | ||
|     }
 | ||
| 
 | ||
|     //创建tensorRT流对象trtModelStream,这个就跟文件流中的ifstream类似的
 | ||
|     //trtModelStream是一块内存区域,用于保存序列化的plan文件
 | ||
|     char *trtModelStream = nullptr;
 | ||
|     size_t size = 0;
 | ||
|     file.seekg(0, file.end);    //将指针移动至距离文件末尾0处的位置
 | ||
|     size = file.tellg();    //获得当前字符的位置
 | ||
|     file.seekg(0, file.beg);    //将指针移动至距离文件开头0处的位置
 | ||
|     trtModelStream = new char[size];
 | ||
|     assert(trtModelStream);
 | ||
|     file.read(trtModelStream, size);    //将序列化engine模型(数据及数据大小)读入trtModelStream
 | ||
|     file.close();
 | ||
| 
 | ||
|    
 | ||
|     //1.设置运行时环境
 | ||
|     //runtime_ = new IRuntime;
 | ||
|     runtime_ = createInferRuntime(*gLogger_);     //创建运行时环境IRuntime对象,传入gLogger用于打印信息
 | ||
|     assert(runtime_ != nullptr);
 | ||
|     //2.生成反序列化引擎
 | ||
|     //engine = new ICudaEngine;
 | ||
|     engine_ = runtime_->deserializeCudaEngine(trtModelStream, size); //反序列化引擎engine(根据trtModelStream反序列化)
 | ||
|     assert(engine_ != nullptr);
 | ||
|     //3.创建上下文环境
 | ||
|     //context = new IExecutionContext;
 | ||
|     context_ = engine_->createExecutionContext();  //创建上下文环境,主要用于inference函数中启动cuda核
 | ||
|     assert(context_ != nullptr);
 | ||
|     delete[] trtModelStream;    //析构trtModelStream
 | ||
|     assert(engine_->getNbBindings() == 2);
 | ||
|     
 | ||
|     //获取绑定的输入输入
 | ||
|     const int inputIndex = engine_->getBindingIndex(INPUT_BLOB_NAME);
 | ||
|     const int outputIndex = engine_->getBindingIndex(OUTPUT_BLOB_NAME);
 | ||
|     std::cout<<"inputIndex: "<<inputIndex<<"\toutputIndex: "<<outputIndex<<std::endl; 
 | ||
|     assert(inputIndex == 0);
 | ||
|     assert(outputIndex == 1);
 | ||
| 
 | ||
|     uint64_t u64count_num = 0;
 | ||
|     int iRet = APP_ERR_OK;
 | ||
|     
 | ||
|     while (!isStop_)
 | ||
|     {
 | ||
|         std::shared_ptr<void> pVoidData0 = nullptr;
 | ||
|         inputQueMap_[strPort0_]->pop(pVoidData0);
 | ||
|         if (nullptr == pVoidData0)
 | ||
|         {
 | ||
|             usleep(1*1000); //n ms
 | ||
|             continue;
 | ||
|         }
 | ||
|         // LogInfo << "receive from ImagePreprocessEngine's data success!";
 | ||
|         // std::cout<<"receive from ImagePreprocessEngine's data success!"<<std::endl;
 | ||
| 
 | ||
|         // std::cout<<"Enter InferenceModelEngine Thread "<<++u64count_num<<" Times!"<<std::endl;
 | ||
|         std::shared_ptr<InferenceData> pImagePreprocessData = std::static_pointer_cast<InferenceData>(pVoidData0);
 | ||
|         
 | ||
|         //将图像预处理数据拷贝到buffers_[0]
 | ||
|         #ifdef CUDA_MEMCPY_TIME_CONSUMING_TEST 
 | ||
|         auto cuda_memcpy_start = std::chrono::system_clock::now();  //计时开始
 | ||
|         CUDA_CHECK(cudaMemcpyAsync(buffers_[0], static_cast<void *>(pImagePreprocessData->pData.get()), pImagePreprocessData->iSize, cudaMemcpyDeviceToDevice,*inference_model_stream_));  
 | ||
|         auto cuda_memcpy_end = std::chrono::system_clock::now();  //计时结束
 | ||
|         std::cout<< "InferenceModelEngine cuda memcpy data size is: "<<pImagePreprocessData->iSize<<std::endl;
 | ||
|         std::cout << "InferenceModelEngine cuda memcpy device to device time: " << std::chrono::duration_cast<std::chrono::milliseconds>(cuda_memcpy_end - cuda_memcpy_start).count() << "ms" << std::endl;
 | ||
|         #else
 | ||
|         CUDA_CHECK(cudaMemcpyAsync(buffers_[0], static_cast<void *>(pImagePreprocessData->pData.get()), pImagePreprocessData->iSize, cudaMemcpyDeviceToDevice,*inference_model_stream_));  
 | ||
|         #endif
 | ||
| 
 | ||
|         //构造推理结果数据
 | ||
|         void* pInferenceModelBuffer = nullptr;
 | ||
|         unsigned int pInferenceModelBuffer_Size = BATCH_SIZE * OUTPUT_SIZE;
 | ||
|         pInferenceModelBuffer = new float[pInferenceModelBuffer_Size];
 | ||
| 
 | ||
|         void* pSrcRGBBuffer = nullptr;
 | ||
| 		unsigned int pSrcRGBBuffer_Size = pImagePreprocessData->iSrcSize;
 | ||
| 		pSrcRGBBuffer = new uint8_t[pSrcRGBBuffer_Size];
 | ||
|         memcpy(pSrcRGBBuffer, pImagePreprocessData->pSrcData.get(), pSrcRGBBuffer_Size);
 | ||
| 
 | ||
|         std::shared_ptr<InferenceData> pInferenceModelData = std::make_shared<InferenceData>();
 | ||
| 
 | ||
|         #ifdef INFERENCE_MODEL_TIME_CONSUMING_TEST 
 | ||
|         auto start = std::chrono::system_clock::now();  //计时开始
 | ||
|         doInference(*context_, *inference_model_stream_, (void**)buffers_, (float*)pInferenceModelBuffer, BATCH_SIZE);  //context为推理的上下文环境,stream为注册流(用于异步推理时进行同步),buffers为传入的图像数据,pInferenceModelBuffer为推理的结果
 | ||
|         auto end = std::chrono::system_clock::now();    
 | ||
|         std::cout << "inference time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
 | ||
|         #else
 | ||
|         doInference(*context_, *inference_model_stream_, (void**)buffers_, (float*)pInferenceModelBuffer, BATCH_SIZE);  //context为推理的上下文环境,stream为注册流(用于异步推理时进行同步),buffers为传入的图像数据,pInferenceModelBuffer为推理的结果
 | ||
|         #endif
 | ||
| 
 | ||
|         //组织数据
 | ||
|         pInferenceModelData->iDataSource = engineId_;
 | ||
|         pInferenceModelData->iSize = pInferenceModelBuffer_Size;
 | ||
|         pInferenceModelData->pData.reset(pInferenceModelBuffer, [](void* data){if(data){delete[] data; data = nullptr;}}); //智能指针管理内存
 | ||
|         pInferenceModelData->iSrcSize = pSrcRGBBuffer_Size;
 | ||
|         pInferenceModelData->pSrcData.reset(pSrcRGBBuffer, [](void* data){if(data){delete[] data; data = nullptr;}}); //智能指针管理内存
 | ||
|         pInferenceModelData->i64TimeStamp = pImagePreprocessData->i64TimeStamp;
 | ||
| 
 | ||
|         #if 1
 | ||
|         //推理结果送入下一引擎
 | ||
|         iRet = outputQueMap_[strPort0_]->push(std::static_pointer_cast<void>(pInferenceModelData));
 | ||
|         if (iRet != APP_ERR_OK){
 | ||
| 			LogError << "push info error";
 | ||
|             // std::cerr<<"push the inference model data failed..."<<std::endl;
 | ||
| 		}else{
 | ||
|             // std::cout<<"push the inference model data success!"<<std::endl;
 | ||
|         }
 | ||
|         #endif
 | ||
|     }
 | ||
| }
 | ||
| 
 | ||
| 
 | ||
| 
 | ||
| //获取宽度
 | ||
| int InferenceModelEngine::get_width(int x, float gw, int divisor = 8) {
 | ||
|     return int(ceil((x * gw) / divisor)) * divisor;
 | ||
| }
 | ||
| 
 | ||
| //获取深度
 | ||
| int InferenceModelEngine::get_depth(int x, float gd) {
 | ||
|     if (x == 1) return 1;
 | ||
|     int r = round(x * gd);
 | ||
|     if (x * gd - int(x * gd) == 0.5 && (int(x * gd) % 2) == 0) {
 | ||
|         --r;
 | ||
|     }
 | ||
|     return std::max<int>(r, 1);
 | ||
| }
 | ||
| 
 | ||
| //构建普通引擎(例:yolov5s,yolov5m...)
 | ||
| ICudaEngine* InferenceModelEngine::build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, nvinfer1::DataType dt, float& gd, float& gw, std::string& wts_name) {
 | ||
|     INetworkDefinition* network = builder->createNetworkV2(0U);
 | ||
| 
 | ||
|     // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
 | ||
|     ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W });
 | ||
|     assert(data);
 | ||
|     std::map<std::string, Weights> weightMap = loadWeights(wts_name);
 | ||
|     /* ------ yolov5 backbone------ */
 | ||
|     auto conv0 = convBlock(network, weightMap, *data,  get_width(64, gw), 6, 2, 1,  "model.0");
 | ||
|     assert(conv0);
 | ||
|     auto conv1 = convBlock(network, weightMap, *conv0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
 | ||
|     auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
 | ||
|     auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
 | ||
|     auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
 | ||
|     auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
 | ||
|     auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
 | ||
|     auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
 | ||
|     auto bottleneck_csp8 = C3(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
 | ||
|     auto spp9 = SPPF(network, weightMap, *bottleneck_csp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, "model.9");
 | ||
|     /* ------ yolov5 head ------ */
 | ||
|     auto conv10 = convBlock(network, weightMap, *spp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");
 | ||
| 
 | ||
|     auto upsample11 = network->addResize(*conv10->getOutput(0));
 | ||
|     assert(upsample11);
 | ||
|     upsample11->setResizeMode(ResizeMode::kNEAREST);
 | ||
|     upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());
 | ||
| 
 | ||
|     ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
 | ||
|     auto cat12 = network->addConcatenation(inputTensors12, 2);
 | ||
|     auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
 | ||
|     auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");
 | ||
| 
 | ||
|     auto upsample15 = network->addResize(*conv14->getOutput(0));
 | ||
|     assert(upsample15);
 | ||
|     upsample15->setResizeMode(ResizeMode::kNEAREST);
 | ||
|     upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());
 | ||
| 
 | ||
|     ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
 | ||
|     auto cat16 = network->addConcatenation(inputTensors16, 2);
 | ||
| 
 | ||
|     auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");
 | ||
| 
 | ||
|     /* ------ detect ------ */
 | ||
|     IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
 | ||
|     auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
 | ||
|     ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) };
 | ||
|     auto cat19 = network->addConcatenation(inputTensors19, 2);
 | ||
|     auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
 | ||
|     IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
 | ||
|     auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
 | ||
|     ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) };
 | ||
|     auto cat22 = network->addConcatenation(inputTensors22, 2);
 | ||
|     auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
 | ||
|     IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);
 | ||
| 
 | ||
|     auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{det0, det1, det2});
 | ||
|     yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
 | ||
|     network->markOutput(*yolo->getOutput(0));
 | ||
|     // Build engine
 | ||
|     builder->setMaxBatchSize(maxBatchSize);
 | ||
|     config->setMaxWorkspaceSize(16 * (1 << 20));  // 16MB
 | ||
| #if defined(USE_FP16)
 | ||
|     config->setFlag(BuilderFlag::kFP16);
 | ||
| #elif defined(USE_INT8)
 | ||
|     std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
 | ||
|     assert(builder->platformHasFastInt8());
 | ||
|     config->setFlag(BuilderFlag::kINT8);
 | ||
|     Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME);
 | ||
|     config->setInt8Calibrator(calibrator);
 | ||
| #endif
 | ||
| 
 | ||
|     std::cout << "Building engine, please wait for a while..." << std::endl;
 | ||
|     ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
 | ||
|     std::cout << "Build engine successfully!" << std::endl;
 | ||
| 
 | ||
|     // Don't need the network any more
 | ||
|     network->destroy();
 | ||
| 
 | ||
|     // Release host memory
 | ||
|     for (auto& mem : weightMap)
 | ||
|     {
 | ||
|         free((void*)(mem.second.values));
 | ||
|     }
 | ||
| 
 | ||
|     return engine;
 | ||
| }
 | ||
| 
 | ||
| //构建p6引擎(例:yolov5s6,yolov5m6...)
 | ||
| ICudaEngine* InferenceModelEngine::build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, nvinfer1::DataType dt, float& gd, float& gw, std::string& wts_name) {
 | ||
|     INetworkDefinition* network = builder->createNetworkV2(0U);
 | ||
|     // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
 | ||
|     ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W });
 | ||
|     assert(data);
 | ||
|     
 | ||
|     std::map<std::string, Weights> weightMap = loadWeights(wts_name);
 | ||
| 
 | ||
|     /* ------ yolov5 backbone------ */
 | ||
|     auto conv0 = convBlock(network, weightMap, *data,  get_width(64, gw), 6, 2, 1,  "model.0");
 | ||
|     auto conv1 = convBlock(network, weightMap, *conv0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
 | ||
|     auto c3_2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
 | ||
|     auto conv3 = convBlock(network, weightMap, *c3_2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
 | ||
|     auto c3_4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
 | ||
|     auto conv5 = convBlock(network, weightMap, *c3_4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
 | ||
|     auto c3_6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
 | ||
|     auto conv7 = convBlock(network, weightMap, *c3_6->getOutput(0), get_width(768, gw), 3, 2, 1, "model.7");
 | ||
|     auto c3_8 = C3(network, weightMap, *conv7->getOutput(0), get_width(768, gw), get_width(768, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
 | ||
|     auto conv9 = convBlock(network, weightMap, *c3_8->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.9");
 | ||
|     auto c3_10 = C3(network, weightMap, *conv9->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.10");
 | ||
|     auto sppf11 = SPPF(network, weightMap, *c3_10->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, "model.11");
 | ||
| 
 | ||
|     /* ------ yolov5 head ------ */
 | ||
|     auto conv12 = convBlock(network, weightMap, *sppf11->getOutput(0), get_width(768, gw), 1, 1, 1, "model.12");
 | ||
|     auto upsample13 = network->addResize(*conv12->getOutput(0));
 | ||
|     assert(upsample13);
 | ||
|     upsample13->setResizeMode(ResizeMode::kNEAREST);
 | ||
|     upsample13->setOutputDimensions(c3_8->getOutput(0)->getDimensions());
 | ||
|     ITensor* inputTensors14[] = { upsample13->getOutput(0), c3_8->getOutput(0) };
 | ||
|     auto cat14 = network->addConcatenation(inputTensors14, 2);
 | ||
|     auto c3_15 = C3(network, weightMap, *cat14->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.15");
 | ||
| 
 | ||
|     auto conv16 = convBlock(network, weightMap, *c3_15->getOutput(0), get_width(512, gw), 1, 1, 1, "model.16");
 | ||
|     auto upsample17 = network->addResize(*conv16->getOutput(0));
 | ||
|     assert(upsample17);
 | ||
|     upsample17->setResizeMode(ResizeMode::kNEAREST);
 | ||
|     upsample17->setOutputDimensions(c3_6->getOutput(0)->getDimensions());
 | ||
|     ITensor* inputTensors18[] = { upsample17->getOutput(0), c3_6->getOutput(0) };
 | ||
|     auto cat18 = network->addConcatenation(inputTensors18, 2);
 | ||
|     auto c3_19 = C3(network, weightMap, *cat18->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.19");
 | ||
| 
 | ||
|     auto conv20 = convBlock(network, weightMap, *c3_19->getOutput(0), get_width(256, gw), 1, 1, 1, "model.20");
 | ||
|     auto upsample21 = network->addResize(*conv20->getOutput(0));
 | ||
|     assert(upsample21);
 | ||
|     upsample21->setResizeMode(ResizeMode::kNEAREST);
 | ||
|     upsample21->setOutputDimensions(c3_4->getOutput(0)->getDimensions());
 | ||
|     ITensor* inputTensors21[] = { upsample21->getOutput(0), c3_4->getOutput(0) };
 | ||
|     auto cat22 = network->addConcatenation(inputTensors21, 2);
 | ||
|     auto c3_23 = C3(network, weightMap, *cat22->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
 | ||
| 
 | ||
|     auto conv24 = convBlock(network, weightMap, *c3_23->getOutput(0), get_width(256, gw), 3, 2, 1, "model.24");
 | ||
|     ITensor* inputTensors25[] = { conv24->getOutput(0), conv20->getOutput(0) };
 | ||
|     auto cat25 = network->addConcatenation(inputTensors25, 2);
 | ||
|     auto c3_26 = C3(network, weightMap, *cat25->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.26");
 | ||
| 
 | ||
|     auto conv27 = convBlock(network, weightMap, *c3_26->getOutput(0), get_width(512, gw), 3, 2, 1, "model.27");
 | ||
|     ITensor* inputTensors28[] = { conv27->getOutput(0), conv16->getOutput(0) };
 | ||
|     auto cat28 = network->addConcatenation(inputTensors28, 2);
 | ||
|     auto c3_29 = C3(network, weightMap, *cat28->getOutput(0), get_width(1536, gw), get_width(768, gw), get_depth(3, gd), false, 1, 0.5, "model.29");
 | ||
| 
 | ||
|     auto conv30 = convBlock(network, weightMap, *c3_29->getOutput(0), get_width(768, gw), 3, 2, 1, "model.30");
 | ||
|     ITensor* inputTensors31[] = { conv30->getOutput(0), conv12->getOutput(0) };
 | ||
|     auto cat31 = network->addConcatenation(inputTensors31, 2);
 | ||
|     auto c3_32 = C3(network, weightMap, *cat31->getOutput(0), get_width(2048, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.32");
 | ||
| 
 | ||
|     /* ------ detect ------ */
 | ||
|     IConvolutionLayer* det0 = network->addConvolutionNd(*c3_23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.0.weight"], weightMap["model.33.m.0.bias"]);
 | ||
|     IConvolutionLayer* det1 = network->addConvolutionNd(*c3_26->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.1.weight"], weightMap["model.33.m.1.bias"]);
 | ||
|     IConvolutionLayer* det2 = network->addConvolutionNd(*c3_29->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.2.weight"], weightMap["model.33.m.2.bias"]);
 | ||
|     IConvolutionLayer* det3 = network->addConvolutionNd(*c3_32->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.33.m.3.weight"], weightMap["model.33.m.3.bias"]);
 | ||
| 
 | ||
|     auto yolo = addYoLoLayer(network, weightMap, "model.33", std::vector<IConvolutionLayer*>{det0, det1, det2, det3});
 | ||
|     yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
 | ||
|     network->markOutput(*yolo->getOutput(0));
 | ||
| 
 | ||
|     // Build engine
 | ||
|     builder->setMaxBatchSize(maxBatchSize);
 | ||
|     config->setMaxWorkspaceSize(16 * (1 << 20));  // 16MB
 | ||
| #if defined(USE_FP16)
 | ||
|     config->setFlag(BuilderFlag::kFP16);
 | ||
| #elif defined(USE_INT8)
 | ||
|     std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
 | ||
|     assert(builder->platformHasFastInt8());
 | ||
|     config->setFlag(BuilderFlag::kINT8);
 | ||
|     Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME);
 | ||
|     config->setInt8Calibrator(calibrator);
 | ||
| #endif
 | ||
| 
 | ||
|     std::cout << "Building engine, please wait for a while..." << std::endl;
 | ||
|     ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
 | ||
|     std::cout << "Build engine successfully!" << std::endl;
 | ||
| 
 | ||
|     // Don't need the network any more
 | ||
|     network->destroy();
 | ||
| 
 | ||
|     // Release host memory
 | ||
|     for (auto& mem : weightMap)
 | ||
|     {
 | ||
|         free((void*)(mem.second.values));
 | ||
|     }
 | ||
| 
 | ||
|     return engine;
 | ||
| }
 | ||
| 
 | ||
| //转换模型
 | ||
| void InferenceModelEngine::APIToModel(Logger gLogger, unsigned int maxBatchSize, IHostMemory** modelStream, bool& is_p6, float& gd, float& gw, std::string& wts_name) {
 | ||
|     // Create builder
 | ||
|     IBuilder* builder = createInferBuilder(gLogger);    //创建builder(要传入gLogger)
 | ||
|     IBuilderConfig* config = builder->createBuilderConfig();    //创建builderconfig
 | ||
| 
 | ||
|     // 创建模型来填充网络,然后设置输出并创建一个引擎  
 | ||
|     // Create model to populate the network, then set the outputs and create an engine
 | ||
|     ICudaEngine *engine = nullptr;
 | ||
|     if (is_p6) {
 | ||
|         engine = build_engine_p6(maxBatchSize, builder, config, nvinfer1::DataType::kFLOAT, gd, gw, wts_name);
 | ||
|     } else {
 | ||
|         engine = build_engine(maxBatchSize, builder, config, nvinfer1::DataType::kFLOAT, gd, gw, wts_name);
 | ||
|     }
 | ||
|     assert(engine != nullptr);
 | ||
| 
 | ||
|     // Serialize the engine
 | ||
|     //序列化引擎生成模型流
 | ||
|     (*modelStream) = engine->serialize();
 | ||
| 
 | ||
|     // Close everything down
 | ||
|     //释放相关资源
 | ||
|     engine->destroy();
 | ||
|     builder->destroy();
 | ||
|     config->destroy();
 | ||
| }
 | ||
| 
 | ||
| //执行推理
 | ||
| void InferenceModelEngine::doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, float* output, int batchSize) {
 | ||
|     // infer on the batch asynchronously, and DMA output back to host
 | ||
|     context.enqueue(batchSize, buffers, stream, nullptr);   //执行异步推理(调用context->enqueueV2即可执行异步推理,如果用同步推理的话,可以调用context->executeV2)
 | ||
|     CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));   //把推理后的结果从GPU上拷贝到CPU上
 | ||
|     cudaStreamSynchronize(stream); //同步之前创建的cuda流,原因很简单,直接使用的context->enqueueV2函数是异步推理,因此需要把cuda流同步一下
 | ||
| }
 |