271 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
			
		
		
	
	
			271 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
| #include "inference.h"
 | |
| 
 | |
| template<typename _T>
 | |
| static std::string join_dims(const std::vector<_T>& dims)
 | |
| {
 | |
|     std::stringstream output;
 | |
|     char buf[64];
 | |
|     const char* fmts[] = {"%d", " x %d"};
 | |
|     for(int i = 0; i < dims.size(); ++i){
 | |
|         snprintf(buf, sizeof(buf), fmts[i != 0], dims[i]);
 | |
|         output << buf;
 | |
|     }
 | |
|     return output.str();
 | |
| }
 | |
| 
 | |
| Inference::Inference() {}
 | |
| 
 | |
| Inference::~Inference() {}
 | |
| 
 | |
| inline unsigned int Inference::getElementSize(nvinfer1::DataType t)
 | |
| {
 | |
|     switch (t)
 | |
|     {
 | |
|         case nvinfer1::DataType::kINT32: return 4;
 | |
|         case nvinfer1::DataType::kFLOAT: return 4;
 | |
|         case nvinfer1::DataType::kHALF: return 2;
 | |
|         case nvinfer1::DataType::kBOOL:
 | |
|         case nvinfer1::DataType::kINT8: return 1;
 | |
|     }
 | |
|     throw std::runtime_error("Invalid DataType.");
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| inline int64_t Inference::volume(const nvinfer1::Dims& d)
 | |
| {
 | |
|     return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
 | |
| }
 | |
| 
 | |
| //onnx解析器
 | |
| ICudaEngine* Inference::build_engine_onnx(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IBuilder* builder, IBuilderConfig* config, std::string& source_onnx)
 | |
| {
 | |
|     const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
 | |
|     INetworkDefinition* network = builder->createNetworkV2(explicitBatch);
 | |
| 
 | |
|     //创建onnx解析器
 | |
|     nvonnxparser::IParser* onnxParser = nvonnxparser::createParser(*network, gLogger);
 | |
|     //解析onnx文件
 | |
|     onnxParser->parseFromFile(source_onnx.c_str(), 1);
 | |
| 
 | |
|     // Build engine
 | |
|     builder->setMaxBatchSize(maxBatchSize);
 | |
|     config->setMaxWorkspaceSize(maxWorkSpaceSize);  // 16MB
 | |
|     float max_workspace_size = (float)maxWorkSpaceSize/1024.0f/1024.0f;
 | |
| 
 | |
|     #if defined(USE_FP16)
 | |
|     config->setFlag(BuilderFlag::kFP16);
 | |
|     #endif
 | |
| 
 | |
|     std::cout<<"Set max batch size = "<<maxBatchSize<<std::endl;        //最大batch size
 | |
|     std::cout<<"Set max workspace size = "<<max_workspace_size<<" MB"<<std::endl;        //最大workspace size
 | |
|     
 | |
|     int net_num_input = network->getNbInputs();     //获取网络输入个数
 | |
|     printf("Network has %d inputs:\n", net_num_input);
 | |
|     std::vector<std::string> input_names(net_num_input);  
 | |
|     for(int i = 0; i < net_num_input; ++i){     //获取每个输入的张量及张量维度
 | |
|         auto tensor = network->getInput(i);
 | |
|         auto dims = tensor->getDimensions();
 | |
|         auto dims_str = join_dims(std::vector<int>(dims.d, dims.d+dims.nbDims));
 | |
|         printf("      %d.[%s] shape is %s\n", i, tensor->getName(), dims_str.c_str());
 | |
| 
 | |
|         input_names[i] = tensor->getName();
 | |
|     }
 | |
| 
 | |
|     int net_num_output = network->getNbOutputs();   //获取网络输出个数
 | |
|     printf("Network has %d outputs:\n", net_num_output);
 | |
|     for(int i = 0; i < net_num_output; ++i){    //获取每个输出的张量及张量维度
 | |
|         auto tensor = network->getOutput(i);
 | |
|         auto dims = tensor->getDimensions();
 | |
|         auto dims_str = join_dims(std::vector<int>(dims.d, dims.d+dims.nbDims));
 | |
|         printf("      %d.[%s] shape is %s\n", i, tensor->getName(), dims_str.c_str());
 | |
|     }
 | |
| 
 | |
|     int net_num_layers = network->getNbLayers();    //获取网络层数
 | |
|     printf("Network has %d layers\n", net_num_layers);		
 | |
| 
 | |
|     //配置OptimizationProfile文件(最佳优化)
 | |
|     auto profile = builder->createOptimizationProfile();
 | |
|     for(int i = 0; i < net_num_input; ++i){
 | |
|         auto input = network->getInput(i);
 | |
|         auto input_dims = input->getDimensions();
 | |
|         input_dims.d[0] = 1;
 | |
|         profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMIN, input_dims);
 | |
|         profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT, input_dims);
 | |
|         input_dims.d[0] = maxBatchSize;
 | |
|         profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMAX, input_dims);
 | |
|     }
 | |
|     config->addOptimizationProfile(profile);    //builderconfig里面添加OptimizationProfile文件
 | |
| 
 | |
| 
 | |
|     std::cout << "Building engine with onnx parser, please wait for a while..." << std::endl;
 | |
|     //计时 计算编译时间
 | |
|     auto time_start = chrono::duration_cast<chrono::milliseconds>(chrono::system_clock::now().time_since_epoch()).count();
 | |
|     ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
 | |
|     auto time_end = chrono::duration_cast<chrono::milliseconds>(chrono::system_clock::now().time_since_epoch()).count();
 | |
|     std::cout << "Build engine with onnx parser successfully!" << std::endl;
 | |
|     printf("Build done %lld ms !\n", time_end - time_start);
 | |
| 
 | |
|     // Don't need the network any more
 | |
|     onnxParser->destroy();
 | |
|     network->destroy();
 | |
| 
 | |
|     return engine;
 | |
| }
 | |
| 
 | |
| ICudaEngine* Inference::build_engine_caffe(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IBuilder* builder, IBuilderConfig* config,            
 | |
|                         const std::string& strCaffeModelFile,  const std::string& strCaffeDeployFile, const std::vector<std::string>& vecOutputs)   
 | |
| {
 | |
|     // 创建network
 | |
|     INetworkDefinition* network = builder->createNetworkV2(0);
 | |
| 
 | |
|     // 创建caffe解析器
 | |
|     ICaffeParser* caffeParser = createCaffeParser();
 | |
| 
 | |
|     const IBlobNameToTensor *blobNameToTensor =	caffeParser->parse(strCaffeDeployFile.c_str(),
 | |
|                                                           strCaffeModelFile.c_str(),
 | |
|                                                           *network,
 | |
|                                                           nvinfer1::DataType::kFLOAT);
 | |
|     //标记输出
 | |
|     for (auto& s : vecOutputs){
 | |
|         network->markOutput(*blobNameToTensor->find(s.c_str()));
 | |
|     }
 | |
| 
 | |
|     //设置batch_size和workspace size
 | |
|     builder->setMaxBatchSize(maxBatchSize);
 | |
|     config->setMaxWorkspaceSize(maxWorkSpaceSize);
 | |
|     config->setFlag(BuilderFlag::kGPU_FALLBACK);
 | |
|     config->setFlag(BuilderFlag::kSTRICT_TYPES);
 | |
| 
 | |
|     // FP16精度
 | |
|     #if defined(USE_FP16)
 | |
|     config->setFlag(BuilderFlag::kFP16);    
 | |
|     #endif
 | |
| 
 | |
|     float max_workspace_size = (float)maxWorkSpaceSize/1024.0f/1024.0f;
 | |
|     std::cout<<"Set max batch size = "<<maxBatchSize<<std::endl;        //最大batch_size
 | |
|     std::cout<<"Set max workspace size = "<<max_workspace_size<<" MB"<<std::endl;    //最大batch_size
 | |
|     
 | |
|     int net_num_input = network->getNbInputs();     //获取网络输入个数
 | |
|     printf("Network has %d inputs:\n", net_num_input);
 | |
|     std::vector<std::string> input_names(net_num_input);  
 | |
|     for(int i = 0; i < net_num_input; ++i){ //获取每个输入的张量及张量维度
 | |
|         auto tensor = network->getInput(i);
 | |
|         auto dims = tensor->getDimensions();
 | |
|         auto dims_str = join_dims(vector<int>(dims.d, dims.d+dims.nbDims));
 | |
|         printf("      %d.[%s] shape is %s\n", i, tensor->getName(), dims_str.c_str());
 | |
| 
 | |
|         input_names[i] = tensor->getName();
 | |
|     }
 | |
| 
 | |
|     int net_num_output = network->getNbOutputs();   //获取网络输出个数
 | |
|     printf("Network has %d outputs:\n", net_num_output);
 | |
|     for(int i = 0; i < net_num_output; ++i){    //获取每个输出的张量及张量维度
 | |
|         auto tensor = network->getOutput(i);
 | |
|         auto dims = tensor->getDimensions();
 | |
|         auto dims_str = join_dims(vector<int>(dims.d, dims.d+dims.nbDims));
 | |
|         printf("      %d.[%s] shape is %s\n", i, tensor->getName(), dims_str.c_str());
 | |
|     }
 | |
| 
 | |
|     int net_num_layers = network->getNbLayers();    //获取网络层数
 | |
|     printf("Network has %d layers\n", net_num_layers);	
 | |
| 
 | |
| 
 | |
|     //编译引擎
 | |
|     //计时 计算编译时间
 | |
|     std::cout << "Building engine with caffe parser, please wait for a while..." << std::endl;
 | |
|     auto time_start = chrono::duration_cast<chrono::milliseconds>(chrono::system_clock::now().time_since_epoch()).count();
 | |
|     ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
 | |
| 	assert(engine);
 | |
|     auto time_end = chrono::duration_cast<chrono::milliseconds>(chrono::system_clock::now().time_since_epoch()).count();
 | |
|     std::cout << "Build engine with caffe parser successfully!" << std::endl;
 | |
|     printf("Build done %lld ms !\n", time_end - time_start);
 | |
| 
 | |
|     //释放所有资源
 | |
| 	caffeParser->destroy();
 | |
|     network->destroy();
 | |
|     
 | |
|     return engine;
 | |
| }
 | |
| 
 | |
| //转换模型
 | |
| void Inference::ONNXToModel(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IHostMemory** modelStream, std::string& onnx_model_name) 
 | |
| {
 | |
|     IBuilder* builder = createInferBuilder(gLogger);    //创建builder(要传入gLogger)
 | |
|     IBuilderConfig* config = builder->createBuilderConfig();    //创建builderconfig
 | |
| 
 | |
|     // 创建模型来填充网络,然后设置输出并创建一个引擎  
 | |
|     ICudaEngine *engine = nullptr;
 | |
| 
 | |
|     engine = build_engine_onnx(gLogger, maxBatchSize, maxWorkSpaceSize, builder, config, onnx_model_name);
 | |
|     assert(engine != nullptr);
 | |
| 
 | |
|     //序列化引擎生成模型流
 | |
|     (*modelStream) = engine->serialize();
 | |
| 
 | |
|     //释放相关资源
 | |
|     engine->destroy();
 | |
|     builder->destroy();
 | |
|     config->destroy();
 | |
| }
 | |
| 
 | |
| void Inference::CaffeToModel(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IHostMemory** modelStream, std::string& caffe_model_name, std::string& caffe_deploy_name, std::vector<std::string>& outputs) 
 | |
| {
 | |
|     IBuilder* builder = createInferBuilder(gLogger);    //创建builder(要传入gLogger)
 | |
|     IBuilderConfig* config = builder->createBuilderConfig();    //创建builderconfig
 | |
| 
 | |
|     // 创建模型来填充网络,然后设置输出并创建一个引擎  
 | |
|     ICudaEngine *engine = nullptr;
 | |
| 
 | |
|     engine = build_engine_caffe(gLogger, maxBatchSize, maxWorkSpaceSize, builder, config, caffe_model_name, caffe_deploy_name, outputs);
 | |
|     assert(engine != nullptr);
 | |
| 
 | |
|     //序列化引擎生成模型流
 | |
|     (*modelStream) = engine->serialize();
 | |
| 
 | |
|     //释放相关资源
 | |
|     engine->destroy();
 | |
|     builder->destroy();
 | |
|     config->destroy();
 | |
| }
 | |
| 
 | |
| //执行推理1
 | |
| void Inference::doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int inputIndex, float* input, int inputSize, 
 | |
|                     unsigned int ouputIndex, float* output, int outputSize, int batchSize) 
 | |
| {
 | |
|     CUDA_CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * inputSize * sizeof(float), cudaMemcpyHostToDevice, stream));  
 | |
|     context.enqueue(batchSize, buffers, stream, nullptr);   
 | |
|     // context.enqueueV2(buffers, stream, nullptr);  
 | |
|     CUDA_CHECK(cudaMemcpyAsync(output, buffers[ouputIndex], batchSize * outputSize * sizeof(float), cudaMemcpyDeviceToHost, stream));  
 | |
|     cudaStreamSynchronize(stream); 
 | |
| }
 | |
| 
 | |
| //执行推理2
 | |
| void Inference::doInferenceV2(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int outputIndex, float* output, int outputSize, int batchSize) 
 | |
| {
 | |
|     context.enqueue(batchSize, buffers, stream, nullptr);   
 | |
|     // context.enqueueV2(buffers, stream, nullptr);  
 | |
|     CUDA_CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * outputSize * sizeof(float), cudaMemcpyDeviceToHost, stream));  
 | |
|     cudaStreamSynchronize(stream); 
 | |
| }
 | |
| 
 | |
| 
 | |
| //执行推理3
 | |
| void Inference::doInferenceV3(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int inputIndex, float* input, int inputSize, 
 | |
|                     unsigned int ouputIndex, float* output, int outputSize, int batchSize) 
 | |
| {
 | |
|     CUDA_CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * inputSize * sizeof(float), cudaMemcpyHostToDevice, stream));  
 | |
|     context.enqueueV2(buffers, stream, nullptr);  
 | |
|     CUDA_CHECK(cudaMemcpyAsync(output, buffers[ouputIndex], batchSize * outputSize * sizeof(float), cudaMemcpyDeviceToHost, stream));  
 | |
|     cudaStreamSynchronize(stream); 
 | |
| }
 | |
| 
 | |
| //执行推理4
 | |
| void Inference::doInferenceV4(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int outputIndex, float* output, int outputSize, int batchSize) 
 | |
| {
 | |
|     context.enqueueV2(buffers, stream, nullptr);  
 | |
|     CUDA_CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * outputSize * sizeof(float), cudaMemcpyDeviceToHost, stream));  
 | |
|     cudaStreamSynchronize(stream); 
 | |
| }
 | |
| 
 | |
| 
 |