generated from zhangwei/Matrixai
			
		
			
				
	
	
		
			141 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
			
		
		
	
	
			141 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
| #include "inference.h"
 | |
| 
 | |
| Inference::Inference() {}
 | |
| 
 | |
| Inference::~Inference() {}
 | |
| 
 | |
| //onnx解析器
 | |
| ICudaEngine* Inference::build_engine_onnx(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IBuilder* builder, IBuilderConfig* config, std::string& source_onnx)
 | |
| {
 | |
|     const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
 | |
|     INetworkDefinition* network = builder->createNetworkV2(explicitBatch);
 | |
| 
 | |
|     //创建onnx解析器
 | |
|     nvonnxparser::IParser* onnxParser = nvonnxparser::createParser(*network, gLogger);
 | |
|     //解析onnx文件
 | |
|     onnxParser->parseFromFile(source_onnx.c_str(), 1);
 | |
| 
 | |
|     // Build engine
 | |
|     builder->setMaxBatchSize(maxBatchSize);
 | |
|     config->setMaxWorkspaceSize(maxWorkSpaceSize);  // 16MB
 | |
|     float max_workspace_size = (float)maxWorkSpaceSize/1024.0f/1024.0f;
 | |
| 
 | |
|     #if defined(USE_FP16)
 | |
|     config->setFlag(BuilderFlag::kFP16);
 | |
|     #endif
 | |
| 
 | |
|     std::cout<<"Set max batch size = "<<maxBatchSize<<std::endl;        //最大batch_size
 | |
|     std::cout<<"Set max workspace size = "<<max_workspace_size<<" MB"<<std::endl;        //最大batch_size
 | |
|     
 | |
|     int net_num_input = network->getNbInputs();     //获取网络输入个数
 | |
|     printf("Network has %d inputs:\n", net_num_input);
 | |
|     std::vector<std::string> input_names(net_num_input);  
 | |
|     for(int i = 0; i < net_num_input; ++i){ //获取每个输入的张量及张量维度
 | |
|         auto tensor = network->getInput(i);
 | |
|         auto dims = tensor->getDimensions();
 | |
|         auto dims_str = join_dims(vector<int>(dims.d, dims.d+dims.nbDims));
 | |
|         printf("      %d.[%s] shape is %s\n", i, tensor->getName(), dims_str.c_str());
 | |
| 
 | |
|         input_names[i] = tensor->getName();
 | |
|     }
 | |
| 
 | |
|     int net_num_output = network->getNbOutputs();   //获取网络输出个数
 | |
|     printf("Network has %d outputs:\n", net_num_output);
 | |
|     for(int i = 0; i < net_num_output; ++i){    //获取每个输出的张量及张量维度
 | |
|         auto tensor = network->getOutput(i);
 | |
|         auto dims = tensor->getDimensions();
 | |
|         auto dims_str = join_dims(vector<int>(dims.d, dims.d+dims.nbDims));
 | |
|         printf("      %d.[%s] shape is %s\n", i, tensor->getName(), dims_str.c_str());
 | |
|     }
 | |
| 
 | |
|     int net_num_layers = network->getNbLayers();    //获取网络层数
 | |
|     printf("Network has %d layers\n", net_num_layers);		
 | |
| 
 | |
|     //配置OptimizationProfile文件(最佳优化)
 | |
|     auto profile = builder->createOptimizationProfile();
 | |
|     for(int i = 0; i < net_num_input; ++i){
 | |
|         auto input = network->getInput(i);
 | |
|         auto input_dims = input->getDimensions();
 | |
|         input_dims.d[0] = 1;
 | |
|         profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMIN, input_dims);
 | |
|         profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT, input_dims);
 | |
|         input_dims.d[0] = maxBatchSize;
 | |
|         profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMAX, input_dims);
 | |
|     }
 | |
|     config->addOptimizationProfile(profile);    //builderconfig里面添加OptimizationProfile文件
 | |
| 
 | |
| 
 | |
|     std::cout << "Building engine with onnx parser, please wait for a while..." << std::endl;
 | |
|     //计时 计算编译时间
 | |
|     auto time_start = chrono::duration_cast<chrono::milliseconds>(chrono::system_clock::now().time_since_epoch()).count();
 | |
|     ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
 | |
|     auto time_end = chrono::duration_cast<chrono::milliseconds>(chrono::system_clock::now().time_since_epoch()).count();
 | |
|     std::cout << "Build engine with onnx parser successfully!" << std::endl;
 | |
|     printf("Build done %lld ms !\n", time_end - time_start);
 | |
| 
 | |
|     // Don't need the network any more
 | |
|     network->destroy();
 | |
| 
 | |
|     return engine;
 | |
| }
 | |
| 
 | |
| //转换模型
 | |
| void Inference::APIToModel(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IHostMemory** modelStream, std::string& onnx_model_name) 
 | |
| {
 | |
|     IBuilder* builder = createInferBuilder(gLogger);    //创建builder(要传入gLogger)
 | |
|     IBuilderConfig* config = builder->createBuilderConfig();    //创建builderconfig
 | |
| 
 | |
|     // 创建模型来填充网络,然后设置输出并创建一个引擎  
 | |
|     ICudaEngine *engine = nullptr;
 | |
| 
 | |
|     engine = build_engine_onnx(gLogger, maxBatchSize, maxWorkSpaceSize, builder, config, onnx_model_name);
 | |
|     assert(engine != nullptr);
 | |
| 
 | |
|     //序列化引擎生成模型流
 | |
|     (*modelStream) = engine->serialize();
 | |
| 
 | |
|     //释放相关资源
 | |
|     engine->destroy();
 | |
|     builder->destroy();
 | |
|     config->destroy();
 | |
| }
 | |
| 
 | |
| //执行推理
 | |
| void Inference::doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int inputIndex, float* input, int inputSize, 
 | |
|                     unsigned int ouputIndex, float* output, int outputSize, int batchSize) 
 | |
| {
 | |
|     CUDA_CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * inputSize * sizeof(float), cudaMemcpyHostToDevice, stream));  
 | |
|     context.enqueue(batchSize, buffers, stream, nullptr);   
 | |
|     //context.enqueueV2(buffers, stream, nullptr);  
 | |
|     CUDA_CHECK(cudaMemcpyAsync(output, buffers[ouputIndex], batchSize * outputSize * sizeof(float), cudaMemcpyDeviceToHost, stream));  
 | |
|     cudaStreamSynchronize(stream); 
 | |
| }
 | |
| 
 | |
| void Inference::doInferenceV2(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int outputIndex, float* output, int outputSize, int batchSize) 
 | |
| {
 | |
|     context.enqueue(batchSize, buffers, stream, nullptr);
 | |
|     //context.enqueueV2(buffers, stream, nullptr);  
 | |
|     CUDA_CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * outputSize * sizeof(float), cudaMemcpyDeviceToHost, stream));  
 | |
|     cudaStreamSynchronize(stream); 
 | |
| }
 | |
| 
 | |
| //执行推理3
 | |
| void Inference::doInferenceV3(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int inputIndex, float* input, int inputSize, 
 | |
|                     unsigned int ouputIndex, float* output, int outputSize, int batchSize) 
 | |
| {
 | |
|     CUDA_CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * inputSize * sizeof(float), cudaMemcpyHostToDevice, stream));  
 | |
|     context.enqueueV2(buffers, stream, nullptr);  
 | |
|     CUDA_CHECK(cudaMemcpyAsync(output, buffers[ouputIndex], batchSize * outputSize * sizeof(float), cudaMemcpyDeviceToHost, stream));  
 | |
|     cudaStreamSynchronize(stream); 
 | |
| }
 | |
| 
 | |
| //执行推理4
 | |
| void Inference::doInferenceV4(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int outputIndex, float* output, int outputSize, int batchSize) 
 | |
| {
 | |
|     context.enqueueV2(buffers, stream, nullptr);  
 | |
|     CUDA_CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * outputSize * sizeof(float), cudaMemcpyDeviceToHost, stream));  
 | |
|     cudaStreamSynchronize(stream); 
 | |
| }
 | |
| 
 | |
| 
 |