89 lines
3.0 KiB
C++
89 lines
3.0 KiB
C++
#ifndef _INFERENCE_H_
|
|
#define _INFERENCE_H_
|
|
|
|
#include <algorithm>
|
|
#include <chrono>
|
|
#include <cstdint>
|
|
#include <fstream>
|
|
#include <functional>
|
|
#include <iostream>
|
|
#include <numeric>
|
|
#include <vector>
|
|
|
|
#include <sys/time.h>
|
|
#include <sys/types.h>
|
|
#include <time.h>
|
|
#include <unistd.h>
|
|
|
|
#include <NvInfer.h>
|
|
#include <NvInferPlugin.h>
|
|
#include <NvOnnxParser.h>
|
|
#include <NvCaffeParser.h>
|
|
|
|
#include <cuda.h>
|
|
#include <cuda_runtime.h>
|
|
#include <cuda_runtime_api.h>
|
|
|
|
#include "cuda_utils.h"
|
|
#include "logging.h"
|
|
|
|
using namespace nvinfer1;
|
|
using namespace nvcaffeparser1;
|
|
using namespace std;
|
|
|
|
#define ENABLE_CUDA_PREPROCESS
|
|
|
|
class Inference
|
|
{
|
|
public:
|
|
Inference();
|
|
~Inference();
|
|
|
|
inline unsigned int getElementSize(nvinfer1::DataType t);
|
|
inline int64_t volume(const nvinfer1::Dims& d);
|
|
|
|
ICudaEngine* build_engine_onnx(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IBuilder* builder, IBuilderConfig* config, std::string& source_onnx);
|
|
|
|
ICudaEngine* build_engine_caffe(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IBuilder* builder, IBuilderConfig* config,
|
|
const std::string& strCaffeModelFile, const std::string& strCaffeDeployFile, const std::vector<std::string>& vecOutputs);
|
|
|
|
void ONNXToModel(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IHostMemory** modelStream, std::string& onnx_model_name);
|
|
|
|
void CaffeToModel(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IHostMemory** modelStream, std::string& caffe_model_name, std::string& caffe_deploy_name, std::vector<std::string>& outputs);
|
|
|
|
void doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int inputIndex, float* input, int inputSize,
|
|
unsigned int ouputIndex, float* output, int outputSize, int batchSize);
|
|
|
|
void doInferenceV2(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int ouputIndex, float* output, int outputSize, int batchSize);
|
|
|
|
void doInferenceV3(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int inputIndex, float* input, int inputSize,
|
|
unsigned int ouputIndex, float* output, int outputSize, int batchSize);
|
|
|
|
void doInferenceV4(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int ouputIndex, float* output, int outputSize, int batchSize);
|
|
|
|
float* pfBuffers_[2];
|
|
float* pfInputData_ = nullptr;
|
|
float* pfOutputData_ = nullptr;
|
|
|
|
uint8_t* pu8ImgHost_ = nullptr; //相关内存分配
|
|
uint8_t* pu8ImgDevice_ = nullptr;
|
|
|
|
unsigned int uiInputIndex_ = 0, uiOutputIndex_ = 0;
|
|
|
|
cudaStream_t* pImagePreprocessStream_ = nullptr; //图像预处理CUDA流
|
|
cudaStream_t* pInferenceModelStream_ = nullptr; //模型推理CUDA流
|
|
|
|
Logger* pGLogger_ = nullptr;
|
|
IRuntime* pRuntime_ = nullptr;
|
|
ICudaEngine* pEngine_ = nullptr;
|
|
IExecutionContext* pContext_ = nullptr;
|
|
|
|
private:
|
|
};
|
|
|
|
|
|
#endif //END OF _INFERENCE_H_
|
|
|
|
|
|
|