#ifndef _INFERENCE_H_ #define _INFERENCE_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cuda_utils.h" #include "logging.h" using namespace nvinfer1; using namespace nvcaffeparser1; using namespace std; #define ENABLE_CUDA_PREPROCESS class Inference { public: Inference(); ~Inference(); inline unsigned int getElementSize(nvinfer1::DataType t); inline int64_t volume(const nvinfer1::Dims& d); ICudaEngine* build_engine_onnx(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IBuilder* builder, IBuilderConfig* config, std::string& source_onnx); ICudaEngine* build_engine_caffe(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IBuilder* builder, IBuilderConfig* config, const std::string& strCaffeModelFile, const std::string& strCaffeDeployFile, const std::vector& vecOutputs); void ONNXToModel(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IHostMemory** modelStream, std::string& onnx_model_name); void CaffeToModel(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IHostMemory** modelStream, std::string& caffe_model_name, std::string& caffe_deploy_name, std::vector& outputs); void doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int inputIndex, float* input, int inputSize, unsigned int ouputIndex, float* output, int outputSize, int batchSize); void doInferenceV2(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int ouputIndex, float* output, int outputSize, int batchSize); void doInferenceV3(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int inputIndex, float* input, int inputSize, unsigned int ouputIndex, float* output, int outputSize, int batchSize); void doInferenceV4(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int ouputIndex, float* output, int outputSize, int batchSize); float* pfBuffers_[2]; float* pfInputData_ = nullptr; float* pfOutputData_ = nullptr; uint8_t* pu8ImgHost_ = nullptr; //相关内存分配 uint8_t* pu8ImgDevice_ = nullptr; unsigned int uiInputIndex_ = 0, uiOutputIndex_ = 0; cudaStream_t* pImagePreprocessStream_ = nullptr; //图像预处理CUDA流 cudaStream_t* pInferenceModelStream_ = nullptr; //模型推理CUDA流 Logger* pGLogger_ = nullptr; IRuntime* pRuntime_ = nullptr; ICudaEngine* pEngine_ = nullptr; IExecutionContext* pContext_ = nullptr; private: }; #endif //END OF _INFERENCE_H_