#ifndef _INFERENCE_H #define _INFERENCE_H #include "AppCommon.h" #define ENABLE_CUDA_PREPROCESS class Inference { public: Inference(); ~Inference(); ICudaEngine* build_engine_onnx(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IBuilder* builder, IBuilderConfig* config, std::string& source_onnx); void APIToModel(Logger gLogger, unsigned int maxBatchSize, unsigned int maxWorkSpaceSize, IHostMemory** modelStream, std::string& onnx_model_name); void doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int inputIndex, float* input, int inputSize, unsigned int ouputIndex, float* output, int outputSize, int batchSize); void doInferenceV2(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int ouputIndex, float* output, int outputSize, int batchSize); void doInferenceV3(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int inputIndex, float* input, int inputSize, unsigned int ouputIndex, float* output, int outputSize, int batchSize); void doInferenceV4(IExecutionContext& context, cudaStream_t& stream, void **buffers, unsigned int ouputIndex, float* output, int outputSize, int batchSize); float* pfBuffers_[2]; float* pfInputData_ = nullptr; float* pfOutputData_ = nullptr; uint8_t* pu8ImgHost_ = nullptr; //相关内存分配 uint8_t* pu8ImgDevice_ = nullptr; unsigned int uiInputIndex_ = 0, uiOutputIndex_ = 0; cudaStream_t* pImagePreprocessStream_ = nullptr; //图像预处理CUDA流 cudaStream_t* pInferenceModelStream_ = nullptr; //模型推理CUDA流 Logger* pGLogger_ = nullptr; IRuntime* pRuntime_ = nullptr; ICudaEngine* pEngine_ = nullptr; IExecutionContext* pContext_ = nullptr; private: }; #endif //END OF _INFERENCE_H