generated from zhangwei/Train_Identify
130 lines
6.2 KiB
C++
130 lines
6.2 KiB
C++
#include "ImagePreprocessEngine.h"
|
|
|
|
using namespace std;
|
|
using namespace cv;
|
|
using namespace ai_matrix;
|
|
|
|
ImagePreprocessEngine::ImagePreprocessEngine() {}
|
|
|
|
ImagePreprocessEngine::~ImagePreprocessEngine() {}
|
|
|
|
APP_ERROR ImagePreprocessEngine::Init()
|
|
{
|
|
strPort0_ = engineName_ + "_" + std::to_string(engineId_) + "_0";
|
|
|
|
width_ = IMAGE_WIDTH, height_ = IMAGE_HEIGHT;
|
|
|
|
//资源分配(创建流,host及device侧内存)
|
|
cudaSetDevice(DEVICE); //设置GPU
|
|
|
|
image_preprocess_stream_ = new cudaStream_t;
|
|
CUDA_CHECK(cudaStreamCreate(image_preprocess_stream_));
|
|
|
|
img_host_ = new uint8_t;
|
|
img_device_ = new uint8_t;
|
|
|
|
CUDA_CHECK(cudaMallocHost((void**)&img_host_, MAX_IMAGE_INPUT_SIZE_THRESH * 3)); //在HOST侧申请预处理数据缓存
|
|
CUDA_CHECK(cudaMalloc((void**)&img_device_, MAX_IMAGE_INPUT_SIZE_THRESH * 3)); //在DEVICE侧申请预处理数据缓存
|
|
|
|
LogInfo << "engineId_:" << engineId_ << " ImagePreprocessEngine Init ok";
|
|
return APP_ERR_OK;
|
|
}
|
|
|
|
APP_ERROR ImagePreprocessEngine::DeInit()
|
|
{
|
|
//资源释放
|
|
CUDA_CHECK(cudaStreamDestroy(*image_preprocess_stream_)); delete image_preprocess_stream_; image_preprocess_stream_ = nullptr; //释放图像预处理cuda流
|
|
|
|
CUDA_CHECK(cudaFree(img_device_)); //释放设备端内存
|
|
CUDA_CHECK(cudaFreeHost(img_host_)); //释放HOST端内存
|
|
|
|
LogInfo << "engineId_:" << engineId_ << " ImagePreprocessEngine DeInit ok";
|
|
return APP_ERR_OK;
|
|
}
|
|
|
|
|
|
APP_ERROR ImagePreprocessEngine::Process()
|
|
{
|
|
unsigned int input_data_buffers_size = BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float);
|
|
size_t size_image = width_ * height_ * 3;
|
|
size_t size_image_dst = INPUT_H * INPUT_W * 3;
|
|
|
|
uint64_t u64count_num = 0;
|
|
int iRet = APP_ERR_OK;
|
|
|
|
while (!isStop_)
|
|
{
|
|
// 从上一引擎获取图像数据
|
|
std::shared_ptr<void> pVoidData0 = nullptr;
|
|
inputQueMap_[strPort0_]->pop(pVoidData0);
|
|
if (nullptr == pVoidData0)
|
|
{
|
|
usleep(1*1000); //n ms
|
|
continue;
|
|
}
|
|
// std::cout<<"Enter Image Preproess Thread "<<++u64count_num<<" Times!"<<std::endl;
|
|
// std::cout<<"Image Preproess Thread ID: "<<std::this_thread::get_id()<<std::endl;
|
|
|
|
std::shared_ptr<FrameData> pRGBFrameData = std::static_pointer_cast<FrameData>(pVoidData0);
|
|
|
|
//copy data to pinned memory
|
|
// memcpy(img_host_,pRGBFrameData->pData.get(),size_image); //拷贝预处理数据到HOST侧
|
|
// auto memcpy_start = std::chrono::system_clock::now(); //计时开始
|
|
memcpy(img_host_, pRGBFrameData->pData.get(), pRGBFrameData->iSize); //拷贝预处理数据到HOST侧
|
|
// auto memcpy_end = std::chrono::system_clock::now(); //计时结束
|
|
// std::cout << "ImagePreprocessEngine memcpy time: " << std::chrono::duration_cast<std::chrono::milliseconds>(memcpy_end - memcpy_start).count() << "ms" << std::endl;
|
|
|
|
|
|
//copy data to device memory
|
|
#ifdef CUDA_MEMCPY_TIME_CONSUMING_TEST
|
|
auto cuda_memcpy_start = std::chrono::system_clock::now(); //计时开始
|
|
CUDA_CHECK(cudaMemcpyAsync(img_device_,img_host_,size_image,cudaMemcpyHostToDevice,*image_preprocess_stream_)); //拷贝预处理数据到Device侧
|
|
auto cuda_memcpy_end = std::chrono::system_clock::now(); //计时结束
|
|
std::cout<< "ImagePreprocessEngine cuda memcpy data size is: "<<size_image<<std::endl;
|
|
std::cout << "ImagePreprocessEngine cuda memcpy host to device time: " << std::chrono::duration_cast<std::chrono::milliseconds>(cuda_memcpy_end - cuda_memcpy_start).count() << "ms" << std::endl;
|
|
#else
|
|
CUDA_CHECK(cudaMemcpyAsync(img_device_,img_host_,size_image,cudaMemcpyHostToDevice,*image_preprocess_stream_)); //拷贝预处理数据到Device侧
|
|
#endif
|
|
|
|
//组织数据压入下一引擎
|
|
float* input_data_buffers = new float;
|
|
#ifdef CUDA_MALLOC_TIME_CONSUMING_TEST
|
|
auto cuda_malloc_start = std::chrono::system_clock::now(); //计时开始
|
|
CUDA_CHECK(cudaMalloc((void**)&input_data_buffers, input_data_buffers_size)); //在DEVICE侧申请预处理输入数据缓存
|
|
auto cuda_malloc_end = std::chrono::system_clock::now();
|
|
std::cout<< "ImagePreprocessEngine cuda device malloc data size is: "<<input_data_buffers_size<<std::endl;
|
|
std::cout << "ImagePreprocessEngine cuda device malloc time: " << std::chrono::duration_cast<std::chrono::milliseconds>(cuda_malloc_end - cuda_malloc_start).count() << "ms" << std::endl;
|
|
#else
|
|
CUDA_CHECK(cudaMalloc((void**)&input_data_buffers, input_data_buffers_size)); //在DEVICE侧申请预处理输入数据缓存
|
|
#endif
|
|
|
|
//预处理CUDA核函数
|
|
std::cout << "src width = " << width_ << " src height= " << height_ << " dest width = " << INPUT_W << " desc height= " << INPUT_H << std::endl;
|
|
preprocess_kernel_img(img_device_, width_, height_, input_data_buffers, INPUT_W, INPUT_H, *image_preprocess_stream_);
|
|
|
|
//构造预处理数据及源数据
|
|
void* pSrcRGBBuffer = nullptr;
|
|
unsigned int pSrcRGBBuffer_Size = pRGBFrameData->iSize;
|
|
pSrcRGBBuffer = new uint8_t[pSrcRGBBuffer_Size];
|
|
memcpy(pSrcRGBBuffer, pRGBFrameData->pData.get(), pSrcRGBBuffer_Size);
|
|
|
|
std::shared_ptr<InferenceData> pImagePreprocessData = std::make_shared<InferenceData>();
|
|
pImagePreprocessData->iDataSource = engineId_;
|
|
pImagePreprocessData->iSize = input_data_buffers_size;
|
|
pImagePreprocessData->pData.reset(input_data_buffers, cudaFree);
|
|
pImagePreprocessData->iSrcSize = pSrcRGBBuffer_Size;
|
|
pImagePreprocessData->pSrcData.reset(pSrcRGBBuffer, [](void* data){if(data) {delete[] data; data = nullptr;}});
|
|
pImagePreprocessData->i64TimeStamp = pRGBFrameData->i64TimeStamp;
|
|
|
|
//压入input_data_buffers
|
|
#if 1
|
|
iRet = outputQueMap_[strPort0_]->push(std::static_pointer_cast<void>(pImagePreprocessData));
|
|
if (iRet != APP_ERR_OK){
|
|
LogError << "push the image preprocess data failed...";
|
|
// std::cerr<<"push the image preprocess data failed..."<<std::endl;
|
|
}else{
|
|
// std::cout<<"push the image preprocess data success!"<<std::endl;
|
|
}
|
|
#endif
|
|
}
|
|
} |