#include "ImagePreprocessEngine.h" using namespace std; using namespace cv; using namespace ai_matrix; ImagePreprocessEngine::ImagePreprocessEngine() {} ImagePreprocessEngine::~ImagePreprocessEngine() {} APP_ERROR ImagePreprocessEngine::Init() { strPort0_ = engineName_ + "_" + std::to_string(engineId_) + "_0"; width_ = IMAGE_WIDTH, height_ = IMAGE_HEIGHT; //资源分配(创建流,host及device侧内存) cudaSetDevice(DEVICE); //设置GPU image_preprocess_stream_ = new cudaStream_t; CUDA_CHECK(cudaStreamCreate(image_preprocess_stream_)); img_host_ = new uint8_t; img_device_ = new uint8_t; CUDA_CHECK(cudaMallocHost((void**)&img_host_, MAX_IMAGE_INPUT_SIZE_THRESH * 3)); //在HOST侧申请预处理数据缓存 CUDA_CHECK(cudaMalloc((void**)&img_device_, MAX_IMAGE_INPUT_SIZE_THRESH * 3)); //在DEVICE侧申请预处理数据缓存 LogInfo << "engineId_:" << engineId_ << " ImagePreprocessEngine Init ok"; return APP_ERR_OK; } APP_ERROR ImagePreprocessEngine::DeInit() { //资源释放 CUDA_CHECK(cudaStreamDestroy(*image_preprocess_stream_)); delete image_preprocess_stream_; image_preprocess_stream_ = nullptr; //释放图像预处理cuda流 CUDA_CHECK(cudaFree(img_device_)); //释放设备端内存 CUDA_CHECK(cudaFreeHost(img_host_)); //释放HOST端内存 LogInfo << "engineId_:" << engineId_ << " ImagePreprocessEngine DeInit ok"; return APP_ERR_OK; } APP_ERROR ImagePreprocessEngine::Process() { unsigned int input_data_buffers_size = BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float); size_t size_image = width_ * height_ * 3; size_t size_image_dst = INPUT_H * INPUT_W * 3; uint64_t u64count_num = 0; int iRet = APP_ERR_OK; while (!isStop_) { // 从上一引擎获取图像数据 std::shared_ptr pVoidData0 = nullptr; inputQueMap_[strPort0_]->pop(pVoidData0); if (nullptr == pVoidData0) { usleep(1*1000); //n ms continue; } // std::cout<<"Enter Image Preproess Thread "<<++u64count_num<<" Times!"< pRGBFrameData = std::static_pointer_cast(pVoidData0); //copy data to pinned memory // memcpy(img_host_,pRGBFrameData->pData.get(),size_image); //拷贝预处理数据到HOST侧 // auto memcpy_start = std::chrono::system_clock::now(); //计时开始 memcpy(img_host_, pRGBFrameData->pData.get(), pRGBFrameData->iSize); //拷贝预处理数据到HOST侧 // auto memcpy_end = std::chrono::system_clock::now(); //计时结束 // std::cout << "ImagePreprocessEngine memcpy time: " << std::chrono::duration_cast(memcpy_end - memcpy_start).count() << "ms" << std::endl; //copy data to device memory #ifdef CUDA_MEMCPY_TIME_CONSUMING_TEST auto cuda_memcpy_start = std::chrono::system_clock::now(); //计时开始 CUDA_CHECK(cudaMemcpyAsync(img_device_,img_host_,size_image,cudaMemcpyHostToDevice,*image_preprocess_stream_)); //拷贝预处理数据到Device侧 auto cuda_memcpy_end = std::chrono::system_clock::now(); //计时结束 std::cout<< "ImagePreprocessEngine cuda memcpy data size is: "<(cuda_memcpy_end - cuda_memcpy_start).count() << "ms" << std::endl; #else CUDA_CHECK(cudaMemcpyAsync(img_device_,img_host_,size_image,cudaMemcpyHostToDevice,*image_preprocess_stream_)); //拷贝预处理数据到Device侧 #endif //组织数据压入下一引擎 float* input_data_buffers = new float; #ifdef CUDA_MALLOC_TIME_CONSUMING_TEST auto cuda_malloc_start = std::chrono::system_clock::now(); //计时开始 CUDA_CHECK(cudaMalloc((void**)&input_data_buffers, input_data_buffers_size)); //在DEVICE侧申请预处理输入数据缓存 auto cuda_malloc_end = std::chrono::system_clock::now(); std::cout<< "ImagePreprocessEngine cuda device malloc data size is: "<(cuda_malloc_end - cuda_malloc_start).count() << "ms" << std::endl; #else CUDA_CHECK(cudaMalloc((void**)&input_data_buffers, input_data_buffers_size)); //在DEVICE侧申请预处理输入数据缓存 #endif //预处理CUDA核函数 std::cout << "src width = " << width_ << " src height= " << height_ << " dest width = " << INPUT_W << " desc height= " << INPUT_H << std::endl; preprocess_kernel_img(img_device_, width_, height_, input_data_buffers, INPUT_W, INPUT_H, *image_preprocess_stream_); //构造预处理数据及源数据 void* pSrcRGBBuffer = nullptr; unsigned int pSrcRGBBuffer_Size = pRGBFrameData->iSize; pSrcRGBBuffer = new uint8_t[pSrcRGBBuffer_Size]; memcpy(pSrcRGBBuffer, pRGBFrameData->pData.get(), pSrcRGBBuffer_Size); std::shared_ptr pImagePreprocessData = std::make_shared(); pImagePreprocessData->iDataSource = engineId_; pImagePreprocessData->iSize = input_data_buffers_size; pImagePreprocessData->pData.reset(input_data_buffers, cudaFree); pImagePreprocessData->iSrcSize = pSrcRGBBuffer_Size; pImagePreprocessData->pSrcData.reset(pSrcRGBBuffer, [](void* data){if(data) {delete[] data; data = nullptr;}}); pImagePreprocessData->i64TimeStamp = pRGBFrameData->i64TimeStamp; //压入input_data_buffers #if 1 iRet = outputQueMap_[strPort0_]->push(std::static_pointer_cast(pImagePreprocessData)); if (iRet != APP_ERR_OK){ LogError << "push the image preprocess data failed..."; // std::cerr<<"push the image preprocess data failed..."<