492 lines
15 KiB
Plaintext
492 lines
15 KiB
Plaintext
#include "preprocess.h"
|
|
|
|
|
|
//仿射变换核函数
|
|
__global__ void yolov5_detect_warpaffine_kernel(
|
|
uint8_t* src, int src_line_size, int src_width,
|
|
int src_height, float* dst, int dst_width,
|
|
int dst_height, uint8_t const_value_st,
|
|
AffineMatrix d2s, int edge) {
|
|
int position = blockDim.x * blockIdx.x + threadIdx.x;
|
|
if (position >= edge) return;
|
|
|
|
float m_x1 = d2s.value[0];
|
|
float m_y1 = d2s.value[1];
|
|
float m_z1 = d2s.value[2];
|
|
float m_x2 = d2s.value[3];
|
|
float m_y2 = d2s.value[4];
|
|
float m_z2 = d2s.value[5];
|
|
|
|
int dx = position % dst_width;
|
|
int dy = position / dst_width;
|
|
float src_x = m_x1 * dx + m_y1 * dy + m_z1 + 0.5f;
|
|
float src_y = m_x2 * dx + m_y2 * dy + m_z2 + 0.5f;
|
|
float c0, c1, c2;
|
|
|
|
if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) {
|
|
// out of range
|
|
c0 = const_value_st;
|
|
c1 = const_value_st;
|
|
c2 = const_value_st;
|
|
} else {
|
|
int y_low = floorf(src_y);
|
|
int x_low = floorf(src_x);
|
|
int y_high = y_low + 1;
|
|
int x_high = x_low + 1;
|
|
|
|
uint8_t const_value[] = {const_value_st, const_value_st, const_value_st};
|
|
float ly = src_y - y_low;
|
|
float lx = src_x - x_low;
|
|
float hy = 1 - ly;
|
|
float hx = 1 - lx;
|
|
float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
|
|
uint8_t* v1 = const_value;
|
|
uint8_t* v2 = const_value;
|
|
uint8_t* v3 = const_value;
|
|
uint8_t* v4 = const_value;
|
|
|
|
if (y_low >= 0) {
|
|
if (x_low >= 0)
|
|
v1 = src + y_low * src_line_size + x_low * 3;
|
|
|
|
if (x_high < src_width)
|
|
v2 = src + y_low * src_line_size + x_high * 3;
|
|
}
|
|
|
|
if (y_high < src_height) {
|
|
if (x_low >= 0)
|
|
v3 = src + y_high * src_line_size + x_low * 3;
|
|
|
|
if (x_high < src_width)
|
|
v4 = src + y_high * src_line_size + x_high * 3;
|
|
}
|
|
|
|
c0 = w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0];
|
|
c1 = w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1];
|
|
c2 = w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2];
|
|
}
|
|
|
|
//bgr to rgb
|
|
float t = c2;
|
|
c2 = c0;
|
|
c0 = t;
|
|
|
|
//normalization
|
|
c0 = c0 / 255.0f;
|
|
c1 = c1 / 255.0f;
|
|
c2 = c2 / 255.0f;
|
|
|
|
//rgbrgbrgb to rrrgggbbb
|
|
int area = dst_width * dst_height;
|
|
float* pdst_c0 = dst + dy * dst_width + dx;
|
|
float* pdst_c1 = pdst_c0 + area;
|
|
float* pdst_c2 = pdst_c1 + area;
|
|
*pdst_c0 = c0;
|
|
*pdst_c1 = c1;
|
|
*pdst_c2 = c2;
|
|
}
|
|
|
|
__global__ void yolov5_classify_warpaffine_kernel(
|
|
uint8_t* src, int src_line_size, int src_width,
|
|
int src_height, float* dst, int dst_width,
|
|
int dst_height, uint8_t const_value_st,
|
|
AffineMatrix d2s, int edge) {
|
|
int position = blockDim.x * blockIdx.x + threadIdx.x;
|
|
if (position >= edge) return;
|
|
|
|
float m_x1 = d2s.value[0];
|
|
float m_y1 = d2s.value[1];
|
|
float m_z1 = d2s.value[2];
|
|
float m_x2 = d2s.value[3];
|
|
float m_y2 = d2s.value[4];
|
|
float m_z2 = d2s.value[5];
|
|
|
|
int dx = position % dst_width;
|
|
int dy = position / dst_width;
|
|
float src_x = m_x1 * dx + m_y1 * dy + m_z1 + 0.5f;
|
|
float src_y = m_x2 * dx + m_y2 * dy + m_z2 + 0.5f;
|
|
float c0, c1, c2;
|
|
|
|
if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) {
|
|
// out of range
|
|
c0 = const_value_st;
|
|
c1 = const_value_st;
|
|
c2 = const_value_st;
|
|
} else {
|
|
int y_low = floorf(src_y);
|
|
int x_low = floorf(src_x);
|
|
int y_high = y_low + 1;
|
|
int x_high = x_low + 1;
|
|
|
|
uint8_t const_value[] = {const_value_st, const_value_st, const_value_st};
|
|
float ly = src_y - y_low;
|
|
float lx = src_x - x_low;
|
|
float hy = 1 - ly;
|
|
float hx = 1 - lx;
|
|
float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
|
|
uint8_t* v1 = const_value;
|
|
uint8_t* v2 = const_value;
|
|
uint8_t* v3 = const_value;
|
|
uint8_t* v4 = const_value;
|
|
|
|
if (y_low >= 0) {
|
|
if (x_low >= 0)
|
|
v1 = src + y_low * src_line_size + x_low * 3;
|
|
|
|
if (x_high < src_width)
|
|
v2 = src + y_low * src_line_size + x_high * 3;
|
|
}
|
|
|
|
if (y_high < src_height) {
|
|
if (x_low >= 0)
|
|
v3 = src + y_high * src_line_size + x_low * 3;
|
|
|
|
if (x_high < src_width)
|
|
v4 = src + y_high * src_line_size + x_high * 3;
|
|
}
|
|
|
|
c0 = w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0];
|
|
c1 = w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1];
|
|
c2 = w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2];
|
|
}
|
|
|
|
//bgr to rgb
|
|
float t = c2;
|
|
c2 = c0;
|
|
c0 = t;
|
|
|
|
// 先进行归一化然后减均值除以标准差
|
|
c0 = ((c0 / 255.0f) - 0.406) / 0.225;
|
|
c1 = ((c1 / 255.0f) - 0.456) / 0.224;
|
|
c2 = ((c2 / 255.0f) - 0.485) / 0.229;
|
|
|
|
//rgbrgbrgb to rrrgggbbb
|
|
int area = dst_width * dst_height;
|
|
float* pdst_c0 = dst + dy * dst_width + dx;
|
|
float* pdst_c1 = pdst_c0 + area;
|
|
float* pdst_c2 = pdst_c1 + area;
|
|
*pdst_c0 = c0;
|
|
*pdst_c1 = c1;
|
|
*pdst_c2 = c2;
|
|
}
|
|
|
|
__global__ void retinanet_detect_warpaffine_kernel(
|
|
uint8_t* src, int src_line_size, int src_width,
|
|
int src_height, float* dst, int dst_width,
|
|
int dst_height, uint8_t const_value_st,
|
|
AffineMatrix d2s, int edge) {
|
|
int position = blockDim.x * blockIdx.x + threadIdx.x;
|
|
if (position >= edge) return;
|
|
|
|
float m_x1 = d2s.value[0];
|
|
float m_y1 = d2s.value[1];
|
|
float m_z1 = d2s.value[2];
|
|
float m_x2 = d2s.value[3];
|
|
float m_y2 = d2s.value[4];
|
|
float m_z2 = d2s.value[5];
|
|
|
|
int dx = position % dst_width;
|
|
int dy = position / dst_width;
|
|
float src_x = m_x1 * dx + m_y1 * dy + m_z1 + 0.5f;
|
|
float src_y = m_x2 * dx + m_y2 * dy + m_z2 + 0.5f;
|
|
float c0, c1, c2;
|
|
|
|
if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) {
|
|
// out of range
|
|
c0 = const_value_st;
|
|
c1 = const_value_st;
|
|
c2 = const_value_st;
|
|
} else {
|
|
int y_low = floorf(src_y);
|
|
int x_low = floorf(src_x);
|
|
int y_high = y_low + 1;
|
|
int x_high = x_low + 1;
|
|
|
|
uint8_t const_value[] = {const_value_st, const_value_st, const_value_st};
|
|
float ly = src_y - y_low;
|
|
float lx = src_x - x_low;
|
|
float hy = 1 - ly;
|
|
float hx = 1 - lx;
|
|
float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
|
|
uint8_t* v1 = const_value;
|
|
uint8_t* v2 = const_value;
|
|
uint8_t* v3 = const_value;
|
|
uint8_t* v4 = const_value;
|
|
|
|
if (y_low >= 0) {
|
|
if (x_low >= 0)
|
|
v1 = src + y_low * src_line_size + x_low * 3;
|
|
|
|
if (x_high < src_width)
|
|
v2 = src + y_low * src_line_size + x_high * 3;
|
|
}
|
|
|
|
if (y_high < src_height) {
|
|
if (x_low >= 0)
|
|
v3 = src + y_high * src_line_size + x_low * 3;
|
|
|
|
if (x_high < src_width)
|
|
v4 = src + y_high * src_line_size + x_high * 3;
|
|
}
|
|
|
|
c0 = w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0];
|
|
c1 = w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1];
|
|
c2 = w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2];
|
|
}
|
|
|
|
//bgr to rgb
|
|
float t = c2;
|
|
c2 = c0;
|
|
c0 = t;
|
|
|
|
//subtract the mean
|
|
c0 -= 104;
|
|
c1 -= 117;
|
|
c2 -= 123;
|
|
|
|
//rgbrgbrgb to rrrgggbbb
|
|
int area = dst_width * dst_height;
|
|
float* pdst_c0 = dst + dy * dst_width + dx;
|
|
float* pdst_c1 = pdst_c0 + area;
|
|
float* pdst_c2 = pdst_c1 + area;
|
|
*pdst_c0 = c0;
|
|
*pdst_c1 = c1;
|
|
*pdst_c2 = c2;
|
|
}
|
|
|
|
__global__ void retinanet_classify_warpaffine_kernel(
|
|
uint8_t* src, int src_line_size, int src_width,
|
|
int src_height, float* dst, int dst_width,
|
|
int dst_height, uint8_t const_value_st,
|
|
AffineMatrix d2s, int edge) {
|
|
int position = blockDim.x * blockIdx.x + threadIdx.x;
|
|
if (position >= edge) return;
|
|
|
|
float m_x1 = d2s.value[0];
|
|
float m_y1 = d2s.value[1];
|
|
float m_z1 = d2s.value[2];
|
|
float m_x2 = d2s.value[3];
|
|
float m_y2 = d2s.value[4];
|
|
float m_z2 = d2s.value[5];
|
|
|
|
int dx = position % dst_width;
|
|
int dy = position / dst_width;
|
|
float src_x = m_x1 * dx + m_y1 * dy + m_z1 + 0.5f;
|
|
float src_y = m_x2 * dx + m_y2 * dy + m_z2 + 0.5f;
|
|
float c0, c1, c2;
|
|
|
|
if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) {
|
|
// out of range
|
|
c0 = const_value_st;
|
|
c1 = const_value_st;
|
|
c2 = const_value_st;
|
|
} else {
|
|
int y_low = floorf(src_y);
|
|
int x_low = floorf(src_x);
|
|
int y_high = y_low + 1;
|
|
int x_high = x_low + 1;
|
|
|
|
uint8_t const_value[] = {const_value_st, const_value_st, const_value_st};
|
|
float ly = src_y - y_low;
|
|
float lx = src_x - x_low;
|
|
float hy = 1 - ly;
|
|
float hx = 1 - lx;
|
|
float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
|
|
uint8_t* v1 = const_value;
|
|
uint8_t* v2 = const_value;
|
|
uint8_t* v3 = const_value;
|
|
uint8_t* v4 = const_value;
|
|
|
|
if (y_low >= 0) {
|
|
if (x_low >= 0)
|
|
v1 = src + y_low * src_line_size + x_low * 3;
|
|
|
|
if (x_high < src_width)
|
|
v2 = src + y_low * src_line_size + x_high * 3;
|
|
}
|
|
|
|
if (y_high < src_height) {
|
|
if (x_low >= 0)
|
|
v3 = src + y_high * src_line_size + x_low * 3;
|
|
|
|
if (x_high < src_width)
|
|
v4 = src + y_high * src_line_size + x_high * 3;
|
|
}
|
|
|
|
c0 = w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0];
|
|
c1 = w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1];
|
|
c2 = w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2];
|
|
}
|
|
|
|
//bgr to rgb
|
|
float t = c2;
|
|
c2 = c0;
|
|
c0 = t;
|
|
|
|
//subtract the mean
|
|
c0 -= 104;
|
|
c1 -= 117;
|
|
c2 -= 123;
|
|
|
|
//rgbrgbrgb to rrrgggbbb
|
|
int area = dst_width * dst_height;
|
|
float* pdst_c0 = dst + dy * dst_width + dx;
|
|
float* pdst_c1 = pdst_c0 + area;
|
|
float* pdst_c2 = pdst_c1 + area;
|
|
*pdst_c0 = c0;
|
|
*pdst_c1 = c1;
|
|
*pdst_c2 = c2;
|
|
}
|
|
|
|
void yolov5_detect_preprocess_kernel_img(
|
|
uint8_t* src, int src_width, int src_height,
|
|
float* dst, int dst_width, int dst_height,
|
|
cudaStream_t stream) {
|
|
AffineMatrix s2d,d2s;
|
|
float scale = std::min(dst_height / (float)src_height, dst_width / (float)src_width);
|
|
|
|
s2d.value[0] = scale;
|
|
s2d.value[1] = 0;
|
|
s2d.value[2] = 0; //左上顶点贴图
|
|
// s2d.value[2] = -scale * src_width * 0.5 + dst_width * 0.5; //中心贴图
|
|
s2d.value[3] = 0;
|
|
s2d.value[4] = scale;
|
|
s2d.value[5] = 0; //左上顶点贴图
|
|
// s2d.value[5] = -scale * src_height * 0.5 + dst_height * 0.5; //中心贴图
|
|
|
|
cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
|
|
cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
|
|
cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
|
|
|
|
memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));
|
|
|
|
int jobs = dst_height * dst_width;
|
|
int threads = 256;
|
|
int blocks = ceil(jobs / (float)threads);
|
|
yolov5_detect_warpaffine_kernel<<<blocks, threads, 0, stream>>>(
|
|
src, src_width*3, src_width,
|
|
src_height, dst, dst_width,
|
|
dst_height, 128, d2s, jobs);
|
|
|
|
}
|
|
|
|
void yolov5_classify_preprocess_kernel_img(
|
|
uint8_t* src, int src_width, int src_height,
|
|
float* dst, int dst_width, int dst_height,
|
|
cudaStream_t stream) {
|
|
AffineMatrix s2d,d2s;
|
|
float scale = std::min(dst_height / (float)src_height, dst_width / (float)src_width);
|
|
|
|
s2d.value[0] = scale;
|
|
s2d.value[1] = 0;
|
|
s2d.value[2] = 0; //左上顶点贴图
|
|
// s2d.value[2] = -scale * src_width * 0.5 + dst_width * 0.5; //中心贴图
|
|
s2d.value[3] = 0;
|
|
s2d.value[4] = scale;
|
|
s2d.value[5] = 0; //左上顶点贴图
|
|
// s2d.value[5] = -scale * src_height * 0.5 + dst_height * 0.5; //中心贴图
|
|
|
|
cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
|
|
cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
|
|
cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
|
|
|
|
memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));
|
|
|
|
int jobs = dst_height * dst_width;
|
|
int threads = 256;
|
|
int blocks = ceil(jobs / (float)threads);
|
|
yolov5_classify_warpaffine_kernel<<<blocks, threads, 0, stream>>>(
|
|
src, src_width*3, src_width,
|
|
src_height, dst, dst_width,
|
|
dst_height, 128, d2s, jobs);
|
|
|
|
}
|
|
|
|
void retinanet_detect_preprocess_kernel_img(
|
|
uint8_t* src, int src_width, int src_height,
|
|
float* dst, int dst_width, int dst_height,
|
|
cudaStream_t stream) {
|
|
AffineMatrix s2d,d2s;
|
|
float scale = std::min(dst_height / (float)src_height, dst_width / (float)src_width);
|
|
|
|
s2d.value[0] = scale;
|
|
s2d.value[1] = 0;
|
|
s2d.value[2] = 0; //左上顶点贴图
|
|
// s2d.value[2] = -scale * src_width * 0.5 + dst_width * 0.5; //中心贴图
|
|
s2d.value[3] = 0;
|
|
s2d.value[4] = scale;
|
|
s2d.value[5] = 0; //左上顶点贴图
|
|
// s2d.value[5] = -scale * src_height * 0.5 + dst_height * 0.5; //中心贴图
|
|
|
|
cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
|
|
cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
|
|
cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
|
|
|
|
memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));
|
|
|
|
int jobs = dst_height * dst_width;
|
|
int threads = 256;
|
|
int blocks = ceil(jobs / (float)threads);
|
|
retinanet_detect_warpaffine_kernel<<<blocks, threads, 0, stream>>>(
|
|
src, src_width*3, src_width,
|
|
src_height, dst, dst_width,
|
|
dst_height, 128, d2s, jobs);
|
|
}
|
|
|
|
void retinanet_classify_preprocess_kernel_img(
|
|
uint8_t* src, int src_width, int src_height,
|
|
float* dst, int dst_width, int dst_height,
|
|
cudaStream_t stream) {
|
|
AffineMatrix s2d,d2s;
|
|
float scale = std::min(dst_height / (float)src_height, dst_width / (float)src_width);
|
|
|
|
s2d.value[0] = scale;
|
|
s2d.value[1] = 0;
|
|
s2d.value[2] = 0; //左上顶点贴图
|
|
// s2d.value[2] = -scale * src_width * 0.5 + dst_width * 0.5; //中心贴图
|
|
s2d.value[3] = 0;
|
|
s2d.value[4] = scale;
|
|
s2d.value[5] = 0; //左上顶点贴图
|
|
// s2d.value[5] = -scale * src_height * 0.5 + dst_height * 0.5; //中心贴图
|
|
|
|
cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
|
|
cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
|
|
cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
|
|
|
|
memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));
|
|
|
|
int jobs = dst_height * dst_width;
|
|
int threads = 256;
|
|
int blocks = ceil(jobs / (float)threads);
|
|
retinanet_classify_warpaffine_kernel<<<blocks, threads, 0, stream>>>(
|
|
src, src_width*3, src_width,
|
|
src_height, dst, dst_width,
|
|
dst_height, 128, d2s, jobs);
|
|
}
|
|
|
|
|
|
// 使用CV进行图像预处理
|
|
cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h)
|
|
{
|
|
int w, h, x, y;
|
|
float r_w = input_w / (img.cols*1.0);
|
|
float r_h = input_h / (img.rows*1.0);
|
|
if (r_h > r_w) {
|
|
w = input_w;
|
|
h = r_w * img.rows;
|
|
x = 0;
|
|
y = (input_h - h) / 2;
|
|
} else {
|
|
w = r_h * img.cols;
|
|
h = input_h;
|
|
x = (input_w - w) / 2;
|
|
y = 0;
|
|
}
|
|
cv::Mat re(h, w, CV_8UC3);
|
|
cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
|
|
cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
|
|
// re.copyTo(out(cv::Rect(x, y, re.cols, re.rows))); //中心贴图
|
|
re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows))); //左上顶点贴图
|
|
return out;
|
|
}
|