fix the bug when src*2 < dst
This commit is contained in:
parent
2920a8e0ec
commit
01226cb8ac
@ -82,6 +82,8 @@ template<typename T1, typename T2> int PyrDownVecV(T1**, T2*, int) { return 0; }
|
||||
|
||||
template<typename T1, typename T2> int PyrUpVecV(T1**, T2**, int) { return 0; }
|
||||
|
||||
template<typename T1, typename T2> int PyrUpVecVOneRow(T1**, T2*, int) { return 0; }
|
||||
|
||||
#if CV_SIMD
|
||||
|
||||
template<> int PyrDownVecH<uchar, int, 1>(const uchar* src, int* row, int width)
|
||||
@ -717,6 +719,120 @@ template <> int PyrUpVecV<float, float>(float** src, float** dst, int width)
|
||||
return x;
|
||||
}
|
||||
|
||||
template <> int PyrUpVecVOneRow<int, uchar>(int** src, uchar* dst, int width)
|
||||
{
|
||||
int x = 0;
|
||||
const int *row0 = src[0], *row1 = src[1], *row2 = src[2];
|
||||
|
||||
for( ; x <= width - v_uint8::nlanes; x += v_uint8::nlanes)
|
||||
{
|
||||
v_int16 v_r00 = v_pack(vx_load(row0 + x), vx_load(row0 + x + v_int32::nlanes)),
|
||||
v_r01 = v_pack(vx_load(row0 + x + 2 * v_int32::nlanes), vx_load(row0 + x + 3 * v_int32::nlanes)),
|
||||
v_r10 = v_pack(vx_load(row1 + x), vx_load(row1 + x + v_int32::nlanes)),
|
||||
v_r11 = v_pack(vx_load(row1 + x + 2 * v_int32::nlanes), vx_load(row1 + x + 3 * v_int32::nlanes)),
|
||||
v_r20 = v_pack(vx_load(row2 + x), vx_load(row2 + x + v_int32::nlanes)),
|
||||
v_r21 = v_pack(vx_load(row2 + x + 2 * v_int32::nlanes), vx_load(row2 + x + 3 * v_int32::nlanes));
|
||||
v_int16 v_2r10 = v_r10 + v_r10, v_2r11 = (v_r11 + v_r11);
|
||||
v_store(dst + x, v_rshr_pack_u<6>(v_r00 + v_r20 + (v_2r10 + v_2r10 + v_2r10), v_r01 + v_r21 + (v_2r11 + v_2r11 + v_2r11)));
|
||||
}
|
||||
if(x <= width - v_uint16::nlanes)
|
||||
{
|
||||
v_int16 v_r00 = v_pack(vx_load(row0 + x), vx_load(row0 + x + v_int32::nlanes)),
|
||||
v_r10 = v_pack(vx_load(row1 + x), vx_load(row1 + x + v_int32::nlanes)),
|
||||
v_r20 = v_pack(vx_load(row2 + x), vx_load(row2 + x + v_int32::nlanes));
|
||||
v_int16 v_2r10 = v_r10 + v_r10;
|
||||
v_rshr_pack_u_store<6>(dst + x, v_r00 + v_r20 + (v_2r10 + v_2r10 + v_2r10));
|
||||
x += v_uint16::nlanes;
|
||||
}
|
||||
typedef int CV_DECL_ALIGNED(1) unaligned_int;
|
||||
for (; x <= width - v_int32x4::nlanes; x += v_int32x4::nlanes)
|
||||
{
|
||||
v_int32 v_r00 = vx_load(row0 + x),
|
||||
v_r10 = vx_load(row1 + x),
|
||||
v_r20 = vx_load(row2 + x);
|
||||
v_int32 v_2r10 = v_r10 + v_r10;
|
||||
v_int16 d = v_pack(v_r00 + v_r20 + (v_2r10 + v_2r10 + v_2r10), (v_r10 + v_r20) << 2);
|
||||
*(unaligned_int*)(dst + x) = v_reinterpret_as_s32(v_rshr_pack_u<6>(d, vx_setzero_s16())).get0();
|
||||
}
|
||||
vx_cleanup();
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
template <> int PyrUpVecVOneRow<int, short>(int** src, short* dst, int width)
|
||||
{
|
||||
int x = 0;
|
||||
const int *row0 = src[0], *row1 = src[1], *row2 = src[2];
|
||||
|
||||
for( ; x <= width - v_int16::nlanes; x += v_int16::nlanes)
|
||||
{
|
||||
v_int32 v_r00 = vx_load(row0 + x),
|
||||
v_r01 = vx_load(row0 + x + v_int32::nlanes),
|
||||
v_r10 = vx_load(row1 + x),
|
||||
v_r11 = vx_load(row1 + x + v_int32::nlanes),
|
||||
v_r20 = vx_load(row2 + x),
|
||||
v_r21 = vx_load(row2 + x + v_int32::nlanes);
|
||||
v_store(dst + x, v_rshr_pack<6>(v_r00 + v_r20 + ((v_r10 << 1) + (v_r10 << 2)), v_r01 + v_r21 + ((v_r11 << 1) + (v_r11 << 2))));
|
||||
}
|
||||
if(x <= width - v_int32::nlanes)
|
||||
{
|
||||
v_int32 v_r00 = vx_load(row0 + x),
|
||||
v_r10 = vx_load(row1 + x),
|
||||
v_r20 = vx_load(row2 + x);
|
||||
v_rshr_pack_store<6>(dst + x, v_r00 + v_r20 + ((v_r10 << 1) + (v_r10 << 2)));
|
||||
x += v_int32::nlanes;
|
||||
}
|
||||
vx_cleanup();
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
template <> int PyrUpVecVOneRow<int, ushort>(int** src, ushort* dst, int width)
|
||||
{
|
||||
int x = 0;
|
||||
const int *row0 = src[0], *row1 = src[1], *row2 = src[2];
|
||||
|
||||
for( ; x <= width - v_uint16::nlanes; x += v_uint16::nlanes)
|
||||
{
|
||||
v_int32 v_r00 = vx_load(row0 + x),
|
||||
v_r01 = vx_load(row0 + x + v_int32::nlanes),
|
||||
v_r10 = vx_load(row1 + x),
|
||||
v_r11 = vx_load(row1 + x + v_int32::nlanes),
|
||||
v_r20 = vx_load(row2 + x),
|
||||
v_r21 = vx_load(row2 + x + v_int32::nlanes);
|
||||
v_store(dst + x, v_rshr_pack_u<6>(v_r00 + v_r20 + ((v_r10 << 1) + (v_r10 << 2)), v_r01 + v_r21 + ((v_r11 << 1) + (v_r11 << 2))));
|
||||
}
|
||||
if(x <= width - v_int32::nlanes)
|
||||
{
|
||||
v_int32 v_r00 = vx_load(row0 + x),
|
||||
v_r10 = vx_load(row1 + x),
|
||||
v_r20 = vx_load(row2 + x);
|
||||
v_rshr_pack_u_store<6>(dst + x, v_r00 + v_r20 + ((v_r10 << 1) + (v_r10 << 2)));
|
||||
x += v_int32::nlanes;
|
||||
}
|
||||
vx_cleanup();
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
template <> int PyrUpVecVOneRow<float, float>(float** src, float* dst, int width)
|
||||
{
|
||||
int x = 0;
|
||||
const float *row0 = src[0], *row1 = src[1], *row2 = src[2];
|
||||
|
||||
v_float32 v_6 = vx_setall_f32(6.0f), v_scale = vx_setall_f32(1.f/64.f);
|
||||
for( ; x <= width - v_float32::nlanes; x += v_float32::nlanes)
|
||||
{
|
||||
v_float32 v_r0 = vx_load(row0 + x),
|
||||
v_r1 = vx_load(row1 + x),
|
||||
v_r2 = vx_load(row2 + x);
|
||||
v_store(dst + x, v_scale * (v_muladd(v_6, v_r1, v_r0) + v_r2));
|
||||
}
|
||||
vx_cleanup();
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<class CastOp>
|
||||
@ -963,7 +1079,7 @@ pyrUp_( const Mat& _src, Mat& _dst, int)
|
||||
|
||||
if (dsize.width > ssize.width*2)
|
||||
{
|
||||
row[(_dst.cols-1) * cn + x] = row[dx + cn];
|
||||
row[(_dst.cols-1) + x] = row[dx + cn];
|
||||
}
|
||||
}
|
||||
|
||||
@ -983,12 +1099,24 @@ pyrUp_( const Mat& _src, Mat& _dst, int)
|
||||
row0 = rows[0]; row1 = rows[1]; row2 = rows[2];
|
||||
dsts[0] = dst0; dsts[1] = dst1;
|
||||
|
||||
x = PyrUpVecV<WT, T>(rows, dsts, dsize.width);
|
||||
for( ; x < dsize.width; x++ )
|
||||
if (dst0 != dst1)
|
||||
{
|
||||
T t1 = castOp((row1[x] + row2[x])*4);
|
||||
T t0 = castOp(row0[x] + row1[x]*6 + row2[x]);
|
||||
dst1[x] = t1; dst0[x] = t0;
|
||||
x = PyrUpVecV<WT, T>(rows, dsts, dsize.width);
|
||||
for( ; x < dsize.width; x++ )
|
||||
{
|
||||
T t1 = castOp((row1[x] + row2[x])*4);
|
||||
T t0 = castOp(row0[x] + row1[x]*6 + row2[x]);
|
||||
dst1[x] = t1; dst0[x] = t0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
x = PyrUpVecVOneRow<WT, T>(rows, dst0, dsize.width);
|
||||
for( ; x < dsize.width; x++ )
|
||||
{
|
||||
T t0 = castOp(row0[x] + row1[x]*6 + row2[x]);
|
||||
dst0[x] = t0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -8,12 +8,41 @@ namespace opencv_test { namespace {
|
||||
|
||||
TEST(Imgproc_PyrUp, pyrUp_regression_22184)
|
||||
{
|
||||
Mat src(100, 100, CV_16UC3, Scalar::all(255));
|
||||
Mat dst(100 * 2 + 1, 100 * 2 + 1, CV_16UC3, Scalar::all(0));
|
||||
Mat src(100,100,CV_16UC3,Scalar(255,255,255));
|
||||
Mat dst(100 * 2 + 1, 100 * 2 + 1, CV_16UC3, Scalar(0,0,0));
|
||||
pyrUp(src, dst, Size(dst.cols, dst.rows));
|
||||
double min_val = 0;
|
||||
double min_val;
|
||||
minMaxLoc(dst, &min_val);
|
||||
ASSERT_GT(cvRound(min_val), 0);
|
||||
}
|
||||
|
||||
}} // namespace
|
||||
TEST(Imgproc_PyrUp, pyrUp_regression_22194)
|
||||
{
|
||||
Mat src(13, 13,CV_16UC3,Scalar(0,0,0));
|
||||
{
|
||||
int swidth = src.cols;
|
||||
int sheight = src.rows;
|
||||
int cn = src.channels();
|
||||
int count = 0;
|
||||
for (int y = 0; y < sheight; y++)
|
||||
{
|
||||
ushort *src_c = src.ptr<ushort>(y);
|
||||
for (int x = 0; x < swidth * cn; x++)
|
||||
{
|
||||
src_c[x] = (count++) % 10;
|
||||
}
|
||||
}
|
||||
}
|
||||
Mat dst(src.cols * 2 - 1, src.rows * 2 - 1, CV_16UC3, Scalar(0,0,0));
|
||||
pyrUp(src, dst, Size(dst.cols, dst.rows));
|
||||
|
||||
{
|
||||
ushort *dst_c = dst.ptr<ushort>(dst.rows - 1);
|
||||
ASSERT_EQ(dst_c[0], 6);
|
||||
ASSERT_EQ(dst_c[1], 6);
|
||||
ASSERT_EQ(dst_c[2], 1);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user