Merge pull request #16731 from alalek:issue_16708
* imgproc(integral): avoid OOB access * imgproc(test): fix integral perf check - FP32 computation is not accurate * imgproc(integral): tune loop limits
This commit is contained in:
parent
da6ad1c640
commit
fd09413566
@ -39,10 +39,23 @@ PERF_TEST_P(Size_MatType_OutMatDepth, integral,
|
||||
Mat sum(sz, sdepth);
|
||||
|
||||
declare.in(src, WARMUP_RNG).out(sum);
|
||||
if (sdepth == CV_32F)
|
||||
src *= (1 << 23) / (double)(sz.area() * 256); // FP32 calculations are not accurate (mantissa is 23-bit)
|
||||
|
||||
TEST_CYCLE() integral(src, sum, sdepth);
|
||||
|
||||
SANITY_CHECK(sum, 1e-6);
|
||||
Mat src_roi; src(Rect(src.cols - 4, src.rows - 4, 4, 4)).convertTo(src_roi, sdepth);
|
||||
Mat restored_src_roi =
|
||||
sum(Rect(sum.cols - 4, sum.rows - 4, 4, 4)) + sum(Rect(sum.cols - 5, sum.rows - 5, 4, 4)) -
|
||||
sum(Rect(sum.cols - 4, sum.rows - 5, 4, 4)) - sum(Rect(sum.cols - 5, sum.rows - 4, 4, 4));
|
||||
EXPECT_EQ(0, cvtest::norm(restored_src_roi, src_roi, NORM_INF))
|
||||
<< src_roi << endl << restored_src_roi << endl
|
||||
<< sum(Rect(sum.cols - 4, sum.rows - 4, 4, 4));
|
||||
|
||||
if (sdepth == CV_32F)
|
||||
SANITY_CHECK_NOTHING();
|
||||
else
|
||||
SANITY_CHECK(sum, 1e-6);
|
||||
}
|
||||
|
||||
PERF_TEST_P(Size_MatType_OutMatDepth, integral_sqsum,
|
||||
|
||||
@ -237,7 +237,11 @@ struct Integral_SIMD<uchar, int, double>
|
||||
v_int32 prev_1 = vx_setzero_s32(), prev_2 = vx_setzero_s32(),
|
||||
prev_3 = vx_setzero_s32();
|
||||
int j = 0;
|
||||
for ( ; j + v_uint16::nlanes * cn <= width; j += v_uint16::nlanes * cn)
|
||||
const int j_max =
|
||||
((_srcstep * i + (width - v_uint16::nlanes * cn + v_uint8::nlanes * cn)) >= _srcstep * height)
|
||||
? width - v_uint8::nlanes * cn // uint8 in v_load_deinterleave()
|
||||
: width - v_uint16::nlanes * cn; // v_expand_low
|
||||
for ( ; j <= j_max; j += v_uint16::nlanes * cn)
|
||||
{
|
||||
v_uint8 v_src_row_1, v_src_row_2, v_src_row_3;
|
||||
v_load_deinterleave(src_row + j, v_src_row_1, v_src_row_2, v_src_row_3);
|
||||
@ -546,7 +550,11 @@ struct Integral_SIMD<uchar, float, double>
|
||||
v_float32 prev_1 = vx_setzero_f32(), prev_2 = vx_setzero_f32(),
|
||||
prev_3 = vx_setzero_f32();
|
||||
int j = 0;
|
||||
for (; j + v_uint16::nlanes * cn <= width; j += v_uint16::nlanes * cn)
|
||||
const int j_max =
|
||||
((_srcstep * i + (width - v_uint16::nlanes * cn + v_uint8::nlanes * cn)) >= _srcstep * height)
|
||||
? width - v_uint8::nlanes * cn // uint8 in v_load_deinterleave()
|
||||
: width - v_uint16::nlanes * cn; // v_expand_low
|
||||
for ( ; j <= j_max; j += v_uint16::nlanes * cn)
|
||||
{
|
||||
v_uint8 v_src_row_1, v_src_row_2, v_src_row_3;
|
||||
v_load_deinterleave(src_row + j, v_src_row_1, v_src_row_2, v_src_row_3);
|
||||
@ -896,7 +904,11 @@ struct Integral_SIMD<uchar, double, double>
|
||||
v_float64 prev_1 = vx_setzero_f64(), prev_2 = vx_setzero_f64(),
|
||||
prev_3 = vx_setzero_f64();
|
||||
int j = 0;
|
||||
for (; j + v_uint16::nlanes * cn <= width; j += v_uint16::nlanes * cn)
|
||||
const int j_max =
|
||||
((_srcstep * i + (width - v_uint16::nlanes * cn + v_uint8::nlanes * cn)) >= _srcstep * height)
|
||||
? width - v_uint8::nlanes * cn // uint8 in v_load_deinterleave()
|
||||
: width - v_uint16::nlanes * cn; // v_expand_low
|
||||
for ( ; j <= j_max; j += v_uint16::nlanes * cn)
|
||||
{
|
||||
v_uint8 v_src_row_1, v_src_row_2, v_src_row_3;
|
||||
v_load_deinterleave(src_row + j, v_src_row_1, v_src_row_2, v_src_row_3);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user