From 198b5096aaf8f5d855b98337e9de2fc45485c5a7 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 7 Mar 2020 22:06:47 +0300 Subject: [PATCH] Merge pull request #16754 from alalek:issue_16752 * core(test): FP16 norm test * core: norm()-FP16 disable OpenCL * core(norm): fix 16f32f local buffer size --- modules/core/src/minmax.cpp | 3 +++ modules/core/src/norm.cpp | 7 +++++-- modules/core/src/sum.dispatch.cpp | 3 +++ modules/core/test/ocl/test_arithm.cpp | 2 +- modules/ts/src/ts_func.cpp | 8 ++++++-- 5 files changed, 18 insertions(+), 5 deletions(-) diff --git a/modules/core/src/minmax.cpp b/modules/core/src/minmax.cpp index 43785d839b..61bddc3d35 100644 --- a/modules/core/src/minmax.cpp +++ b/modules/core/src/minmax.cpp @@ -978,6 +978,9 @@ bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src, _src2)); + if (depth >= CV_16F) + return false; + // disabled following modes since it occasionally fails on AMD devices (e.g. A10-6800K, sep. 2014) if ((haveMask || type == CV_32FC1) && dev.isAMD()) return false; diff --git a/modules/core/src/norm.cpp b/modules/core/src/norm.cpp index 8611d1eab5..088c163c87 100644 --- a/modules/core/src/norm.cpp +++ b/modules/core/src/norm.cpp @@ -433,6 +433,9 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & bool doubleSupport = d.doubleFPConfig() > 0, haveMask = _mask.kind() != _InputArray::NONE; + if (depth >= CV_16F) + return false; // TODO: support FP16 + if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) || (!doubleSupport && depth == CV_64F)) return false; @@ -747,7 +750,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) const size_t esz = src.elemSize(); const int total = (int)it.size; const int blockSize = std::min(total, divUp(1024, cn)); - AutoBuffer fltbuf(blockSize); + AutoBuffer fltbuf(blockSize * cn); float* data0 = fltbuf.data(); for (size_t i = 0; i < it.nplanes; i++, ++it) { @@ -1235,7 +1238,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m const size_t esz = src1.elemSize(); const int total = (int)it.size; const int blockSize = std::min(total, divUp(512, cn)); - AutoBuffer fltbuf(blockSize * 2); + AutoBuffer fltbuf(blockSize * cn * 2); float* data0 = fltbuf.data(); float* data1 = fltbuf.data() + blockSize * cn; for (size_t i = 0; i < it.nplanes; i++, ++it) diff --git a/modules/core/src/sum.dispatch.cpp b/modules/core/src/sum.dispatch.cpp index e0a576e93f..20caca9f04 100644 --- a/modules/core/src/sum.dispatch.cpp +++ b/modules/core/src/sum.dispatch.cpp @@ -46,6 +46,9 @@ bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask, if ( (!doubleSupport && depth == CV_64F) || cn > 4 ) return false; + if (depth >= CV_16F) + return false; + int ngroups = dev.maxComputeUnits(), dbsize = ngroups * (calc2 ? 2 : 1); size_t wgs = dev.maxWorkGroupSize(); diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index c450f2c2e5..e6cb82919a 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -1905,7 +1905,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(::testing::Values(CV_32F, OCL_INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, MinMaxIdx, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, MinMaxIdx_Mask, Combine(OCL_ALL_DEPTHS, ::testing::Values(Channels(1)), Bool())); -OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS_16F, OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index c35755043a..f67568a08f 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -87,7 +87,9 @@ double getMinVal(int depth) depth = CV_MAT_DEPTH(depth); double val = depth == CV_8U ? 0 : depth == CV_8S ? SCHAR_MIN : depth == CV_16U ? 0 : depth == CV_16S ? SHRT_MIN : depth == CV_32S ? INT_MIN : - depth == CV_32F ? -FLT_MAX : depth == CV_64F ? -DBL_MAX : -1; + depth == CV_32F ? -FLT_MAX : depth == CV_64F ? -DBL_MAX : + depth == CV_16F ? -65504 + : -1; CV_Assert(val != -1); return val; } @@ -97,7 +99,9 @@ double getMaxVal(int depth) depth = CV_MAT_DEPTH(depth); double val = depth == CV_8U ? UCHAR_MAX : depth == CV_8S ? SCHAR_MAX : depth == CV_16U ? USHRT_MAX : depth == CV_16S ? SHRT_MAX : depth == CV_32S ? INT_MAX : - depth == CV_32F ? FLT_MAX : depth == CV_64F ? DBL_MAX : -1; + depth == CV_32F ? FLT_MAX : depth == CV_64F ? DBL_MAX : + depth == CV_16F ? 65504 + : -1; CV_Assert(val != -1); return val; }