From 198b5096aaf8f5d855b98337e9de2fc45485c5a7 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Sat, 7 Mar 2020 22:06:47 +0300
Subject: [PATCH] Merge pull request #16754 from alalek:issue_16752

* core(test): FP16 norm test

* core: norm()-FP16 disable OpenCL

* core(norm): fix 16f32f local buffer size
---
 modules/core/src/minmax.cpp           | 3 +++
 modules/core/src/norm.cpp             | 7 +++++--
 modules/core/src/sum.dispatch.cpp     | 3 +++
 modules/core/test/ocl/test_arithm.cpp | 2 +-
 modules/ts/src/ts_func.cpp            | 8 ++++++--
 5 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/modules/core/src/minmax.cpp b/modules/core/src/minmax.cpp
index 43785d839b..61bddc3d35 100644
--- a/modules/core/src/minmax.cpp
+++ b/modules/core/src/minmax.cpp
@@ -978,6 +978,9 @@ bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc
     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
             kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src, _src2));
 
+    if (depth >= CV_16F)
+        return false;
+
     // disabled following modes since it occasionally fails on AMD devices (e.g. A10-6800K, sep. 2014)
     if ((haveMask || type == CV_32FC1) && dev.isAMD())
         return false;
diff --git a/modules/core/src/norm.cpp b/modules/core/src/norm.cpp
index 8611d1eab5..088c163c87 100644
--- a/modules/core/src/norm.cpp
+++ b/modules/core/src/norm.cpp
@@ -433,6 +433,9 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double &
     bool doubleSupport = d.doubleFPConfig() > 0,
             haveMask = _mask.kind() != _InputArray::NONE;
 
+    if (depth >= CV_16F)
+        return false;  // TODO: support FP16
+
     if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
          (!doubleSupport && depth == CV_64F))
         return false;
@@ -747,7 +750,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
         const size_t esz = src.elemSize();
         const int total = (int)it.size;
         const int blockSize = std::min(total, divUp(1024, cn));
-        AutoBuffer<float, 1024> fltbuf(blockSize);
+        AutoBuffer<float, 1026/*divUp(1024,3)*3*/> fltbuf(blockSize * cn);
         float* data0 = fltbuf.data();
         for (size_t i = 0; i < it.nplanes; i++, ++it)
         {
@@ -1235,7 +1238,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
         const size_t esz = src1.elemSize();
         const int total = (int)it.size;
         const int blockSize = std::min(total, divUp(512, cn));
-        AutoBuffer<float, 1024> fltbuf(blockSize * 2);
+        AutoBuffer<float, 1026/*divUp(512,3)*3*2*/> fltbuf(blockSize * cn * 2);
         float* data0 = fltbuf.data();
         float* data1 = fltbuf.data() + blockSize * cn;
         for (size_t i = 0; i < it.nplanes; i++, ++it)
diff --git a/modules/core/src/sum.dispatch.cpp b/modules/core/src/sum.dispatch.cpp
index e0a576e93f..20caca9f04 100644
--- a/modules/core/src/sum.dispatch.cpp
+++ b/modules/core/src/sum.dispatch.cpp
@@ -46,6 +46,9 @@ bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask,
     if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
         return false;
 
+    if (depth >= CV_16F)
+        return false;
+
     int ngroups = dev.maxComputeUnits(), dbsize = ngroups * (calc2 ? 2 : 1);
     size_t wgs = dev.maxWorkGroupSize();
 
diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp
index c450f2c2e5..e6cb82919a 100644
--- a/modules/core/test/ocl/test_arithm.cpp
+++ b/modules/core/test/ocl/test_arithm.cpp
@@ -1905,7 +1905,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(::testing::Values(CV_32F,
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, MinMaxIdx, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, MinMaxIdx_Mask, Combine(OCL_ALL_DEPTHS, ::testing::Values(Channels(1)), Bool()));
-OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS_16F, OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp
index c35755043a..f67568a08f 100644
--- a/modules/ts/src/ts_func.cpp
+++ b/modules/ts/src/ts_func.cpp
@@ -87,7 +87,9 @@ double getMinVal(int depth)
     depth = CV_MAT_DEPTH(depth);
     double val = depth == CV_8U ? 0 : depth == CV_8S ? SCHAR_MIN : depth == CV_16U ? 0 :
     depth == CV_16S ? SHRT_MIN : depth == CV_32S ? INT_MIN :
-    depth == CV_32F ? -FLT_MAX : depth == CV_64F ? -DBL_MAX : -1;
+    depth == CV_32F ? -FLT_MAX : depth == CV_64F ? -DBL_MAX :
+            depth == CV_16F ? -65504
+            : -1;
     CV_Assert(val != -1);
     return val;
 }
@@ -97,7 +99,9 @@ double getMaxVal(int depth)
     depth = CV_MAT_DEPTH(depth);
     double val = depth == CV_8U ? UCHAR_MAX : depth == CV_8S ? SCHAR_MAX : depth == CV_16U ? USHRT_MAX :
     depth == CV_16S ? SHRT_MAX : depth == CV_32S ? INT_MAX :
-    depth == CV_32F ? FLT_MAX : depth == CV_64F ? DBL_MAX : -1;
+    depth == CV_32F ? FLT_MAX : depth == CV_64F ? DBL_MAX :
+            depth == CV_16F ? 65504
+            : -1;
     CV_Assert(val != -1);
     return val;
 }