Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin
2020-03-06 20:00:55 +00:00
19 changed files with 518 additions and 391 deletions
+1
View File
@@ -1089,6 +1089,7 @@ bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc
getMinMaxRes<double>
};
CV_Assert(ddepth <= CV_64F);
getMinMaxResFunc func = functab[ddepth];
int locTemp[2];
+112 -89
View File
@@ -710,67 +710,78 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
result;
result.d = 0;
NAryMatIterator it(arrays, ptrs);
int j, total = (int)it.size, blockSize = total;
bool blockSum = depth == CV_16F || (normType == NORM_L1 && depth <= CV_16S) ||
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
int isum = 0;
int *ibuf = &result.i;
AutoBuffer<float> fltbuf_;
float* fltbuf = 0;
size_t esz = 0;
CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
if( blockSum )
if ((normType == NORM_L1 && depth <= CV_16S) ||
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
{
esz = src.elemSize();
// special case to handle "integer" overflow in accumulator
const size_t esz = src.elemSize();
const int total = (int)it.size;
const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
const int blockSize = std::min(total, intSumBlockSize);
int isum = 0;
int count = 0;
if( depth == CV_16F )
for (size_t i = 0; i < it.nplanes; i++, ++it)
{
blockSize = std::min(blockSize, 1024);
fltbuf_.allocate(blockSize);
fltbuf = fltbuf_.data();
}
else
{
int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
blockSize = std::min(blockSize, intSumBlockSize);
ibuf = &isum;
for (int j = 0; j < total; j += blockSize)
{
int bsz = std::min(total - j, blockSize);
func(ptrs[0], ptrs[1], (uchar*)&isum, bsz, cn);
count += bsz;
if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
{
result.d += isum;
isum = 0;
count = 0;
}
ptrs[0] += bsz*esz;
if (ptrs[1])
ptrs[1] += bsz;
}
}
}
for( size_t i = 0; i < it.nplanes; i++, ++it )
else if (depth == CV_16F)
{
for( j = 0; j < total; j += blockSize )
const size_t esz = src.elemSize();
const int total = (int)it.size;
const int blockSize = std::min(total, divUp(1024, cn));
AutoBuffer<float, 1024> fltbuf(blockSize);
float* data0 = fltbuf.data();
for (size_t i = 0; i < it.nplanes; i++, ++it)
{
int bsz = std::min(total - j, blockSize);
const uchar* data = ptrs[0];
if( depth == CV_16F )
for (int j = 0; j < total; j += blockSize)
{
hal::cvt16f32f((const float16_t*)ptrs[0], fltbuf, bsz);
data = (const uchar*)fltbuf;
int bsz = std::min(total - j, blockSize);
hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
func((uchar*)data0, ptrs[1], (uchar*)&result.d, bsz, cn);
ptrs[0] += bsz*esz;
if (ptrs[1])
ptrs[1] += bsz;
}
func( data, ptrs[1], (uchar*)ibuf, bsz, cn );
if( blockSum && depth != CV_16F )
{
result.d += isum;
isum = 0;
}
ptrs[0] += bsz*esz;
if( ptrs[1] )
ptrs[1] += bsz;
}
}
else
{
// generic implementation
for (size_t i = 0; i < it.nplanes; i++, ++it)
{
func(ptrs[0], ptrs[1], (uchar*)&result, (int)it.size, cn);
}
}
if( normType == NORM_INF )
{
if( depth == CV_64F )
;
else if( depth == CV_32F )
result.d = result.f;
if(depth == CV_64F || depth == CV_16F)
return result.d;
else if (depth == CV_32F)
return result.f;
else
result.d = result.i;
return result.i;
}
else if( normType == NORM_L2 )
result.d = std::sqrt(result.d);
return std::sqrt(result.d);
return result.d;
}
@@ -1186,70 +1197,82 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
result;
result.d = 0;
NAryMatIterator it(arrays, ptrs);
int j, total = (int)it.size, blockSize = total;
bool blockSum = depth == CV_16F || (normType == NORM_L1 && depth <= CV_16S) ||
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
unsigned isum = 0;
unsigned *ibuf = &result.u;
AutoBuffer<float> fltbuf_;
float* fltbuf = 0;
size_t esz = 0;
CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
if( blockSum )
if ((normType == NORM_L1 && depth <= CV_16S) ||
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
{
esz = src1.elemSize();
// special case to handle "integer" overflow in accumulator
const size_t esz = src1.elemSize();
const int total = (int)it.size;
const int intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
const int blockSize = std::min(total, intSumBlockSize);
int isum = 0;
int count = 0;
if( depth == CV_16F )
for (size_t i = 0; i < it.nplanes; i++, ++it)
{
blockSize = std::min(blockSize, 1024);
fltbuf_.allocate(blockSize*2);
fltbuf = fltbuf_.data();
}
else
{
int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
blockSize = std::min(blockSize, intSumBlockSize);
ibuf = &isum;
for (int j = 0; j < total; j += blockSize)
{
int bsz = std::min(total - j, blockSize);
func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&isum, bsz, cn);
count += bsz;
if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
{
result.d += isum;
isum = 0;
count = 0;
}
ptrs[0] += bsz*esz;
ptrs[1] += bsz*esz;
if (ptrs[2])
ptrs[2] += bsz;
}
}
}
for( size_t i = 0; i < it.nplanes; i++, ++it )
else if (depth == CV_16F)
{
for( j = 0; j < total; j += blockSize )
const size_t esz = src1.elemSize();
const int total = (int)it.size;
const int blockSize = std::min(total, divUp(512, cn));
AutoBuffer<float, 1024> fltbuf(blockSize * 2);
float* data0 = fltbuf.data();
float* data1 = fltbuf.data() + blockSize * cn;
for (size_t i = 0; i < it.nplanes; i++, ++it)
{
int bsz = std::min(total - j, blockSize);
const uchar *data0 = ptrs[0], *data1 = ptrs[1];
if( depth == CV_16F )
for (int j = 0; j < total; j += blockSize)
{
hal::cvt16f32f((const float16_t*)ptrs[0], fltbuf, bsz);
hal::cvt16f32f((const float16_t*)ptrs[1], fltbuf + bsz, bsz);
data0 = (const uchar*)fltbuf;
data1 = (const uchar*)(fltbuf + bsz);
int bsz = std::min(total - j, blockSize);
hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
hal::cvt16f32f((const float16_t*)ptrs[1], data1, bsz * cn);
func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.d, bsz, cn);
ptrs[0] += bsz*esz;
ptrs[1] += bsz*esz;
if (ptrs[2])
ptrs[2] += bsz;
}
func( data0, data1, ptrs[2], (uchar*)ibuf, bsz, cn );
if( blockSum && depth != CV_16F )
{
result.d += isum;
isum = 0;
}
ptrs[0] += bsz*esz;
ptrs[1] += bsz*esz;
if( ptrs[2] )
ptrs[2] += bsz;
}
}
else
{
// generic implementation
for (size_t i = 0; i < it.nplanes; i++, ++it)
{
func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&result, (int)it.size, cn);
}
}
if( normType == NORM_INF )
{
if( depth == CV_64F )
;
else if( depth == CV_32F )
result.d = result.f;
if (depth == CV_64F || depth == CV_16F)
return result.d;
else if (depth == CV_32F)
return result.f;
else
result.d = result.u;
return result.u;
}
else if( normType == NORM_L2 )
result.d = std::sqrt(result.d);
return std::sqrt(result.d);
return result.d;
}
+11 -8
View File
@@ -6451,16 +6451,19 @@ struct Image2D::Impl
CL_MEM_OBJECT_IMAGE2D, numFormats,
NULL, &numFormats);
CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, NULL)");
AutoBuffer<cl_image_format> formats(numFormats);
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
CL_MEM_OBJECT_IMAGE2D, numFormats,
formats.data(), NULL);
CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
for (cl_uint i = 0; i < numFormats; ++i)
if (numFormats > 0)
{
if (!memcmp(&formats[i], &format, sizeof(format)))
AutoBuffer<cl_image_format> formats(numFormats);
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
CL_MEM_OBJECT_IMAGE2D, numFormats,
formats.data(), NULL);
CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
for (cl_uint i = 0; i < numFormats; ++i)
{
return true;
if (!memcmp(&formats[i], &format, sizeof(format)))
{
return true;
}
}
}
return false;