Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
@@ -1089,6 +1089,7 @@ bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc
|
||||
getMinMaxRes<double>
|
||||
};
|
||||
|
||||
CV_Assert(ddepth <= CV_64F);
|
||||
getMinMaxResFunc func = functab[ddepth];
|
||||
|
||||
int locTemp[2];
|
||||
|
||||
+112
-89
@@ -710,67 +710,78 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
|
||||
result;
|
||||
result.d = 0;
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int j, total = (int)it.size, blockSize = total;
|
||||
bool blockSum = depth == CV_16F || (normType == NORM_L1 && depth <= CV_16S) ||
|
||||
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
|
||||
int isum = 0;
|
||||
int *ibuf = &result.i;
|
||||
AutoBuffer<float> fltbuf_;
|
||||
float* fltbuf = 0;
|
||||
size_t esz = 0;
|
||||
CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
|
||||
|
||||
if( blockSum )
|
||||
if ((normType == NORM_L1 && depth <= CV_16S) ||
|
||||
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
|
||||
{
|
||||
esz = src.elemSize();
|
||||
// special case to handle "integer" overflow in accumulator
|
||||
const size_t esz = src.elemSize();
|
||||
const int total = (int)it.size;
|
||||
const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
|
||||
const int blockSize = std::min(total, intSumBlockSize);
|
||||
int isum = 0;
|
||||
int count = 0;
|
||||
|
||||
if( depth == CV_16F )
|
||||
for (size_t i = 0; i < it.nplanes; i++, ++it)
|
||||
{
|
||||
blockSize = std::min(blockSize, 1024);
|
||||
fltbuf_.allocate(blockSize);
|
||||
fltbuf = fltbuf_.data();
|
||||
}
|
||||
else
|
||||
{
|
||||
int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
|
||||
blockSize = std::min(blockSize, intSumBlockSize);
|
||||
ibuf = &isum;
|
||||
for (int j = 0; j < total; j += blockSize)
|
||||
{
|
||||
int bsz = std::min(total - j, blockSize);
|
||||
func(ptrs[0], ptrs[1], (uchar*)&isum, bsz, cn);
|
||||
count += bsz;
|
||||
if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
|
||||
{
|
||||
result.d += isum;
|
||||
isum = 0;
|
||||
count = 0;
|
||||
}
|
||||
ptrs[0] += bsz*esz;
|
||||
if (ptrs[1])
|
||||
ptrs[1] += bsz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
||||
else if (depth == CV_16F)
|
||||
{
|
||||
for( j = 0; j < total; j += blockSize )
|
||||
const size_t esz = src.elemSize();
|
||||
const int total = (int)it.size;
|
||||
const int blockSize = std::min(total, divUp(1024, cn));
|
||||
AutoBuffer<float, 1024> fltbuf(blockSize);
|
||||
float* data0 = fltbuf.data();
|
||||
for (size_t i = 0; i < it.nplanes; i++, ++it)
|
||||
{
|
||||
int bsz = std::min(total - j, blockSize);
|
||||
const uchar* data = ptrs[0];
|
||||
if( depth == CV_16F )
|
||||
for (int j = 0; j < total; j += blockSize)
|
||||
{
|
||||
hal::cvt16f32f((const float16_t*)ptrs[0], fltbuf, bsz);
|
||||
data = (const uchar*)fltbuf;
|
||||
int bsz = std::min(total - j, blockSize);
|
||||
hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
|
||||
func((uchar*)data0, ptrs[1], (uchar*)&result.d, bsz, cn);
|
||||
ptrs[0] += bsz*esz;
|
||||
if (ptrs[1])
|
||||
ptrs[1] += bsz;
|
||||
}
|
||||
func( data, ptrs[1], (uchar*)ibuf, bsz, cn );
|
||||
if( blockSum && depth != CV_16F )
|
||||
{
|
||||
result.d += isum;
|
||||
isum = 0;
|
||||
}
|
||||
ptrs[0] += bsz*esz;
|
||||
if( ptrs[1] )
|
||||
ptrs[1] += bsz;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// generic implementation
|
||||
for (size_t i = 0; i < it.nplanes; i++, ++it)
|
||||
{
|
||||
func(ptrs[0], ptrs[1], (uchar*)&result, (int)it.size, cn);
|
||||
}
|
||||
}
|
||||
|
||||
if( normType == NORM_INF )
|
||||
{
|
||||
if( depth == CV_64F )
|
||||
;
|
||||
else if( depth == CV_32F )
|
||||
result.d = result.f;
|
||||
if(depth == CV_64F || depth == CV_16F)
|
||||
return result.d;
|
||||
else if (depth == CV_32F)
|
||||
return result.f;
|
||||
else
|
||||
result.d = result.i;
|
||||
return result.i;
|
||||
}
|
||||
else if( normType == NORM_L2 )
|
||||
result.d = std::sqrt(result.d);
|
||||
return std::sqrt(result.d);
|
||||
|
||||
return result.d;
|
||||
}
|
||||
@@ -1186,70 +1197,82 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
|
||||
result;
|
||||
result.d = 0;
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int j, total = (int)it.size, blockSize = total;
|
||||
bool blockSum = depth == CV_16F || (normType == NORM_L1 && depth <= CV_16S) ||
|
||||
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
|
||||
unsigned isum = 0;
|
||||
unsigned *ibuf = &result.u;
|
||||
AutoBuffer<float> fltbuf_;
|
||||
float* fltbuf = 0;
|
||||
size_t esz = 0;
|
||||
CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
|
||||
|
||||
if( blockSum )
|
||||
if ((normType == NORM_L1 && depth <= CV_16S) ||
|
||||
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
|
||||
{
|
||||
esz = src1.elemSize();
|
||||
// special case to handle "integer" overflow in accumulator
|
||||
const size_t esz = src1.elemSize();
|
||||
const int total = (int)it.size;
|
||||
const int intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
|
||||
const int blockSize = std::min(total, intSumBlockSize);
|
||||
int isum = 0;
|
||||
int count = 0;
|
||||
|
||||
if( depth == CV_16F )
|
||||
for (size_t i = 0; i < it.nplanes; i++, ++it)
|
||||
{
|
||||
blockSize = std::min(blockSize, 1024);
|
||||
fltbuf_.allocate(blockSize*2);
|
||||
fltbuf = fltbuf_.data();
|
||||
}
|
||||
else
|
||||
{
|
||||
int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
|
||||
blockSize = std::min(blockSize, intSumBlockSize);
|
||||
ibuf = &isum;
|
||||
for (int j = 0; j < total; j += blockSize)
|
||||
{
|
||||
int bsz = std::min(total - j, blockSize);
|
||||
func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&isum, bsz, cn);
|
||||
count += bsz;
|
||||
if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
|
||||
{
|
||||
result.d += isum;
|
||||
isum = 0;
|
||||
count = 0;
|
||||
}
|
||||
ptrs[0] += bsz*esz;
|
||||
ptrs[1] += bsz*esz;
|
||||
if (ptrs[2])
|
||||
ptrs[2] += bsz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
||||
else if (depth == CV_16F)
|
||||
{
|
||||
for( j = 0; j < total; j += blockSize )
|
||||
const size_t esz = src1.elemSize();
|
||||
const int total = (int)it.size;
|
||||
const int blockSize = std::min(total, divUp(512, cn));
|
||||
AutoBuffer<float, 1024> fltbuf(blockSize * 2);
|
||||
float* data0 = fltbuf.data();
|
||||
float* data1 = fltbuf.data() + blockSize * cn;
|
||||
for (size_t i = 0; i < it.nplanes; i++, ++it)
|
||||
{
|
||||
int bsz = std::min(total - j, blockSize);
|
||||
const uchar *data0 = ptrs[0], *data1 = ptrs[1];
|
||||
if( depth == CV_16F )
|
||||
for (int j = 0; j < total; j += blockSize)
|
||||
{
|
||||
hal::cvt16f32f((const float16_t*)ptrs[0], fltbuf, bsz);
|
||||
hal::cvt16f32f((const float16_t*)ptrs[1], fltbuf + bsz, bsz);
|
||||
data0 = (const uchar*)fltbuf;
|
||||
data1 = (const uchar*)(fltbuf + bsz);
|
||||
int bsz = std::min(total - j, blockSize);
|
||||
hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
|
||||
hal::cvt16f32f((const float16_t*)ptrs[1], data1, bsz * cn);
|
||||
func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.d, bsz, cn);
|
||||
ptrs[0] += bsz*esz;
|
||||
ptrs[1] += bsz*esz;
|
||||
if (ptrs[2])
|
||||
ptrs[2] += bsz;
|
||||
}
|
||||
func( data0, data1, ptrs[2], (uchar*)ibuf, bsz, cn );
|
||||
if( blockSum && depth != CV_16F )
|
||||
{
|
||||
result.d += isum;
|
||||
isum = 0;
|
||||
}
|
||||
ptrs[0] += bsz*esz;
|
||||
ptrs[1] += bsz*esz;
|
||||
if( ptrs[2] )
|
||||
ptrs[2] += bsz;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// generic implementation
|
||||
for (size_t i = 0; i < it.nplanes; i++, ++it)
|
||||
{
|
||||
func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&result, (int)it.size, cn);
|
||||
}
|
||||
}
|
||||
|
||||
if( normType == NORM_INF )
|
||||
{
|
||||
if( depth == CV_64F )
|
||||
;
|
||||
else if( depth == CV_32F )
|
||||
result.d = result.f;
|
||||
if (depth == CV_64F || depth == CV_16F)
|
||||
return result.d;
|
||||
else if (depth == CV_32F)
|
||||
return result.f;
|
||||
else
|
||||
result.d = result.u;
|
||||
return result.u;
|
||||
}
|
||||
else if( normType == NORM_L2 )
|
||||
result.d = std::sqrt(result.d);
|
||||
return std::sqrt(result.d);
|
||||
|
||||
return result.d;
|
||||
}
|
||||
|
||||
@@ -6451,16 +6451,19 @@ struct Image2D::Impl
|
||||
CL_MEM_OBJECT_IMAGE2D, numFormats,
|
||||
NULL, &numFormats);
|
||||
CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, NULL)");
|
||||
AutoBuffer<cl_image_format> formats(numFormats);
|
||||
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
|
||||
CL_MEM_OBJECT_IMAGE2D, numFormats,
|
||||
formats.data(), NULL);
|
||||
CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
|
||||
for (cl_uint i = 0; i < numFormats; ++i)
|
||||
if (numFormats > 0)
|
||||
{
|
||||
if (!memcmp(&formats[i], &format, sizeof(format)))
|
||||
AutoBuffer<cl_image_format> formats(numFormats);
|
||||
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
|
||||
CL_MEM_OBJECT_IMAGE2D, numFormats,
|
||||
formats.data(), NULL);
|
||||
CV_OCL_DBG_CHECK_RESULT(err, "clGetSupportedImageFormats(CL_MEM_OBJECT_IMAGE2D, formats)");
|
||||
for (cl_uint i = 0; i < numFormats; ++i)
|
||||
{
|
||||
return true;
|
||||
if (!memcmp(&formats[i], &format, sizeof(format)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
||||
Reference in New Issue
Block a user