From 5ce38e516e554ed64106350c929127adae9d908f Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Tue, 5 Dec 2017 13:32:28 +0300 Subject: [PATCH] Merge pull request #10223 from vpisarev:ocl_mac_fixes * fixed OpenCL functions on Mac, so that the tests pass * fixed compile warnings; temporarily disabled OCL branch of TV L1 optical flow on mac * fixed other few warnings on macos --- modules/core/src/ocl.cpp | 87 ++++++++++++++++++++++++++++++++-- modules/core/src/stat.cpp | 5 ++ modules/imgproc/src/morph.cpp | 14 +++--- modules/video/src/tvl1flow.cpp | 2 + 4 files changed, 98 insertions(+), 10 deletions(-) diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 778efe98f9..eac630e3be 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -4172,13 +4172,13 @@ protected: size_t step_; public: - AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment) + AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0) : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step) { CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n - if (((size_t)ptr_ & (alignment - 1)) != 0) + if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0) { - allocatedPtr_ = new uchar[size_ + alignment - 1]; + allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1]; ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1)); if (readAccess) { @@ -4978,6 +4978,25 @@ public: CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0)); } +#ifdef __APPLE__ + else + { + const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT; + size_t new_srcrawofs = srcrawofs & ~(padding-1); + size_t membuf_ofs = srcrawofs - new_srcrawofs; + AlignedDataPtr2D alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0], + CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2); + uchar* ptr = alignedPtr.getAlignedPtr(); + + CV_Assert(new_srcstep[0] >= new_sz[0]); + total = alignSize(new_srcstep[0]*new_sz[1] + membuf_ofs, padding); + total = std::min(total, u->size - new_srcrawofs); + CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, + new_srcrawofs, total, ptr, 0, 0, 0)); + for( size_t i = 0; i < new_sz[1]; i++ ) + memcpy( (uchar*)dstptr + i*new_dststep[0], ptr + i*new_srcstep[0] + membuf_ofs, new_sz[0]); + } +#else else { AlignedDataPtr2D alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT); @@ -4989,6 +5008,7 @@ public: new_dststep[0], 0, ptr, 0, 0, 0)); } +#endif } } @@ -5095,6 +5115,30 @@ public: CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0)); } +#ifdef __APPLE__ + else + { + const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT; + size_t new_dstrawofs = dstrawofs & ~(padding-1); + size_t membuf_ofs = dstrawofs - new_dstrawofs; + AlignedDataPtr2D alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0], + CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2); + uchar* ptr = alignedPtr.getAlignedPtr(); + + CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]); + total = alignSize(new_dststep[0]*new_sz[1] + membuf_ofs, padding); + total = std::min(total, u->size - new_dstrawofs); + /*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n", + (int)new_sz[0], (int)new_sz[1], (int)membuf_ofs, + (int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/ + CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, + new_dstrawofs, total, ptr, 0, 0, 0)); + for( size_t i = 0; i < new_sz[1]; i++ ) + memcpy( ptr + i*new_dststep[0] + membuf_ofs, (uchar*)srcptr + i*new_srcstep[0], new_sz[0]); + CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, + new_dstrawofs, total, ptr, 0, 0, 0)); + } +#else else { AlignedDataPtr2D alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT); @@ -5106,6 +5150,7 @@ public: new_srcstep[0], 0, ptr, 0, 0, 0)); } +#endif } u->markHostCopyObsolete(true); #ifdef HAVE_OPENCL_SVM @@ -5247,6 +5292,41 @@ public: CV_OCL_CHECK(retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle, srcrawofs, dstrawofs, total, 0, 0, 0)); } +#ifdef __APPLE__ + else + { + const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT; + size_t new_srcrawofs = srcrawofs & ~(padding-1); + size_t srcmembuf_ofs = srcrawofs - new_srcrawofs; + size_t new_dstrawofs = dstrawofs & ~(padding-1); + size_t dstmembuf_ofs = dstrawofs - new_dstrawofs; + + AlignedDataPtr2D srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0], + CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2); + AlignedDataPtr2D dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0], + CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2); + uchar* srcptr = srcBuf.getAlignedPtr(); + uchar* dstptr = dstBuf.getAlignedPtr(); + + CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]); + + size_t src_total = alignSize(new_srcstep[0]*new_sz[1] + srcmembuf_ofs, padding); + src_total = std::min(src_total, src->size - new_srcrawofs); + size_t dst_total = alignSize(new_dststep[0]*new_sz[1] + dstmembuf_ofs, padding); + dst_total = std::min(dst_total, dst->size - new_dstrawofs); + + CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE, + new_srcrawofs, src_total, srcptr, 0, 0, 0)); + CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE, + new_dstrawofs, dst_total, dstptr, 0, 0, 0)); + + for( size_t i = 0; i < new_sz[1]; i++ ) + memcpy( dstptr + dstmembuf_ofs + i*new_dststep[0], + srcptr + srcmembuf_ofs + i*new_srcstep[0], new_sz[0]); + CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE, + new_dstrawofs, dst_total, dstptr, 0, 0, 0)); + } +#else else { CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle, @@ -5255,6 +5335,7 @@ public: new_dststep[0], 0, 0, 0, 0)); } +#endif } if (retval == CL_SUCCESS) { diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index d3232e5726..8a512a9511 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -3359,6 +3359,11 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr normType &= ~NORM_RELATIVE; bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR; +#ifdef __APPLE__ + if(normType == NORM_L1 && type == CV_16UC3 && !_mask.empty()) + return false; +#endif + if (normsum) { if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ? diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index 2d5feca266..a8bdb1b3d2 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1403,6 +1403,7 @@ void morph(int op, int src_type, int dst_type, #define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n))) +#ifndef __APPLE__ static bool ocl_morph3x3_8UC1( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, int op, int actual_op = -1, InputArray _extraMat = noArray()) { @@ -1628,16 +1629,15 @@ static bool ocl_morphSmall( InputArray _src, OutputArray _dst, InputArray _kerne } return kernel.run(2, globalsize, NULL, false); - } +#endif static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, int iterations, int op, int borderType, const Scalar &, int actual_op = -1, InputArray _extraMat = noArray()) { const ocl::Device & dev = ocl::Device::getDefault(); - int type = _src.type(), depth = CV_MAT_DEPTH(type), - cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type); + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); Mat kernel = _kernel.getMat(); Size ksize = !kernel.empty() ? kernel.size() : Size(3, 3), ssize = _src.size(); @@ -1664,14 +1664,13 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel, iterations = 1; } +#ifndef __APPLE__ + int esz = CV_ELEM_SIZE(type); // try to use OpenCL kernel adopted for small morph kernel - if (dev.isIntel() && !(dev.type() & ocl::Device::TYPE_CPU) && + if (dev.isIntel() && ((ksize.width < 5 && ksize.height < 5 && esz <= 4) || (ksize.width == 5 && ksize.height == 5 && cn == 1)) && (iterations == 1) -#if defined __APPLE__ - && cn == 1 -#endif ) { if (ocl_morph3x3_8UC1(_src, _dst, kernel, anchor, op, actual_op, _extraMat)) @@ -1680,6 +1679,7 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel, if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat)) return true; } +#endif if (iterations == 0 || kernel.rows*kernel.cols == 1) { diff --git a/modules/video/src/tvl1flow.cpp b/modules/video/src/tvl1flow.cpp index 03d215d561..77168a422e 100644 --- a/modules/video/src/tvl1flow.cpp +++ b/modules/video/src/tvl1flow.cpp @@ -392,9 +392,11 @@ void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray { CV_INSTRUMENT_REGION() +#ifndef __APPLE__ CV_OCL_RUN(_flow.isUMat() && ocl::Image2D::isFormatSupported(CV_32F, 1, false), calc_ocl(_I0, _I1, _flow)) +#endif Mat I0 = _I0.getMat(); Mat I1 = _I1.getMat();