diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index eeb83c0744..5e667b2132 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -2572,27 +2572,38 @@ public: - number of channels */ int flags; + //! the matrix dimensionality, >= 2 int dims; - //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions - int rows, cols; + + //! number of rows in the matrix; -1 when the matrix has more than 2 dimensions + int rows; + + //! number of columns in the matrix; -1 when the matrix has more than 2 dimensions + int cols; //! custom allocator MatAllocator* allocator; - UMatUsageFlags usageFlags; // usage flags for allocator + + //! usage flags for allocator; recommend do not set directly, instead set during construct/create/getUMat + UMatUsageFlags usageFlags; + //! and the standard allocator static MatAllocator* getStdAllocator(); //! internal use method: updates the continuity flag void updateContinuityFlag(); - // black-box container of UMat data + //! black-box container of UMat data UMatData* u; - // offset of the submatrix (or 0) + //! offset of the submatrix (or 0) size_t offset; + //! dimensional size of the matrix; accessible in various formats MatSize size; + + //! number of bytes each matrix element/row/plane/dimension occupies MatStep step; protected: diff --git a/modules/core/perf/opencl/perf_usage_flags.cpp b/modules/core/perf/opencl/perf_usage_flags.cpp index d59087121f..0717121d1c 100644 --- a/modules/core/perf/opencl/perf_usage_flags.cpp +++ b/modules/core/perf/opencl/perf_usage_flags.cpp @@ -12,25 +12,33 @@ namespace opencv_test { namespace ocl { -typedef TestBaseWithParam > UsageFlagsBoolFixture; +typedef TestBaseWithParam> SizeUsageFlagsFixture; -OCL_PERF_TEST_P(UsageFlagsBoolFixture, UsageFlags_AllocHostMem, ::testing::Combine(OCL_TEST_SIZES, Bool())) +OCL_PERF_TEST_P(SizeUsageFlagsFixture, UsageFlags_AllocMem, + ::testing::Combine( + OCL_TEST_SIZES, + testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY), // USAGE_ALLOCATE_SHARED_MEMORY + testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY), // USAGE_ALLOCATE_SHARED_MEMORY + testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY) // USAGE_ALLOCATE_SHARED_MEMORY + )) { Size sz = get<0>(GetParam()); - bool allocHostMem = get<1>(GetParam()); + UMatUsageFlags srcAllocMem = get<1>(GetParam()); + UMatUsageFlags dstAllocMem = get<2>(GetParam()); + UMatUsageFlags finalAllocMem = get<3>(GetParam()); - UMat src(sz, CV_8UC1, Scalar::all(128)); + UMat src(sz, CV_8UC1, Scalar::all(128), srcAllocMem); OCL_TEST_CYCLE() { - UMat dst(allocHostMem ? USAGE_ALLOCATE_HOST_MEMORY : USAGE_DEFAULT); + UMat dst(dstAllocMem); cv::add(src, Scalar::all(1), dst); { Mat canvas = dst.getMat(ACCESS_RW); cv::putText(canvas, "Test", Point(20, 20), FONT_HERSHEY_PLAIN, 1, Scalar::all(255)); } - UMat final; + UMat final(finalAllocMem); cv::subtract(dst, Scalar::all(1), final); } diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 8749b29ec8..6ca61acc18 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -5518,13 +5518,19 @@ public: && !(u->originalUMatData && u->originalUMatData->handle) ) { - handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags, + // Change the host-side origdata[size] to "pinned memory" that enables fast + // DMA-transfers over PCIe to the device. Often used with clEnqueueMapBuffer/clEnqueueUnmapMemObject + handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|(createFlags & ~CL_MEM_ALLOC_HOST_PTR), u->size, u->origdata, &retval); - CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|createFlags, sz=%lld, origdata=%p) => %p", + CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|(createFlags & ~CL_MEM_ALLOC_HOST_PTR), sz=%lld, origdata=%p) => %p", (long long int)u->size, u->origdata, (void*)handle).c_str()); } if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST)) { + // Allocate device-side memory and immediately copy data from the host-side pointer origdata[size]. + // If createFlags=CL_MEM_ALLOC_HOST_PTR (aka cv::USAGE_ALLOCATE_HOST_MEMORY), then + // additionally allocate a host-side "pinned" duplicate of the origdata that is + // managed by OpenCL. This is potentially faster in unaligned/unmanaged scenarios. handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, u->size, u->origdata, &retval); CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p", diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index 09ba92ecde..c80d240ecc 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -307,8 +307,7 @@ UMat& UMat::operator=(const UMat& m) else copySize(m); allocator = m.allocator; - if (usageFlags == USAGE_DEFAULT) - usageFlags = m.usageFlags; + usageFlags = m.usageFlags; u = m.u; offset = m.offset; } @@ -332,9 +331,6 @@ void UMat::assignTo(UMat& m, int _type) const void UMat::create(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags) { - _type &= TYPE_MASK; - if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && u ) - return; int sz[] = {_rows, _cols}; create(2, sz, _type, _usageFlags); } @@ -426,7 +422,9 @@ UMat& UMat::operator=(UMat&& m) m.step.p = m.step.buf; m.size.p = &m.rows; } - m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0; + m.flags = MAGIC_VAL; + m.usageFlags = USAGE_DEFAULT; + m.dims = m.rows = m.cols = 0; m.allocator = NULL; m.u = NULL; m.offset = 0; @@ -600,6 +598,7 @@ UMat Mat::getUMat(AccessFlag accessFlags, UMatUsageFlags usageFlags) const CV_XADD(&(u->urefcount), 1); } hdr.flags = flags; + hdr.usageFlags = usageFlags; setSize(hdr, dims, size.p, step.p); finalizeHdr(hdr); hdr.u = new_u; @@ -610,16 +609,21 @@ UMat Mat::getUMat(AccessFlag accessFlags, UMatUsageFlags usageFlags) const void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlags) { - this->usageFlags = _usageFlags; - int i; CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes); _type = CV_MAT_TYPE(_type); - if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() ) + // if param value is USAGE_DEFAULT by implicit default param value -or- explicit value + // ...then don't change the existing usageFlags + // it is not possible to change usage from non-default to USAGE_DEFAULT through create() + // ...instead must construct UMat() + if (_usageFlags == cv::USAGE_DEFAULT) + { + _usageFlags = usageFlags; + } + + if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() && _usageFlags == usageFlags ) { - if( d == 2 && rows == _sizes[0] && cols == _sizes[1] ) - return; for( i = 0; i < d; i++ ) if( size[i] != _sizes[i] ) break; @@ -636,6 +640,7 @@ void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlag } release(); + usageFlags = _usageFlags; if( d == 0 ) return; flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL; diff --git a/modules/core/test/test_opencl.cpp b/modules/core/test/test_opencl.cpp index 17cd7b5c89..ff1391016e 100644 --- a/modules/core/test/test_opencl.cpp +++ b/modules/core/test/test_opencl.cpp @@ -207,9 +207,32 @@ TEST_P(OCL_OpenCLExecutionContext_P, ScopeTest) executeUMatCall(); } - - INSTANTIATE_TEST_CASE_P(/*nothing*/, OCL_OpenCLExecutionContext_P, getOpenCLTestConfigurations()); +typedef testing::TestWithParam UsageFlagsFixture; +OCL_TEST_P(UsageFlagsFixture, UsageFlagsRetained) +{ + if (!cv::ocl::useOpenCL()) + { + throw SkipTestException("OpenCL is not available / disabled"); + } + + const UMatUsageFlags usage = GetParam(); + cv::UMat flip_in(10, 10, CV_32F, usage); + cv::UMat flip_out(usage); + cv::flip(flip_in, flip_out, 1); + cv::ocl::finish(); + + ASSERT_EQ(usage, flip_in.usageFlags); + ASSERT_EQ(usage, flip_out.usageFlags); +} + +INSTANTIATE_TEST_CASE_P( + /*nothing*/, + UsageFlagsFixture, + testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY) +); + + } } // namespace opencv_test::ocl