diff --git a/modules/core/doc/utility_and_system_functions_and_macros.rst b/modules/core/doc/utility_and_system_functions_and_macros.rst index 30577c4483..861f98bfc8 100644 --- a/modules/core/doc/utility_and_system_functions_and_macros.rst +++ b/modules/core/doc/utility_and_system_functions_and_macros.rst @@ -255,7 +255,7 @@ The function allocates the buffer of the specified size and returns it. When the fastFree ------------- +-------- Deallocates a memory buffer. .. ocv:function:: void fastFree(void* ptr) @@ -280,6 +280,14 @@ The function acts like ``sprintf`` but forms and returns an STL string. It can :ocv:class:`Exception` constructor. +getBuildInformation +------------------- +Returns full configuration time cmake output. + +.. ocv:function:: const std::string& getBuildInformation() + +Returned value is raw cmake output including version control system revision, compiler version, compiler flags, enabled modules and third party libraries, etc. Output format depends on target architecture. + checkHardwareSupport -------------------- @@ -304,7 +312,7 @@ Returns true if the specified feature is supported by the host hardware. The function returns true if the host hardware supports the specified feature. When user calls ``setUseOptimized(false)``, the subsequent calls to ``checkHardwareSupport()`` will return false until ``setUseOptimized(true)`` is called. This way user can dynamically switch on and off the optimized code in OpenCV. getNumThreads ------------------ +------------- Returns the number of threads used by OpenCV. .. ocv:function:: int getNumThreads() @@ -318,7 +326,7 @@ The function returns the number of threads that is used by OpenCV. getThreadNum ----------------- +------------ Returns the index of the currently executed thread. .. ocv:function:: int getThreadNum() @@ -332,7 +340,7 @@ The function returns a 0-based index of the currently executed thread. The funct getTickCount ----------------- +------------ Returns the number of ticks. .. ocv:function:: int64 getTickCount() @@ -346,7 +354,7 @@ It can be used to initialize getTickFrequency --------------------- +---------------- Returns the number of ticks per second. .. ocv:function:: double getTickFrequency() @@ -363,7 +371,7 @@ That is, the following code computes the execution time in seconds: :: getCPUTickCount ----------------- +--------------- Returns the number of CPU ticks. .. ocv:function:: int64 getCPUTickCount() @@ -417,7 +425,7 @@ The function sets the number of threads used by OpenCV in parallel OpenMP region setUseOptimized ------------------ +--------------- Enables or disables the optimized code. .. ocv:function:: int cvUseOptimized( int on_off ) @@ -433,7 +441,7 @@ The function can be used to dynamically turn on and off optimized code (code tha By default, the optimized code is enabled unless you disable it in CMake. The current status can be retrieved using ``useOptimized``. useOptimized ------------------ +------------ Returns the status of optimized code usage. .. ocv:function:: bool useOptimized() diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp index 5515ebff2e..5af419c939 100644 --- a/modules/core/test/test_misc.cpp +++ b/modules/core/test/test_misc.cpp @@ -39,4 +39,12 @@ TEST(Core_OutputArraySreate, _1997) Size submatSize = Size(256, 256); ASSERT_NO_THROW(local::create( mat(Rect(Point(), submatSize)), submatSize, mat.type() )); -} \ No newline at end of file +} + +TEST(Core_SaturateCast, NegativeNotClipped) +{ + double d = -1.0; + unsigned int val = cv::saturate_cast(d); + + ASSERT_EQ(0xffffffff, val); +} diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu index d34b38a250..cbc471c815 100644 --- a/modules/gpu/src/cuda/matrix_reductions.cu +++ b/modules/gpu/src/cuda/matrix_reductions.cu @@ -55,6 +55,128 @@ using namespace cv::gpu; using namespace cv::gpu::device; +namespace detail +{ + __device__ __forceinline__ int cvAtomicAdd(int* address, int val) + { + return ::atomicAdd(address, val); + } + __device__ __forceinline__ unsigned int cvAtomicAdd(unsigned int* address, unsigned int val) + { + return ::atomicAdd(address, val); + } + __device__ __forceinline__ float cvAtomicAdd(float* address, float val) + { + #if __CUDA_ARCH__ >= 200 + return ::atomicAdd(address, val); + #else + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(val + __int_as_float(assumed))); + } while (assumed != old); + return __int_as_float(old); + #endif + } + __device__ __forceinline__ double cvAtomicAdd(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); + #else + (void) address; + (void) val; + return 0.0; + #endif + } + + __device__ __forceinline__ int cvAtomicMin(int* address, int val) + { + return ::atomicMin(address, val); + } + __device__ __forceinline__ float cvAtomicMin(float* address, float val) + { + #if __CUDA_ARCH__ >= 120 + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(::fminf(val, __int_as_float(assumed)))); + } while (assumed != old); + return __int_as_float(old); + #else + (void) address; + (void) val; + return 0.0f; + #endif + } + __device__ __forceinline__ double cvAtomicMin(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(::fmin(val, __longlong_as_double(assumed)))); + } while (assumed != old); + return __longlong_as_double(old); + #else + (void) address; + (void) val; + return 0.0; + #endif + } + + __device__ __forceinline__ int cvAtomicMax(int* address, int val) + { + return ::atomicMax(address, val); + } + __device__ __forceinline__ float cvAtomicMax(float* address, float val) + { + #if __CUDA_ARCH__ >= 120 + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(::fmaxf(val, __int_as_float(assumed)))); + } while (assumed != old); + return __int_as_float(old); + #else + (void) address; + (void) val; + return 0.0f; + #endif + } + __device__ __forceinline__ double cvAtomicMax(double* address, double val) + { + #if __CUDA_ARCH__ >= 130 + unsigned long long int* address_as_ull = (unsigned long long int*) address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_ull, assumed, + __double_as_longlong(::fmax(val, __longlong_as_double(assumed)))); + } while (assumed != old); + return __longlong_as_double(old); + #else + (void) address; + (void) val; + return 0.0; + #endif + } +} + namespace detail { template struct Unroll; @@ -152,7 +274,7 @@ namespace sum { static __device__ void run(R* ptr, R val) { - ::atomicAdd(ptr, val); + detail::cvAtomicAdd(ptr, val); } }; template struct AtomicAdd @@ -161,8 +283,8 @@ namespace sum static __device__ void run(R* ptr, val_type val) { - ::atomicAdd(ptr, val.x); - ::atomicAdd(ptr + 1, val.y); + detail::cvAtomicAdd(ptr, val.x); + detail::cvAtomicAdd(ptr + 1, val.y); } }; template struct AtomicAdd @@ -171,9 +293,9 @@ namespace sum static __device__ void run(R* ptr, val_type val) { - ::atomicAdd(ptr, val.x); - ::atomicAdd(ptr + 1, val.y); - ::atomicAdd(ptr + 2, val.z); + detail::cvAtomicAdd(ptr, val.x); + detail::cvAtomicAdd(ptr + 1, val.y); + detail::cvAtomicAdd(ptr + 2, val.z); } }; template struct AtomicAdd @@ -182,10 +304,10 @@ namespace sum static __device__ void run(R* ptr, val_type val) { - ::atomicAdd(ptr, val.x); - ::atomicAdd(ptr + 1, val.y); - ::atomicAdd(ptr + 2, val.z); - ::atomicAdd(ptr + 3, val.w); + detail::cvAtomicAdd(ptr, val.x); + detail::cvAtomicAdd(ptr + 1, val.y); + detail::cvAtomicAdd(ptr + 2, val.z); + detail::cvAtomicAdd(ptr + 3, val.w); } }; @@ -229,41 +351,6 @@ namespace sum #endif } }; - template - struct GlobalReduce - { - typedef typename TypeVec::vec_type result_type; - - static __device__ void run(result_type& sum, result_type* result, int tid, int bid, double* smem) - { - __shared__ bool is_last; - - if (tid == 0) - { - result[bid] = sum; - - __threadfence(); - - unsigned int ticket = ::atomicAdd(&blocks_finished, 1); - is_last = (ticket == gridDim.x * gridDim.y - 1); - } - - __syncthreads(); - - if (is_last) - { - sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits::all(0); - - device::reduce(detail::Unroll::template smem_tuple(smem), detail::Unroll::tie(sum), tid, detail::Unroll::op(plus())); - - if (tid == 0) - { - result[0] = sum; - blocks_finished = 0; - } - } - } - }; template __global__ void kernel(const PtrStepSz src, result_type* result, const Op op, const int twidth, const int theight) @@ -518,53 +605,12 @@ namespace minMax struct GlobalReduce { static __device__ void run(R& mymin, R& mymax, R* minval, R* maxval, int tid, int bid, R* sminval, R* smaxval) - { - __shared__ bool is_last; - - if (tid == 0) - { - minval[bid] = mymin; - maxval[bid] = mymax; - - __threadfence(); - - unsigned int ticket = ::atomicAdd(&blocks_finished, 1); - is_last = (ticket == gridDim.x * gridDim.y - 1); - } - - __syncthreads(); - - if (is_last) - { - int idx = ::min(tid, gridDim.x * gridDim.y - 1); - - mymin = minval[idx]; - mymax = maxval[idx]; - - const minimum minOp; - const maximum maxOp; - device::reduce(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp)); - - if (tid == 0) - { - minval[0] = mymin; - maxval[0] = mymax; - - blocks_finished = 0; - } - } - } - }; - template - struct GlobalReduce - { - static __device__ void run(int& mymin, int& mymax, int* minval, int* maxval, int tid, int bid, int* sminval, int* smaxval) { #if __CUDA_ARCH__ >= 200 if (tid == 0) { - ::atomicMin(minval, mymin); - ::atomicMax(maxval, mymax); + detail::cvAtomicMin(minval, mymin); + detail::cvAtomicMax(maxval, mymax); } #else __shared__ bool is_last; @@ -589,8 +635,8 @@ namespace minMax mymin = minval[idx]; mymax = maxval[idx]; - const minimum minOp; - const maximum maxOp; + const minimum minOp; + const maximum maxOp; device::reduce(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp)); if (tid == 0) @@ -672,12 +718,19 @@ namespace minMax *minval_buf = numeric_limits::max(); *maxval_buf = numeric_limits::min(); } + __global__ void setDefaultKernel(float* minval_buf, float* maxval_buf) + { + *minval_buf = numeric_limits::max(); + *maxval_buf = -numeric_limits::max(); + } + __global__ void setDefaultKernel(double* minval_buf, double* maxval_buf) + { + *minval_buf = numeric_limits::max(); + *maxval_buf = -numeric_limits::max(); + } template - void setDefault(R*, R*) - { - } - void setDefault(int* minval_buf, int* maxval_buf) + void setDefault(R* minval_buf, R* maxval_buf) { setDefaultKernel<<<1, 1>>>(minval_buf, maxval_buf); } @@ -728,21 +781,19 @@ namespace minMax namespace minMaxLoc { - __device__ unsigned int blocks_finished = 0; - // To avoid shared bank conflicts we convert each value into value of // appropriate type (32 bits minimum) template struct MinMaxTypeTraits; - template <> struct MinMaxTypeTraits { typedef int best_type; }; - template <> struct MinMaxTypeTraits { typedef int best_type; }; - template <> struct MinMaxTypeTraits { typedef int best_type; }; + template <> struct MinMaxTypeTraits { typedef int best_type; }; + template <> struct MinMaxTypeTraits { typedef int best_type; }; + template <> struct MinMaxTypeTraits { typedef int best_type; }; template <> struct MinMaxTypeTraits { typedef int best_type; }; template <> struct MinMaxTypeTraits { typedef int best_type; }; template <> struct MinMaxTypeTraits { typedef float best_type; }; template <> struct MinMaxTypeTraits { typedef double best_type; }; template - __global__ void kernel(const PtrStepSz src, const Mask mask, T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, const int twidth, const int theight) + __global__ void kernel_pass_1(const PtrStepSz src, const Mask mask, T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, const int twidth, const int theight) { typedef typename MinMaxTypeTraits::best_type work_type; @@ -750,7 +801,6 @@ namespace minMaxLoc __shared__ work_type smaxval[BLOCK_SIZE]; __shared__ unsigned int sminloc[BLOCK_SIZE]; __shared__ unsigned int smaxloc[BLOCK_SIZE]; - __shared__ bool is_last; const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x; const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y; @@ -799,38 +849,36 @@ namespace minMaxLoc maxval[bid] = (T) mymax; minloc[bid] = myminloc; maxloc[bid] = mymaxloc; - - __threadfence(); - - unsigned int ticket = ::atomicInc(&blocks_finished, gridDim.x * gridDim.y); - is_last = (ticket == gridDim.x * gridDim.y - 1); } + } + template + __global__ void kernel_pass_2(T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, int count) + { + typedef typename MinMaxTypeTraits::best_type work_type; - __syncthreads(); + __shared__ work_type sminval[BLOCK_SIZE]; + __shared__ work_type smaxval[BLOCK_SIZE]; + __shared__ unsigned int sminloc[BLOCK_SIZE]; + __shared__ unsigned int smaxloc[BLOCK_SIZE]; - if (is_last) + unsigned int idx = ::min(threadIdx.x, count - 1); + + work_type mymin = minval[idx]; + work_type mymax = maxval[idx]; + unsigned int myminloc = minloc[idx]; + unsigned int mymaxloc = maxloc[idx]; + + reduceKeyVal(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), + smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc), + threadIdx.x, + thrust::make_tuple(less(), greater())); + + if (threadIdx.x == 0) { - unsigned int idx = ::min(tid, gridDim.x * gridDim.y - 1); - - mymin = minval[idx]; - mymax = maxval[idx]; - myminloc = minloc[idx]; - mymaxloc = maxloc[idx]; - - reduceKeyVal(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), - smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc), - tid, - thrust::make_tuple(less(), greater())); - - if (tid == 0) - { - minval[0] = (T) mymin; - maxval[0] = (T) mymax; - minloc[0] = myminloc; - maxloc[0] = mymaxloc; - - blocks_finished = 0; - } + minval[0] = (T) mymin; + maxval[0] = (T) mymax; + minloc[0] = myminloc; + maxloc[0] = mymaxloc; } } @@ -877,12 +925,15 @@ namespace minMaxLoc unsigned int* maxloc_buf = locbuf.ptr(1); if (mask.data) - kernel<<>>((PtrStepSz) src, SingleMask(mask), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight); + kernel_pass_1<<>>((PtrStepSz) src, SingleMask(mask), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight); else - kernel<<>>((PtrStepSz) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight); + kernel_pass_1<<>>((PtrStepSz) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight); cudaSafeCall( cudaGetLastError() ); + kernel_pass_2<<<1, threads_x * threads_y>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y); + cudaSafeCall( cudaGetLastError() ); + cudaSafeCall( cudaDeviceSynchronize() ); T minval_, maxval_; @@ -898,9 +949,9 @@ namespace minMaxLoc maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols; } - template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); - template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); - template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); + template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); + template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); + template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); diff --git a/modules/highgui/src/cap_ffmpeg_impl.hpp b/modules/highgui/src/cap_ffmpeg_impl.hpp index 445a9e6208..a6952a2cba 100644 --- a/modules/highgui/src/cap_ffmpeg_impl.hpp +++ b/modules/highgui/src/cap_ffmpeg_impl.hpp @@ -49,6 +49,10 @@ #pragma warning( disable: 4244 4510 4512 4610 ) #endif +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + #ifdef __cplusplus extern "C" { #endif @@ -2054,7 +2058,7 @@ bool InputMediaStream_FFMPEG::read(unsigned char** data, int* size, int* endOfFi if (ret < 0) { - if (ret == AVERROR_EOF) + if (ret == (int)AVERROR_EOF) *endOfFile = true; return false; } diff --git a/modules/java/generator/src/java/android+AsyncServiceHelper.java b/modules/java/generator/src/java/android+AsyncServiceHelper.java index adf0e41a1b..568f3da170 100644 --- a/modules/java/generator/src/java/android+AsyncServiceHelper.java +++ b/modules/java/generator/src/java/android+AsyncServiceHelper.java @@ -3,6 +3,7 @@ package org.opencv.android; import java.io.File; import java.util.StringTokenizer; +import org.opencv.core.Core; import org.opencv.engine.OpenCVEngineInterface; import android.content.ComponentName; @@ -85,7 +86,6 @@ class AsyncServiceHelper { mServiceInstallationProgress = true; Log.d(TAG, "Package installation started"); - } else { @@ -299,6 +299,10 @@ class AsyncServiceHelper if (initOpenCVLibs(path, libs)) { Log.d(TAG, "First attempt to load libs is OK"); + String eol = System.getProperty("line.separator"); + for (String str : Core.getBuildInformation().split(eol)) + Log.i(TAG, str); + status = LoaderCallbackInterface.SUCCESS; } else diff --git a/modules/java/generator/src/java/android+StaticHelper.java b/modules/java/generator/src/java/android+StaticHelper.java index 7d7b64dde4..8d0629c8d3 100644 --- a/modules/java/generator/src/java/android+StaticHelper.java +++ b/modules/java/generator/src/java/android+StaticHelper.java @@ -1,7 +1,8 @@ package org.opencv.android; -import java.util.StringTokenizer; +import org.opencv.core.Core; +import java.util.StringTokenizer; import android.util.Log; class StaticHelper { @@ -28,6 +29,10 @@ class StaticHelper { if (initOpenCVLibs(libs)) { Log.d(TAG, "First attempt to load libs is OK"); + String eol = System.getProperty("line.separator"); + for (String str : Core.getBuildInformation().split(eol)) + Log.i(TAG, str); + result = true; } else