diff --git a/apps/annotation/CMakeLists.txt b/apps/annotation/CMakeLists.txt index cf244cbb30..e14721ac6b 100644 --- a/apps/annotation/CMakeLists.txt +++ b/apps/annotation/CMakeLists.txt @@ -1,19 +1,21 @@ -SET(deps opencv_core opencv_highgui opencv_imgproc opencv_imgcodecs opencv_videoio) -ocv_check_dependencies(${deps}) +SET(OPENCV_ANNOTATION_DEPS opencv_core opencv_highgui opencv_imgproc opencv_imgcodecs opencv_videoio) +ocv_check_dependencies(${OPENCV_ANNOTATION_DEPS}) if(NOT OCV_DEPENDENCIES_FOUND) return() endif() project(annotation) - -ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv") -ocv_include_modules(${deps}) - set(the_target opencv_annotation) -add_executable(${the_target} opencv_annotation.cpp) -target_link_libraries(${the_target} ${deps}) +ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv") +ocv_target_include_modules(${the_target} ${OPENCV_ANNOTATION_DEPS}) + +file(GLOB SRCS *.cpp) + +set(annotation_files ${SRCS}) +ocv_add_executable(${the_target} ${annotation_files}) +ocv_target_link_libraries(${the_target} ${OPENCV_ANNOTATION_DEPS}) set_target_properties(${the_target} PROPERTIES DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}" diff --git a/doc/py_tutorials/py_bindings/py_bindings_basics/py_bindings_basics.markdown b/doc/py_tutorials/py_bindings/py_bindings_basics/py_bindings_basics.markdown index ed2d3510f3..e256bcea03 100644 --- a/doc/py_tutorials/py_bindings/py_bindings_basics/py_bindings_basics.markdown +++ b/doc/py_tutorials/py_bindings/py_bindings_basics/py_bindings_basics.markdown @@ -49,7 +49,7 @@ pyopencv_generated_\*.h files). But there may be some basic OpenCV datatypes lik Size. They need to be extended manually. For example, a Mat type should be extended to Numpy array, Size should be extended to a tuple of two integers etc. Similarly, there may be some complex structs/classes/functions etc. which need to be extended manually. All such manual wrapper functions -are placed in modules/python/src2/pycv2.hpp. +are placed in modules/python/src2/cv2.cpp. So now only thing left is the compilation of these wrapper files which gives us **cv2** module. So when you call a function, say res = equalizeHist(img1,img2) in Python, you pass two numpy arrays and diff --git a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown index 0e983d925d..b310c65b9f 100644 --- a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown +++ b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown @@ -54,25 +54,19 @@ int main( int argc, char** argv ) if( !img_1.data || !img_2.data ) { std::cout<< " --(!) Error reading images " << std::endl; return -1; } - //-- Step 1: Detect the keypoints using SURF Detector + //-- Step 1: Detect the keypoints using SURF Detector, compute the descriptors int minHessian = 400; - SurfFeatureDetector detector( minHessian ); + Ptr detector = SURF::create(); + detector->setMinHessian(minHessian); std::vector keypoints_1, keypoints_2; - - detector.detect( img_1, keypoints_1 ); - detector.detect( img_2, keypoints_2 ); - - //-- Step 2: Calculate descriptors (feature vectors) - SurfDescriptorExtractor extractor; - Mat descriptors_1, descriptors_2; - extractor.compute( img_1, keypoints_1, descriptors_1 ); - extractor.compute( img_2, keypoints_2, descriptors_2 ); + detector->detectAndCompute( img_1, keypoints_1, descriptors_1 ); + detector->detectAndCompute( img_2, keypoints_2, descriptors_2 ); - //-- Step 3: Matching descriptor vectors using FLANN matcher + //-- Step 2: Matching descriptor vectors using FLANN matcher FlannBasedMatcher matcher; std::vector< DMatch > matches; matcher.match( descriptors_1, descriptors_2, matches ); diff --git a/doc/tutorials/features2d/feature_homography/feature_homography.markdown b/doc/tutorials/features2d/feature_homography/feature_homography.markdown index dae120b898..98c3374ea2 100644 --- a/doc/tutorials/features2d/feature_homography/feature_homography.markdown +++ b/doc/tutorials/features2d/feature_homography/feature_homography.markdown @@ -42,25 +42,18 @@ int main( int argc, char** argv ) if( !img_object.data || !img_scene.data ) { std::cout<< " --(!) Error reading images " << std::endl; return -1; } - //-- Step 1: Detect the keypoints using SURF Detector + //-- Step 1: Detect the keypoints and extract descriptors using SURF int minHessian = 400; - SurfFeatureDetector detector( minHessian ); + Ptr detector = SURF::create( minHessian ); std::vector keypoints_object, keypoints_scene; - - detector.detect( img_object, keypoints_object ); - detector.detect( img_scene, keypoints_scene ); - - //-- Step 2: Calculate descriptors (feature vectors) - SurfDescriptorExtractor extractor; - Mat descriptors_object, descriptors_scene; - extractor.compute( img_object, keypoints_object, descriptors_object ); - extractor.compute( img_scene, keypoints_scene, descriptors_scene ); + detector->detectAndCompute( img_object, keypoints_object, descriptors_object ); + detector->detectAndCompute( img_scene, keypoints_scene, descriptors_scene ); - //-- Step 3: Matching descriptor vectors using FLANN matcher + //-- Step 2: Matching descriptor vectors using FLANN matcher FlannBasedMatcher matcher; std::vector< DMatch > matches; matcher.match( descriptors_object, descriptors_scene, matches ); diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index f87e15ee6a..173722f61f 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -184,6 +184,7 @@ public: // After fix restore code in arithm.cpp: ocl_compare() inline bool isAMD() const { return vendorID() == VENDOR_AMD; } inline bool isIntel() const { return vendorID() == VENDOR_INTEL; } + inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; } int maxClockFrequency() const; int maxComputeUnits() const; diff --git a/modules/core/src/kmeans.cpp b/modules/core/src/kmeans.cpp index efc22dc0e4..cc86d2972d 100644 --- a/modules/core/src/kmeans.cpp +++ b/modules/core/src/kmeans.cpp @@ -180,10 +180,9 @@ public: const int K = centers.rows; const int dims = centers.cols; - const float *sample; for( int i = begin; i(i); + const float *sample = data.ptr(i); int k_best = 0; double min_dist = DBL_MAX; diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index e1e9caa837..c1d2c34852 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -205,9 +205,12 @@ public: void deallocate(UMatData* u) const { + if(!u) + return; + CV_Assert(u->urefcount >= 0); CV_Assert(u->refcount >= 0); - if(u && u->refcount == 0) + if(u->refcount == 0) { if( !(u->flags & UMatData::USER_ALLOCATED) ) { diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 348a68fd3b..a5f3e06366 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -2114,6 +2114,12 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* int ddepth = -1, bool absValues = false, InputArray _src2 = noArray(), double * maxVal2 = NULL) { const ocl::Device & dev = ocl::Device::getDefault(); + +#ifdef ANDROID + if (dev.isNVidia()) + return false; +#endif + bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(), haveSrc2 = _src2.kind() != _InputArray::NONE; int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), @@ -2885,6 +2891,12 @@ static NormDiffFunc getNormDiffFunc(int normType, int depth) static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result ) { const ocl::Device & d = ocl::Device::getDefault(); + +#ifdef ANDROID + if (d.isNVidia()) + return false; +#endif + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); bool doubleSupport = d.doubleFPConfig() > 0, haveMask = _mask.kind() != _InputArray::NONE; @@ -3250,6 +3262,11 @@ namespace cv { static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result ) { +#ifdef ANDROID + if (ocl::Device::getDefault().isNVidia()) + return false; +#endif + Scalar sc1, sc2; int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); bool relative = (normType & NORM_RELATIVE) != 0; diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 91966b2d70..cefae8cb88 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -549,13 +549,9 @@ String tempfile( const char* suffix ) #if defined WIN32 || defined _WIN32 #ifdef WINRT RoInitialize(RO_INIT_MULTITHREADED); - std::wstring temp_dir = L""; - const wchar_t* opencv_temp_dir = GetTempPathWinRT().c_str(); - if (opencv_temp_dir) - temp_dir = std::wstring(opencv_temp_dir); + std::wstring temp_dir = GetTempPathWinRT(); - std::wstring temp_file; - temp_file = GetTempFileNameWinRT(L"ocv"); + std::wstring temp_file = GetTempFileNameWinRT(L"ocv"); if (temp_file.empty()) return String(); diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 0541819f89..f1efe9b22d 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -331,7 +331,11 @@ OCL_TEST_P(Mul, Mat_Scale) OCL_OFF(cv::multiply(src1_roi, src2_roi, dst1_roi, val[0])); OCL_ON(cv::multiply(usrc1_roi, usrc2_roi, udst1_roi, val[0])); +#ifdef ANDROID + Near(udst1_roi.depth() >= CV_32F ? 2e-1 : 1); +#else Near(udst1_roi.depth() >= CV_32F ? 1e-3 : 1); +#endif } } diff --git a/modules/features2d/perf/opencl/perf_orb.cpp b/modules/features2d/perf/opencl/perf_orb.cpp index e9aadf50f3..a7d96f12a5 100644 --- a/modules/features2d/perf/opencl/perf_orb.cpp +++ b/modules/features2d/perf/opencl/perf_orb.cpp @@ -61,6 +61,12 @@ OCL_PERF_TEST_P(ORBFixture, ORB_Full, ORB_IMAGES) string filename = getDataPath(GetParam()); Mat mframe = imread(filename, IMREAD_GRAYSCALE); + double desc_eps = 1e-6; +#ifdef ANDROID + if (cv::ocl::Device::getDefault().isNVidia()) + desc_eps = 2; +#endif + if (mframe.empty()) FAIL() << "Unable to load source image " << filename; @@ -77,7 +83,7 @@ OCL_PERF_TEST_P(ORBFixture, ORB_Full, ORB_IMAGES) ::perf::sort(points, descriptors); SANITY_CHECK_KEYPOINTS(points, 1e-5); - SANITY_CHECK(descriptors); + SANITY_CHECK(descriptors, desc_eps); } } // ocl diff --git a/modules/features2d/src/akaze.cpp b/modules/features2d/src/akaze.cpp index 4037d1c5c3..1b6e30e459 100644 --- a/modules/features2d/src/akaze.cpp +++ b/modules/features2d/src/akaze.cpp @@ -172,7 +172,14 @@ namespace cv cvtColor(image, img, COLOR_BGR2GRAY); Mat img1_32; - img.convertTo(img1_32, CV_32F, 1.0 / 255.0, 0); + if ( img.depth() == CV_32F ) + img1_32 = img; + else if ( img.depth() == CV_8U ) + img.convertTo(img1_32, CV_32F, 1.0 / 255.0, 0); + else if ( img.depth() == CV_16U ) + img.convertTo(img1_32, CV_32F, 1.0 / 65535.0, 0); + + CV_Assert( ! img1_32.empty() ); AKAZEOptions options; options.descriptor = descriptor; diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 83cafc9653..3db822db3d 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -1221,7 +1221,22 @@ CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth, bool normalize = true, int borderType = BORDER_DEFAULT ); -/** @todo document +/** @brief Calculates the normalized sum of squares of the pixel values overlapping the filter. + +For every pixel \f$ (x, y) \f$ in the source image, the function calculates the sum of squares of those neighboring +pixel values which overlap the filter placed over the pixel \f$ (x, y) \f$. + +The unnormalized square box filter can be useful in computing local image statistics such as the the local +variance and standard deviation around the neighborhood of a pixel. + +@param _src input image +@param _dst output image of the same size and type as _src +@param ddepth the output image depth (-1 to use src.depth()) +@param ksize kernel size +@param anchor kernel anchor point. The default value of Point(-1, -1) denotes that the anchor is at the kernel +center. +@param normalize flag, specifying whether the kernel is to be normalized by it's area or not. +@param borderType border mode used to extrapolate pixels outside of the image, see cv::BorderTypes @sa boxFilter */ CV_EXPORTS_W void sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth, diff --git a/modules/imgproc/src/canny.cpp b/modules/imgproc/src/canny.cpp index 8893be8839..d51711e0a3 100644 --- a/modules/imgproc/src/canny.cpp +++ b/modules/imgproc/src/canny.cpp @@ -230,7 +230,352 @@ static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float #endif -} +#ifdef HAVE_TBB + +// Queue with peaks that will processed serially. +static tbb::concurrent_queue borderPeaks; + +class tbbCanny +{ +public: + tbbCanny(const Range _boundaries, const Mat& _src, uchar* _map, int _low, + int _high, int _aperture_size, bool _L2gradient) + : boundaries(_boundaries), src(_src), map(_map), low(_low), high(_high), + aperture_size(_aperture_size), L2gradient(_L2gradient) + {} + + // This parallel version of Canny algorithm splits the src image in threadsNumber horizontal slices. + // The first row of each slice contains the last row of the previous slice and + // the last row of each slice contains the first row of the next slice + // so that each slice is independent and no mutexes are required. + void operator()() const + { +#if CV_SSE2 + bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2); +#endif + + const int type = src.type(), cn = CV_MAT_CN(type); + + Mat dx, dy; + + ptrdiff_t mapstep = src.cols + 2; + + // In sobel transform we calculate ksize2 extra lines for the first and last rows of each slice + // because IPPDerivSobel expects only isolated ROIs, in contrast with the opencv version which + // uses the pixels outside of the ROI to form a border. + uchar ksize2 = aperture_size / 2; + + if (boundaries.start == 0 && boundaries.end == src.rows) + { + Mat tempdx(boundaries.end - boundaries.start + 2, src.cols, CV_16SC(cn)); + Mat tempdy(boundaries.end - boundaries.start + 2, src.cols, CV_16SC(cn)); + + memset(tempdx.ptr(0), 0, cn * src.cols*sizeof(short)); + memset(tempdy.ptr(0), 0, cn * src.cols*sizeof(short)); + memset(tempdx.ptr(tempdx.rows - 1), 0, cn * src.cols*sizeof(short)); + memset(tempdy.ptr(tempdy.rows - 1), 0, cn * src.cols*sizeof(short)); + + Sobel(src, tempdx.rowRange(1, tempdx.rows - 1), CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); + Sobel(src, tempdy.rowRange(1, tempdy.rows - 1), CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE); + + dx = tempdx; + dy = tempdy; + } + else if (boundaries.start == 0) + { + Mat tempdx(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn)); + Mat tempdy(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn)); + + memset(tempdx.ptr(0), 0, cn * src.cols*sizeof(short)); + memset(tempdy.ptr(0), 0, cn * src.cols*sizeof(short)); + + Sobel(src.rowRange(boundaries.start, boundaries.end + 1 + ksize2), tempdx.rowRange(1, tempdx.rows), + CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); + Sobel(src.rowRange(boundaries.start, boundaries.end + 1 + ksize2), tempdy.rowRange(1, tempdy.rows), + CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE); + + dx = tempdx.rowRange(0, tempdx.rows - ksize2); + dy = tempdy.rowRange(0, tempdy.rows - ksize2); + } + else if (boundaries.end == src.rows) + { + Mat tempdx(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn)); + Mat tempdy(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn)); + + memset(tempdx.ptr(tempdx.rows - 1), 0, cn * src.cols*sizeof(short)); + memset(tempdy.ptr(tempdy.rows - 1), 0, cn * src.cols*sizeof(short)); + + Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end), tempdx.rowRange(0, tempdx.rows - 1), + CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); + Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end), tempdy.rowRange(0, tempdy.rows - 1), + CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE); + + dx = tempdx.rowRange(ksize2, tempdx.rows); + dy = tempdy.rowRange(ksize2, tempdy.rows); + } + else + { + Mat tempdx(boundaries.end - boundaries.start + 2 + 2*ksize2, src.cols, CV_16SC(cn)); + Mat tempdy(boundaries.end - boundaries.start + 2 + 2*ksize2, src.cols, CV_16SC(cn)); + + Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end + 1 + ksize2), tempdx, + CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE); + Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end + 1 + ksize2), tempdy, + CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE); + + dx = tempdx.rowRange(ksize2, tempdx.rows - ksize2); + dy = tempdy.rowRange(ksize2, tempdy.rows - ksize2); + } + + int maxsize = std::max(1 << 10, src.cols * (boundaries.end - boundaries.start) / 10); + std::vector stack(maxsize); + uchar **stack_top = &stack[0]; + uchar **stack_bottom = &stack[0]; + + AutoBuffer buffer(cn * mapstep * 3 * sizeof(int)); + + int* mag_buf[3]; + mag_buf[0] = (int*)(uchar*)buffer; + mag_buf[1] = mag_buf[0] + mapstep*cn; + mag_buf[2] = mag_buf[1] + mapstep*cn; + + // calculate magnitude and angle of gradient, perform non-maxima suppression. + // fill the map with one of the following values: + // 0 - the pixel might belong to an edge + // 1 - the pixel can not belong to an edge + // 2 - the pixel does belong to an edge + for (int i = boundaries.start - 1; i <= boundaries.end; i++) + { + int* _norm = mag_buf[(i > boundaries.start) - (i == boundaries.start - 1) + 1] + 1; + + short* _dx = dx.ptr(i - boundaries.start + 1); + short* _dy = dy.ptr(i - boundaries.start + 1); + + if (!L2gradient) + { + int j = 0, width = src.cols * cn; +#if CV_SSE2 + if (haveSSE2) + { + __m128i v_zero = _mm_setzero_si128(); + for ( ; j <= width - 8; j += 8) + { + __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j)); + __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j)); + v_dx = _mm_max_epi16(v_dx, _mm_sub_epi16(v_zero, v_dx)); + v_dy = _mm_max_epi16(v_dy, _mm_sub_epi16(v_zero, v_dy)); + + __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx, v_zero), _mm_unpacklo_epi16(v_dy, v_zero)); + _mm_storeu_si128((__m128i *)(_norm + j), v_norm); + + v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx, v_zero), _mm_unpackhi_epi16(v_dy, v_zero)); + _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm); + } + } +#elif CV_NEON + for ( ; j <= width - 8; j += 8) + { + int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j); + vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))), + vabsq_s32(vmovl_s16(vget_low_s16(v_dy))))); + vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))), + vabsq_s32(vmovl_s16(vget_high_s16(v_dy))))); + } +#endif + for ( ; j < width; ++j) + _norm[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j])); + } + else + { + int j = 0, width = src.cols * cn; +#if CV_SSE2 + if (haveSSE2) + { + for ( ; j <= width - 8; j += 8) + { + __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j)); + __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j)); + + __m128i v_dx_ml = _mm_mullo_epi16(v_dx, v_dx), v_dx_mh = _mm_mulhi_epi16(v_dx, v_dx); + __m128i v_dy_ml = _mm_mullo_epi16(v_dy, v_dy), v_dy_mh = _mm_mulhi_epi16(v_dy, v_dy); + + __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx_ml, v_dx_mh), _mm_unpacklo_epi16(v_dy_ml, v_dy_mh)); + _mm_storeu_si128((__m128i *)(_norm + j), v_norm); + + v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx_ml, v_dx_mh), _mm_unpackhi_epi16(v_dy_ml, v_dy_mh)); + _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm); + } + } +#elif CV_NEON + for ( ; j <= width - 8; j += 8) + { + int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j); + int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy); + int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp); + vst1q_s32(_norm + j, v_dst); + + v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy); + v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp); + vst1q_s32(_norm + j + 4, v_dst); + } +#endif + for ( ; j < width; ++j) + _norm[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j]; + } + + if (cn > 1) + { + for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn) + { + int maxIdx = jn; + for(int k = 1; k < cn; ++k) + if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k; + _norm[j] = _norm[maxIdx]; + _dx[j] = _dx[maxIdx]; + _dy[j] = _dy[maxIdx]; + } + } + _norm[-1] = _norm[src.cols] = 0; + + // at the very beginning we do not have a complete ring + // buffer of 3 magnitude rows for non-maxima suppression + if (i <= boundaries.start) + continue; + + uchar* _map = map + mapstep*i + 1; + _map[-1] = _map[src.cols] = 1; + + int* _mag = mag_buf[1] + 1; // take the central row + ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1]; + ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1]; + + const short* _x = dx.ptr(i - boundaries.start); + const short* _y = dy.ptr(i - boundaries.start); + + if ((stack_top - stack_bottom) + src.cols > maxsize) + { + int sz = (int)(stack_top - stack_bottom); + maxsize = std::max(maxsize * 3/2, sz + src.cols); + stack.resize(maxsize); + stack_bottom = &stack[0]; + stack_top = stack_bottom + sz; + } + +#define CANNY_PUSH(d) *(d) = uchar(2), *stack_top++ = (d) +#define CANNY_POP(d) (d) = *--stack_top + + int prev_flag = 0; + bool canny_push = false; + for (int j = 0; j < src.cols; j++) + { + #define CANNY_SHIFT 15 + const int TG22 = (int)(0.4142135623730950488016887242097*(1< low) + { + int xs = _x[j]; + int ys = _y[j]; + int x = std::abs(xs); + int y = std::abs(ys) << CANNY_SHIFT; + + int tg22x = x * TG22; + + if (y < tg22x) + { + if (m > _mag[j-1] && m >= _mag[j+1]) canny_push = true; + } + else + { + int tg67x = tg22x + (x << (CANNY_SHIFT+1)); + if (y > tg67x) + { + if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) canny_push = true; + } + else + { + int s = (xs ^ ys) < 0 ? -1 : 1; + if (m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) canny_push = true; + } + } + } + if (!canny_push) + { + prev_flag = 0; + _map[j] = uchar(1); + continue; + } + else + { + // _map[j-mapstep] is short-circuited at the start because previous thread is + // responsible for initializing it. + if (!prev_flag && m > high && (i <= boundaries.start+1 || _map[j-mapstep] != 2) ) + { + CANNY_PUSH(_map + j); + prev_flag = 1; + } + else + _map[j] = 0; + + canny_push = false; + } + } + + // scroll the ring buffer + _mag = mag_buf[0]; + mag_buf[0] = mag_buf[1]; + mag_buf[1] = mag_buf[2]; + mag_buf[2] = _mag; + } + + // now track the edges (hysteresis thresholding) + while (stack_top > stack_bottom) + { + if ((stack_top - stack_bottom) + 8 > maxsize) + { + int sz = (int)(stack_top - stack_bottom); + maxsize = maxsize * 3/2; + stack.resize(maxsize); + stack_bottom = &stack[0]; + stack_top = stack_bottom + sz; + } + + uchar* m; + CANNY_POP(m); + + // Stops thresholding from expanding to other slices by sending pixels in the borders of each + // slice in a queue to be serially processed later. + if ( (m < map + (boundaries.start + 2) * mapstep) || (m >= map + boundaries.end * mapstep) ) + { + borderPeaks.push(m); + continue; + } + + if (!m[-1]) CANNY_PUSH(m - 1); + if (!m[1]) CANNY_PUSH(m + 1); + if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1); + if (!m[-mapstep]) CANNY_PUSH(m - mapstep); + if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1); + if (!m[mapstep-1]) CANNY_PUSH(m + mapstep - 1); + if (!m[mapstep]) CANNY_PUSH(m + mapstep); + if (!m[mapstep+1]) CANNY_PUSH(m + mapstep + 1); + } + } + +private: + const Range boundaries; + const Mat& src; + uchar* map; + int low; + int high; + int aperture_size; + bool L2gradient; +}; + +#endif + +} // namespace cv void cv::Canny( InputArray _src, OutputArray _dst, double low_thresh, double high_thresh, @@ -280,6 +625,69 @@ void cv::Canny( InputArray _src, OutputArray _dst, } #endif +#ifdef HAVE_TBB + +if (L2gradient) +{ + low_thresh = std::min(32767.0, low_thresh); + high_thresh = std::min(32767.0, high_thresh); + + if (low_thresh > 0) low_thresh *= low_thresh; + if (high_thresh > 0) high_thresh *= high_thresh; +} +int low = cvFloor(low_thresh); +int high = cvFloor(high_thresh); + +ptrdiff_t mapstep = src.cols + 2; +AutoBuffer buffer((src.cols+2)*(src.rows+2)); + +uchar* map = (uchar*)buffer; +memset(map, 1, mapstep); +memset(map + mapstep*(src.rows + 1), 1, mapstep); + +int threadsNumber = tbb::task_scheduler_init::default_num_threads(); +int grainSize = src.rows / threadsNumber; + +// Make a fallback for pictures with too few rows. +uchar ksize2 = aperture_size / 2; +int minGrainSize = 1 + ksize2; +int maxGrainSize = src.rows - 2 - 2*ksize2; +if ( !( minGrainSize <= grainSize && grainSize <= maxGrainSize ) ) +{ + threadsNumber = 1; + grainSize = src.rows; +} + +tbb::task_group g; + +for (int i = 0; i < threadsNumber; ++i) +{ + if (i < threadsNumber - 1) + g.run(tbbCanny(Range(i * grainSize, (i + 1) * grainSize), src, map, low, high, aperture_size, L2gradient)); + else + g.run(tbbCanny(Range(i * grainSize, src.rows), src, map, low, high, aperture_size, L2gradient)); +} + +g.wait(); + +#define CANNY_PUSH_SERIAL(d) *(d) = uchar(2), borderPeaks.push(d) + +// now track the edges (hysteresis thresholding) +uchar* m; +while (borderPeaks.try_pop(m)) +{ + if (!m[-1]) CANNY_PUSH_SERIAL(m - 1); + if (!m[1]) CANNY_PUSH_SERIAL(m + 1); + if (!m[-mapstep-1]) CANNY_PUSH_SERIAL(m - mapstep - 1); + if (!m[-mapstep]) CANNY_PUSH_SERIAL(m - mapstep); + if (!m[-mapstep+1]) CANNY_PUSH_SERIAL(m - mapstep + 1); + if (!m[mapstep-1]) CANNY_PUSH_SERIAL(m + mapstep - 1); + if (!m[mapstep]) CANNY_PUSH_SERIAL(m + mapstep); + if (!m[mapstep+1]) CANNY_PUSH_SERIAL(m + mapstep + 1); +} + +#else + Mat dx(src.rows, src.cols, CV_16SC(cn)); Mat dy(src.rows, src.cols, CV_16SC(cn)); @@ -540,6 +948,8 @@ __ocv_canny_push: if (!m[mapstep+1]) CANNY_PUSH(m + mapstep + 1); } +#endif + // the final pass, form the final image const uchar* pmap = map + mapstep + 1; uchar* pdst = dst.ptr(); diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index ab07967ba5..44eb3adfc0 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1548,7 +1548,7 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel, return true; } -#if defined ANDROID +#ifdef ANDROID size_t localThreads[2] = { 16, 8 }; #else size_t localThreads[2] = { 16, 16 }; @@ -1563,6 +1563,11 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel, if (localThreads[0]*localThreads[1] * 2 < (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1)) return false; +#ifdef ANDROID + if (dev.isNVidia()) + return false; +#endif + // build processing String processing; Mat kernel8u; diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index 53cd8269ce..90840cdaa5 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -2966,6 +2966,11 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d, double sigma_color, double sigma_space, int borderType) { +#ifdef ANDROID + if (ocl::Device::getDefault().isNVidia()) + return false; +#endif + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int i, j, maxk, radius; diff --git a/modules/imgproc/test/ocl/test_canny.cpp b/modules/imgproc/test/ocl/test_canny.cpp index fadf777985..70e4bb1fcf 100644 --- a/modules/imgproc/test/ocl/test_canny.cpp +++ b/modules/imgproc/test/ocl/test_canny.cpp @@ -99,12 +99,17 @@ OCL_TEST_P(Canny, Accuracy) generateTestData(); const double low_thresh = 50.0, high_thresh = 100.0; + double eps = 1e-2; +#ifdef ANDROID + if (cv::ocl::Device::getDefault().isNVidia()) + eps = 12e-3; +#endif OCL_OFF(cv::Canny(src_roi, dst_roi, low_thresh, high_thresh, apperture_size, useL2gradient)); OCL_ON(cv::Canny(usrc_roi, udst_roi, low_thresh, high_thresh, apperture_size, useL2gradient)); - EXPECT_MAT_SIMILAR(dst_roi, udst_roi, 1e-2); - EXPECT_MAT_SIMILAR(dst, udst, 1e-2); + EXPECT_MAT_SIMILAR(dst_roi, udst_roi, eps); + EXPECT_MAT_SIMILAR(dst, udst, eps); } OCL_INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine( diff --git a/modules/imgproc/test/ocl/test_color.cpp b/modules/imgproc/test/ocl/test_color.cpp index c53607fdbd..6e117cdd5f 100644 --- a/modules/imgproc/test/ocl/test_color.cpp +++ b/modules/imgproc/test/ocl/test_color.cpp @@ -128,7 +128,7 @@ OCL_TEST_P(CvtColor, BGR2GRAY) { performTest(3, 1, CVTCODE(BGR2GRAY)); } OCL_TEST_P(CvtColor, GRAY2BGR) { performTest(1, 3, CVTCODE(GRAY2BGR)); } OCL_TEST_P(CvtColor, RGBA2GRAY) { performTest(4, 1, CVTCODE(RGBA2GRAY)); } OCL_TEST_P(CvtColor, GRAY2RGBA) { performTest(1, 4, CVTCODE(GRAY2RGBA)); } -OCL_TEST_P(CvtColor, BGRA2GRAY) { performTest(4, 1, CVTCODE(BGRA2GRAY)); } +OCL_TEST_P(CvtColor, BGRA2GRAY) { performTest(4, 1, CVTCODE(BGRA2GRAY), cv::ocl::Device::getDefault().isNVidia() ? 1 : 1e-3); } OCL_TEST_P(CvtColor, GRAY2BGRA) { performTest(1, 4, CVTCODE(GRAY2BGRA)); } // RGB <-> YUV diff --git a/modules/imgproc/test/ocl/test_warp.cpp b/modules/imgproc/test/ocl/test_warp.cpp index ccbdf25f53..da70f732df 100644 --- a/modules/imgproc/test/ocl/test_warp.cpp +++ b/modules/imgproc/test/ocl/test_warp.cpp @@ -319,10 +319,17 @@ OCL_TEST_P(Remap_INTER_LINEAR, Mat) { random_roi(); + double eps = 2.0; +#ifdef ANDROID + // TODO investigate accuracy + if (cv::ocl::Device::getDefault().isNVidia()) + eps = 8.0; +#endif + OCL_OFF(cv::remap(src_roi, dst_roi, map1_roi, map2_roi, INTER_LINEAR, borderType, val)); OCL_ON(cv::remap(usrc_roi, udst_roi, umap1_roi, umap2_roi, INTER_LINEAR, borderType, val)); - Near(2.0); + Near(eps); } } diff --git a/modules/video/src/bgfg_KNN.cpp b/modules/video/src/bgfg_KNN.cpp index c551ce4c9e..cddd62c154 100755 --- a/modules/video/src/bgfg_KNN.cpp +++ b/modules/video/src/bgfg_KNN.cpp @@ -458,10 +458,8 @@ CV_INLINE void uchar nShadowDetection ) { - int size=_src.rows*_src.cols; int nchannels = CV_MAT_CN(_src.type()); - const uchar* pDataCurrent=_src.ptr(0); - uchar* pDataOutput=_dst.ptr(0); + //model uchar* m_aModel=_bgmodel.ptr(0); uchar* m_nNextLongUpdate=_nNextLongUpdate.ptr(0); @@ -509,48 +507,51 @@ CV_INLINE void if (_nLongCounter >= m_nLongUpdate) _nLongCounter = 0; //go through the image - for (long i=0;i