diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index 6389b19894..de34aff4dd 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -346,7 +346,7 @@ elseif(MIPS) ocv_update(CPU_MSA_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_msa.cpp") ocv_update(CPU_KNOWN_OPTIMIZATIONS "MSA") ocv_update(CPU_MSA_FLAGS_ON "-mmsa") - set(CPU_BASELINE "MSA" CACHE STRING "${HELP_CPU_BASELINE}") + set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}") elseif(PPC64LE) ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX;VSX3") ocv_update(CPU_VSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx.cpp") diff --git a/cmake/OpenCVFindMKL.cmake b/cmake/OpenCVFindMKL.cmake index 5eee3f5daa..19a76ddf57 100644 --- a/cmake/OpenCVFindMKL.cmake +++ b/cmake/OpenCVFindMKL.cmake @@ -133,7 +133,7 @@ message(STATUS "Found MKL ${MKL_VERSION_STR} at: ${MKL_ROOT_DIR}") set(HAVE_MKL ON) set(MKL_ROOT_DIR "${MKL_ROOT_DIR}" CACHE PATH "Path to MKL directory") set(MKL_INCLUDE_DIRS "${MKL_INCLUDE_DIRS}" CACHE PATH "Path to MKL include directory") -set(MKL_LIBRARIES "${MKL_LIBRARIES}" CACHE STRING "MKL libarries") +set(MKL_LIBRARIES "${MKL_LIBRARIES}" CACHE STRING "MKL libraries") if(UNIX AND NOT MKL_LIBRARIES_DONT_HACK) #it's ugly but helps to avoid cyclic lib problem set(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_LIBRARIES} ${MKL_LIBRARIES} "-lpthread" "-lm" "-ldl") diff --git a/cmake/platforms/OpenCV-WindowsPhone.cmake b/cmake/platforms/OpenCV-WindowsPhone.cmake index 8a496d3a7b..c32c256b75 100644 --- a/cmake/platforms/OpenCV-WindowsPhone.cmake +++ b/cmake/platforms/OpenCV-WindowsPhone.cmake @@ -1,4 +1,4 @@ -include("${CMAKE_CURRENT_LIST_DIR}/OpenCV_WinRT.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/OpenCV-WinRT.cmake") # Adding additional using directory for WindowsPhone 8.0 to get Windows.winmd properly if(WINRT_8_0) diff --git a/cmake/platforms/OpenCV-WindowsStore.cmake b/cmake/platforms/OpenCV-WindowsStore.cmake index 8b5dfa5556..efc8b4f86d 100644 --- a/cmake/platforms/OpenCV-WindowsStore.cmake +++ b/cmake/platforms/OpenCV-WindowsStore.cmake @@ -1 +1 @@ -include("${CMAKE_CURRENT_LIST_DIR}/OpenCV_WinRT.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/OpenCV-WinRT.cmake") diff --git a/doc/js_tutorials/js_core/js_image_arithmetics/js_image_arithmetics.markdown b/doc/js_tutorials/js_core/js_image_arithmetics/js_image_arithmetics.markdown index 73e483943d..30ed918576 100644 --- a/doc/js_tutorials/js_core/js_image_arithmetics/js_image_arithmetics.markdown +++ b/doc/js_tutorials/js_core/js_image_arithmetics/js_image_arithmetics.markdown @@ -27,7 +27,7 @@ src1.delete(); src2.delete(); dst.delete(); mask.delete(); Image Subtraction -------------- -You can subtract two images by OpenCV function, cv.subtract(). res = img1 - img2. Both images should be of same depth and type. +You can subtract two images by OpenCV function, cv.subtract(). res = img1 - img2. Both images should be of same depth and type. Note that when used with RGBA images, the alpha channel is also subtracted. For example, consider below sample: @code{.js} @@ -59,4 +59,4 @@ Try it -\endhtmlonly \ No newline at end of file +\endhtmlonly diff --git a/doc/py_tutorials/py_gui/py_table_of_contents_gui.markdown b/doc/py_tutorials/py_gui/py_table_of_contents_gui.markdown index a7ad0a1a43..471d464b55 100644 --- a/doc/py_tutorials/py_gui/py_table_of_contents_gui.markdown +++ b/doc/py_tutorials/py_gui/py_table_of_contents_gui.markdown @@ -4,21 +4,21 @@ Gui Features in OpenCV {#tutorial_py_table_of_contents_gui} - @subpage tutorial_py_image_display Learn to load an - image, display it and save it back + image, display it, and save it back - @subpage tutorial_py_video_display Learn to play videos, - capture videos from Camera and write it as a video + capture videos from a camera, and write videos - @subpage tutorial_py_drawing_functions Learn to draw lines, - rectangles, ellipses, circles etc with OpenCV + rectangles, ellipses, circles, etc with OpenCV - @subpage tutorial_py_mouse_handling - Draw stuffs with your + Draw stuff with your mouse - @subpage tutorial_py_trackbar diff --git a/doc/py_tutorials/py_gui/py_video_display/py_video_display.markdown b/doc/py_tutorials/py_gui/py_video_display/py_video_display.markdown index 9cea2359c7..d60b846245 100644 --- a/doc/py_tutorials/py_gui/py_video_display/py_video_display.markdown +++ b/doc/py_tutorials/py_gui/py_video_display/py_video_display.markdown @@ -4,19 +4,19 @@ Getting Started with Videos {#tutorial_py_video_display} Goal ---- -- Learn to read video, display video and save video. -- Learn to capture from Camera and display it. +- Learn to read video, display video, and save video. +- Learn to capture video from a camera and display it. - You will learn these functions : **cv.VideoCapture()**, **cv.VideoWriter()** Capture Video from Camera ------------------------- -Often, we have to capture live stream with camera. OpenCV provides a very simple interface to this. -Let's capture a video from the camera (I am using the in-built webcam of my laptop), convert it into +Often, we have to capture live stream with a camera. OpenCV provides a very simple interface to do this. +Let's capture a video from the camera (I am using the built-in webcam on my laptop), convert it into grayscale video and display it. Just a simple task to get started. To capture a video, you need to create a **VideoCapture** object. Its argument can be either the -device index or the name of a video file. Device index is just the number to specify which camera. +device index or the name of a video file. A device index is just the number to specify which camera. Normally one camera will be connected (as in my case). So I simply pass 0 (or -1). You can select the second camera by passing 1 and so on. After that, you can capture frame-by-frame. But at the end, don't forget to release the capture. @@ -46,16 +46,16 @@ while True: # When everything done, release the capture cap.release() cv.destroyAllWindows()@endcode -`cap.read()` returns a bool (`True`/`False`). If frame is read correctly, it will be `True`. So you can -check end of the video by checking this return value. +`cap.read()` returns a bool (`True`/`False`). If the frame is read correctly, it will be `True`. So you can +check for the end of the video by checking this returned value. -Sometimes, cap may not have initialized the capture. In that case, this code shows error. You can +Sometimes, cap may not have initialized the capture. In that case, this code shows an error. You can check whether it is initialized or not by the method **cap.isOpened()**. If it is `True`, OK. Otherwise open it using **cap.open()**. You can also access some of the features of this video using **cap.get(propId)** method where propId is a number from 0 to 18. Each number denotes a property of the video (if it is applicable to that -video) and full details can be seen here: cv::VideoCapture::get(). +video). Full details can be seen here: cv::VideoCapture::get(). Some of these values can be modified using **cap.set(propId, value)**. Value is the new value you want. @@ -63,13 +63,13 @@ For example, I can check the frame width and height by `cap.get(cv.CAP_PROP_FRAM 640x480 by default. But I want to modify it to 320x240. Just use `ret = cap.set(cv.CAP_PROP_FRAME_WIDTH,320)` and `ret = cap.set(cv.CAP_PROP_FRAME_HEIGHT,240)`. -@note If you are getting error, make sure camera is working fine using any other camera application +@note If you are getting an error, make sure your camera is working fine using any other camera application (like Cheese in Linux). Playing Video from file ----------------------- -It is same as capturing from Camera, just change camera index with video file name. Also while +Playing video from file is the same as capturing it from camera, just change the camera index to a video file name. Also while displaying the frame, use appropriate time for `cv.waitKey()`. If it is too less, video will be very fast and if it is too high, video will be slow (Well, that is how you can display videos in slow motion). 25 milliseconds will be OK in normal cases. @@ -96,23 +96,23 @@ cap.release() cv.destroyAllWindows() @endcode -@note Make sure proper versions of ffmpeg or gstreamer is installed. Sometimes, it is a headache to -work with Video Capture mostly due to wrong installation of ffmpeg/gstreamer. +@note Make sure a proper version of ffmpeg or gstreamer is installed. Sometimes it is a headache to +work with video capture, mostly due to wrong installation of ffmpeg/gstreamer. Saving a Video -------------- -So we capture a video, process it frame-by-frame and we want to save that video. For images, it is -very simple, just use `cv.imwrite()`. Here a little more work is required. +So we capture a video and process it frame-by-frame, and we want to save that video. For images, it is +very simple: just use `cv.imwrite()`. Here, a little more work is required. This time we create a **VideoWriter** object. We should specify the output file name (eg: output.avi). Then we should specify the **FourCC** code (details in next paragraph). Then number of -frames per second (fps) and frame size should be passed. And last one is **isColor** flag. If it is -`True`, encoder expect color frame, otherwise it works with grayscale frame. +frames per second (fps) and frame size should be passed. And the last one is the **isColor** flag. If it is +`True`, the encoder expect color frame, otherwise it works with grayscale frame. [FourCC](http://en.wikipedia.org/wiki/FourCC) is a 4-byte code used to specify the video codec. The list of available codes can be found in [fourcc.org](http://www.fourcc.org/codecs.php). It is -platform dependent. Following codecs works fine for me. +platform dependent. The following codecs work fine for me. - In Fedora: DIVX, XVID, MJPG, X264, WMV1, WMV2. (XVID is more preferable. MJPG results in high size video. X264 gives very small size video) @@ -122,7 +122,7 @@ platform dependent. Following codecs works fine for me. FourCC code is passed as `cv.VideoWriter_fourcc('M','J','P','G')` or `cv.VideoWriter_fourcc(*'MJPG')` for MJPG. -Below code capture from a Camera, flip every frame in vertical direction and saves it. +The below code captures from a camera, flips every frame in the vertical direction, and saves the video. @code{.py} import numpy as np import cv2 as cv diff --git a/modules/calib3d/src/stereobm.cpp b/modules/calib3d/src/stereobm.cpp index 64a7071ca2..a7c7bfd849 100644 --- a/modules/calib3d/src/stereobm.cpp +++ b/modules/calib3d/src/stereobm.cpp @@ -216,30 +216,30 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero ) dptr0[0] = dptr0[size.width-1] = dptr1[0] = dptr1[size.width-1] = val0; x = 1; -#if CV_SIMD128 +#if CV_SIMD { - v_int16x8 ftz = v_setall_s16((short) ftzero); - v_int16x8 ftz2 = v_setall_s16((short)(ftzero*2)); - v_int16x8 z = v_setzero_s16(); + v_int16 ftz = vx_setall_s16((short) ftzero); + v_int16 ftz2 = vx_setall_s16((short)(ftzero*2)); + v_int16 z = vx_setzero_s16(); - for(; x <= (size.width - 1) - 8; x += 8 ) + for(; x <= (size.width - 1) - v_int16::nlanes; x += v_int16::nlanes) { - v_int16x8 s00 = v_reinterpret_as_s16(v_load_expand(srow0 + x + 1)); - v_int16x8 s01 = v_reinterpret_as_s16(v_load_expand(srow0 + x - 1)); - v_int16x8 s10 = v_reinterpret_as_s16(v_load_expand(srow1 + x + 1)); - v_int16x8 s11 = v_reinterpret_as_s16(v_load_expand(srow1 + x - 1)); - v_int16x8 s20 = v_reinterpret_as_s16(v_load_expand(srow2 + x + 1)); - v_int16x8 s21 = v_reinterpret_as_s16(v_load_expand(srow2 + x - 1)); - v_int16x8 s30 = v_reinterpret_as_s16(v_load_expand(srow3 + x + 1)); - v_int16x8 s31 = v_reinterpret_as_s16(v_load_expand(srow3 + x - 1)); + v_int16 s00 = v_reinterpret_as_s16(vx_load_expand(srow0 + x + 1)); + v_int16 s01 = v_reinterpret_as_s16(vx_load_expand(srow0 + x - 1)); + v_int16 s10 = v_reinterpret_as_s16(vx_load_expand(srow1 + x + 1)); + v_int16 s11 = v_reinterpret_as_s16(vx_load_expand(srow1 + x - 1)); + v_int16 s20 = v_reinterpret_as_s16(vx_load_expand(srow2 + x + 1)); + v_int16 s21 = v_reinterpret_as_s16(vx_load_expand(srow2 + x - 1)); + v_int16 s30 = v_reinterpret_as_s16(vx_load_expand(srow3 + x + 1)); + v_int16 s31 = v_reinterpret_as_s16(vx_load_expand(srow3 + x - 1)); - v_int16x8 d0 = s00 - s01; - v_int16x8 d1 = s10 - s11; - v_int16x8 d2 = s20 - s21; - v_int16x8 d3 = s30 - s31; + v_int16 d0 = s00 - s01; + v_int16 d1 = s10 - s11; + v_int16 d2 = s20 - s21; + v_int16 d3 = s30 - s31; - v_uint16x8 v0 = v_reinterpret_as_u16(v_max(v_min(d0 + d1 + d1 + d2 + ftz, ftz2), z)); - v_uint16x8 v1 = v_reinterpret_as_u16(v_max(v_min(d1 + d2 + d2 + d3 + ftz, ftz2), z)); + v_uint16 v0 = v_reinterpret_as_u16(v_max(v_min(d0 + d1 + d1 + d2 + ftz, ftz2), z)); + v_uint16 v1 = v_reinterpret_as_u16(v_max(v_min(d1 + d2 + d2 + d3 + ftz, ftz2), z)); v_pack_store(dptr0 + x, v0); v_pack_store(dptr1 + x, v1); @@ -262,10 +262,10 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero ) { uchar* dptr = dst.ptr(y); x = 0; -#if CV_SIMD128 +#if CV_SIMD { - v_uint8x16 val0_16 = v_setall_u8(val0); - for(; x <= size.width-16; x+=16 ) + v_uint8 val0_16 = vx_setall_u8(val0); + for(; x <= size.width-v_uint8::nlanes; x+=v_uint8::nlanes) v_store(dptr + x, val0_16); } #endif @@ -309,13 +309,13 @@ inline int dispDescale(int v1, int v2, int d) return (int)(v1*256 + (d != 0 ? v2*256/d : 0)); // no need to add 127, this will be converted to float } -#if CV_SIMD128 +#if CV_SIMD template static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right, Mat& disp, Mat& cost, StereoBMParams& state, uchar* buf, int _dy0, int _dy1 ) { - const int ALIGN = 16; + const int ALIGN = CV_SIMD_WIDTH; int x, y, d; int wsz = state.SADWindowSize, wsz2 = wsz/2; int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1); @@ -345,7 +345,9 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right, int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0; const int TABSZ = 256; uchar tab[TABSZ]; - const v_int16x8 d0_8 = v_int16x8(0,1,2,3,4,5,6,7), dd_8 = v_setall_s16(8); + short v_seq[v_int16::nlanes]; + for (short i = 0; i < v_int16::nlanes; ++i) + v_seq[i] = i; sad = (ushort*)alignPtr(buf + sizeof(sad[0]), ALIGN); hsad0 = (ushort*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN); @@ -368,20 +370,26 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right, for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep ) { int lval = lptr[0]; - v_uint8x16 lv = v_setall_u8((uchar)lval); - for( d = 0; d < ndisp; d += 16 ) + v_uint8 lv = vx_setall_u8((uchar)lval); + for( d = 0; d <= ndisp - v_uint8::nlanes; d += v_uint8::nlanes ) { - v_uint8x16 rv = v_load(rptr + d); - v_uint16x8 hsad_l = v_load(hsad + d); - v_uint16x8 hsad_h = v_load(hsad + d + 8); - v_uint8x16 diff = v_absdiff(lv, rv); + v_uint8 diff = v_absdiff(lv, vx_load(rptr + d)); v_store(cbuf + d, diff); - v_uint16x8 diff0, diff1; - v_expand(diff, diff0, diff1); - hsad_l += diff0; - hsad_h += diff1; - v_store(hsad + d, hsad_l); - v_store(hsad + d + 8, hsad_h); + v_store(hsad + d, vx_load(hsad + d) + v_expand_low(diff)); + v_store(hsad + d + v_uint16::nlanes, vx_load(hsad + d + v_uint16::nlanes) + v_expand_high(diff)); + } + if( d <= ndisp - v_uint16::nlanes ) + { + v_uint8 diff = v_absdiff(lv, vx_load_low(rptr + d)); + v_store_low(cbuf + d, diff); + v_store(hsad + d, vx_load(hsad + d) + v_expand_low(diff)); + d += v_uint16::nlanes; + } + for( ; d < ndisp; d++ ) + { + int diff = abs(lval - rptr[d]); + cbuf[d] = (uchar)diff; + hsad[d] += (ushort)diff; } htext[y] += tab[lval]; } @@ -412,24 +420,27 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right, hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep ) { int lval = lptr[0]; - v_uint8x16 lv = v_setall_u8((uchar)lval); - for( d = 0; d < ndisp; d += 16 ) + v_uint8 lv = vx_setall_u8((uchar)lval); + for( d = 0; d <= ndisp - v_uint8::nlanes; d += v_uint8::nlanes ) { - v_uint8x16 rv = v_load(rptr + d); - v_uint16x8 hsad_l = v_load(hsad + d); - v_uint16x8 hsad_h = v_load(hsad + d + 8); - v_uint8x16 cbs = v_load(cbuf_sub + d); - v_uint8x16 diff = v_absdiff(lv, rv); - v_int16x8 diff_l, diff_h, cbs_l, cbs_h; + v_uint8 diff = v_absdiff(lv, vx_load(rptr + d)); + v_int8 cbs = v_reinterpret_as_s8(vx_load(cbuf_sub + d)); v_store(cbuf + d, diff); - v_expand(v_reinterpret_as_s8(diff), diff_l, diff_h); - v_expand(v_reinterpret_as_s8(cbs), cbs_l, cbs_h); - diff_l -= cbs_l; - diff_h -= cbs_h; - hsad_h = v_reinterpret_as_u16(v_reinterpret_as_s16(hsad_h) + diff_h); - hsad_l = v_reinterpret_as_u16(v_reinterpret_as_s16(hsad_l) + diff_l); - v_store(hsad + d, hsad_l); - v_store(hsad + d + 8, hsad_h); + v_store(hsad + d, v_reinterpret_as_u16(v_reinterpret_as_s16(vx_load(hsad + d) + v_expand_low(diff)) - v_expand_low(cbs))); + v_store(hsad + d + v_uint16::nlanes, v_reinterpret_as_u16(v_reinterpret_as_s16(vx_load(hsad + d + v_uint16::nlanes) + v_expand_high(diff)) - v_expand_high(cbs))); + } + if( d <= ndisp - v_uint16::nlanes) + { + v_uint8 diff = v_absdiff(lv, vx_load_low(rptr + d)); + v_store_low(cbuf + d, diff); + v_store(hsad + d, v_reinterpret_as_u16(v_reinterpret_as_s16(vx_load(hsad + d) + v_expand_low(diff)) - vx_load_expand((schar*)cbuf_sub + d))); + d += v_uint16::nlanes; + } + for( ; d < ndisp; d++ ) + { + int diff = abs(lval - rptr[d]); + cbuf[d] = (uchar)diff; + hsad[d] = hsad[d] + (ushort)diff - cbuf_sub[d]; } htext[y] += tab[lval] - tab[lptr_sub[0]]; } @@ -446,17 +457,25 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right, hsad = hsad0 + (1 - dy0)*ndisp; for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp ) - for( d = 0; d <= ndisp-16; d += 16 ) + { + for( d = 0; d <= ndisp-2*v_uint16::nlanes; d += 2*v_uint16::nlanes ) { - v_uint16x8 s0 = v_load(sad + d); - v_uint16x8 s1 = v_load(sad + d + 8); - v_uint16x8 t0 = v_load(hsad + d); - v_uint16x8 t1 = v_load(hsad + d + 8); - s0 = s0 + t0; - s1 = s1 + t1; - v_store(sad + d, s0); - v_store(sad + d + 8, s1); + v_store(sad + d, vx_load(sad + d) + vx_load(hsad + d)); + v_store(sad + d + v_uint16::nlanes, vx_load(sad + d + v_uint16::nlanes) + vx_load(hsad + d + v_uint16::nlanes)); } + if( d <= ndisp-v_uint16::nlanes ) + { + v_store(sad + d, vx_load(sad + d) + vx_load(hsad + d)); + d += v_uint16::nlanes; + } + if( d <= ndisp-v_uint16::nlanes/2 ) + { + v_store_low(sad + d, vx_load_low(sad + d) + vx_load_low(hsad + d)); + d += v_uint16::nlanes/2; + } + for( ; d < ndisp; d++ ) + sad[d] = sad[d] + hsad[d]; + } int tsum = 0; for( y = -wsz2-1; y < wsz2; y++ ) tsum += htext[y]; @@ -467,38 +486,41 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right, int minsad = INT_MAX, mind = -1; hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp; hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp; - v_int16x8 minsad8 = v_setall_s16(SHRT_MAX); - v_int16x8 mind8 = v_setall_s16(0), d8 = d0_8; + v_int16 minsad8 = vx_setall_s16(SHRT_MAX); + v_int16 mind8 = vx_setall_s16(0); - for( d = 0; d < ndisp; d += 16 ) + for( d = 0; d <= ndisp - 2*v_int16::nlanes; d += 2*v_int16::nlanes ) { - v_int16x8 u0 = v_reinterpret_as_s16(v_load(hsad_sub + d)); - v_int16x8 u1 = v_reinterpret_as_s16(v_load(hsad + d)); + v_int16 sad8 = v_reinterpret_as_s16(vx_load(hsad + d)) - v_reinterpret_as_s16(vx_load(hsad_sub + d)) + v_reinterpret_as_s16(vx_load(sad + d)); + v_store(sad + d, v_reinterpret_as_u16(sad8)); + mind8 = v_max(mind8, (minsad8 > sad8) & vx_setall_s16((short)d)); + minsad8 = v_min(minsad8, sad8); - v_int16x8 v0 = v_reinterpret_as_s16(v_load(hsad_sub + d + 8)); - v_int16x8 v1 = v_reinterpret_as_s16(v_load(hsad + d + 8)); - - v_int16x8 usad8 = v_reinterpret_as_s16(v_load(sad + d)); - v_int16x8 vsad8 = v_reinterpret_as_s16(v_load(sad + d + 8)); - - u1 -= u0; - v1 -= v0; - usad8 += u1; - vsad8 += v1; - - v_int16x8 mask = minsad8 > usad8; - minsad8 = v_min(minsad8, usad8); - mind8 = v_max(mind8, (mask& d8)); - - v_store(sad + d, v_reinterpret_as_u16(usad8)); - v_store(sad + d + 8, v_reinterpret_as_u16(vsad8)); - - mask = minsad8 > vsad8; - minsad8 = v_min(minsad8, vsad8); - - d8 = d8 + dd_8; - mind8 = v_max(mind8, (mask & d8)); - d8 = d8 + dd_8; + sad8 = v_reinterpret_as_s16(vx_load(hsad + d + v_int16::nlanes)) - v_reinterpret_as_s16(vx_load(hsad_sub + d + v_int16::nlanes)) + v_reinterpret_as_s16(vx_load(sad + d + v_int16::nlanes)); + v_store(sad + d + v_int16::nlanes, v_reinterpret_as_u16(sad8)); + mind8 = v_max(mind8, (minsad8 > sad8) & vx_setall_s16((short)d+v_int16::nlanes)); + minsad8 = v_min(minsad8, sad8); + } + if( d <= ndisp - v_int16::nlanes ) + { + v_int16 sad8 = v_reinterpret_as_s16(vx_load(hsad + d)) - v_reinterpret_as_s16(vx_load(hsad_sub + d)) + v_reinterpret_as_s16(vx_load(sad + d)); + v_store(sad + d, v_reinterpret_as_u16(sad8)); + mind8 = v_max(mind8, (minsad8 > sad8) & vx_setall_s16((short)d)); + minsad8 = v_min(minsad8, sad8); + d += v_int16::nlanes; + } + minsad = v_reduce_min(minsad8); + v_int16 v_mask = (vx_setall_s16((short)minsad) == minsad8); + mind = v_reduce_min(((mind8+vx_load(v_seq)) & v_mask) | (vx_setall_s16(SHRT_MAX) & ~v_mask)); + for( ; d < ndisp; d++ ) + { + int sad8 = (int)(hsad[d]) - hsad_sub[d] + sad[d]; + sad[d] = (ushort)sad8; + if(minsad > sad8) + { + mind = d; + minsad = sad8; + } } tsum += htext[y + wsz2] - htext[y - wsz2 - 1]; @@ -508,41 +530,45 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right, continue; } - ushort CV_DECL_ALIGNED(16) minsad_buf[8], mind_buf[8]; - v_store(minsad_buf, v_reinterpret_as_u16(minsad8)); - v_store(mind_buf, v_reinterpret_as_u16(mind8)); - for( d = 0; d < 8; d++ ) - if(minsad > (int)minsad_buf[d] || (minsad == (int)minsad_buf[d] && mind > mind_buf[d])) - { - minsad = minsad_buf[d]; - mind = mind_buf[d]; - } - if( uniquenessRatio > 0 ) { int thresh = minsad + (minsad * uniquenessRatio/100); - v_int32x4 thresh4 = v_setall_s32(thresh + 1); - v_int32x4 d1 = v_setall_s32(mind-1), d2 = v_setall_s32(mind+1); - v_int32x4 dd_4 = v_setall_s32(4); - v_int32x4 d4 = v_int32x4(0,1,2,3); - v_int32x4 mask4; + v_int32 thresh4 = vx_setall_s32(thresh + 1); + v_int32 d1 = vx_setall_s32(mind-1), d2 = vx_setall_s32(mind+1); + v_int32 dd_4 = vx_setall_s32(v_int32::nlanes); + v_int32 d4 = vx_load_expand(v_seq); - for( d = 0; d < ndisp; d += 8 ) + for( d = 0; d <= ndisp - v_int16::nlanes; d += v_int16::nlanes ) { - v_int16x8 sad8 = v_reinterpret_as_s16(v_load(sad + d)); - v_int32x4 sad4_l, sad4_h; - v_expand(sad8, sad4_l, sad4_h); - mask4 = thresh4 > sad4_l; - mask4 = mask4 & ((d1 > d4) | (d4 > d2)); - if( v_check_any(mask4) ) + v_int32 sad4_l, sad4_h; + v_expand(v_reinterpret_as_s16(vx_load(sad + d)), sad4_l, sad4_h); + if( v_check_any((thresh4 > sad4_l) & ((d1 > d4) | (d4 > d2))) ) break; d4 += dd_4; - mask4 = thresh4 > sad4_h; - mask4 = mask4 & ((d1 > d4) | (d4 > d2)); - if( v_check_any(mask4) ) + if( v_check_any((thresh4 > sad4_h) & ((d1 > d4) | (d4 > d2))) ) break; d4 += dd_4; } + if( d <= ndisp - v_int16::nlanes ) + { + dptr[y*dstep] = FILTERED; + continue; + } + if( d <= ndisp - v_int32::nlanes ) + { + v_int32 sad4_l = vx_load_expand((short*)sad + d); + if (v_check_any((thresh4 > sad4_l) & ((d1 > d4) | (d4 > d2)))) + { + dptr[y*dstep] = FILTERED; + continue; + } + d += v_int16::nlanes; + } + for( ; d < ndisp; d++ ) + { + if( (thresh + 1) > sad[d] && ((mind - 1) > d || d > (mind + 1)) ) + break; + } if( d < ndisp ) { dptr[y*dstep] = FILTERED; @@ -571,7 +597,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, uchar* buf, int _dy0, int _dy1 ) { - const int ALIGN = 16; + const int ALIGN = CV_SIMD_WIDTH; int x, y, d; int wsz = state.SADWindowSize, wsz2 = wsz/2; int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1); @@ -587,12 +613,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, const int disp_shift = dispShiftTemplate::value; mType FILTERED = (mType)((mindisp - 1) << disp_shift); -#if CV_SIMD128 - { - CV_Assert (ndisp % 8 == 0); - } -#endif - int *sad, *hsad0, *hsad, *hsad_sub, *htext; uchar *cbuf0, *cbuf; const uchar* lptr0 = left.ptr() + lofs; @@ -607,6 +627,13 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, const int TABSZ = 256; uchar tab[TABSZ]; +#if CV_SIMD + int v_seq[v_int32::nlanes]; + for (int i = 0; i < v_int32::nlanes; ++i) + v_seq[i] = i; + v_int32 d0_4 = vx_load(v_seq), dd_4 = vx_setall_s32(v_int32::nlanes); +#endif + sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN); hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN); htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN); @@ -628,22 +655,22 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, { int lval = lptr[0]; d = 0; -#if CV_SIMD128 +#if CV_SIMD { - v_uint8x16 lv = v_setall_u8((uchar)lval); + v_uint8 lv = vx_setall_u8((uchar)lval); - for( ; d <= ndisp - 16; d += 16 ) + for( ; d <= ndisp - v_uint8::nlanes; d += v_uint8::nlanes ) { - v_uint8x16 rv = v_load(rptr + d); - v_int32x4 hsad_0 = v_load(hsad + d); - v_int32x4 hsad_1 = v_load(hsad + d + 4); - v_int32x4 hsad_2 = v_load(hsad + d + 8); - v_int32x4 hsad_3 = v_load(hsad + d + 12); - v_uint8x16 diff = v_absdiff(lv, rv); + v_uint8 rv = vx_load(rptr + d); + v_int32 hsad_0 = vx_load(hsad + d); + v_int32 hsad_1 = vx_load(hsad + d + v_int32::nlanes); + v_int32 hsad_2 = vx_load(hsad + d + 2*v_int32::nlanes); + v_int32 hsad_3 = vx_load(hsad + d + 3*v_int32::nlanes); + v_uint8 diff = v_absdiff(lv, rv); v_store(cbuf + d, diff); - v_uint16x8 diff0, diff1; - v_uint32x4 diff00, diff01, diff10, diff11; + v_uint16 diff0, diff1; + v_uint32 diff00, diff01, diff10, diff11; v_expand(diff, diff0, diff1); v_expand(diff0, diff00, diff01); v_expand(diff1, diff10, diff11); @@ -654,9 +681,9 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, hsad_3 += v_reinterpret_as_s32(diff11); v_store(hsad + d, hsad_0); - v_store(hsad + d + 4, hsad_1); - v_store(hsad + d + 8, hsad_2); - v_store(hsad + d + 12, hsad_3); + v_store(hsad + d + v_int32::nlanes, hsad_1); + v_store(hsad + d + 2*v_int32::nlanes, hsad_2); + v_store(hsad + d + 3*v_int32::nlanes, hsad_3); } } #endif @@ -696,22 +723,22 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, { int lval = lptr[0]; d = 0; -#if CV_SIMD128 +#if CV_SIMD { - v_uint8x16 lv = v_setall_u8((uchar)lval); - for( ; d <= ndisp - 16; d += 16 ) + v_uint8 lv = vx_setall_u8((uchar)lval); + for( ; d <= ndisp - v_uint8::nlanes; d += v_uint8::nlanes ) { - v_uint8x16 rv = v_load(rptr + d); - v_int32x4 hsad_0 = v_load(hsad + d); - v_int32x4 hsad_1 = v_load(hsad + d + 4); - v_int32x4 hsad_2 = v_load(hsad + d + 8); - v_int32x4 hsad_3 = v_load(hsad + d + 12); - v_uint8x16 cbs = v_load(cbuf_sub + d); - v_uint8x16 diff = v_absdiff(lv, rv); + v_uint8 rv = vx_load(rptr + d); + v_int32 hsad_0 = vx_load(hsad + d); + v_int32 hsad_1 = vx_load(hsad + d + v_int32::nlanes); + v_int32 hsad_2 = vx_load(hsad + d + 2*v_int32::nlanes); + v_int32 hsad_3 = vx_load(hsad + d + 3*v_int32::nlanes); + v_uint8 cbs = vx_load(cbuf_sub + d); + v_uint8 diff = v_absdiff(lv, rv); v_store(cbuf + d, diff); - v_uint16x8 diff0, diff1, cbs0, cbs1; - v_int32x4 diff00, diff01, diff10, diff11, cbs00, cbs01, cbs10, cbs11; + v_uint16 diff0, diff1, cbs0, cbs1; + v_int32 diff00, diff01, diff10, diff11, cbs00, cbs01, cbs10, cbs11; v_expand(diff, diff0, diff1); v_expand(cbs, cbs0, cbs1); v_expand(v_reinterpret_as_s16(diff0), diff00, diff01); @@ -719,19 +746,19 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, v_expand(v_reinterpret_as_s16(cbs0), cbs00, cbs01); v_expand(v_reinterpret_as_s16(cbs1), cbs10, cbs11); - v_int32x4 diff_0 = diff00 - cbs00; - v_int32x4 diff_1 = diff01 - cbs01; - v_int32x4 diff_2 = diff10 - cbs10; - v_int32x4 diff_3 = diff11 - cbs11; + v_int32 diff_0 = diff00 - cbs00; + v_int32 diff_1 = diff01 - cbs01; + v_int32 diff_2 = diff10 - cbs10; + v_int32 diff_3 = diff11 - cbs11; hsad_0 += diff_0; hsad_1 += diff_1; hsad_2 += diff_2; hsad_3 += diff_3; v_store(hsad + d, hsad_0); - v_store(hsad + d + 4, hsad_1); - v_store(hsad + d + 8, hsad_2); - v_store(hsad + d + 12, hsad_3); + v_store(hsad + d + v_int32::nlanes, hsad_1); + v_store(hsad + d + 2*v_int32::nlanes, hsad_2); + v_store(hsad + d + 3*v_int32::nlanes, hsad_3); } } #endif @@ -758,18 +785,18 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp ) { d = 0; -#if CV_SIMD128 +#if CV_SIMD { - for( d = 0; d <= ndisp-8; d += 8 ) + for( d = 0; d <= ndisp-2*v_int32::nlanes; d += 2*v_int32::nlanes ) { - v_int32x4 s0 = v_load(sad + d); - v_int32x4 s1 = v_load(sad + d + 4); - v_int32x4 t0 = v_load(hsad + d); - v_int32x4 t1 = v_load(hsad + d + 4); + v_int32 s0 = vx_load(sad + d); + v_int32 s1 = vx_load(sad + d + v_int32::nlanes); + v_int32 t0 = vx_load(hsad + d); + v_int32 t1 = vx_load(hsad + d + v_int32::nlanes); s0 += t0; s1 += t1; v_store(sad + d, s0); - v_store(sad + d + 4, s1); + v_store(sad + d + v_int32::nlanes, s1); } } #endif @@ -787,50 +814,31 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp; hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp; d = 0; -#if CV_SIMD128 +#if CV_SIMD { - v_int32x4 d0_4 = v_int32x4(0, 1, 2, 3); - v_int32x4 dd_4 = v_setall_s32(4); - v_int32x4 minsad4 = v_setall_s32(INT_MAX); - v_int32x4 mind4 = v_setall_s32(0), d4 = d0_4; + v_int32 minsad4 = vx_setall_s32(INT_MAX); + v_int32 mind4 = vx_setall_s32(0), d4 = d0_4; - for( ; d <= ndisp - 8; d += 8 ) + for( ; d <= ndisp - 2*v_int32::nlanes; d += 2*v_int32::nlanes ) { - v_int32x4 u0 = v_load(hsad_sub + d); - v_int32x4 u1 = v_load(hsad + d); - - v_int32x4 v0 = v_load(hsad_sub + d + 4); - v_int32x4 v1 = v_load(hsad + d + 4); - - v_int32x4 usad4 = v_load(sad + d); - v_int32x4 vsad4 = v_load(sad + d + 4); - - u1 -= u0; - v1 -= v0; - usad4 += u1; - vsad4 += v1; - - v_store(sad + d, usad4); - v_store(sad + d + 4, vsad4); - - v_int32x4 mask = minsad4 > usad4; - minsad4 = v_min(minsad4, usad4); - mind4 = v_select(mask, d4, mind4); + v_int32 sad4 = vx_load(sad + d) + vx_load(hsad + d) - vx_load(hsad_sub + d); + v_store(sad + d, sad4); + mind4 = v_select(minsad4 > sad4, d4, mind4); + minsad4 = v_min(minsad4, sad4); d4 += dd_4; - mask = minsad4 > vsad4; - minsad4 = v_min(minsad4, vsad4); - mind4 = v_select(mask, d4, mind4); + sad4 = vx_load(sad + d + v_int32::nlanes) + vx_load(hsad + d + v_int32::nlanes) - vx_load(hsad_sub + d + v_int32::nlanes); + v_store(sad + d + v_int32::nlanes, sad4); + mind4 = v_select(minsad4 > sad4, d4, mind4); + minsad4 = v_min(minsad4, sad4); d4 += dd_4; } - int CV_DECL_ALIGNED(16) minsad_buf[4], mind_buf[4]; + int CV_DECL_ALIGNED(CV_SIMD_WIDTH) minsad_buf[v_int32::nlanes], mind_buf[v_int32::nlanes]; v_store(minsad_buf, minsad4); v_store(mind_buf, mind4); - if(minsad_buf[0] < minsad || (minsad == minsad_buf[0] && mind_buf[0] < mind)) { minsad = minsad_buf[0]; mind = mind_buf[0]; } - if(minsad_buf[1] < minsad || (minsad == minsad_buf[1] && mind_buf[1] < mind)) { minsad = minsad_buf[1]; mind = mind_buf[1]; } - if(minsad_buf[2] < minsad || (minsad == minsad_buf[2] && mind_buf[2] < mind)) { minsad = minsad_buf[2]; mind = mind_buf[2]; } - if(minsad_buf[3] < minsad || (minsad == minsad_buf[3] && mind_buf[3] < mind)) { minsad = minsad_buf[3]; mind = mind_buf[3]; } + for (int i = 0; i < v_int32::nlanes; ++i) + if(minsad_buf[i] < minsad || (minsad == minsad_buf[i] && mind_buf[i] < mind)) { minsad = minsad_buf[i]; mind = mind_buf[i]; } } #endif for( ; d < ndisp; d++ ) @@ -1027,7 +1035,7 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody Mat disp_i = disp->rowRange(row0, row1); Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat(); -#if CV_SIMD128 +#if CV_SIMD if (useShorts) { if( disp_i.type() == CV_16S) diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp index 8f6c982c72..fbd6f470cd 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp @@ -1012,6 +1012,54 @@ OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float32x8, _mm256_castsi256_ps OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, v_float64x4, _mm256_castsi256_pd) OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float64x4, _mm256_castsi256_pd) +/** Reverse **/ +inline v_uint8x32 v_reverse(const v_uint8x32 &a) +{ + static const __m256i perm = _mm256_setr_epi8( + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __m256i vec = _mm256_shuffle_epi8(a.val, perm); + return v_uint8x32(_mm256_permute2x128_si256(vec, vec, 1)); +} + +inline v_int8x32 v_reverse(const v_int8x32 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x16 v_reverse(const v_uint16x16 &a) +{ + static const __m256i perm = _mm256_setr_epi8( + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); + __m256i vec = _mm256_shuffle_epi8(a.val, perm); + return v_uint16x16(_mm256_permute2x128_si256(vec, vec, 1)); +} + +inline v_int16x16 v_reverse(const v_int16x16 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x8 v_reverse(const v_uint32x8 &a) +{ + static const __m256i perm = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0); + return v_uint32x8(_mm256_permutevar8x32_epi32(a.val, perm)); +} + +inline v_int32x8 v_reverse(const v_int32x8 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x8 v_reverse(const v_float32x8 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x4 v_reverse(const v_uint64x4 &a) +{ + return v_uint64x4(_mm256_permute4x64_epi64(a.val, _MM_SHUFFLE(0, 1, 2, 3))); +} + +inline v_int64x4 v_reverse(const v_int64x4 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x4 v_reverse(const v_float64x4 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + ////////// Reduce and mask ///////// /** Reduce **/ diff --git a/modules/core/include/opencv2/core/hal/intrin_avx512.hpp b/modules/core/include/opencv2/core/hal/intrin_avx512.hpp index 844c546e38..2c31a8d014 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx512.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx512.hpp @@ -1068,6 +1068,79 @@ OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_int64x8, epi64) OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float32x16, ps) OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float64x8, pd) +/** Reverse **/ +inline v_uint8x64 v_reverse(const v_uint8x64 &a) +{ +#if CV_AVX_512VBMI + static const __m512i perm = _mm512_set_epi32( + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f, + 0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f, + 0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f); + return v_uint8x64(_mm512_permutexvar_epi8(perm, a.val)); +#else + static const __m512i shuf = _mm512_set_epi32( + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); + static const __m512i perm = _mm512_set_epi64(1, 0, 3, 2, 5, 4, 7, 6); + __m512i vec = _mm512_shuffle_epi8(a.val, shuf); + return v_uint8x64(_mm512_permutexvar_epi64(perm, vec)); +#endif +} + +inline v_int8x64 v_reverse(const v_int8x64 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x32 v_reverse(const v_uint16x32 &a) +{ +#if CV_AVX_512VBMI + static const __m512i perm = _mm512_set_epi32( + 0x00000001, 0x00020003, 0x00040005, 0x00060007, + 0x00080009, 0x000a000b, 0x000c000d, 0x000e000f, + 0x00100011, 0x00120013, 0x00140015, 0x00160017, + 0x00180019, 0x001a001b, 0x001c001d, 0x001e001f); + return v_uint16x32(_mm512_permutexvar_epi16(perm, a.val)); +#else + static const __m512i shuf = _mm512_set_epi32( + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e, + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e, + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e, + 0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e); + static const __m512i perm = _mm512_set_epi64(1, 0, 3, 2, 5, 4, 7, 6); + __m512i vec = _mm512_shuffle_epi8(a.val, shuf); + return v_uint16x32(_mm512_permutexvar_epi64(perm, vec)); +#endif +} + +inline v_int16x32 v_reverse(const v_int16x32 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x16 v_reverse(const v_uint32x16 &a) +{ + static const __m512i perm = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,14, 15); + return v_uint32x16(_mm512_permutexvar_epi32(perm, a.val)); +} + +inline v_int32x16 v_reverse(const v_int32x16 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x16 v_reverse(const v_float32x16 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x8 v_reverse(const v_uint64x8 &a) +{ + static const __m512i perm = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + return v_uint64x8(_mm512_permutexvar_epi64(perm, a.val)); +} + +inline v_int64x8 v_reverse(const v_int64x8 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x8 v_reverse(const v_float64x8 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + ////////// Reduce ///////// /** Reduce **/ diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index 39de0b5a09..031420e9fc 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -112,6 +112,7 @@ These operations allow to reorder or recombine elements in one or multiple vecto - Pack: @ref v_pack, @ref v_pack_u, @ref v_pack_b, @ref v_rshr_pack, @ref v_rshr_pack_u, @ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store - Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high +- Reverse: @ref v_reverse - Extract: @ref v_extract @@ -215,6 +216,7 @@ Regular integers: |cvt_flt32 | | | | | | x | |cvt_flt64 | | | | | | x | |transpose4x4 | | | | | x | x | +|reverse | x | x | x | x | x | x | Big integers: @@ -224,6 +226,7 @@ Big integers: |add, sub | x | x | |shift | x | x | |logical | x | x | +|reverse | x | x | |extract | x | x | |rotate (lanes) | x | x | |cvt_flt64 | | x | @@ -250,6 +253,7 @@ Floating point: |transpose4x4 | x | | |extract | x | x | |rotate (lanes) | x | x | +|reverse | x | x | @{ */ @@ -1724,6 +1728,23 @@ inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, } } +/** @brief Vector reverse order + +Reverse the order of the vector +Scheme: +@code + REG {A1 ... An} ==> REG {An ... A1} +@endcode +For all types. */ +template +inline v_reg<_Tp, n> v_reverse(const v_reg<_Tp, n>& a) +{ + v_reg<_Tp, n> c; + for( int i = 0; i < n; i++ ) + c.s[i] = a.s[n-i-1]; + return c; +} + /** @brief Vector extract Scheme: diff --git a/modules/core/include/opencv2/core/hal/intrin_msa.hpp b/modules/core/include/opencv2/core/hal/intrin_msa.hpp index 5ece9c131e..4dbdfef49d 100755 --- a/modules/core/include/opencv2/core/hal/intrin_msa.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_msa.hpp @@ -906,6 +906,57 @@ OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int64x2, int64, s64) OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float32x4, float, f32) OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float64x2, double, f64) + +/** Reverse **/ +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + v_uint8x16 c = v_uint8x16((v16u8)__builtin_msa_vshf_b((v16i8)((v2i64){0x08090A0B0C0D0E0F, 0x0001020304050607}), msa_dupq_n_s8(0), (v16i8)a.val)); + return c; +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + v_uint16x8 c = v_uint16x8((v8u16)__builtin_msa_vshf_h((v8i16)((v2i64){0x0004000500060007, 0x0000000100020003}), msa_dupq_n_s16(0), (v8i16)a.val)); + return c; +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + v_uint32x4 c; + c.val[0] = a.val[3]; + c.val[1] = a.val[2]; + c.val[2] = a.val[1]; + c.val[3] = a.val[0]; + return c; +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + v_uint64x2 c; + c.val[0] = a.val[1]; + c.val[1] = a.val[0]; + return c; +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + + #define OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(func, cfunc) \ inline unsigned short v_reduce_##func(const v_uint16x8& a) \ { \ diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 3e8321aca3..abbd635fac 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -1585,6 +1585,52 @@ OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32) OPENCV_HAL_IMPL_NEON_UNPACKS(float64x2, f64) #endif +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + uint8x16_t vec = vrev64q_u8(a.val); + return v_uint8x16(vextq_u8(vec, vec, 8)); +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + uint16x8_t vec = vrev64q_u16(a.val); + return v_uint16x8(vextq_u16(vec, vec, 4)); +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + uint32x4_t vec = vrev64q_u32(a.val); + return v_uint32x4(vextq_u32(vec, vec, 2)); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + uint64x2_t vec = a.val; + uint64x1_t vec_lo = vget_low_u64(vec); + uint64x1_t vec_hi = vget_high_u64(vec); + return v_uint64x2(vcombine_u64(vec_hi, vec_lo)); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +#if CV_SIMD128_64F +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } +#endif + #define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \ template \ inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \ diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index c4de1195b5..e7370504ef 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -1220,14 +1220,23 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps) OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd) -#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec, cast) \ +#if CV_SSE4_1 +#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \ inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ -{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \ +{ return _Tpvec(_mm_cmpeq_epi64(a.val, b.val)); } \ inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ -{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); } +{ return ~(a == b); } +#else +#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \ +inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ +{ __m128i cmp = _mm_cmpeq_epi32(a.val, b.val); \ + return _Tpvec(_mm_and_si128(cmp, _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 3, 0, 1)))); } \ +inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ +{ return ~(a == b); } +#endif -OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64) -OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64) +OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2) +OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2) inline v_float32x4 v_not_nan(const v_float32x4& a) { return v_float32x4(_mm_cmpord_ps(a.val, a.val)); } @@ -1914,6 +1923,59 @@ OPENCV_HAL_IMPL_SSE_UNPACKS(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP) OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps) OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd) +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ +#if CV_SSSE3 + static const __m128i perm = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return v_uint8x16(_mm_shuffle_epi8(a.val, perm)); +#else + uchar CV_DECL_ALIGNED(32) d[16]; + v_store_aligned(d, a); + return v_uint8x16(d[15], d[14], d[13], d[12], d[11], d[10], d[9], d[8], d[7], d[6], d[5], d[4], d[3], d[2], d[1], d[0]); +#endif +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ +#if CV_SSSE3 + static const __m128i perm = _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); + return v_uint16x8(_mm_shuffle_epi8(a.val, perm)); +#else + __m128i r = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3)); + r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(2, 3, 0, 1)); + r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(2, 3, 0, 1)); + return v_uint16x8(r); +#endif +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + return v_uint32x4(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3))); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + return v_uint64x2(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(1, 0, 3, 2))); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + template inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) { diff --git a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp index 0d65ca5e7a..5b4a0d4137 100644 --- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp @@ -678,6 +678,53 @@ OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_float64x2) OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_uint64x2) OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_int64x2) +/* Reverse */ +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ + static const vec_uchar16 perm = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_uint8x16(vec_perm(vec, vec, perm)); +} + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ + static const vec_uchar16 perm = {14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_reinterpret_as_u16(v_uint8x16(vec_perm(vec, vec, perm))); +} + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ + static const vec_uchar16 perm = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_reinterpret_as_u32(v_uint8x16(vec_perm(vec, vec, perm))); +} + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ + static const vec_uchar16 perm = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7}; + vec_uchar16 vec = (vec_uchar16)a.val; + return v_reinterpret_as_u64(v_uint8x16(vec_perm(vec, vec, perm))); +} + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + /* Extract */ template inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) diff --git a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp index f2da617cfe..4b8cd61dd2 100644 --- a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp @@ -21,6 +21,18 @@ namespace cv CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN +#if (__EMSCRIPTEN_major__ * 1000000 + __EMSCRIPTEN_minor__ * 1000 + __EMSCRIPTEN_tiny__) < (1038046) +// handle renames: https://github.com/emscripten-core/emscripten/pull/9440 (https://github.com/emscripten-core/emscripten/commit/755d5b46cb84d0aa120c10981b11d05646c29673) +#define wasm_i32x4_trunc_saturate_f32x4 wasm_trunc_saturate_i32x4_f32x4 +#define wasm_u32x4_trunc_saturate_f32x4 wasm_trunc_saturate_u32x4_f32x4 +#define wasm_i64x2_trunc_saturate_f64x2 wasm_trunc_saturate_i64x2_f64x2 +#define wasm_u64x2_trunc_saturate_f64x2 wasm_trunc_saturate_u64x2_f64x2 +#define wasm_f32x4_convert_i32x4 wasm_convert_f32x4_i32x4 +#define wasm_f32x4_convert_u32x4 wasm_convert_f32x4_u32x4 +#define wasm_f64x2_convert_i64x2 wasm_convert_f64x2_i64x2 +#define wasm_f64x2_convert_u64x2 wasm_convert_f64x2_u64x2 +#endif // COMPATIBILITY: <1.38.46 + ///////// Types /////////// struct v_uint8x16 @@ -3111,6 +3123,38 @@ OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float32x4, float) OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float64x2, double) +/** Reverse **/ +inline v_uint8x16 v_reverse(const v_uint8x16 &a) +{ return v_uint8x16(wasm_v8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); } + +inline v_int8x16 v_reverse(const v_int8x16 &a) +{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } + +inline v_uint16x8 v_reverse(const v_uint16x8 &a) +{ return v_uint16x8(wasm_v8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); } + +inline v_int16x8 v_reverse(const v_int16x8 &a) +{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } + +inline v_uint32x4 v_reverse(const v_uint32x4 &a) +{ return v_uint32x4(wasm_v8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); } + +inline v_int32x4 v_reverse(const v_int32x4 &a) +{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_float32x4 v_reverse(const v_float32x4 &a) +{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } + +inline v_uint64x2 v_reverse(const v_uint64x2 &a) +{ return v_uint64x2(wasm_v8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); } + +inline v_int64x2 v_reverse(const v_int64x2 &a) +{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } + +inline v_float64x2 v_reverse(const v_float64x2 &a) +{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); } + + #define OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, esuffix) \ inline scalartype v_reduce_sum(const _Tpvec& a) \ { \ @@ -3400,25 +3444,25 @@ inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) inline v_int32x4 v_round(const v_float32x4& a) { v128_t h = wasm_f32x4_splat(0.5); - return v_int32x4(wasm_trunc_saturate_i32x4_f32x4(wasm_f32x4_add(a.val, h))); + return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(wasm_f32x4_add(a.val, h))); } inline v_int32x4 v_floor(const v_float32x4& a) { - v128_t a1 = wasm_trunc_saturate_i32x4_f32x4(a.val); - v128_t mask = wasm_f32x4_lt(a.val, wasm_convert_f32x4_i32x4(a1)); + v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val); + v128_t mask = wasm_f32x4_lt(a.val, wasm_f32x4_convert_i32x4(a1)); return v_int32x4(wasm_i32x4_add(a1, mask)); } inline v_int32x4 v_ceil(const v_float32x4& a) { - v128_t a1 = wasm_trunc_saturate_i32x4_f32x4(a.val); - v128_t mask = wasm_f32x4_gt(a.val, wasm_convert_f32x4_i32x4(a1)); + v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val); + v128_t mask = wasm_f32x4_gt(a.val, wasm_f32x4_convert_i32x4(a1)); return v_int32x4(wasm_i32x4_sub(a1, mask)); } inline v_int32x4 v_trunc(const v_float32x4& a) -{ return v_int32x4(wasm_trunc_saturate_i32x4_f32x4(a.val)); } +{ return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(a.val)); } #define OPENCV_HAL_IMPL_WASM_MATH_FUNC(func, cfunc, _Tpvec, _Tpnvec, _Tp, _Tpn) \ inline _Tpnvec func(const _Tpvec& a) \ @@ -3924,7 +3968,7 @@ OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, inline v_float32x4 v_cvt_f32(const v_int32x4& a) { - return v_float32x4(wasm_convert_f32x4_i32x4(a.val)); + return v_float32x4(wasm_f32x4_convert_i32x4(a.val)); } inline v_float32x4 v_cvt_f32(const v_float64x2& a) @@ -3943,7 +3987,7 @@ inline v_float64x2 v_cvt_f64(const v_int32x4& a) { #ifdef __wasm_unimplemented_simd128__ v128_t p = v128_cvti32x4_i64x2(a.val); - return v_float64x2(wasm_convert_f64x2_i64x2(p)); + return v_float64x2(wasm_f64x2_convert_i64x2(p)); #else fallback::v_int32x4 a_(a); return fallback::v_cvt_f64(a_); @@ -3954,7 +3998,7 @@ inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) { #ifdef __wasm_unimplemented_simd128__ v128_t p = v128_cvti32x4_i64x2_high(a.val); - return v_float64x2(wasm_convert_f64x2_i64x2(p)); + return v_float64x2(wasm_f64x2_convert_i64x2(p)); #else fallback::v_int32x4 a_(a); return fallback::v_cvt_f64_high(a_); @@ -3976,7 +4020,7 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) inline v_float64x2 v_cvt_f64(const v_int64x2& a) { #ifdef __wasm_unimplemented_simd128__ - return v_float64x2(wasm_convert_f64x2_i64x2(a.val)); + return v_float64x2(wasm_f64x2_convert_i64x2(a.val)); #else fallback::v_int64x2 a_(a); return fallback::v_cvt_f64(a_); diff --git a/modules/core/src/alloc.cpp b/modules/core/src/alloc.cpp index 8384f6dd53..98012998fc 100644 --- a/modules/core/src/alloc.cpp +++ b/modules/core/src/alloc.cpp @@ -112,6 +112,13 @@ bool isAlignedAllocationEnabled() } return useMemalign; } +// do not use variable directly, details: https://github.com/opencv/opencv/issues/15691 +static const bool g_force_initialization_memalign_flag +#if defined __GNUC__ + __attribute__((unused)) +#endif + = isAlignedAllocationEnabled(); + #endif #ifdef OPENCV_ALLOC_ENABLE_STATISTICS diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index fb721e2d63..c1478de763 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -711,6 +711,13 @@ static bool ipp_flip(Mat &src, Mat &dst, int flip_mode) #ifdef HAVE_IPP_IW CV_INSTRUMENT_REGION_IPP(); + // Details: https://github.com/opencv/opencv/issues/12943 + if (flip_mode <= 0 /* swap rows */ + && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42 + && (int64_t)(src.total()) * src.elemSize() >= CV_BIG_INT(0x80000000)/*2Gb*/ + ) + return false; + IppiAxis ippMode; if(flip_mode < 0) ippMode = ippAxsBoth; diff --git a/modules/core/src/count_non_zero.simd.hpp b/modules/core/src/count_non_zero.simd.hpp index 4c01c08850..6994564127 100644 --- a/modules/core/src/count_non_zero.simd.hpp +++ b/modules/core/src/count_non_zero.simd.hpp @@ -179,7 +179,25 @@ static int countNonZero32f( const float* src, int len ) static int countNonZero64f( const double* src, int len ) { - return countNonZero_(src, len); + int nz = 0, i = 0; +#if CV_SIMD_64F + v_int64 sum1 = vx_setzero_s64(); + v_int64 sum2 = vx_setzero_s64(); + v_float64 zero = vx_setzero_f64(); + int step = v_float64::nlanes * 2; + int len0 = len & -step; + + for(i = 0; i < len0; i += step ) + { + sum1 += v_reinterpret_as_s64(vx_load(&src[i]) == zero); + sum2 += v_reinterpret_as_s64(vx_load(&src[i + step / 2]) == zero); + } + + // N.B the value is incremented by -1 (0xF...F) for each value + nz = i + (int)v_reduce_sum(sum1 + sum2); + v_cleanup(); +#endif + return nz + countNonZero_(src + i, len - i); } CountNonZeroFunc getCountNonZeroTab(int depth) diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index fcb6b93a3c..bd1e24722c 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -1115,6 +1115,22 @@ template struct TheTest return *this; } + TheTest & test_reverse() + { + Data dataA; + R a = dataA; + + Data resB = v_reverse(a); + + for (int i = 0; i < R::nlanes; ++i) + { + SCOPED_TRACE(cv::format("i=%d", i)); + EXPECT_EQ(dataA[R::nlanes - i - 1], resB[i]); + } + + return *this; + } + template TheTest & test_extract() { @@ -1426,6 +1442,50 @@ template struct TheTest return *this; } #endif + +#if CV_SIMD_64F + TheTest & test_cmp64() + { + Data dataA, dataB; + R a = dataA, b = dataB; + + for (int i = 0; i < R::nlanes; ++i) + { + dataA[i] = dataB[i]; + } + dataA[0]++; + + a = dataA, b = dataB; + + Data resC = (a == b); + Data resD = (a != b); + + for (int i = 0; i < R::nlanes; ++i) + { + SCOPED_TRACE(cv::format("i=%d", i)); + EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); + EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); + } + + for (int i = 0; i < R::nlanes; ++i) + { + dataA[i] = dataB[i] = (LaneType)-1; + } + + a = dataA, b = dataB; + + resC = (a == b); + resD = (a != b); + + for (int i = 0; i < R::nlanes; ++i) + { + SCOPED_TRACE(cv::format("i=%d", i)); + EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); + EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); + } + return *this; + } +#endif }; @@ -1459,6 +1519,7 @@ void test_hal_intrin_uint8() .test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>() .test_pack_b() .test_unpack() + .test_reverse() .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() ; @@ -1497,6 +1558,7 @@ void test_hal_intrin_int8() .test_popcount() .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() .test_unpack() + .test_reverse() .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() ; @@ -1529,6 +1591,7 @@ void test_hal_intrin_uint16() .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() .test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>() .test_unpack() + .test_reverse() .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() ; @@ -1561,6 +1624,7 @@ void test_hal_intrin_int16() .test_popcount() .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() .test_unpack() + .test_reverse() .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() ; @@ -1590,6 +1654,7 @@ void test_hal_intrin_uint32() .test_popcount() .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() .test_unpack() + .test_reverse() .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() .test_transpose() @@ -1619,6 +1684,7 @@ void test_hal_intrin_int32() .test_mask() .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() .test_unpack() + .test_reverse() .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() .test_float_cvt32() @@ -1635,8 +1701,12 @@ void test_hal_intrin_uint64() TheTest() .test_loadstore() .test_addsub() +#if CV_SIMD_64F + .test_cmp64() +#endif .test_shift<1>().test_shift<8>() .test_logic() + .test_reverse() .test_extract<0>().test_extract<1>() .test_rotate<0>().test_rotate<1>() ; @@ -1648,8 +1718,12 @@ void test_hal_intrin_int64() TheTest() .test_loadstore() .test_addsub() +#if CV_SIMD_64F + .test_cmp64() +#endif .test_shift<1>().test_shift<8>() .test_logic() + .test_reverse() .test_extract<0>().test_extract<1>() .test_rotate<0>().test_rotate<1>() .test_cvt64_double() @@ -1680,6 +1754,7 @@ void test_hal_intrin_float32() .test_matmul() .test_transpose() .test_reduce_sum4() + .test_reverse() .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() ; @@ -1709,6 +1784,7 @@ void test_hal_intrin_float64() .test_unpack() .test_float_math() .test_float_cvt32() + .test_reverse() .test_extract<0>().test_extract<1>() .test_rotate<0>().test_rotate<1>() ; diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp index 885ff0e43f..d2328771d9 100644 --- a/modules/core/test/test_mat.cpp +++ b/modules/core/test/test_mat.cpp @@ -2025,4 +2025,17 @@ TEST(Core_Eigen, eigen2cv_check_Mat_type) } #endif // HAVE_EIGEN +TEST(Mat, regression_12943) // memory usage: ~4.5 Gb +{ + applyTestTag(CV_TEST_TAG_MEMORY_6GB); + + const int width = 0x8000; + const int height = 0x10001; + + cv::Mat src(height, width, CV_8UC1, Scalar::all(128)); + + cv::Mat dst; + cv::flip(src, dst, 0); +} + }} // namespace diff --git a/modules/dnn/include/opencv2/dnn/version.hpp b/modules/dnn/include/opencv2/dnn/version.hpp index b96cf3d6a6..3b372f93d5 100644 --- a/modules/dnn/include/opencv2/dnn/version.hpp +++ b/modules/dnn/include/opencv2/dnn/version.hpp @@ -6,7 +6,7 @@ #define OPENCV_DNN_VERSION_HPP /// Use with major OpenCV version only. -#define OPENCV_DNN_API_VERSION 20190902 +#define OPENCV_DNN_API_VERSION 20191024 #if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS #define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION) diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index 54a53fd867..3a90081e17 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -128,7 +128,7 @@ namespace cv { void setConvolution(int kernel, int pad, int stride, - int filters_num, int channels_num, int use_batch_normalize, int use_relu) + int filters_num, int channels_num, int use_batch_normalize) { cv::dnn::LayerParams conv_param = getParamConvolution(kernel, pad, stride, filters_num); @@ -168,27 +168,29 @@ namespace cv { net->layers.push_back(lp); } - if (use_relu) - { - cv::dnn::LayerParams activation_param; - activation_param.set("negative_slope", 0.1f); - activation_param.name = "ReLU-name"; - activation_param.type = "ReLU"; - - darknet::LayerParameter lp; - std::string layer_name = cv::format("relu_%d", layer_id); - lp.layer_name = layer_name; - lp.layer_type = activation_param.type; - lp.layerParams = activation_param; - lp.bottom_indexes.push_back(last_layer); - last_layer = layer_name; - net->layers.push_back(lp); - } - layer_id++; fused_layer_names.push_back(last_layer); } + void setReLU() + { + cv::dnn::LayerParams activation_param; + activation_param.set("negative_slope", 0.1f); + activation_param.name = "ReLU-name"; + activation_param.type = "ReLU"; + + darknet::LayerParameter lp; + std::string layer_name = cv::format("relu_%d", layer_id); + lp.layer_name = layer_name; + lp.layer_type = activation_param.type; + lp.layerParams = activation_param; + lp.bottom_indexes.push_back(last_layer); + last_layer = layer_name; + net->layers.push_back(lp); + + fused_layer_names.back() = last_layer; + } + void setMaxpool(size_t kernel, size_t pad, size_t stride) { cv::dnn::LayerParams maxpool_param; @@ -409,12 +411,19 @@ namespace cv { fused_layer_names.push_back(last_layer); } - void setShortcut(int from) + void setShortcut(int from, float alpha) { cv::dnn::LayerParams shortcut_param; shortcut_param.name = "Shortcut-name"; shortcut_param.type = "Eltwise"; + if (alpha != 1) + { + std::vector coeffs(2, 1); + coeffs[0] = alpha; + shortcut_param.set("coeff", DictValue::arrayReal(&coeffs[0], coeffs.size())); + } + shortcut_param.set("op", "sum"); darknet::LayerParameter lp; @@ -422,8 +431,8 @@ namespace cv { lp.layer_name = layer_name; lp.layer_type = shortcut_param.type; lp.layerParams = shortcut_param; - lp.bottom_indexes.push_back(fused_layer_names.at(from)); lp.bottom_indexes.push_back(last_layer); + lp.bottom_indexes.push_back(fused_layer_names.at(from)); last_layer = layer_name; net->layers.push_back(lp); @@ -548,10 +557,7 @@ namespace cv { int pad = getParam(layer_params, "pad", 0); int stride = getParam(layer_params, "stride", 1); int filters = getParam(layer_params, "filters", -1); - std::string activation = getParam(layer_params, "activation", "linear"); bool batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; - if(activation != "linear" && activation != "leaky") - CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation); int flipped = getParam(layer_params, "flipped", 0); if (flipped == 1) CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented"); @@ -563,7 +569,7 @@ namespace cv { CV_Assert(current_channels > 0); setParams.setConvolution(kernel_size, pad, stride, filters, current_channels, - batch_normalize, activation == "leaky"); + batch_normalize); current_channels = filters; } @@ -593,7 +599,7 @@ namespace cv { current_channels = 0; for (size_t k = 0; k < layers_vec.size(); ++k) { - layers_vec[k] = layers_vec[k] > 0 ? layers_vec[k] : (layers_vec[k] + layers_counter); + layers_vec[k] = layers_vec[k] >= 0 ? layers_vec[k] : (layers_vec[k] + layers_counter); current_channels += net->out_channels_vec[layers_vec[k]]; } @@ -631,13 +637,15 @@ namespace cv { else if (layer_type == "shortcut") { std::string bottom_layer = getParam(layer_params, "from", ""); + float alpha = getParam(layer_params, "alpha", 1); + float beta = getParam(layer_params, "beta", 0); + if (beta != 0) + CV_Error(Error::StsNotImplemented, "Non-zero beta"); CV_Assert(!bottom_layer.empty()); int from = std::atoi(bottom_layer.c_str()); - from += layers_counter; - current_channels = net->out_channels_vec[from]; - - setParams.setShortcut(from); + from = from < 0 ? from + layers_counter : from; + setParams.setShortcut(from, alpha); } else if (layer_type == "upsample") { @@ -667,6 +675,15 @@ namespace cv { else { CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type); } + + std::string activation = getParam(layer_params, "activation", "linear"); + if (activation == "leaky") + { + setParams.setReLU(); + } + else if (activation != "linear") + CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation); + net->out_channels_vec[layers_counter] = current_channels; } @@ -710,7 +727,6 @@ namespace cv { { int kernel_size = getParam(layer_params, "size", -1); int filters = getParam(layer_params, "filters", -1); - std::string activation = getParam(layer_params, "activation", "linear"); bool use_batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; CV_Assert(kernel_size > 0 && filters > 0); @@ -754,14 +770,16 @@ namespace cv { bn_blobs.push_back(biasData_mat); setParams.setLayerBlobs(cv_layers_counter, bn_blobs); } - - if(activation == "leaky") - ++cv_layers_counter; } if (layer_type == "region" || layer_type == "yolo") { ++cv_layers_counter; // For permute. } + + std::string activation = getParam(layer_params, "activation", "linear"); + if(activation == "leaky") + ++cv_layers_counter; // For ReLU + current_channels = net->out_channels_vec[darknet_layers_counter]; } return true; diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 47acc07063..3903298a1d 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -103,6 +103,37 @@ public: static BackendRegistry impl; return impl; } + + static inline bool checkIETarget(int target) + { +#ifndef HAVE_INF_ENGINE + return false; +#else + cv::dnn::Net net; + cv::dnn::LayerParams lp; + lp.set("kernel_size", 1); + lp.set("num_output", 1); + lp.set("bias_term", false); + lp.type = "Convolution"; + lp.name = "testLayer"; + lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1))); + net.addLayerToPrev(lp.name, lp.type, lp); + net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE); + net.setPreferableTarget(target); + static int inpDims[] = {1, 2, 3, 4}; + net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0))); + try + { + net.forward(); + } + catch(...) + { + return false; + } + return true; +#endif + } + private: BackendRegistry() { @@ -154,35 +185,6 @@ private: } #endif } - static inline bool checkIETarget(int target) - { -#ifndef HAVE_INF_ENGINE - return false; -#else - cv::dnn::Net net; - cv::dnn::LayerParams lp; - lp.set("kernel_size", 1); - lp.set("num_output", 1); - lp.set("bias_term", false); - lp.type = "Convolution"; - lp.name = "testLayer"; - lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1))); - net.addLayerToPrev(lp.name, lp.type, lp); - net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE); - net.setPreferableTarget(target); - static int inpDims[] = {1, 2, 3, 4}; - net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0))); - try - { - net.forward(); - } - catch(...) - { - return false; - } - return true; -#endif - } BackendsList backends; }; @@ -1689,6 +1691,9 @@ struct Net::Impl // backend. Split a whole model on several Inference Engine networks if // some of layers are not implemented. + bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU || + BackendRegistry::checkIETarget(DNN_TARGET_CPU); + // Set of all input and output blobs wrappers for current network. std::map > netBlobsWrappers; for (it = layers.begin(); it != layers.end(); ++it) @@ -1702,7 +1707,8 @@ struct Net::Impl if (!fused && !layer->supportBackend(preferableBackend)) { bool customizable = ld.id != 0 && ld.outputBlobs.size() == 1 && - INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2); + INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2) && + supportsCPUFallback; // TODO: there is a bug in Myriad plugin with custom layers shape infer. if (preferableTarget == DNN_TARGET_MYRIAD) { diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index cccef29374..c67b198d03 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -70,6 +70,7 @@ public: MAX = 2, } op; std::vector coeffs; + bool variableChannels; EltwiseLayerImpl(const LayerParams& params) { @@ -105,7 +106,7 @@ public: return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_HALIDE || - (backendId == DNN_BACKEND_INFERENCE_ENGINE && + (backendId == DNN_BACKEND_INFERENCE_ENGINE && !variableChannels && (preferableTarget != DNN_TARGET_OPENCL || coeffs.empty())); } @@ -115,33 +116,57 @@ public: std::vector &internals) const CV_OVERRIDE { CV_Assert(inputs.size() >= 2); + CV_Assert(inputs[0].size() >= 2); CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size()); CV_Assert(op == SUM || coeffs.size() == 0); + int dims = inputs[0].size(); + // Number of channels in output shape is determined by the first input tensor. + int numChannels = inputs[0][1]; for (int i = 1; i < inputs.size(); i++) { - CV_Assert(inputs[0] == inputs[i]); + CV_Assert(inputs[0][0] == inputs[i][0]); + + // It's allowed for channels axis to be different. + for (int j = 2; j < dims; j++) + CV_Assert(inputs[0][j] == inputs[i][j]); } outputs.assign(1, inputs[0]); - + outputs[0][1] = numChannels; return false; } + void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE + { + std::vector inputs; + inputs_arr.getMatVector(inputs); + variableChannels = false; + for (int i = 1; i < inputs.size(); ++i) + { + if (inputs[i].size[1] != inputs[0].size[1]) + { + variableChannels = true; + break; + } + } + } + + class EltwiseInvoker : public ParallelLoopBody { public: - const Mat* srcs; + std::vector srcs; int nsrcs; Mat* dst; - const std::vector* coeffs; + std::vector coeffs; EltwiseOp op; int nstripes; const ActivationLayer* activ; int channels; size_t planeSize; - EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {} + EltwiseInvoker() : nsrcs(0), dst(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {} static void run(const Mat* srcs, int nsrcs, Mat& dst, const std::vector& coeffs, EltwiseOp op, @@ -150,15 +175,23 @@ public: CV_Check(dst.dims, 1 < dst.dims && dst.dims <= 5, ""); CV_CheckTypeEQ(dst.type(), CV_32FC1, ""); CV_Assert(dst.isContinuous()); CV_Assert(coeffs.empty() || coeffs.size() == (size_t)nsrcs); + EltwiseInvoker p; + p.srcs.resize(nsrcs); + p.coeffs = coeffs; for( int i = 0; i < nsrcs; i++ ) { - CV_Assert(srcs[i].size == dst.size && - srcs[i].type() == dst.type() && + p.srcs[i] = srcs + i; + CV_Assert(srcs[i].type() == dst.type() && srcs[i].isContinuous()); + // Sort srcs and coefficients in the order by number of channels + for( int j = i; j >= 1 && p.srcs[j - 1]->size[1] < p.srcs[j]->size[1]; j-- ) + { + std::swap(p.srcs[j - 1], p.srcs[j]); + if (!p.coeffs.empty()) + std::swap(p.coeffs[j - 1], p.coeffs[j]); + } } - EltwiseInvoker p; - p.srcs = srcs; p.nsrcs = nsrcs; p.dst = &dst; p.op = op; @@ -180,7 +213,8 @@ public: break; } } - p.coeffs = simpleCoeffs ? 0 : &coeffs; + if (simpleCoeffs) + p.coeffs.clear(); p.activ = activ; parallel_for_(Range(0, nstripes), p, nstripes); @@ -192,8 +226,8 @@ public: size_t stripeSize = (total + nstripes - 1)/nstripes; size_t stripeStart = r.start*stripeSize; size_t stripeEnd = std::min(r.end*stripeSize, total); - int c, j, k, n = nsrcs; - const float* coeffsptr = coeffs && !coeffs->empty() ? &coeffs->at(0) : 0; + int c, j, k, n; + const float* coeffsptr = !coeffs.empty() ? &coeffs[0] : 0; float* dstptr0 = dst->ptr(); int blockSize0 = 1 << 12, blockSize; @@ -208,14 +242,35 @@ public: for( c = 0; c < channels; c++ ) { size_t globalDelta = delta + (sampleIdx*channels + c)*planeSize; - const float* srcptr0 = srcs[0].ptr() + globalDelta; + const float* srcptr0 = srcs[0]->ptr() + globalDelta; float* dstptr = dstptr0 + globalDelta; - if( op == PROD ) + // This code assumes that srcs are sorted in descending order by channels. + for (n = 1; n < nsrcs && c < srcs[n]->size[1]; ++n) {} + + if (n == 1) + { + if( !coeffsptr ) + { + for( j = 0; j < blockSize; j++ ) + { + dstptr[j] = srcptr0[j]; + } + } + else + { + float c0 = coeffsptr[0]; + for( j = 0; j < blockSize; j++ ) + { + dstptr[j] = c0*srcptr0[j]; + } + } + } + else if( op == PROD ) { for( k = 1; k < n; k++ ) { - const float* srcptr1 = srcs[k].ptr() + globalDelta; + const float* srcptr1 = srcs[k]->ptr() + globalDelta; for( j = 0; j < blockSize; j++ ) { dstptr[j] = srcptr0[j]*srcptr1[j]; @@ -227,7 +282,7 @@ public: { for( k = 1; k < n; k++ ) { - const float* srcptr1 = srcs[k].ptr() + globalDelta; + const float* srcptr1 = srcs[k]->ptr() + globalDelta; for( j = 0; j < blockSize; j++ ) { dstptr[j] = std::max(srcptr0[j], srcptr1[j]); @@ -239,7 +294,7 @@ public: { for( k = 1; k < n; k++ ) { - const float* srcptr1 = srcs[k].ptr() + globalDelta; + const float* srcptr1 = srcs[k]->ptr() + globalDelta; for( j = 0; j < blockSize; j++ ) { dstptr[j] = srcptr0[j] + srcptr1[j]; @@ -252,7 +307,7 @@ public: float c0 = coeffsptr[0]; for( k = 1; k < n; k++ ) { - const float* srcptr1 = srcs[k].ptr() + globalDelta; + const float* srcptr1 = srcs[k]->ptr() + globalDelta; float c1 = coeffsptr[k]; for( j = 0; j < blockSize; j++ ) { @@ -279,7 +334,7 @@ public: std::vector inputs; std::vector outputs; - if (inputs_.depth() == CV_16S && op != SUM) + if ((inputs_.depth() == CV_16S && op != SUM) || variableChannels) return false; inputs_.getUMatVector(inputs); diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 26637ebbe6..61e3cad9a8 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -390,12 +390,6 @@ TEST_P(Test_Darknet_nets, YOLOv3) { applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB)); -#if defined(INF_ENGINE_RELEASE) - if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD - && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); -#endif - // batchId, classId, confidence, left, top, right, bottom Mat ref = (Mat_(9, 7) << 0, 7, 0.952983f, 0.614622f, 0.150257f, 0.901369f, 0.289251f, // a truck 0, 1, 0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.74626f, // a bicycle @@ -413,23 +407,35 @@ TEST_P(Test_Darknet_nets, YOLOv3) std::string config_file = "yolov3.cfg"; std::string weights_file = "yolov3.weights"; +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD && + getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + { + scoreDiff = 0.04; + iouDiff = 0.2; + } +#endif + { SCOPED_TRACE("batch size 1"); testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff); } -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2018050000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL) // Test with 'batch size 2' is disabled for DLIE/OpenCL target -#endif - -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2019020000) +#if defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE) { - if (target == DNN_TARGET_OPENCL) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_2019R2); - if (target == DNN_TARGET_OPENCL_FP16) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_2019R2); + if (INF_ENGINE_VER_MAJOR_LE(2018050000) && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_2018R5); + else if (INF_ENGINE_VER_MAJOR_EQ(2019020000)) + { + if (target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_2019R2); + if (target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_2019R2); + } + else if (target == DNN_TARGET_MYRIAD && + getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); } #endif @@ -444,6 +450,9 @@ INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets()); TEST_P(Test_Darknet_layers, shortcut) { testDarknetLayer("shortcut"); + testDarknetLayer("shortcut_leaky"); + testDarknetLayer("shortcut_unequal"); + testDarknetLayer("shortcut_unequal_2"); } TEST_P(Test_Darknet_layers, upsample) diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 43e54b952a..e6bc350520 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -1493,4 +1493,62 @@ TEST(Layer_Test_Convolution, relu_fusion) normAssert(input, output); } +typedef testing::TestWithParam > > Layer_Test_Eltwise_unequal; +TEST_P(Layer_Test_Eltwise_unequal, Accuracy) +{ + bool weighted = get<0>(GetParam()); + int backendId = get<0>(get<1>(GetParam())); + int targetId = get<1>(get<1>(GetParam())); + + if (backendId == DNN_BACKEND_OPENCV && targetId == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + + Net net; + LayerParams lp; + lp.type = "Eltwise"; + lp.name = "testLayer"; + + const int inpShapes[][4] = {{1, 4, 2, 2}, {1, 5, 2, 2}, {1, 3, 2, 2}}; + std::vector inpNames(3); + std::vector inputs(3); + size_t numOutValues = 1*4*2*2; // By the first input + + std::vector weights(3, 1); + if (weighted) + { + for (int i = 0; i < inputs.size(); ++i) + randu(Mat(1, 1, CV_32F, &weights[i]), -1, 1); + lp.set("coeff", DictValue::arrayReal(&weights[0], weights.size())); + } + + int eltwiseId = net.addLayer(lp.name, lp.type, lp); + for (int i = 0; i < inputs.size(); ++i) + { + inputs[i].create(4, inpShapes[i], CV_32F); + randu(inputs[i], 0, 255); + inpNames[i] = format("input_%d", i); + net.connect(0, i, eltwiseId, i); + } + Mat ref(1, numOutValues, CV_32F, Scalar(0)); + + net.setInputsNames(inpNames); + for (int i = 0; i < inputs.size(); ++i) + { + net.setInput(inputs[i], inpNames[i]); + if (numOutValues >= inputs[i].total()) + ref.colRange(0, inputs[i].total()) += weights[i] * inputs[i].reshape(1, 1); + else + ref += weights[i] * inputs[i].reshape(1, 1).colRange(0, numOutValues); + } + + net.setPreferableBackend(backendId); + net.setPreferableTarget(targetId); + Mat out = net.forward(); + normAssert(out.reshape(1, 1), ref); +} +INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Eltwise_unequal, Combine( + testing::Bool(), + dnnBackendsAndTargets() +)); + }} // namespace diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index a5e78a38fa..3045215717 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -394,7 +394,9 @@ enum ConnectedComponentsTypes { CC_STAT_WIDTH = 2, //!< The horizontal size of the bounding box CC_STAT_HEIGHT = 3, //!< The vertical size of the bounding box CC_STAT_AREA = 4, //!< The total area (in pixels) of the connected component - CC_STAT_MAX = 5 +#ifndef CV_DOXYGEN + CC_STAT_MAX = 5 //!< Max enumeration value. Used internally only for memory allocation +#endif }; //! connected components algorithm @@ -4008,7 +4010,23 @@ without self-intersections. Otherwise, the function output is undefined. */ CV_EXPORTS_W bool isContourConvex( InputArray contour ); -//! finds intersection of two convex polygons +/** @example samples/cpp/intersectExample.cpp +Examples of how intersectConvexConvex works +*/ + +/** @brief Finds intersection of two convex polygons + +@param _p1 First polygon +@param _p2 Second polygon +@param _p12 Output polygon describing the intersecting area +@param handleNested When true, an intersection is found if one of the polygons is fully enclosed in the other. +When false, no intersection is found. If the polygons share a side or the vertex of one polygon lies on an edge +of the other, they are not considered nested and an intersection will be found regardless of the value of handleNested. + +@returns Absolute value of area of intersecting polygon + +@note intersectConvexConvex doesn't confirm that both polygons are convex and will return invalid results if they aren't. + */ CV_EXPORTS_W float intersectConvexConvex( InputArray _p1, InputArray _p2, OutputArray _p12, bool handleNested = true ); diff --git a/modules/imgproc/src/accum.simd.hpp b/modules/imgproc/src/accum.simd.hpp index 7bca93de87..6b0e6d6fbe 100644 --- a/modules/imgproc/src/accum.simd.hpp +++ b/modules/imgproc/src/accum.simd.hpp @@ -2624,11 +2624,127 @@ void accW_simd_(const uchar* src, float* dst, const uchar* mask, int len, int cn v_dst10 = v_fma(v_dst10, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src10)) * v_alpha); v_dst11 = v_fma(v_dst11, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src11)) * v_alpha); - v_store(dst + x, v_dst00); - v_store(dst + x + step, v_dst01); + v_store(dst + x , v_dst00); + v_store(dst + x + step , v_dst01); v_store(dst + x + step * 2, v_dst10); v_store(dst + x + step * 3, v_dst11); } + } else { + const v_float32 zero = vx_setall_f32((float)0); + int size = len * cn; + + if ( cn == 1 ){ + for (; x <= size - cVectorWidth; x += cVectorWidth) + { + v_uint8 v_src = vx_load(src + x); + v_uint8 v_mask = vx_load(mask + x); + + v_uint16 v_m0, v_m1; + v_expand(v_mask, v_m0, v_m1); + v_uint32 v_m00, v_m01, v_m10, v_m11; + v_expand(v_m0, v_m00, v_m01); + v_expand(v_m1, v_m10, v_m11); + + v_float32 v_mf00, v_mf01, v_mf10, v_mf11; + v_mf00 = v_cvt_f32(v_reinterpret_as_s32(v_m00)); + v_mf01 = v_cvt_f32(v_reinterpret_as_s32(v_m01)); + v_mf10 = v_cvt_f32(v_reinterpret_as_s32(v_m10)); + v_mf11 = v_cvt_f32(v_reinterpret_as_s32(v_m11)); + + v_uint16 v_src0, v_src1; + v_expand(v_src, v_src0, v_src1); + + v_uint32 v_src00, v_src01, v_src10, v_src11; + v_expand(v_src0, v_src00, v_src01); + v_expand(v_src1, v_src10, v_src11); + + v_float32 v_dst00 = vx_load(dst + x); + v_float32 v_dst01 = vx_load(dst + x + step); + v_float32 v_dst10 = vx_load(dst + x + step * 2); + v_float32 v_dst11 = vx_load(dst + x + step * 3); + + v_mf00 = v_mf00 != zero; + v_mf01 = v_mf01 != zero; + v_mf10 = v_mf10 != zero; + v_mf11 = v_mf11 != zero; + + v_dst00 = v_select(v_mf00, v_fma(v_dst00, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src00)) * v_alpha), v_dst00); + v_dst01 = v_select(v_mf01, v_fma(v_dst01, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src01)) * v_alpha), v_dst01); + v_dst10 = v_select(v_mf10, v_fma(v_dst10, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src10)) * v_alpha), v_dst10); + v_dst11 = v_select(v_mf11, v_fma(v_dst11, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src11)) * v_alpha), v_dst11); + + v_store(dst + x , v_dst00); + v_store(dst + x + step , v_dst01); + v_store(dst + x + step * 2, v_dst10); + v_store(dst + x + step * 3, v_dst11); + } + } else if ( cn == 3 ) + { + for (; x*cn <= size - cVectorWidth*cn; x += cVectorWidth ) + { + v_uint8 v_src0, v_src1, v_src2; + v_load_deinterleave(src + x * cn, v_src0, v_src1, v_src2); + + v_uint16 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21; + v_expand(v_src0, v_src00, v_src01); + v_expand(v_src1, v_src10, v_src11); + v_expand(v_src2, v_src20, v_src21); + + v_uint32 v_src000, v_src001, v_src010, v_src011, v_src100, v_src101, v_src110, v_src111, v_src200, v_src201, v_src210, v_src211; + v_expand(v_src00, v_src000, v_src001); + v_expand(v_src01, v_src010, v_src011); + v_expand(v_src10, v_src100, v_src101); + v_expand(v_src11, v_src110, v_src111); + v_expand(v_src20, v_src200, v_src201); + v_expand(v_src21, v_src210, v_src211); + + v_float32 v_dst00, v_dst01, v_dst02, v_dst03, v_dst10, v_dst11, v_dst12, v_dst13; + v_float32 v_dst20, v_dst21, v_dst22, v_dst23; + v_load_deinterleave(dst + x * cn , v_dst00, v_dst10, v_dst20); + v_load_deinterleave(dst + (x + step) * cn, v_dst01, v_dst11, v_dst21); + v_load_deinterleave(dst + (x + 2 * step) * cn, v_dst02, v_dst12, v_dst22); + v_load_deinterleave(dst + (x + 3 * step) * cn, v_dst03, v_dst13, v_dst23); + + v_uint8 v_mask = vx_load(mask + x); + + v_uint16 v_m0, v_m1; + v_expand(v_mask, v_m0, v_m1); + v_uint32 v_m00, v_m01, v_m10, v_m11; + v_expand(v_m0, v_m00, v_m01); + v_expand(v_m1, v_m10, v_m11); + + v_float32 v_mf00, v_mf01, v_mf10, v_mf11; + v_mf00 = v_cvt_f32(v_reinterpret_as_s32(v_m00)); + v_mf01 = v_cvt_f32(v_reinterpret_as_s32(v_m01)); + v_mf10 = v_cvt_f32(v_reinterpret_as_s32(v_m10)); + v_mf11 = v_cvt_f32(v_reinterpret_as_s32(v_m11)); + + v_mf00 = v_mf00 != zero; + v_mf01 = v_mf01 != zero; + v_mf10 = v_mf10 != zero; + v_mf11 = v_mf11 != zero; + + v_dst00 = v_select(v_mf00, v_fma(v_dst00, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src000)) * v_alpha), v_dst00); + v_dst01 = v_select(v_mf01, v_fma(v_dst01, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src001)) * v_alpha), v_dst01); + v_dst02 = v_select(v_mf10, v_fma(v_dst02, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src010)) * v_alpha), v_dst02); + v_dst03 = v_select(v_mf11, v_fma(v_dst03, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src011)) * v_alpha), v_dst03); + + v_dst10 = v_select(v_mf00, v_fma(v_dst10, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src100)) * v_alpha), v_dst10); + v_dst11 = v_select(v_mf01, v_fma(v_dst11, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src101)) * v_alpha), v_dst11); + v_dst12 = v_select(v_mf10, v_fma(v_dst12, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src110)) * v_alpha), v_dst12); + v_dst13 = v_select(v_mf11, v_fma(v_dst13, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src111)) * v_alpha), v_dst13); + + v_dst20 = v_select(v_mf00, v_fma(v_dst20, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src200)) * v_alpha), v_dst20); + v_dst21 = v_select(v_mf01, v_fma(v_dst21, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src201)) * v_alpha), v_dst21); + v_dst22 = v_select(v_mf10, v_fma(v_dst22, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src210)) * v_alpha), v_dst22); + v_dst23 = v_select(v_mf11, v_fma(v_dst23, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src211)) * v_alpha), v_dst23); + + v_store_interleave(dst + x * cn , v_dst00, v_dst10, v_dst20); + v_store_interleave(dst + ( x + step ) * cn, v_dst01, v_dst11, v_dst21); + v_store_interleave(dst + ( x + step * 2 ) * cn, v_dst02, v_dst12, v_dst22); + v_store_interleave(dst + ( x + step * 3 ) * cn, v_dst03, v_dst13, v_dst23); + } + } } #endif // CV_SIMD accW_general_(src, dst, mask, len, cn, alpha, x); @@ -2657,9 +2773,81 @@ void accW_simd_(const ushort* src, float* dst, const uchar* mask, int len, int c v_dst0 = v_fma(v_dst0, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_int0)) * v_alpha); v_dst1 = v_fma(v_dst1, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_int1)) * v_alpha); - v_store(dst + x, v_dst0); + v_store(dst + x , v_dst0); v_store(dst + x + step, v_dst1); } + } else { + const v_float32 zero = vx_setall_f32((float)0); + int size = len * cn; + if ( cn == 1 ) + { + for (; x <= size - cVectorWidth; x += cVectorWidth) + { + v_uint16 v_src = vx_load(src + x); + v_uint16 v_mask = v_reinterpret_as_u16(vx_load_expand(mask + x)); + + v_uint32 v_m0, v_m1; + v_expand(v_mask, v_m0, v_m1); + + v_float32 v_mf0, v_mf1; + v_mf0 = v_cvt_f32(v_reinterpret_as_s32(v_m0)); + v_mf1 = v_cvt_f32(v_reinterpret_as_s32(v_m1)); + + v_uint32 v_src0, v_src1; + v_expand(v_src, v_src0, v_src1); + + v_float32 v_dst0 = vx_load(dst + x); + v_float32 v_dst1 = vx_load(dst + x + step); + + v_mf0 = v_mf0 != zero; + v_mf1 = v_mf1 != zero; + + v_dst0 = v_select(v_mf0, v_fma(v_dst0, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src0)) * v_alpha), v_dst0); + v_dst1 = v_select(v_mf1, v_fma(v_dst1, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src1)) * v_alpha), v_dst1); + + v_store(dst + x , v_dst0); + v_store(dst + x + step, v_dst1); + } + } else if ( cn == 3 ) + { + for (; x*cn <= size - cVectorWidth*cn; x += cVectorWidth ) + { + v_uint16 v_src0, v_src1, v_src2; + v_load_deinterleave(src + x * cn, v_src0, v_src1, v_src2); + + v_uint16 v_mask = v_reinterpret_as_u16(vx_load_expand(mask + x)); + + v_uint32 v_m0, v_m1; + v_expand(v_mask, v_m0, v_m1); + + v_uint32 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21; + v_expand(v_src0, v_src00, v_src01); + v_expand(v_src1, v_src10, v_src11); + v_expand(v_src2, v_src20, v_src21); + + v_float32 v_dst00, v_dst01, v_dst02, v_dst10, v_dst11, v_dst20, v_dst21; + v_load_deinterleave(dst + x * cn , v_dst00, v_dst10, v_dst20); + v_load_deinterleave(dst + (x + step) * cn, v_dst01, v_dst11, v_dst21); + + v_float32 v_mf0, v_mf1; + v_mf0 = v_cvt_f32(v_reinterpret_as_s32(v_m0)); + v_mf1 = v_cvt_f32(v_reinterpret_as_s32(v_m1)); + + v_mf0 = v_mf0 != zero; + v_mf1 = v_mf1 != zero; + + v_dst00 = v_select(v_mf0, v_fma(v_dst00, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src00)) * v_alpha), v_dst00); + v_dst10 = v_select(v_mf0, v_fma(v_dst10, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src10)) * v_alpha), v_dst10); + v_dst20 = v_select(v_mf0, v_fma(v_dst20, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src20)) * v_alpha), v_dst20); + + v_dst01 = v_select(v_mf1, v_fma(v_dst01, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src01)) * v_alpha), v_dst01); + v_dst11 = v_select(v_mf1, v_fma(v_dst11, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src11)) * v_alpha), v_dst11); + v_dst21 = v_select(v_mf1, v_fma(v_dst21, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src21)) * v_alpha), v_dst21); + + v_store_interleave(dst + x * cn , v_dst00, v_dst10, v_dst20); + v_store_interleave(dst + ( x + step ) * cn, v_dst01, v_dst11, v_dst21); + } + } } #endif // CV_SIMD accW_general_(src, dst, mask, len, cn, alpha, x); diff --git a/modules/java/jar/CMakeLists.txt b/modules/java/jar/CMakeLists.txt index 2cce25a76b..33817bcc62 100644 --- a/modules/java/jar/CMakeLists.txt +++ b/modules/java/jar/CMakeLists.txt @@ -27,6 +27,13 @@ endif() set(OPENCV_JAVADOC_DESTINATION "${OpenCV_BINARY_DIR}/doc/doxygen/html/javadoc" CACHE STRING "") +# Old Javadoc URL looks like this: https://docs.oracle.com/javase/6/docs/api/ +# New Javadoc URL looks like this: https://docs.oracle.com/en/java/javase/11/docs/api/ +set(OPENCV_JAVADOC_LINK_URL "" CACHE STRING "See details in modules/java/jar/CMakeLists.txt") +if(OPENCV_JAVADOC_LINK_URL) + set(CMAKE_CONFIG_OPENCV_JAVADOC_LINK "link=\"${OPENCV_JAVADOC_LINK_URL}\"") +endif() + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build.xml.in" "${OPENCV_JAVA_DIR}/build.xml" @ONLY) list(APPEND depends "${OPENCV_JAVA_DIR}/build.xml") diff --git a/modules/java/jar/build.xml.in b/modules/java/jar/build.xml.in index bf2830186e..732b398576 100644 --- a/modules/java/jar/build.xml.in +++ b/modules/java/jar/build.xml.in @@ -42,7 +42,7 @@ bottom="Generated on ${timestamp} / OpenCV @OPENCV_VCSVERSION@" failonerror="true" encoding="UTF-8" charset="UTF-8" docencoding="UTF-8" - link="https://docs.oracle.com/javase/6/docs/api/" + @CMAKE_CONFIG_OPENCV_JAVADOC_LINK@ additionalparam="--allow-script-in-comments" >
diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py index 892215b9a1..7114bea3af 100644 --- a/modules/python/test/test_misc.py +++ b/modules/python/test/test_misc.py @@ -96,7 +96,7 @@ class SamplesFindFile(NewOpenCVTests): def test_MissingFileException(self): try: - res = cv.samples.findFile('non_existed.file', True) + _res = cv.samples.findFile('non_existed.file', True) self.assertEqual("Dead code", 0) except cv.error as _e: pass diff --git a/modules/ts/src/ts_tags.cpp b/modules/ts/src/ts_tags.cpp index 4571e4462d..4b775722c1 100644 --- a/modules/ts/src/ts_tags.cpp +++ b/modules/ts/src/ts_tags.cpp @@ -46,7 +46,8 @@ static std::vector& getTestTagsSkipList() #if OPENCV_32BIT_CONFIGURATION testSkipWithTags.push_back(CV_TEST_TAG_MEMORY_2GB); #else - testSkipWithTags.push_back(CV_TEST_TAG_MEMORY_6GB); + if (!cvtest::runBigDataTests) + testSkipWithTags.push_back(CV_TEST_TAG_MEMORY_6GB); #endif testSkipWithTags.push_back(CV_TEST_TAG_VERYLONG); #if defined(_DEBUG) diff --git a/samples/cpp/intersectExample.cpp b/samples/cpp/intersectExample.cpp new file mode 100644 index 0000000000..a8a897241f --- /dev/null +++ b/samples/cpp/intersectExample.cpp @@ -0,0 +1,161 @@ +/* + * Author: Steve Nicholson + * + * A program that illustrates intersectConvexConvex in various scenarios + */ + +#include "opencv2/imgproc.hpp" +#include "opencv2/highgui.hpp" + +using namespace cv; +using namespace std; + +// Create a vector of points describing a rectangle with the given corners +static vector makeRectangle(Point topLeft, Point bottomRight) +{ + vector rectangle; + rectangle.push_back(topLeft); + rectangle.push_back(Point(bottomRight.x, topLeft.y)); + rectangle.push_back(bottomRight); + rectangle.push_back(Point(topLeft.x, bottomRight.y)); + return rectangle; +} + +static vector makeTriangle(Point point1, Point point2, Point point3) +{ + vector triangle; + triangle.push_back(point1); + triangle.push_back(point2); + triangle.push_back(point3); + return triangle; +} + +// Run intersectConvexConvex on two polygons then draw the polygons and their intersection (if there is one) +// Return the area of the intersection +static float drawIntersection(Mat &image, vector polygon1, vector polygon2, bool handleNested = true) +{ + vector intersectionPolygon; + + vector > polygons; + polygons.push_back(polygon1); + polygons.push_back(polygon2); + + float intersectArea = intersectConvexConvex(polygon1, polygon2, intersectionPolygon, handleNested); + + if (intersectArea > 0) + { + Scalar fillColor(200, 200, 200); + // If the input is invalid, draw the intersection in red + if (!isContourConvex(polygon1) || !isContourConvex(polygon2)) + { + fillColor = Scalar(0, 0, 255); + } + vector > pp; + pp.push_back(intersectionPolygon); + fillPoly(image, pp, fillColor); + } + polylines(image, polygons, true, Scalar(0, 0, 0)); + + return intersectArea; +} + +static void drawDescription(Mat &image, int intersectionArea, string description, Point origin) +{ + const size_t bufSize=1024; + char caption[bufSize]; + snprintf(caption, bufSize, "Intersection area: %d%s", intersectionArea, description.c_str()); + putText(image, caption, origin, FONT_HERSHEY_SIMPLEX, 0.6, Scalar(0, 0, 0)); +} + +static void intersectConvexExample() +{ + Mat image(610, 550, CV_8UC3, Scalar(255, 255, 255)); + float intersectionArea; + + intersectionArea = drawIntersection(image, + makeRectangle(Point(10, 10), Point(50, 50)), + makeRectangle(Point(20, 20), Point(60, 60))); + + drawDescription(image, (int)intersectionArea, "", Point(70, 40)); + + intersectionArea = drawIntersection(image, + makeRectangle(Point(10, 70), Point(35, 95)), + makeRectangle(Point(35, 95), Point(60, 120))); + + drawDescription(image, (int)intersectionArea, "", Point(70, 100)); + + intersectionArea = drawIntersection(image, + makeRectangle(Point(10, 130), Point(60, 180)), + makeRectangle(Point(20, 140), Point(50, 170)), + true); + + drawDescription(image, (int)intersectionArea, " (handleNested true)", Point(70, 160)); + + intersectionArea = drawIntersection(image, + makeRectangle(Point(10, 190), Point(60, 240)), + makeRectangle(Point(20, 200), Point(50, 230)), + false); + + drawDescription(image, (int)intersectionArea, " (handleNested false)", Point(70, 220)); + + intersectionArea = drawIntersection(image, + makeRectangle(Point(10, 250), Point(60, 300)), + makeRectangle(Point(20, 250), Point(50, 290)), + true); + + drawDescription(image, (int)intersectionArea, " (handleNested true)", Point(70, 280)); + + // These rectangles share an edge so handleNested can be false and an intersection is still found + intersectionArea = drawIntersection(image, + makeRectangle(Point(10, 310), Point(60, 360)), + makeRectangle(Point(20, 310), Point(50, 350)), + false); + + drawDescription(image, (int)intersectionArea, " (handleNested false)", Point(70, 340)); + + intersectionArea = drawIntersection(image, + makeRectangle(Point(10, 370), Point(60, 420)), + makeRectangle(Point(20, 371), Point(50, 410)), + false); + + drawDescription(image, (int)intersectionArea, " (handleNested false)", Point(70, 400)); + + // A vertex of the triangle lies on an edge of the rectangle so handleNested can be false and an intersection is still found + intersectionArea = drawIntersection(image, + makeRectangle(Point(10, 430), Point(60, 480)), + makeTriangle(Point(35, 430), Point(20, 470), Point(50, 470)), + false); + + drawDescription(image, (int)intersectionArea, " (handleNested false)", Point(70, 460)); + + // Show intersection of overlapping rectangle and triangle + intersectionArea = drawIntersection(image, + makeRectangle(Point(10, 490), Point(40, 540)), + makeTriangle(Point(25, 500), Point(25, 530), Point(60, 515)), + false); + + drawDescription(image, (int)intersectionArea, "", Point(70, 520)); + + // This concave polygon is invalid input to intersectConvexConvex so it returns an invalid intersection + vector notConvex; + notConvex.push_back(Point(25, 560)); + notConvex.push_back(Point(25, 590)); + notConvex.push_back(Point(45, 580)); + notConvex.push_back(Point(60, 600)); + notConvex.push_back(Point(60, 550)); + notConvex.push_back(Point(45, 570)); + intersectionArea = drawIntersection(image, + makeRectangle(Point(10, 550), Point(50, 600)), + notConvex, + false); + + drawDescription(image, (int)intersectionArea, " (invalid input: not convex)", Point(70, 580)); + + imshow("Intersections", image); + waitKey(0); +} + +int main() +{ + intersectConvexExample(); +} diff --git a/samples/cpp/stitching_detailed.cpp b/samples/cpp/stitching_detailed.cpp index e94c68cd6d..a556217f8b 100644 --- a/samples/cpp/stitching_detailed.cpp +++ b/samples/cpp/stitching_detailed.cpp @@ -116,8 +116,10 @@ double compose_megapix = -1; float conf_thresh = 1.f; #ifdef HAVE_OPENCV_XFEATURES2D string features_type = "surf"; +float match_conf = 0.65f; #else string features_type = "orb"; +float match_conf = 0.3f; #endif string matcher_type = "homography"; string estimator_type = "homography"; @@ -132,7 +134,6 @@ int expos_comp_type = ExposureCompensator::GAIN_BLOCKS; int expos_comp_nr_feeds = 1; int expos_comp_nr_filtering = 2; int expos_comp_block_size = 32; -float match_conf = 0.3f; string seam_find_type = "gc_color"; int blend_type = Blender::MULTI_BAND; int timelapse_type = Timelapser::AS_IS; @@ -196,7 +197,7 @@ static int parseCmdArgs(int argc, char** argv) else if (string(argv[i]) == "--features") { features_type = argv[i + 1]; - if (features_type == "orb") + if (string(features_type) == "orb") match_conf = 0.3f; i++; } diff --git a/samples/cpp/tutorial_code/Histograms_Matching/compareHist_Demo.cpp b/samples/cpp/tutorial_code/Histograms_Matching/compareHist_Demo.cpp index 73aa3c9819..aa5fce2091 100644 --- a/samples/cpp/tutorial_code/Histograms_Matching/compareHist_Demo.cpp +++ b/samples/cpp/tutorial_code/Histograms_Matching/compareHist_Demo.cpp @@ -14,9 +14,9 @@ using namespace cv; const char* keys = "{ help h| | Print help message. }" - "{ input1 | | Path to input image 1. }" - "{ input2 | | Path to input image 2. }" - "{ input3 | | Path to input image 3. }"; + "{ @input1 | | Path to input image 1. }" + "{ @input2 | | Path to input image 2. }" + "{ @input3 | | Path to input image 3. }"; /** * @function main diff --git a/samples/dnn/fast_neural_style.py b/samples/dnn/fast_neural_style.py index 6afd166be5..912c2f0832 100644 --- a/samples/dnn/fast_neural_style.py +++ b/samples/dnn/fast_neural_style.py @@ -14,7 +14,7 @@ parser.add_argument('--median_filter', default=0, type=int, help='Kernel size of args = parser.parse_args() net = cv.dnn.readNetFromTorch(cv.samples.findFile(args.model)) -net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV); +net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) if args.input: cap = cv.VideoCapture(args.input) diff --git a/samples/dnn/mobilenet_ssd_accuracy.py b/samples/dnn/mobilenet_ssd_accuracy.py index 58395acbdf..23fb06b921 100644 --- a/samples/dnn/mobilenet_ssd_accuracy.py +++ b/samples/dnn/mobilenet_ssd_accuracy.py @@ -27,7 +27,7 @@ args = parser.parse_args() ### Get OpenCV predictions ##################################################### net = cv.dnn.readNetFromTensorflow(cv.samples.findFile(args.weights), cv.samples.findFile(args.prototxt)) -net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV); +net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) detections = [] for imgName in os.listdir(args.images): diff --git a/samples/dnn/text_detection.py b/samples/dnn/text_detection.py index 9f7f159a54..9ea4c10190 100644 --- a/samples/dnn/text_detection.py +++ b/samples/dnn/text_detection.py @@ -134,7 +134,7 @@ def main(): for j in range(4): p1 = (vertices[j][0], vertices[j][1]) p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1]) - cv.line(frame, p1, p2, (0, 255, 0), 1); + cv.line(frame, p1, p2, (0, 255, 0), 1) # Put efficiency information cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) diff --git a/samples/dnn/tf_text_graph_common.py b/samples/dnn/tf_text_graph_common.py index b46b1d492c..5aa1d30e39 100644 --- a/samples/dnn/tf_text_graph_common.py +++ b/samples/dnn/tf_text_graph_common.py @@ -21,7 +21,7 @@ def tokenize(s): elif token: tokens.append(token) token = "" - isString = (symbol == '\"' or symbol == '\'') ^ isString; + isString = (symbol == '\"' or symbol == '\'') ^ isString elif symbol == '{' or symbol == '}' or symbol == '[' or symbol == ']': if token: diff --git a/samples/dnn/tf_text_graph_ssd.py b/samples/dnn/tf_text_graph_ssd.py index beaca3f4e4..e6017b227e 100644 --- a/samples/dnn/tf_text_graph_ssd.py +++ b/samples/dnn/tf_text_graph_ssd.py @@ -122,7 +122,7 @@ def createSSDGraph(modelPath, configPath, outputPath): print('Input image size: %dx%d' % (image_width, image_height)) # Read the graph. - inpNames = ['image_tensor'] + _inpNames = ['image_tensor'] outNames = ['num_detections', 'detection_scores', 'detection_boxes', 'detection_classes'] writeTextGraph(modelPath, outputPath, outNames) diff --git a/samples/python/browse.py b/samples/python/browse.py index 14bd05a05d..edc791f9cd 100755 --- a/samples/python/browse.py +++ b/samples/python/browse.py @@ -45,7 +45,7 @@ def main(): small = img - for i in xrange(3): + for _i in xrange(3): small = cv.pyrDown(small) def onmouse(event, x, y, flags, param): diff --git a/samples/python/calibrate.py b/samples/python/calibrate.py index 2378d8bf1a..bca430b5a5 100755 --- a/samples/python/calibrate.py +++ b/samples/python/calibrate.py @@ -97,7 +97,7 @@ def main(): obj_points.append(pattern_points) # calculate camera distortion - rms, camera_matrix, dist_coefs, rvecs, tvecs = cv.calibrateCamera(obj_points, img_points, (w, h), None, None) + rms, camera_matrix, dist_coefs, _rvecs, _tvecs = cv.calibrateCamera(obj_points, img_points, (w, h), None, None) print("\nRMS:", rms) print("camera matrix:\n", camera_matrix) @@ -106,7 +106,7 @@ def main(): # undistort the image with the calibration print('') for fn in img_names if debug_dir else []: - path, name, ext = splitfn(fn) + _path, name, _ext = splitfn(fn) img_found = os.path.join(debug_dir, name + '_chess.png') outfile = os.path.join(debug_dir, name + '_undistorted.png') diff --git a/samples/python/camera_calibration_show_extrinsics.py b/samples/python/camera_calibration_show_extrinsics.py index 610138bc7b..0118b5b913 100755 --- a/samples/python/camera_calibration_show_extrinsics.py +++ b/samples/python/camera_calibration_show_extrinsics.py @@ -184,7 +184,7 @@ def main(): extrinsics = fs.getNode('extrinsic_parameters').mat() import matplotlib.pyplot as plt - from mpl_toolkits.mplot3d import Axes3D + from mpl_toolkits.mplot3d import Axes3D # pylint: disable=unused-variable fig = plt.figure() ax = fig.gca(projection='3d') diff --git a/samples/python/color_histogram.py b/samples/python/color_histogram.py index 0422d7282c..a1924bab8b 100755 --- a/samples/python/color_histogram.py +++ b/samples/python/color_histogram.py @@ -46,7 +46,7 @@ class App(): cam = video.create_capture(fn, fallback='synth:bg=baboon.jpg:class=chess:noise=0.05') while True: - flag, frame = cam.read() + _flag, frame = cam.read() cv.imshow('camera', frame) small = cv.pyrDown(frame) diff --git a/samples/python/edge.py b/samples/python/edge.py index ba04adecfe..e85c2f6288 100755 --- a/samples/python/edge.py +++ b/samples/python/edge.py @@ -38,7 +38,7 @@ def main(): cap = video.create_capture(fn) while True: - flag, img = cap.read() + _flag, img = cap.read() gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) thrs1 = cv.getTrackbarPos('thrs1', 'edge') thrs2 = cv.getTrackbarPos('thrs2', 'edge') diff --git a/samples/python/facedetect.py b/samples/python/facedetect.py index 1050cc5aff..488c92d5e5 100755 --- a/samples/python/facedetect.py +++ b/samples/python/facedetect.py @@ -48,7 +48,7 @@ def main(): cam = create_capture(video_src, fallback='synth:bg={}:noise=0.05'.format(cv.samples.findFile('samples/data/lena.jpg'))) while True: - ret, img = cam.read() + _ret, img = cam.read() gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) gray = cv.equalizeHist(gray) diff --git a/samples/python/fitline.py b/samples/python/fitline.py index 6705f39abb..db695cbb2b 100755 --- a/samples/python/fitline.py +++ b/samples/python/fitline.py @@ -88,6 +88,7 @@ def main(): update() ch = cv.waitKey(0) if ch == ord('f'): + global cur_func_name if PY3: cur_func_name = next(dist_func_names) else: diff --git a/samples/python/houghcircles.py b/samples/python/houghcircles.py index b8d3a1a019..416309aab0 100755 --- a/samples/python/houghcircles.py +++ b/samples/python/houghcircles.py @@ -30,7 +30,7 @@ def main(): circles = cv.HoughCircles(img, cv.HOUGH_GRADIENT, 1, 10, np.array([]), 100, 30, 1, 30) if circles is not None: # Check if circles have been found and only then iterate over these and add them to the image - a, b, c = circles.shape + _a, b, _c = circles.shape for i in range(b): cv.circle(cimg, (circles[0][i][0], circles[0][i][1]), circles[0][i][2], (0, 0, 255), 3, cv.LINE_AA) cv.circle(cimg, (circles[0][i][0], circles[0][i][1]), 2, (0, 255, 0), 3, cv.LINE_AA) # draw center of circle diff --git a/samples/python/houghlines.py b/samples/python/houghlines.py index 7c99cf2ae9..022b680f56 100755 --- a/samples/python/houghlines.py +++ b/samples/python/houghlines.py @@ -29,14 +29,14 @@ def main(): if True: # HoughLinesP lines = cv.HoughLinesP(dst, 1, math.pi/180.0, 40, np.array([]), 50, 10) - a,b,c = lines.shape + a, b, _c = lines.shape for i in range(a): cv.line(cdst, (lines[i][0][0], lines[i][0][1]), (lines[i][0][2], lines[i][0][3]), (0, 0, 255), 3, cv.LINE_AA) else: # HoughLines lines = cv.HoughLines(dst, 1, math.pi/180.0, 50, np.array([]), 0, 0) if lines is not None: - a,b,c = lines.shape + a, b, _c = lines.shape for i in range(a): rho = lines[i][0][0] theta = lines[i][0][1] diff --git a/samples/python/kmeans.py b/samples/python/kmeans.py index d7fcbe8083..1b1c9d6a04 100755 --- a/samples/python/kmeans.py +++ b/samples/python/kmeans.py @@ -33,7 +33,7 @@ def main(): points, _ = make_gaussians(cluster_n, img_size) term_crit = (cv.TERM_CRITERIA_EPS, 30, 0.1) - ret, labels, centers = cv.kmeans(points, cluster_n, None, term_crit, 10, 0) + _ret, labels, _centers = cv.kmeans(points, cluster_n, None, term_crit, 10, 0) img = np.zeros((img_size, img_size, 3), np.uint8) for (x, y), label in zip(np.int32(points), labels.ravel()): diff --git a/samples/python/lappyr.py b/samples/python/lappyr.py index 2ee73ecb1d..2835b98d13 100755 --- a/samples/python/lappyr.py +++ b/samples/python/lappyr.py @@ -60,7 +60,7 @@ def main(): cv.createTrackbar('%d'%i, 'level control', 5, 50, nothing) while True: - ret, frame = cap.read() + _ret, frame = cap.read() pyr = build_lappyr(frame, leveln) for i in xrange(leveln): diff --git a/samples/python/opt_flow.py b/samples/python/opt_flow.py index c4515582e7..76a0ac2caf 100755 --- a/samples/python/opt_flow.py +++ b/samples/python/opt_flow.py @@ -64,14 +64,14 @@ def main(): fn = 0 cam = video.create_capture(fn) - ret, prev = cam.read() + _ret, prev = cam.read() prevgray = cv.cvtColor(prev, cv.COLOR_BGR2GRAY) show_hsv = False show_glitch = False cur_glitch = prev.copy() while True: - ret, img = cam.read() + _ret, img = cam.read() gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) flow = cv.calcOpticalFlowFarneback(prevgray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0) prevgray = gray diff --git a/samples/python/peopledetect.py b/samples/python/peopledetect.py index d2a7fdeee5..bdd49cab6f 100755 --- a/samples/python/peopledetect.py +++ b/samples/python/peopledetect.py @@ -51,7 +51,7 @@ def main(): print('loading error') continue - found, w = hog.detectMultiScale(img, winStride=(8,8), padding=(32,32), scale=1.05) + found, _w = hog.detectMultiScale(img, winStride=(8,8), padding=(32,32), scale=1.05) found_filtered = [] for ri, r in enumerate(found): for qi, q in enumerate(found): diff --git a/samples/python/stereo_match.py b/samples/python/stereo_match.py index 969ea11dbb..4d5875b814 100755 --- a/samples/python/stereo_match.py +++ b/samples/python/stereo_match.py @@ -69,8 +69,8 @@ def main(): out_points = points[mask] out_colors = colors[mask] out_fn = 'out.ply' - write_ply('out.ply', out_points, out_colors) - print('%s saved' % 'out.ply') + write_ply(out_fn, out_points, out_colors) + print('%s saved' % out_fn) cv.imshow('left', imgL) cv.imshow('disparity', (disp-min_disp)/num_disp) diff --git a/samples/python/turing.py b/samples/python/turing.py index 27dbe02ad3..dc920d1295 100755 --- a/samples/python/turing.py +++ b/samples/python/turing.py @@ -32,7 +32,7 @@ def main(): w, h = 512, 512 - args, args_list = getopt.getopt(sys.argv[1:], 'o:', []) + args, _args_list = getopt.getopt(sys.argv[1:], 'o:', []) args = dict(args) out = None if '-o' in args: diff --git a/samples/python/tutorial_code/core/mat_operations/mat_operations.py b/samples/python/tutorial_code/core/mat_operations/mat_operations.py index e9ec03699d..f237074fb6 100644 --- a/samples/python/tutorial_code/core/mat_operations/mat_operations.py +++ b/samples/python/tutorial_code/core/mat_operations/mat_operations.py @@ -25,13 +25,13 @@ def access_pixel(): y = 0 x = 0 ## [Pixel access 1] - intensity = img[y,x] + _intensity = img[y,x] ## [Pixel access 1] ## [Pixel access 3] - blue = img[y,x,0] - green = img[y,x,1] - red = img[y,x,2] + _blue = img[y,x,0] + _green = img[y,x,1] + _red = img[y,x,2] ## [Pixel access 3] ## [Pixel access 5] @@ -42,12 +42,12 @@ def reference_counting(): # Memory management and reference counting ## [Reference counting 2] img = cv.imread('image.jpg') - img1 = np.copy(img) + _img1 = np.copy(img) ## [Reference counting 2] ## [Reference counting 3] img = cv.imread('image.jpg') - sobelx = cv.Sobel(img, cv.CV_32F, 1, 0); + _sobelx = cv.Sobel(img, cv.CV_32F, 1, 0) ## [Reference counting 3] def primitive_operations(): @@ -57,17 +57,17 @@ def primitive_operations(): ## [Set image to black] ## [Select ROI] - smallImg = img[10:110,10:110] + _smallImg = img[10:110,10:110] ## [Select ROI] ## [BGR to Gray] img = cv.imread('image.jpg') - grey = cv.cvtColor(img, cv.COLOR_BGR2GRAY) + _grey = cv.cvtColor(img, cv.COLOR_BGR2GRAY) ## [BGR to Gray] src = np.ones((4,4), np.uint8) ## [Convert to CV_32F] - dst = src.astype(np.float32) + _dst = src.astype(np.float32) ## [Convert to CV_32F] def visualize_images(): diff --git a/samples/python/tutorial_code/imgProc/changing_contrast_brightness_image/changing_contrast_brightness_image.py b/samples/python/tutorial_code/imgProc/changing_contrast_brightness_image/changing_contrast_brightness_image.py index b3f316396a..127a0f4325 100644 --- a/samples/python/tutorial_code/imgProc/changing_contrast_brightness_image/changing_contrast_brightness_image.py +++ b/samples/python/tutorial_code/imgProc/changing_contrast_brightness_image/changing_contrast_brightness_image.py @@ -25,8 +25,8 @@ def gammaCorrection(): res = cv.LUT(img_original, lookUpTable) ## [changing-contrast-brightness-gamma-correction] - img_gamma_corrected = cv.hconcat([img_original, res]); - cv.imshow("Gamma correction", img_gamma_corrected); + img_gamma_corrected = cv.hconcat([img_original, res]) + cv.imshow("Gamma correction", img_gamma_corrected) def on_linear_transform_alpha_trackbar(val): global alpha diff --git a/samples/python/tutorial_code/ml/introduction_to_pca/introduction_to_pca.py b/samples/python/tutorial_code/ml/introduction_to_pca/introduction_to_pca.py index c83f7980f5..64fd07b174 100644 --- a/samples/python/tutorial_code/ml/introduction_to_pca/introduction_to_pca.py +++ b/samples/python/tutorial_code/ml/introduction_to_pca/introduction_to_pca.py @@ -85,13 +85,13 @@ contours, _ = cv.findContours(bw, cv.RETR_LIST, cv.CHAIN_APPROX_NONE) for i, c in enumerate(contours): # Calculate the area of each contour - area = cv.contourArea(c); + area = cv.contourArea(c) # Ignore contours that are too small or too large if area < 1e2 or 1e5 < area: continue # Draw each contour only for visualisation purposes - cv.drawContours(src, contours, i, (0, 0, 255), 2); + cv.drawContours(src, contours, i, (0, 0, 255), 2) # Find the orientation of each shape getOrientation(c, src) ## [contours] diff --git a/samples/python/video_threaded.py b/samples/python/video_threaded.py index 4886db3d80..cbc73d296b 100755 --- a/samples/python/video_threaded.py +++ b/samples/python/video_threaded.py @@ -70,7 +70,7 @@ def main(): draw_str(res, (20, 60), "frame interval : %.1f ms" % (frame_interval.value*1000)) cv.imshow('threaded video', res) if len(pending) < threadn: - ret, frame = cap.read() + _ret, frame = cap.read() t = clock() frame_interval.update(t - last_frame_time) last_frame_time = t diff --git a/samples/python/video_v4l2.py b/samples/python/video_v4l2.py index 68f22699b1..61b1e35804 100644 --- a/samples/python/video_v4l2.py +++ b/samples/python/video_v4l2.py @@ -42,7 +42,7 @@ def main(): cv.createTrackbar("Focus", "Video", focus, 100, lambda v: cap.set(cv.CAP_PROP_FOCUS, v / 100)) while True: - status, img = cap.read() + _status, img = cap.read() fourcc = decode_fourcc(cap.get(cv.CAP_PROP_FOURCC))