diff --git a/apps/interactive-calibration/rotationConverters.cpp b/apps/interactive-calibration/rotationConverters.cpp index ff31c9e380..421d15a924 100644 --- a/apps/interactive-calibration/rotationConverters.cpp +++ b/apps/interactive-calibration/rotationConverters.cpp @@ -16,7 +16,7 @@ void calib::Euler(const cv::Mat& src, cv::Mat& dst, int argType) { if((src.rows == 3) && (src.cols == 3)) { - //convert rotaion matrix to 3 angles (pitch, yaw, roll) + //convert rotation matrix to 3 angles (pitch, yaw, roll) dst = cv::Mat(3, 1, CV_64F); double pitch, yaw, roll; @@ -55,7 +55,7 @@ void calib::Euler(const cv::Mat& src, cv::Mat& dst, int argType) else if( (src.cols == 1 && src.rows == 3) || (src.cols == 3 && src.rows == 1 ) ) { - //convert vector which contains 3 angles (pitch, yaw, roll) to rotaion matrix + //convert vector which contains 3 angles (pitch, yaw, roll) to rotation matrix double pitch, yaw, roll; if(src.cols == 1 && src.rows == 3) { diff --git a/cmake/FindCUDA.cmake b/cmake/FindCUDA.cmake index 632b8c8285..37d557a792 100644 --- a/cmake/FindCUDA.cmake +++ b/cmake/FindCUDA.cmake @@ -141,7 +141,7 @@ # -- Same as CUDA_ADD_EXECUTABLE except that a library is created. # # CUDA_BUILD_CLEAN_TARGET() -# -- Creates a convience target that deletes all the dependency files +# -- Creates a convenience target that deletes all the dependency files # generated. You should make clean after running this target to ensure the # dependency files get regenerated. # @@ -473,7 +473,7 @@ else() endif() # Propagate the host flags to the host compiler via -Xcompiler -option(CUDA_PROPAGATE_HOST_FLAGS "Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile" ON) +option(CUDA_PROPAGATE_HOST_FLAGS "Propagate C/CXX_FLAGS and friends to the host compiler via -Xcompile" ON) # Enable CUDA_SEPARABLE_COMPILATION option(CUDA_SEPARABLE_COMPILATION "Compile CUDA objects with separable compilation enabled. Requires CUDA 5.0+" OFF) diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index 8beabefe41..377eb98a65 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -761,24 +761,24 @@ macro(ocv_compiler_optimization_fill_cpu_config) endif() endmacro() -macro(ocv_add_dispatched_file filename) +macro(__ocv_add_dispatched_file filename target_src_var src_directory dst_directory precomp_hpp optimizations_var) if(NOT OPENCV_INITIAL_PASS) set(__codestr " -#include \"${CMAKE_CURRENT_LIST_DIR}/src/precomp.hpp\" -#include \"${CMAKE_CURRENT_LIST_DIR}/src/${filename}.simd.hpp\" +#include \"${src_directory}/${precomp_hpp}\" +#include \"${src_directory}/${filename}.simd.hpp\" ") - set(__declarations_str "#define CV_CPU_SIMD_FILENAME \"${CMAKE_CURRENT_LIST_DIR}/src/${filename}.simd.hpp\"") + set(__declarations_str "#define CV_CPU_SIMD_FILENAME \"${src_directory}/${filename}.simd.hpp\"") set(__dispatch_modes "BASELINE") - set(__optimizations "${ARGN}") + set(__optimizations "${${optimizations_var}}") if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS) set(__optimizations "") endif() foreach(OPT ${__optimizations}) string(TOLOWER "${OPT}" OPT_LOWER) - set(__file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.${OPT_LOWER}.cpp") + set(__file "${CMAKE_CURRENT_BINARY_DIR}/${dst_directory}${filename}.${OPT_LOWER}.cpp") if(EXISTS "${__file}") file(READ "${__file}" __content) else() @@ -791,7 +791,11 @@ macro(ocv_add_dispatched_file filename) endif() if(";${CPU_DISPATCH};" MATCHES "${OPT}" OR __CPU_DISPATCH_INCLUDE_ALL) - list(APPEND OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED "${__file}") + if(EXISTS "${src_directory}/${filename}.${OPT_LOWER}.cpp") + message(STATUS "Using overrided ${OPT} source: ${src_directory}/${filename}.${OPT_LOWER}.cpp") + else() + list(APPEND ${target_src_var} "${__file}") + endif() endif() set(__declarations_str "${__declarations_str} @@ -803,9 +807,11 @@ macro(ocv_add_dispatched_file filename) set(__declarations_str "${__declarations_str} #define CV_CPU_DISPATCH_MODES_ALL ${__dispatch_modes} + +#undef CV_CPU_SIMD_FILENAME ") - set(__file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.simd_declarations.hpp") + set(__file "${CMAKE_CURRENT_BINARY_DIR}/${dst_directory}${filename}.simd_declarations.hpp") if(EXISTS "${__file}") file(READ "${__file}" __content) endif() @@ -817,6 +823,17 @@ macro(ocv_add_dispatched_file filename) endif() endmacro() +macro(ocv_add_dispatched_file filename) + set(__optimizations "${ARGN}") + if(" ${ARGV1}" STREQUAL " TEST") + list(REMOVE_AT __optimizations 0) + __ocv_add_dispatched_file("${filename}" "OPENCV_MODULE_${the_module}_TEST_SOURCES_DISPATCHED" "${CMAKE_CURRENT_LIST_DIR}/test" "test/" "test_precomp.hpp" __optimizations) + else() + __ocv_add_dispatched_file("${filename}" "OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED" "${CMAKE_CURRENT_LIST_DIR}/src" "" "precomp.hpp" __optimizations) + endif() +endmacro() + + # Workaround to support code which always require all code paths macro(ocv_add_dispatched_file_force_all) set(__CPU_DISPATCH_INCLUDE_ALL 1) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index 3bdb6fa961..58e204094f 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -3,7 +3,7 @@ if(WIN32 AND NOT MSVC) return() endif() -if(NOT APPLE AND CV_CLANG) +if(NOT UNIX AND CV_CLANG) message(STATUS "CUDA compilation is disabled (due to Clang unsupported on your platform).") return() endif() @@ -188,6 +188,13 @@ if(CUDA_FOUND) foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) set(${var}_backup_in_cuda_compile_ "${${var}}") + if (CV_CLANG) + # we remove -Winconsistent-missing-override and -Qunused-arguments + # just in case we are compiling CUDA with gcc but OpenCV with clang + string(REPLACE "-Winconsistent-missing-override" "" ${var} "${${var}}") + string(REPLACE "-Qunused-arguments" "" ${var} "${${var}}") + endif() + # we remove /EHa as it generates warnings under windows string(REPLACE "/EHa" "" ${var} "${${var}}") diff --git a/cmake/OpenCVDownload.cmake b/cmake/OpenCVDownload.cmake index 7724147d31..cdc47ad2cb 100644 --- a/cmake/OpenCVDownload.cmake +++ b/cmake/OpenCVDownload.cmake @@ -20,16 +20,19 @@ if(DEFINED ENV{OPENCV_DOWNLOAD_PATH}) endif() set(OPENCV_DOWNLOAD_PATH "${OpenCV_SOURCE_DIR}/.cache" CACHE PATH "${HELP_OPENCV_DOWNLOAD_PATH}") set(OPENCV_DOWNLOAD_LOG "${OpenCV_BINARY_DIR}/CMakeDownloadLog.txt") +set(OPENCV_DOWNLOAD_WITH_CURL "${OpenCV_BINARY_DIR}/download_with_curl.sh") +set(OPENCV_DOWNLOAD_WITH_WGET "${OpenCV_BINARY_DIR}/download_with_wget.sh") -# Init download cache directory and log file +# Init download cache directory and log file and helper scripts if(NOT EXISTS "${OPENCV_DOWNLOAD_PATH}") file(MAKE_DIRECTORY ${OPENCV_DOWNLOAD_PATH}) endif() if(NOT EXISTS "${OPENCV_DOWNLOAD_PATH}/.gitignore") file(WRITE "${OPENCV_DOWNLOAD_PATH}/.gitignore" "*\n") endif() -file(WRITE "${OPENCV_DOWNLOAD_LOG}" "use_cache \"${OPENCV_DOWNLOAD_PATH}\"\n") - +file(WRITE "${OPENCV_DOWNLOAD_LOG}" "#use_cache \"${OPENCV_DOWNLOAD_PATH}\"\n") +file(REMOVE "${OPENCV_DOWNLOAD_WITH_CURL}") +file(REMOVE "${OPENCV_DOWNLOAD_WITH_WGET}") function(ocv_download) cmake_parse_arguments(DL "UNPACK;RELATIVE_URL" "FILENAME;HASH;DESTINATION_DIR;ID;STATUS" "URL" ${ARGN}) @@ -103,7 +106,7 @@ function(ocv_download) endif() # Log all calls to file - ocv_download_log("do_${mode} \"${DL_FILENAME}\" \"${DL_HASH}\" \"${DL_URL}\" \"${DL_DESTINATION_DIR}\"") + ocv_download_log("#do_${mode} \"${DL_FILENAME}\" \"${DL_HASH}\" \"${DL_URL}\" \"${DL_DESTINATION_DIR}\"") # ... and to console set(__msg_prefix "") if(DL_ID) @@ -191,6 +194,9 @@ function(ocv_download) For details please refer to the download log file: ${OPENCV_DOWNLOAD_LOG} ") + # write helper scripts for failed downloads + file(APPEND "${OPENCV_DOWNLOAD_WITH_CURL}" "curl --output \"${CACHE_CANDIDATE}\" \"${DL_URL}\"\n") + file(APPEND "${OPENCV_DOWNLOAD_WITH_WGET}" "wget -O \"${CACHE_CANDIDATE}\" \"${DL_URL}\"\n") return() endif() diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 00d15dc6d9..54f100d3cf 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -1202,6 +1202,9 @@ function(ocv_add_accuracy_tests) set(OPENCV_TEST_${the_module}_SOURCES ${test_srcs} ${test_hdrs}) endif() + if(OPENCV_MODULE_${the_module}_TEST_SOURCES_DISPATCHED) + list(APPEND OPENCV_TEST_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_TEST_SOURCES_DISPATCHED}) + endif() ocv_compiler_optimization_process_sources(OPENCV_TEST_${the_module}_SOURCES OPENCV_TEST_${the_module}_DEPS ${the_target}) if(NOT BUILD_opencv_world) @@ -1211,6 +1214,9 @@ function(ocv_add_accuracy_tests) source_group("Src" FILES "${${the_target}_pch}") ocv_add_executable(${the_target} ${OPENCV_TEST_${the_module}_SOURCES} ${${the_target}_pch}) ocv_target_include_modules(${the_target} ${test_deps} "${test_path}") + if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/test") + ocv_target_include_directories(${the_target} "${CMAKE_CURRENT_BINARY_DIR}/test") + endif() ocv_target_link_libraries(${the_target} LINK_PRIVATE ${test_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS} ${OPENCV_TEST_${the_module}_DEPS}) add_dependencies(opencv_tests ${the_target}) diff --git a/cmake/OpenCVPCHSupport.cmake b/cmake/OpenCVPCHSupport.cmake index b4658c604b..f9b1b48b65 100644 --- a/cmake/OpenCVPCHSupport.cmake +++ b/cmake/OpenCVPCHSupport.cmake @@ -362,7 +362,7 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input) endif() endforeach() - #also inlude ${oldProps} to have the same compile options + #also include ${oldProps} to have the same compile options GET_TARGET_PROPERTY(oldProps ${_targetName} COMPILE_FLAGS) if (oldProps MATCHES NOTFOUND) SET(oldProps "") diff --git a/cmake/templates/OpenCVConfig.cmake.in b/cmake/templates/OpenCVConfig.cmake.in index 84262a87b3..fefa359e0a 100644 --- a/cmake/templates/OpenCVConfig.cmake.in +++ b/cmake/templates/OpenCVConfig.cmake.in @@ -260,7 +260,7 @@ endif() set(OpenCV_LIBRARIES ${OpenCV_LIBS}) # -# Some macroses for samples +# Some macros for samples # macro(ocv_check_dependencies) set(OCV_DEPENDENCIES_FOUND TRUE) diff --git a/doc/js_tutorials/js_imgproc/js_grabcut/js_grabcut.markdown b/doc/js_tutorials/js_imgproc/js_grabcut/js_grabcut.markdown index 570a490fea..ef71d07aa5 100644 --- a/doc/js_tutorials/js_imgproc/js_grabcut/js_grabcut.markdown +++ b/doc/js_tutorials/js_imgproc/js_grabcut/js_grabcut.markdown @@ -29,7 +29,7 @@ What happens in background ? objects). Everything inside rectangle is unknown. Similarly any user input specifying foreground and background are considered as hard-labelling which means they won't change in the process. -- Computer does an initial labelling depeding on the data we gave. It labels the foreground and +- Computer does an initial labelling depending on the data we gave. It labels the foreground and background pixels (or it hard-labels) - Now a Gaussian Mixture Model(GMM) is used to model the foreground and background. - Depending on the data we gave, GMM learns and create new pixel distribution. That is, the diff --git a/doc/js_tutorials/js_setup/js_usage/js_usage.markdown b/doc/js_tutorials/js_setup/js_usage/js_usage.markdown index 72f481df7a..88aba1afd5 100644 --- a/doc/js_tutorials/js_setup/js_usage/js_usage.markdown +++ b/doc/js_tutorials/js_setup/js_usage/js_usage.markdown @@ -129,7 +129,7 @@ function onOpenCvReady() { @endcode -@note You have to call delete method of cv.Mat to free memory allocated in Emscripten's heap. Please refer to [Memeory management of Emscripten](https://kripken.github.io/emscripten-site/docs/porting/connecting_cpp_and_javascript/embind.html#memory-management) for details. +@note You have to call delete method of cv.Mat to free memory allocated in Emscripten's heap. Please refer to [Memory management of Emscripten](https://kripken.github.io/emscripten-site/docs/porting/connecting_cpp_and_javascript/embind.html#memory-management) for details. Try it ------ diff --git a/doc/py_tutorials/py_imgproc/py_grabcut/py_grabcut.markdown b/doc/py_tutorials/py_imgproc/py_grabcut/py_grabcut.markdown index 2c489e2453..7dc22d37aa 100644 --- a/doc/py_tutorials/py_imgproc/py_grabcut/py_grabcut.markdown +++ b/doc/py_tutorials/py_imgproc/py_grabcut/py_grabcut.markdown @@ -37,7 +37,7 @@ So what happens in background ? objects). Everything inside rectangle is unknown. Similarly any user input specifying foreground and background are considered as hard-labelling which means they won't change in the process. -- Computer does an initial labelling depeding on the data we gave. It labels the foreground and +- Computer does an initial labelling depending on the data we gave. It labels the foreground and background pixels (or it hard-labels) - Now a Gaussian Mixture Model(GMM) is used to model the foreground and background. - Depending on the data we gave, GMM learns and create new pixel distribution. That is, the diff --git a/doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown b/doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown index b974b8bc63..4347d11651 100644 --- a/doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown +++ b/doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown @@ -16,7 +16,7 @@ In this tutorial is explained how to build a real time application to estimate t order to track a textured object with six degrees of freedom given a 2D image and its 3D textured model. -The application will have the followings parts: +The application will have the following parts: - Read 3D textured object model and object mesh. - Take input from Camera or Video. @@ -426,16 +426,16 @@ Here is explained in detail the code for the real time application: @endcode OpenCV provides four PnP methods: ITERATIVE, EPNP, P3P and DLS. Depending on the application type, the estimation method will be different. In the case that we want to make a real time application, - the more suitable methods are EPNP and P3P due to that are faster than ITERATIVE and DLS at + the more suitable methods are EPNP and P3P since they are faster than ITERATIVE and DLS at finding an optimal solution. However, EPNP and P3P are not especially robust in front of planar - surfaces and sometimes the pose estimation seems to have a mirror effect. Therefore, in this this - tutorial is used ITERATIVE method due to the object to be detected has planar surfaces. + surfaces and sometimes the pose estimation seems to have a mirror effect. Therefore, in this + tutorial an ITERATIVE method is used due to the object to be detected has planar surfaces. - The OpenCV RANSAC implementation wants you to provide three parameters: the maximum number of - iterations until stop the algorithm, the maximum allowed distance between the observed and - computed point projections to consider it an inlier and the confidence to obtain a good result. + The OpenCV RANSAC implementation wants you to provide three parameters: 1) the maximum number of + iterations until the algorithm stops, 2) the maximum allowed distance between the observed and + computed point projections to consider it an inlier and 3) the confidence to obtain a good result. You can tune these parameters in order to improve your algorithm performance. Increasing the - number of iterations you will have a more accurate solution, but will take more time to find a + number of iterations will have a more accurate solution, but will take more time to find a solution. Increasing the reprojection error will reduce the computation time, but your solution will be unaccurate. Decreasing the confidence your algorithm will be faster, but the obtained solution will be unaccurate. diff --git a/doc/tutorials/introduction/windows_install/windows_install.markdown b/doc/tutorials/introduction/windows_install/windows_install.markdown index e60c846b12..7f491d8fdd 100644 --- a/doc/tutorials/introduction/windows_install/windows_install.markdown +++ b/doc/tutorials/introduction/windows_install/windows_install.markdown @@ -46,7 +46,7 @@ cd /c/lib myRepo=$(pwd) CMAKE_CONFIG_GENERATOR="Visual Studio 14 2015 Win64" if [ ! -d "$myRepo/opencv" ]; then - echo "clonning opencv" + echo "cloning opencv" git clone https://github.com/opencv/opencv.git mkdir Build mkdir Build/opencv @@ -58,7 +58,7 @@ else cd .. fi if [ ! -d "$myRepo/opencv_contrib" ]; then - echo "clonning opencv_contrib" + echo "cloning opencv_contrib" git clone https://github.com/opencv/opencv_contrib.git mkdir Build mkdir Build/opencv_contrib diff --git a/modules/calib3d/test/test_chesscorners.cpp b/modules/calib3d/test/test_chesscorners.cpp index 8303a8dcd4..e55d069de0 100644 --- a/modules/calib3d/test/test_chesscorners.cpp +++ b/modules/calib3d/test/test_chesscorners.cpp @@ -198,7 +198,7 @@ void CV_ChessboardDetectorTest::run_batch( const string& filename ) if( !fs.isOpened() || board_list.empty() || !board_list.isSeq() || board_list.size() % 2 != 0 ) { - ts->printf( cvtest::TS::LOG, "%s can not be readed or is not valid\n", (folder + filename).c_str() ); + ts->printf( cvtest::TS::LOG, "%s can not be read or is not valid\n", (folder + filename).c_str() ); ts->printf( cvtest::TS::LOG, "fs.isOpened=%d, board_list.empty=%d, board_list.isSeq=%d,board_list.size()%2=%d\n", fs.isOpened(), (int)board_list.empty(), board_list.isSeq(), board_list.size()%2); ts->set_failed_test_info( cvtest::TS::FAIL_MISSING_TEST_DATA ); diff --git a/modules/calib3d/test/test_chesscorners_timing.cpp b/modules/calib3d/test/test_chesscorners_timing.cpp index 4d643a1d45..b89d2e0147 100644 --- a/modules/calib3d/test/test_chesscorners_timing.cpp +++ b/modules/calib3d/test/test_chesscorners_timing.cpp @@ -85,7 +85,7 @@ void CV_ChessboardDetectorTimingTest::run( int start_from ) if( !fs || !board_list || !CV_NODE_IS_SEQ(board_list->tag) || board_list->data.seq->total % 4 != 0 ) { - ts->printf( cvtest::TS::LOG, "chessboard_timing_list.dat can not be readed or is not valid" ); + ts->printf( cvtest::TS::LOG, "chessboard_timing_list.dat can not be read or is not valid" ); code = cvtest::TS::FAIL_MISSING_TEST_DATA; goto _exit_; } diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 1997c906bc..455afaf593 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -3,6 +3,10 @@ set(the_description "The Core Functionality") ocv_add_dispatched_file(mathfuncs_core SSE2 AVX AVX2) ocv_add_dispatched_file(stat SSE4_2 AVX2) +# dispatching for accuracy tests +ocv_add_dispatched_file_force_all(test_intrin128 TEST SSE2 SSE3 SSSE3 SSE4_1 SSE4_2 AVX FP16 AVX2) +ocv_add_dispatched_file_force_all(test_intrin256 TEST AVX2) + ocv_add_module(core OPTIONAL opencv_cudev WRAP java python js) diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 263659d302..ff2d5160d2 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -204,20 +204,6 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN #define CV_SIMD512_64F 0 #endif -#if CV_SIMD512 - #define CV_SIMD 1 - #define CV_SIMD_64F CV_SIMD512_64F - #define CV_SIMD_WIDTH 64 -#elif CV_SIMD256 - #define CV_SIMD 1 - #define CV_SIMD_64F CV_SIMD256_64F - #define CV_SIMD_WIDTH 32 -#else - #define CV_SIMD CV_SIMD128 - #define CV_SIMD_64F CV_SIMD128_64F - #define CV_SIMD_WIDTH 16 -#endif - //================================================================================================== #define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \ @@ -309,7 +295,21 @@ template struct V_RegTraits #endif #endif -#if CV_SIMD256 +#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512) +#define CV__SIMD_NAMESPACE simd512 +namespace CV__SIMD_NAMESPACE { + #define CV_SIMD 1 + #define CV_SIMD_64F CV_SIMD512_64F + #define CV_SIMD_WIDTH 64 + // TODO typedef v_uint8 / v_int32 / etc types here +} // namespace +using namespace CV__SIMD_NAMESPACE; +#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256) +#define CV__SIMD_NAMESPACE simd256 +namespace CV__SIMD_NAMESPACE { + #define CV_SIMD 1 + #define CV_SIMD_64F CV_SIMD256_64F + #define CV_SIMD_WIDTH 32 typedef v_uint8x32 v_uint8; typedef v_int8x32 v_int8; typedef v_uint16x16 v_uint16; @@ -329,7 +329,14 @@ template struct V_RegTraits CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256) CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load) inline void vx_cleanup() { v256_cleanup(); } -#elif CV_SIMD128 || CV_SIMD128_CPP +} // namespace +using namespace CV__SIMD_NAMESPACE; +#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128) +#define CV__SIMD_NAMESPACE simd128 +namespace CV__SIMD_NAMESPACE { + #define CV_SIMD CV_SIMD128 + #define CV_SIMD_64F CV_SIMD128_64F + #define CV_SIMD_WIDTH 16 typedef v_uint8x16 v_uint8; typedef v_int8x16 v_int8; typedef v_uint16x8 v_uint16; @@ -351,6 +358,8 @@ template struct V_RegTraits CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v, load) #endif inline void vx_cleanup() { v_cleanup(); } +} // namespace +using namespace CV__SIMD_NAMESPACE; #endif inline unsigned int trailingZeros32(unsigned int value) { @@ -380,6 +389,14 @@ inline unsigned int trailingZeros32(unsigned int value) { CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END #endif +#ifndef CV_SIMD_64F +#define CV_SIMD_64F 0 +#endif + +#ifndef CV_SIMD +#define CV_SIMD 0 +#endif + } // cv:: //! @endcond diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index d1f24d17b5..e58486fb5d 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -494,7 +494,12 @@ void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a) inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b) { __m128i delta32 = _mm_set1_epi32(32768); - __m128i r = _mm_packs_epi32(_mm_sub_epi32(a.val, delta32), _mm_sub_epi32(b.val, delta32)); + + // preliminary saturate negative values to zero + __m128i a1 = _mm_and_si128(a.val, _mm_cmpgt_epi32(a.val, _mm_set1_epi32(0))); + __m128i b1 = _mm_and_si128(b.val, _mm_cmpgt_epi32(b.val, _mm_set1_epi32(0))); + + __m128i r = _mm_packs_epi32(_mm_sub_epi32(a1, delta32), _mm_sub_epi32(b1, delta32)); return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768))); } diff --git a/modules/core/include/opencv2/core/types_c.h b/modules/core/include/opencv2/core/types_c.h index 7e384a5c6f..81e986fcd1 100644 --- a/modules/core/include/opencv2/core/types_c.h +++ b/modules/core/include/opencv2/core/types_c.h @@ -1764,7 +1764,7 @@ typedef struct CvString } CvString; -/** All the keys (names) of elements in the readed file storage +/** All the keys (names) of elements in the read file storage are stored in the hash to speed up the lookup operations: */ typedef struct CvStringHashNode { diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index 481b86b4f1..e5fd24dfad 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -453,9 +453,9 @@ struct Cvt_SIMD { v_int32x4 v_src1 = v_load(src + x), v_src2 = v_load(src + x + cWidth); v_int32x4 v_src3 = v_load(src + x + cWidth * 2), v_src4 = v_load(src + x + cWidth * 3); - v_uint16x8 v_dst1 = v_pack_u(v_src1, v_src2); - v_uint16x8 v_dst2 = v_pack_u(v_src3, v_src4); - v_store(dst + x, v_pack(v_dst1, v_dst2)); + v_int16x8 v_dst1 = v_pack(v_src1, v_src2); + v_int16x8 v_dst2 = v_pack(v_src3, v_src4); + v_store(dst + x, v_pack_u(v_dst1, v_dst2)); } } return x; diff --git a/modules/core/src/datastructs.cpp b/modules/core/src/datastructs.cpp index 56528fcf69..83c11c1855 100644 --- a/modules/core/src/datastructs.cpp +++ b/modules/core/src/datastructs.cpp @@ -2779,7 +2779,7 @@ cvGraphAddEdgeByPtr( CvGraph* graph, if( start_vtx == end_vtx ) CV_Error( start_vtx ? CV_StsBadArg : CV_StsNullPtr, - "vertex pointers coinside (or set to NULL)" ); + "vertex pointers coincide (or set to NULL)" ); edge = (CvGraphEdge*)cvSetNew( (CvSet*)(graph->edges) ); assert( edge->flags >= 0 ); diff --git a/modules/core/src/merge.cpp b/modules/core/src/merge.cpp index 9c52f0e20c..300a718506 100644 --- a/modules/core/src/merge.cpp +++ b/modules/core/src/merge.cpp @@ -36,13 +36,14 @@ vecmerge_( const T** src, T* dst, int len, int cn ) const T* src0 = src[0]; const T* src1 = src[1]; + const int dstElemSize = cn * sizeof(T); int r = (int)((size_t)(void*)dst % (VECSZ*sizeof(T))); hal::StoreMode mode = hal::STORE_ALIGNED_NOCACHE; if( r != 0 ) { mode = hal::STORE_UNALIGNED; - if( r % cn == 0 && len > VECSZ ) - i0 = VECSZ - (r / cn); + if (r % dstElemSize == 0 && len > VECSZ*2) + i0 = VECSZ - (r / dstElemSize); } if( cn == 2 ) diff --git a/modules/core/src/persistence_c.cpp b/modules/core/src/persistence_c.cpp index ed315d0971..ed349cc150 100644 --- a/modules/core/src/persistence_c.cpp +++ b/modules/core/src/persistence_c.cpp @@ -1063,7 +1063,7 @@ cvReadRawDataSlice( const CvFileStorage* fs, CvSeqReader* reader, CV_Error( CV_StsNullPtr, "Null pointer to reader or destination array" ); if( !reader->seq && len != 1 ) - CV_Error( CV_StsBadSize, "The readed sequence is a scalar, thus len must be 1" ); + CV_Error( CV_StsBadSize, "The read sequence is a scalar, thus len must be 1" ); fmt_pair_count = icvDecodeFormat( dt, fmt_pairs, CV_FS_MAX_FMT_PAIRS ); size_t step = ::icvCalcStructSize(dt, 0); diff --git a/modules/core/src/split.cpp b/modules/core/src/split.cpp index 78d8daadd0..3fab6874b7 100644 --- a/modules/core/src/split.cpp +++ b/modules/core/src/split.cpp @@ -27,8 +27,8 @@ vecsplit_( const T* src, T** dst, int len, int cn ) if( (r0|r1|r2|r3) != 0 ) { mode = hal::STORE_UNALIGNED; - if( r0 == r1 && r0 == r2 && r0 == r3 && r0 % cn == 0 && len > VECSZ ) - i0 = VECSZ - (r0 / cn); + if( r0 == r1 && r0 == r2 && r0 == r3 && r0 % sizeof(T) == 0 && len > VECSZ*2 ) + i0 = VECSZ - (r0 / sizeof(T)); } if( cn == 2 ) diff --git a/modules/core/src/utils/filesystem.cpp b/modules/core/src/utils/filesystem.cpp index 23bed074f7..32183a2f6c 100644 --- a/modules/core/src/utils/filesystem.cpp +++ b/modules/core/src/utils/filesystem.cpp @@ -469,7 +469,32 @@ cv::String getCacheDirectory(const char* sub_directory_name, const char* configu { if (utils::fs::isDirectory(default_cache_path)) { - default_cache_path = utils::fs::join(default_cache_path, utils::fs::join("opencv", CV_VERSION)); + cv::String default_cache_path_base = utils::fs::join(default_cache_path, "opencv"); + default_cache_path = utils::fs::join(default_cache_path_base, "4.0" CV_VERSION_STATUS); + if (utils::getConfigurationParameterBool("OPENCV_CACHE_SHOW_CLEANUP_MESSAGE", true) + && !utils::fs::isDirectory(default_cache_path)) + { + std::vector existedCacheDirs; + try + { + utils::fs::glob_relative(default_cache_path_base, "*", existedCacheDirs, false, true); + } + catch (...) + { + // ignore + } + if (!existedCacheDirs.empty()) + { + CV_LOG_WARNING(NULL, "Creating new OpenCV cache directory: " << default_cache_path); + CV_LOG_WARNING(NULL, "There are several neighbour directories, probably created by old OpenCV versions."); + CV_LOG_WARNING(NULL, "Feel free to cleanup these unused directories:"); + for (size_t i = 0; i < existedCacheDirs.size(); i++) + { + CV_LOG_WARNING(NULL, " - " << existedCacheDirs[i]); + } + CV_LOG_WARNING(NULL, "Note: This message is showed only once."); + } + } if (sub_directory_name && sub_directory_name[0] != '\0') default_cache_path = utils::fs::join(default_cache_path, cv::String(sub_directory_name) + native_separator); if (!utils::fs::createDirectories(default_cache_path)) diff --git a/modules/core/test/test_intrin.avx2.cpp b/modules/core/test/test_intrin.avx2.cpp deleted file mode 100644 index 9ebfcdf542..0000000000 --- a/modules/core/test/test_intrin.avx2.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -#include "test_precomp.hpp" -#include "test_intrin.simd.hpp" \ No newline at end of file diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp index 6610e332de..602877382d 100644 --- a/modules/core/test/test_intrin.cpp +++ b/modules/core/test/test_intrin.cpp @@ -2,101 +2,100 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" -#include "test_intrin.simd.hpp" -#define CV_CPU_SIMD_FILENAME "test_intrin.simd.hpp" -#define CV_CPU_DISPATCH_MODE FP16 -#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp" +#include "test_intrin128.simd.hpp" +#include "test_intrin128.simd_declarations.hpp" + +#undef CV_CPU_DISPATCH_MODES_ALL + +#include "opencv2/core/cv_cpu_dispatch.h" +#include "test_intrin256.simd.hpp" +#include "test_intrin256.simd_declarations.hpp" -#define CV_CPU_DISPATCH_MODE AVX2 -#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp" namespace opencv_test { namespace hal { -using namespace CV_CPU_OPTIMIZATION_NAMESPACE; -TEST(hal_intrin, uint8x16) -{ test_hal_intrin_uint8(); } +#define CV_CPU_CALL_BASELINE_(fn, args) CV_CPU_CALL_BASELINE(fn, args) -TEST(hal_intrin, int8x16) -{ test_hal_intrin_int8(); } +#define DISPATCH_SIMD128(fn, cpu_opt) do { \ + CV_CPU_CALL_ ## cpu_opt ## _(fn, ()); \ + throw SkipTestException("SIMD128 (" #cpu_opt ") is not available or disabled"); \ +} while(0) -TEST(hal_intrin, uint16x8) -{ test_hal_intrin_uint16(); } +#define DISPATCH_SIMD256(fn, cpu_opt) do { \ + CV_CPU_CALL_ ## cpu_opt ## _(fn, ()); \ + throw SkipTestException("SIMD256 (" #cpu_opt ") is not available or disabled"); \ +} while(0) -TEST(hal_intrin, int16x8) -{ test_hal_intrin_int16(); } +#define DEFINE_SIMD_TESTS(simd_size, cpu_opt) \ +TEST(hal_intrin ## simd_size, uint8x16_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_uint8, cpu_opt); } \ +TEST(hal_intrin ## simd_size, int8x16_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_int8, cpu_opt); } \ +TEST(hal_intrin ## simd_size, uint16x8_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_uint16, cpu_opt); } \ +TEST(hal_intrin ## simd_size, int16x8_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_int16, cpu_opt); } \ +TEST(hal_intrin ## simd_size, int32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_int32, cpu_opt); } \ +TEST(hal_intrin ## simd_size, uint32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_uint32, cpu_opt); } \ +TEST(hal_intrin ## simd_size, uint64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_uint64, cpu_opt); } \ +TEST(hal_intrin ## simd_size, int64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_int64, cpu_opt); } \ +TEST(hal_intrin ## simd_size, float32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_float32, cpu_opt); } \ +TEST(hal_intrin ## simd_size, float64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_float64, cpu_opt); } \ -TEST(hal_intrin, int32x4) -{ test_hal_intrin_int32(); } +namespace intrin128 { -TEST(hal_intrin, uint32x4) -{ test_hal_intrin_uint32(); } +DEFINE_SIMD_TESTS(128, BASELINE) -TEST(hal_intrin, uint64x2) -{ test_hal_intrin_uint64(); } +#if defined CV_CPU_DISPATCH_COMPILE_SSE2 || defined CV_CPU_BASELINE_COMPILE_SSE2 +DEFINE_SIMD_TESTS(128, SSE2) +#endif +#if defined CV_CPU_DISPATCH_COMPILE_SSE3 || defined CV_CPU_BASELINE_COMPILE_SSE3 +DEFINE_SIMD_TESTS(128, SSE3) +#endif +#if defined CV_CPU_DISPATCH_COMPILE_SSSE3 || defined CV_CPU_BASELINE_COMPILE_SSSE3 +DEFINE_SIMD_TESTS(128, SSSE3) +#endif +#if defined CV_CPU_DISPATCH_COMPILE_SSE4_1 || defined CV_CPU_BASELINE_COMPILE_SSE4_1 +DEFINE_SIMD_TESTS(128, SSE4_1) +#endif +#if defined CV_CPU_DISPATCH_COMPILE_SSE4_2 || defined CV_CPU_BASELINE_COMPILE_SSE4_2 +DEFINE_SIMD_TESTS(128, SSE4_2) +#endif +#if defined CV_CPU_DISPATCH_COMPILE_AVX || defined CV_CPU_BASELINE_COMPILE_AVX +DEFINE_SIMD_TESTS(128, AVX) +#endif +#if defined CV_CPU_DISPATCH_COMPILE_AVX2 || defined CV_CPU_BASELINE_COMPILE_AVX2 +DEFINE_SIMD_TESTS(128, AVX2) +#endif -TEST(hal_intrin, int64x2) -{ test_hal_intrin_int64(); } - -TEST(hal_intrin, float32x4) -{ test_hal_intrin_float32(); } - -TEST(hal_intrin, float64x2) -{ test_hal_intrin_float64(); } - -TEST(hal_intrin, float16x8) +TEST(hal_intrin128, float16x8_FP16) { CV_CPU_CALL_FP16_(test_hal_intrin_float16, ()); throw SkipTestException("Unsupported hardware: FP16 is not available"); } -#define DISPATCH_SIMD_MODES AVX2 -#define DISPATCH_SIMD_NAME "SIMD256" -#define DISPATCH_SIMD(fun) \ - do { \ - CV_CPU_DISPATCH(fun, (), DISPATCH_SIMD_MODES); \ - throw SkipTestException( \ - "Unsupported hardware: " \ - DISPATCH_SIMD_NAME \ - " is not available" \ - ); \ - } while(0) +} // namespace intrin128 -TEST(hal_intrin256, uint8x32) -{ DISPATCH_SIMD(test_hal_intrin_uint8); } -TEST(hal_intrin256, int8x32) -{ DISPATCH_SIMD(test_hal_intrin_int8); } +namespace intrin256 { -TEST(hal_intrin256, uint16x16) -{ DISPATCH_SIMD(test_hal_intrin_uint16); } -TEST(hal_intrin256, int16x16) -{ DISPATCH_SIMD(test_hal_intrin_int16); } +// Not available due missing C++ backend for SIMD256 +//DEFINE_SIMD_TESTS(256, BASELINE) -TEST(hal_intrin256, uint32x8) -{ DISPATCH_SIMD(test_hal_intrin_uint32); } +//#if defined CV_CPU_DISPATCH_COMPILE_AVX +//DEFINE_SIMD_TESTS(256, AVX) +//#endif -TEST(hal_intrin256, int32x8) -{ DISPATCH_SIMD(test_hal_intrin_int32); } +#if defined CV_CPU_DISPATCH_COMPILE_AVX2 || defined CV_CPU_BASELINE_COMPILE_AVX2 +DEFINE_SIMD_TESTS(256, AVX2) +#endif -TEST(hal_intrin256, uint64x4) -{ DISPATCH_SIMD(test_hal_intrin_uint64); } - -TEST(hal_intrin256, int64x4) -{ DISPATCH_SIMD(test_hal_intrin_int64); } - -TEST(hal_intrin256, float32x8) -{ DISPATCH_SIMD(test_hal_intrin_float32); } - -TEST(hal_intrin256, float64x4) -{ DISPATCH_SIMD(test_hal_intrin_float64); } - -TEST(hal_intrin256, float16x16) +TEST(hal_intrin256, float16x16_FP16) { - if (!CV_CPU_HAS_SUPPORT_FP16) - throw SkipTestException("Unsupported hardware: FP16 is not available"); - DISPATCH_SIMD(test_hal_intrin_float16); + //CV_CPU_CALL_FP16_(test_hal_intrin_float16, ()); + CV_CPU_CALL_AVX2_(test_hal_intrin_float16, ()); + throw SkipTestException("Unsupported hardware: FP16 is not available"); } + +} // namespace intrin256 + }} // namespace \ No newline at end of file diff --git a/modules/core/test/test_intrin.fp16.cpp b/modules/core/test/test_intrin.fp16.cpp deleted file mode 100644 index 9f6416bcf8..0000000000 --- a/modules/core/test/test_intrin.fp16.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -#include "test_precomp.hpp" -#include "test_intrin_utils.hpp" - -namespace opencv_test { namespace hal { -CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN - -void test_hal_intrin_float16() -{ - TheTest() - .test_loadstore_fp16() - .test_float_cvt_fp16() - ; -} - -CV_CPU_OPTIMIZATION_NAMESPACE_END -}} // namespace diff --git a/modules/core/test/test_intrin.simd.hpp b/modules/core/test/test_intrin.simd.hpp deleted file mode 100644 index 4e0d3a073f..0000000000 --- a/modules/core/test/test_intrin.simd.hpp +++ /dev/null @@ -1,296 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -#include "test_precomp.hpp" -#include "test_intrin_utils.hpp" - -namespace opencv_test { namespace hal { -CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN - -void test_hal_intrin_uint8(); -void test_hal_intrin_int8(); -void test_hal_intrin_uint16(); -void test_hal_intrin_int16(); -void test_hal_intrin_uint32(); -void test_hal_intrin_int32(); -void test_hal_intrin_uint64(); -void test_hal_intrin_int64(); -void test_hal_intrin_float32(); -void test_hal_intrin_float64(); - -#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY - -//============= 8-bit integer ===================================================================== - -void test_hal_intrin_uint8() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_expand_q() - .test_addsub() - .test_addsub_wrap() - .test_cmp() - .test_logic() - .test_min_max() - .test_absdiff() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() - .test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() - .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() - ; - -#if CV_SIMD256 - TheTest() - .test_pack<9>().test_pack<10>().test_pack<13>().test_pack<15>() - .test_pack_u<9>().test_pack_u<10>().test_pack_u<13>().test_pack_u<15>() - .test_extract<16>().test_extract<17>().test_extract<23>().test_extract<31>() - .test_rotate<16>().test_rotate<17>().test_rotate<23>().test_rotate<31>() - ; -#endif -} - -void test_hal_intrin_int8() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_expand_q() - .test_addsub() - .test_addsub_wrap() - .test_cmp() - .test_logic() - .test_min_max() - .test_absdiff() - .test_abs() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() - .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() - ; -} - -//============= 16-bit integer ===================================================================== - -void test_hal_intrin_uint16() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_addsub() - .test_addsub_wrap() - .test_mul() - .test_mul_expand() - .test_cmp() - .test_shift<1>() - .test_shift<8>() - .test_logic() - .test_min_max() - .test_absdiff() - .test_reduce() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() - .test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() - .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() - ; -} - -void test_hal_intrin_int16() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_addsub() - .test_addsub_wrap() - .test_mul() - .test_mul_expand() - .test_cmp() - .test_shift<1>() - .test_shift<8>() - .test_dot_prod() - .test_logic() - .test_min_max() - .test_absdiff() - .test_abs() - .test_reduce() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() - .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() - ; -} - -//============= 32-bit integer ===================================================================== - -void test_hal_intrin_uint32() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_addsub() - .test_mul() - .test_mul_expand() - .test_cmp() - .test_shift<1>() - .test_shift<8>() - .test_logic() - .test_min_max() - .test_absdiff() - .test_reduce() - .test_mask() - .test_popcount() - .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() - .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() - .test_transpose() - ; -} - -void test_hal_intrin_int32() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_expand() - .test_addsub() - .test_mul() - .test_abs() - .test_cmp() - .test_popcount() - .test_shift<1>().test_shift<8>() - .test_logic() - .test_min_max() - .test_absdiff() - .test_reduce() - .test_mask() - .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() - .test_unpack() - .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() - .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() - .test_float_cvt32() - .test_float_cvt64() - .test_transpose() - ; -} - -//============= 64-bit integer ===================================================================== - -void test_hal_intrin_uint64() -{ - TheTest() - .test_loadstore() - .test_addsub() - .test_shift<1>().test_shift<8>() - .test_logic() - .test_extract<0>().test_extract<1>() - .test_rotate<0>().test_rotate<1>() - ; -} - -void test_hal_intrin_int64() -{ - TheTest() - .test_loadstore() - .test_addsub() - .test_shift<1>().test_shift<8>() - .test_logic() - .test_extract<0>().test_extract<1>() - .test_rotate<0>().test_rotate<1>() - ; -} - -//============= Floating point ===================================================================== -void test_hal_intrin_float32() -{ - TheTest() - .test_loadstore() - .test_interleave() - .test_interleave_2channel() - .test_addsub() - .test_mul() - .test_div() - .test_cmp() - .test_sqrt_abs() - .test_min_max() - .test_float_absdiff() - .test_reduce() - .test_mask() - .test_unpack() - .test_float_math() - .test_float_cvt64() - .test_matmul() - .test_transpose() - .test_reduce_sum4() - .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() - .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() - ; - -#if CV_SIMD256 - TheTest() - .test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>() - .test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>() - ; -#endif -} - -void test_hal_intrin_float64() -{ -#if CV_SIMD_64F - TheTest() - .test_loadstore() - .test_addsub() - .test_mul() - .test_div() - .test_cmp() - .test_sqrt_abs() - .test_min_max() - .test_float_absdiff() - .test_mask() - .test_unpack() - .test_float_math() - .test_float_cvt32() - .test_extract<0>().test_extract<1>() - .test_rotate<0>().test_rotate<1>() - ; - -#if CV_SIMD256 - TheTest() - .test_extract<2>().test_extract<3>() - .test_rotate<2>().test_rotate<3>() - ; -#endif //CV_SIMD256 - -#endif -} - -#if CV_FP16 && CV_SIMD_WIDTH > 16 -void test_hal_intrin_float16() -{ - TheTest() - .test_loadstore_fp16() - .test_float_cvt_fp16() - ; -} -#endif - -#endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY - -CV_CPU_OPTIMIZATION_NAMESPACE_END - -}} //namespace \ No newline at end of file diff --git a/modules/core/test/test_intrin128.simd.hpp b/modules/core/test/test_intrin128.simd.hpp new file mode 100644 index 0000000000..1d9bee2d33 --- /dev/null +++ b/modules/core/test/test_intrin128.simd.hpp @@ -0,0 +1,22 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +#define CV__SIMD_FORCE_WIDTH 128 +#include "opencv2/core/hal/intrin.hpp" +#undef CV__SIMD_FORCE_WIDTH + +#if CV_SIMD_WIDTH != 16 +#error "Invalid build configuration" +#endif + +#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +namespace opencv_test { namespace hal { namespace intrin128 { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +#include "test_intrin_utils.hpp" + +CV_CPU_OPTIMIZATION_NAMESPACE_END +}}} //namespace diff --git a/modules/core/test/test_intrin256.simd.hpp b/modules/core/test/test_intrin256.simd.hpp new file mode 100644 index 0000000000..a5e2cd5221 --- /dev/null +++ b/modules/core/test/test_intrin256.simd.hpp @@ -0,0 +1,23 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#if !defined CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY && \ + !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS // TODO? C++ fallback implementation for SIMD256 + +#define CV__SIMD_FORCE_WIDTH 256 +#include "opencv2/core/hal/intrin.hpp" +#undef CV__SIMD_FORCE_WIDTH + +#if CV_SIMD_WIDTH != 32 +#error "Invalid build configuration" +#endif + +#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +namespace opencv_test { namespace hal { namespace intrin256 { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +#include "test_intrin_utils.hpp" + +CV_CPU_OPTIMIZATION_NAMESPACE_END +}}} //namespace diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 5f3175bc6c..cc9de4fc75 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -1,10 +1,22 @@ // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. -#include "opencv2/core/hal/intrin.hpp" -namespace opencv_test { namespace hal { -CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN +// This file is not standalone. +// It is included with these active namespaces: +//namespace opencv_test { namespace hal { namespace intrinXXX { +//CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +void test_hal_intrin_uint8(); +void test_hal_intrin_int8(); +void test_hal_intrin_uint16(); +void test_hal_intrin_int16(); +void test_hal_intrin_uint32(); +void test_hal_intrin_int32(); +void test_hal_intrin_uint64(); +void test_hal_intrin_int64(); +void test_hal_intrin_float32(); +void test_hal_intrin_float64(); void test_hal_intrin_float16(); @@ -258,6 +270,7 @@ template struct TheTest v_store(out.u.d, r_low); for (int i = 0; i < R::nlanes/2; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((LaneType)data.u[i], (LaneType)out.u[i]); } @@ -266,6 +279,7 @@ template struct TheTest v_store(out.u.d, r_low_align8byte); for (int i = 0; i < R::nlanes/2; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((LaneType)data.u[i + R::nlanes/2], (LaneType)out.u[i]); } @@ -296,6 +310,7 @@ template struct TheTest resV.fill((LaneType)8); for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((LaneType)0, resZ[i]); EXPECT_EQ((LaneType)8, resV[i]); } @@ -342,6 +357,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(data1, Data(a)); EXPECT_EQ(data2, Data(b)); EXPECT_EQ(data3, Data(c)); @@ -374,6 +390,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(data1, Data(a)); EXPECT_EQ(data2, Data(b)); } @@ -397,6 +414,7 @@ template struct TheTest const int n = Rx2::nlanes; for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i], resB[i]); EXPECT_EQ(dataA[i], resC[i]); EXPECT_EQ(dataA[i + n], resD[i]); @@ -412,7 +430,10 @@ template struct TheTest Data out = vx_load_expand_q(data.d); const int n = Rx4::nlanes; for (int i = 0; i < n; ++i) + { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(data[i], out[i]); + } return *this; } @@ -426,6 +447,7 @@ template struct TheTest Data resC = a + b, resD = a - b; for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(saturate_cast(dataA[i] + dataB[i]), resC[i]); EXPECT_EQ(saturate_cast(dataA[i] - dataB[i]), resD[i]); } @@ -443,6 +465,7 @@ template struct TheTest resD = v_sub_wrap(a, b); for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((LaneType)(dataA[i] + dataB[i]), resC[i]); EXPECT_EQ((LaneType)(dataA[i] - dataB[i]), resD[i]); } @@ -458,6 +481,7 @@ template struct TheTest Data resC = a * b; for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i] * dataB[i], resC[i]); } @@ -473,6 +497,7 @@ template struct TheTest Data resC = a / b; for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i] / dataB[i], resC[i]); } @@ -492,6 +517,7 @@ template struct TheTest const int n = R::nlanes / 2; for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((typename Rx2::lane_type)dataA[i] * dataB[i], resC[i]); EXPECT_EQ((typename Rx2::lane_type)dataA[i + n] * dataB[i + n], resD[i]); } @@ -511,6 +537,7 @@ template struct TheTest for (int i = 0; i < Ru::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((u_type)std::abs(dataA[i] - dataB[i]), resC[i]); } @@ -529,6 +556,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(static_cast(dataA[i] << s), resB[i]); EXPECT_EQ(static_cast(dataA[i] << s), resC[i]); EXPECT_EQ(static_cast(dataA[i] >> s), resD[i]); @@ -553,6 +581,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); EXPECT_EQ(dataA[i] > dataB[i], resE[i] != 0); @@ -583,6 +612,7 @@ template struct TheTest const int n = R::nlanes / 2; for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1], resD[i]); EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1] + dataC[i], resE[i]); } @@ -597,6 +627,7 @@ template struct TheTest Data resC = a & b, resD = a | b, resE = a ^ b, resF = ~a; for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i] & dataB[i], resC[i]); EXPECT_EQ(dataA[i] | dataB[i], resD[i]); EXPECT_EQ(dataA[i] ^ dataB[i], resE[i]); @@ -615,6 +646,7 @@ template struct TheTest Data resB = v_sqrt(a), resC = v_invsqrt(a), resE = v_abs(d); for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_COMPARE_EQ((float)std::sqrt(dataA[i]), (float)resB[i]); EXPECT_COMPARE_EQ(1/(float)std::sqrt(dataA[i]), (float)resC[i]); EXPECT_COMPARE_EQ((float)abs(dataA[i]), (float)resE[i]); @@ -632,6 +664,7 @@ template struct TheTest Data resC = v_min(a, b), resD = v_max(a, b); for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(std::min(dataA[i], dataB[i]), resC[i]); EXPECT_EQ(std::max(dataA[i], dataB[i]), resD[i]); } @@ -672,6 +705,7 @@ template struct TheTest const u_type mask = std::numeric_limits::is_signed ? (u_type)(1 << (sizeof(u_type)*8 - 1)) : 0; for (int i = 0; i < Ru::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); u_type uA = dataA[i] ^ mask; u_type uB = dataB[i] ^ mask; EXPECT_EQ(uA > uB ? uA - uB : uB - uA, resC[i]); @@ -691,6 +725,7 @@ template struct TheTest Data resC = v_absdiff(a, b); for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i] > dataB[i] ? dataA[i] - dataB[i] : dataB[i] - dataA[i], resC[i]); } return *this; @@ -744,6 +779,7 @@ template struct TheTest Data resF = f; for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); int_type m2 = dataB.as_int(i); EXPECT_EQ((dataD.as_int(i) & m2) | (dataE.as_int(i) & ~m2), resF.as_int(i)); } @@ -776,6 +812,7 @@ template struct TheTest const w_type add = (w_type)1 << (s - 1); for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(pack_saturate_cast(dataA[i]), resC[i]); EXPECT_EQ(pack_saturate_cast(dataB[i]), resC[i + n]); EXPECT_EQ(pack_saturate_cast((dataA[i] + add) >> s), resD[i]); @@ -816,6 +853,7 @@ template struct TheTest const w_type add = (w_type)1 << (s - 1); for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(pack_saturate_cast(dataA[i]), resC[i]); EXPECT_EQ(pack_saturate_cast(dataB[i]), resC[i + n]); EXPECT_EQ(pack_saturate_cast((dataA[i] + add) >> s), resD[i]); @@ -845,6 +883,7 @@ template struct TheTest const int n = R::nlanes/2; for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(dataA[i], resC[i*2]); EXPECT_EQ(dataB[i], resC[i*2+1]); EXPECT_EQ(dataA[i+n], resD[i*2]); @@ -876,6 +915,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); if (i + s >= R::nlanes) EXPECT_EQ(dataB[i - R::nlanes + s], resC[i]); else @@ -901,6 +941,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); if (i + s >= R::nlanes) { EXPECT_EQ((LaneType)0, resC[i]); @@ -940,6 +981,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(cvRound(data1[i]), resB[i]); EXPECT_EQ((typename Ri::lane_type)data1[i], resC[i]); EXPECT_EQ(cvFloor(data1[i]), resD[i]); @@ -964,6 +1006,7 @@ template struct TheTest int n = std::min(Rt::nlanes, R::nlanes); for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]); } return *this; @@ -983,10 +1026,12 @@ template struct TheTest int n = std::min(Rt::nlanes, R::nlanes); for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]); } for (int i = 0; i < n; ++i) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ((typename Rt::lane_type)dataA[i+n], resC[i]); } #endif @@ -1006,6 +1051,7 @@ template struct TheTest { for (int j = i; j < i + 4; ++j) { + SCOPED_TRACE(cv::format("i=%d j=%d", i, j)); LaneType val = dataV[i] * dataA[j] + dataV[i + 1] * dataB[j] + dataV[i + 2] * dataC[j] @@ -1019,6 +1065,7 @@ template struct TheTest { for (int j = i; j < i + 4; ++j) { + SCOPED_TRACE(cv::format("i=%d j=%d", i, j)); LaneType val = dataV[i] * dataA[j] + dataV[i + 1] * dataB[j] + dataV[i + 2] * dataC[j] @@ -1045,6 +1092,7 @@ template struct TheTest { for (int j = 0; j < 4; ++j) { + SCOPED_TRACE(cv::format("i=%d j=%d", i, j)); EXPECT_EQ(dataA[i + j], res[j][i]); EXPECT_EQ(dataB[i + j], res[j][i + 1]); EXPECT_EQ(dataC[i + j], res[j][i + 2]); @@ -1066,6 +1114,7 @@ template struct TheTest for (int i = 0; i < R::nlanes; i += 4) { + SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_COMPARE_EQ(dataA.sum(i, 4), res[i]); EXPECT_COMPARE_EQ(dataB.sum(i, 4), res[i + 1]); EXPECT_COMPARE_EQ(dataC.sum(i, 4), res[i + 2]); @@ -1121,7 +1170,304 @@ template struct TheTest }; + +#if 1 +#define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*(int)sizeof(v_uint8), CV__TRACE_FUNCTION); #endif -CV_CPU_OPTIMIZATION_NAMESPACE_END -}} // namespace +//============= 8-bit integer ===================================================================== + +void test_hal_intrin_uint8() +{ + DUMP_ENTRY(v_uint8); + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_expand_q() + .test_addsub() + .test_addsub_wrap() + .test_cmp() + .test_logic() + .test_min_max() + .test_absdiff() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() + .test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() + .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() + ; + +#if CV_SIMD_WIDTH == 32 + TheTest() + .test_pack<9>().test_pack<10>().test_pack<13>().test_pack<15>() + .test_pack_u<9>().test_pack_u<10>().test_pack_u<13>().test_pack_u<15>() + .test_extract<16>().test_extract<17>().test_extract<23>().test_extract<31>() + .test_rotate<16>().test_rotate<17>().test_rotate<23>().test_rotate<31>() + ; +#endif +} + +void test_hal_intrin_int8() +{ + DUMP_ENTRY(v_int8); + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_expand_q() + .test_addsub() + .test_addsub_wrap() + .test_cmp() + .test_logic() + .test_min_max() + .test_absdiff() + .test_abs() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() + .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() + ; +} + +//============= 16-bit integer ===================================================================== + +void test_hal_intrin_uint16() +{ + DUMP_ENTRY(v_uint16); + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_addsub() + .test_addsub_wrap() + .test_mul() + .test_mul_expand() + .test_cmp() + .test_shift<1>() + .test_shift<8>() + .test_logic() + .test_min_max() + .test_absdiff() + .test_reduce() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() + .test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() + .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() + ; +} + +void test_hal_intrin_int16() +{ + DUMP_ENTRY(v_int16); + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_addsub() + .test_addsub_wrap() + .test_mul() + .test_mul_expand() + .test_cmp() + .test_shift<1>() + .test_shift<8>() + .test_dot_prod() + .test_logic() + .test_min_max() + .test_absdiff() + .test_abs() + .test_reduce() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() + .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() + ; +} + +//============= 32-bit integer ===================================================================== + +void test_hal_intrin_uint32() +{ + DUMP_ENTRY(v_uint32); + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_addsub() + .test_mul() + .test_mul_expand() + .test_cmp() + .test_shift<1>() + .test_shift<8>() + .test_logic() + .test_min_max() + .test_absdiff() + .test_reduce() + .test_mask() + .test_popcount() + .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() + .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() + .test_transpose() + ; +} + +void test_hal_intrin_int32() +{ + DUMP_ENTRY(v_int32); + TheTest() + .test_loadstore() + .test_interleave() + .test_expand() + .test_addsub() + .test_mul() + .test_abs() + .test_cmp() + .test_popcount() + .test_shift<1>().test_shift<8>() + .test_logic() + .test_min_max() + .test_absdiff() + .test_reduce() + .test_mask() + .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() + .test_unpack() + .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() + .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() + .test_float_cvt32() + .test_float_cvt64() + .test_transpose() + ; +} + +//============= 64-bit integer ===================================================================== + +void test_hal_intrin_uint64() +{ + DUMP_ENTRY(v_uint64); + TheTest() + .test_loadstore() + .test_addsub() + .test_shift<1>().test_shift<8>() + .test_logic() + .test_extract<0>().test_extract<1>() + .test_rotate<0>().test_rotate<1>() + ; +} + +void test_hal_intrin_int64() +{ + DUMP_ENTRY(v_int64); + TheTest() + .test_loadstore() + .test_addsub() + .test_shift<1>().test_shift<8>() + .test_logic() + .test_extract<0>().test_extract<1>() + .test_rotate<0>().test_rotate<1>() + ; +} + +//============= Floating point ===================================================================== +void test_hal_intrin_float32() +{ + DUMP_ENTRY(v_float32); + TheTest() + .test_loadstore() + .test_interleave() + .test_interleave_2channel() + .test_addsub() + .test_mul() + .test_div() + .test_cmp() + .test_sqrt_abs() + .test_min_max() + .test_float_absdiff() + .test_reduce() + .test_mask() + .test_unpack() + .test_float_math() + .test_float_cvt64() + .test_matmul() + .test_transpose() + .test_reduce_sum4() + .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() + .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() + ; + +#if CV_SIMD_WIDTH == 32 + TheTest() + .test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>() + .test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>() + ; +#endif +} + +void test_hal_intrin_float64() +{ + DUMP_ENTRY(v_float64); +#if CV_SIMD_64F + TheTest() + .test_loadstore() + .test_addsub() + .test_mul() + .test_div() + .test_cmp() + .test_sqrt_abs() + .test_min_max() + .test_float_absdiff() + .test_mask() + .test_unpack() + .test_float_math() + .test_float_cvt32() + .test_extract<0>().test_extract<1>() + .test_rotate<0>().test_rotate<1>() + ; + +#if CV_SIMD_WIDTH == 32 + TheTest() + .test_extract<2>().test_extract<3>() + .test_rotate<2>().test_rotate<3>() + ; +#endif //CV_SIMD256 + +#endif +} + +#if CV_FP16 +void test_hal_intrin_float16() +{ + DUMP_ENTRY(v_float16); +#if CV_SIMD_WIDTH > 16 + TheTest() + .test_loadstore_fp16() + .test_float_cvt_fp16() + ; +#endif +} +#endif + +/*#if defined(CV_CPU_DISPATCH_MODE_FP16) && CV_CPU_DISPATCH_MODE == FP16 +void test_hal_intrin_float16() +{ + TheTest() + .test_loadstore_fp16() + .test_float_cvt_fp16() + ; +} +#endif*/ + +#endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +//CV_CPU_OPTIMIZATION_NAMESPACE_END +//}}} // namespace diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp index d67e53f506..4a8c347c68 100644 --- a/modules/core/test/test_mat.cpp +++ b/modules/core/test/test_mat.cpp @@ -1814,4 +1814,62 @@ BIGDATA_TEST(Mat, push_back_regression_4158) // memory usage: ~10.6 Gb } } + +TEST(Core_Merge, hang_12171) +{ + Mat src1(4, 24, CV_8UC1, Scalar::all(1)); + Mat src2(4, 24, CV_8UC1, Scalar::all(2)); + Rect src_roi(0, 0, 23, 4); + Mat src_channels[2] = { src1(src_roi), src2(src_roi) }; + Mat dst(4, 24, CV_8UC2, Scalar::all(5)); + Rect dst_roi(1, 0, 23, 4); + cv::merge(src_channels, 2, dst(dst_roi)); + EXPECT_EQ(5, dst.ptr()[0]); + EXPECT_EQ(5, dst.ptr()[1]); + EXPECT_EQ(1, dst.ptr()[2]); + EXPECT_EQ(2, dst.ptr()[3]); + EXPECT_EQ(5, dst.ptr(1)[0]); + EXPECT_EQ(5, dst.ptr(1)[1]); + EXPECT_EQ(1, dst.ptr(1)[2]); + EXPECT_EQ(2, dst.ptr(1)[3]); +} + +TEST(Core_Split, hang_12171) +{ + Mat src(4, 24, CV_8UC2, Scalar(1,2,3,4)); + Rect src_roi(0, 0, 23, 4); + Mat dst1(4, 24, CV_8UC1, Scalar::all(5)); + Mat dst2(4, 24, CV_8UC1, Scalar::all(10)); + Rect dst_roi(0, 0, 23, 4); + Mat dst[2] = { dst1(dst_roi), dst2(dst_roi) }; + cv::split(src(src_roi), dst); + EXPECT_EQ(1, dst1.ptr()[0]); + EXPECT_EQ(1, dst1.ptr()[1]); + EXPECT_EQ(2, dst2.ptr()[0]); + EXPECT_EQ(2, dst2.ptr()[1]); + EXPECT_EQ(1, dst1.ptr(1)[0]); + EXPECT_EQ(1, dst1.ptr(1)[1]); + EXPECT_EQ(2, dst2.ptr(1)[0]); + EXPECT_EQ(2, dst2.ptr(1)[1]); +} + +TEST(Core_Split, crash_12171) +{ + Mat src(4, 40, CV_8UC2, Scalar(1,2,3,4)); + Rect src_roi(0, 0, 39, 4); + Mat dst1(4, 40, CV_8UC1, Scalar::all(5)); + Mat dst2(4, 40, CV_8UC1, Scalar::all(10)); + Rect dst_roi(0, 0, 39, 4); + Mat dst[2] = { dst1(dst_roi), dst2(dst_roi) }; + cv::split(src(src_roi), dst); + EXPECT_EQ(1, dst1.ptr()[0]); + EXPECT_EQ(1, dst1.ptr()[1]); + EXPECT_EQ(2, dst2.ptr()[0]); + EXPECT_EQ(2, dst2.ptr()[1]); + EXPECT_EQ(1, dst1.ptr(1)[0]); + EXPECT_EQ(1, dst1.ptr(1)[1]); + EXPECT_EQ(2, dst2.ptr(1)[0]); + EXPECT_EQ(2, dst2.ptr(1)[1]); +} + }} // namespace diff --git a/modules/core/test/test_precomp.hpp b/modules/core/test/test_precomp.hpp index 9787586156..a82f5cc12c 100644 --- a/modules/core/test/test_precomp.hpp +++ b/modules/core/test/test_precomp.hpp @@ -11,6 +11,5 @@ #include "opencv2/core/cvdef.h" #include "opencv2/core/private.hpp" #include "opencv2/core/hal/hal.hpp" -#include "opencv2/core/hal/intrin.hpp" #endif diff --git a/modules/cudafilters/src/cuda/median_filter.cu b/modules/cudafilters/src/cuda/median_filter.cu index f8e02cb039..fe26c7be0e 100644 --- a/modules/cudafilters/src/cuda/median_filter.cu +++ b/modules/cudafilters/src/cuda/median_filter.cu @@ -246,7 +246,7 @@ namespace cv { namespace cuda { namespace device } __syncthreads(); - // Fot all remaining rows in the median filter, add the values to the the histogram + // For all remaining rows in the median filter, add the values to the the histogram for (int j=threadIdx.x; j(CV_PI / 4), false); + Mat M = createAffineTransformMatrix(size, static_cast(CV_PI / 4), false); GpuMat_ d_src(src); GpuMat_ d_M; @@ -240,7 +240,7 @@ TEST(WarpPerspective, Rotation) const Size size = randomSize(100, 400); Mat src = randomMat(size, CV_32FC1, 0, 1); - Mat M = createAffineTransfomMatrix(size, static_cast(CV_PI / 4), true); + Mat M = createAffineTransformMatrix(size, static_cast(CV_PI / 4), true); GpuMat_ d_src(src); GpuMat_ d_M; diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 9ba180c7d1..b5416142c9 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -489,7 +489,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; - class CV_EXPORTS BatchNormLayer : public Layer + class CV_EXPORTS BatchNormLayer : public ActivationLayer { public: bool hasWeights, hasBias; diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp index 16138cb99f..c6cef9f4f7 100644 --- a/modules/dnn/perf/perf_net.cpp +++ b/modules/dnn/perf/perf_net.cpp @@ -258,6 +258,17 @@ PERF_TEST_P_(DNNTestNetwork, FastNeuralStyle_eccv16) processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", "", Mat(cv::Size(320, 240), CV_32FC3)); } +PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN) +{ + if (backend == DNN_BACKEND_HALIDE || + (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU) || + (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + processNet("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pb", + "dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt", "", + Mat(cv::Size(800, 600), CV_32FC3)); +} + const tuple testCases[] = { #ifdef HAVE_HALIDE tuple(DNN_BACKEND_HALIDE, DNN_TARGET_CPU), diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 5920edc85e..43ad3d6d42 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1408,7 +1408,7 @@ struct Net::Impl bool fused = ld.skip; Ptr layer = ld.layerInstance; - if (!layer->supportBackend(preferableBackend)) + if (!fused && !layer->supportBackend(preferableBackend)) { addInfEngineNetOutputs(ld); net = Ptr(); @@ -1471,6 +1471,8 @@ struct Net::Impl { node = layer->initInfEngine(ld.inputBlobsWrappers); } + else if (node.empty()) + continue; CV_Assert(!node.empty()); ld.backendNodes[preferableBackend] = node; @@ -1715,40 +1717,41 @@ struct Net::Impl if (preferableBackend != DNN_BACKEND_OPENCV) continue; // Go to the next layer. - // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh - if ( !IS_DNN_OPENCL_TARGET(preferableTarget) || - (IS_DNN_OPENCL_TARGET(preferableTarget) && - nextData && - ((nextData->type == "ReLU") || - (nextData->type == "ChannelsPReLU") || - (nextData->type == "ReLU6") || - (nextData->type == "TanH") || - (nextData->type == "Power"))) ) + while (nextData) { + // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh + if (IS_DNN_OPENCL_TARGET(preferableTarget) && + nextData->type != "ReLU" && + nextData->type != "ChannelsPReLU" && + nextData->type != "ReLU6" && + nextData->type != "TanH" && + nextData->type != "Power") + break; - Ptr nextActivLayer; + Ptr nextActivLayer = nextData->layerInstance.dynamicCast(); + if (nextActivLayer.empty()) + break; - if( nextData ) - nextActivLayer = nextData->layerInstance.dynamicCast(); - - if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 - && currLayer->setActivation(nextActivLayer) ) + if (currLayer->setActivation(nextActivLayer)) { - LayerData *activData = nextData; printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); - activData->skip = true; + nextData->skip = true; ld.outputBlobs = layers[lpNext.lid].outputBlobs; ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; - - if ( IS_DNN_OPENCL_TARGET(preferableTarget) ) + if (nextData->consumers.size() == 1) { - if ( !activData->consumers.empty() ) - { - nextData = &layers[activData->consumers[0].lid]; - lpNext = LayerPin(activData->consumers[0].lid, 0); - } + int nextLayerId = nextData->consumers[0].lid; + nextData = &layers[nextLayerId]; + lpNext = LayerPin(nextLayerId, 0); + } + else + { + nextData = 0; + break; } } + else + break; } // fuse convolution layer followed by eltwise + relu @@ -2050,10 +2053,10 @@ struct Net::Impl TickMeter tm; tm.start(); - if (preferableBackend == DNN_BACKEND_OPENCV || - !layer->supportBackend(preferableBackend)) + if( !ld.skip ) { - if( !ld.skip ) + std::map >::iterator it = ld.backendNodes.find(preferableBackend); + if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty()) { if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) { @@ -2196,24 +2199,25 @@ struct Net::Impl } } else - tm.reset(); - } - else if (!ld.skip) - { - Ptr node = ld.backendNodes[preferableBackend]; - if (preferableBackend == DNN_BACKEND_HALIDE) { - forwardHalide(ld.outputBlobsWrappers, node); - } - else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) - { - forwardInfEngine(node); - } - else - { - CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); + Ptr node = it->second; + CV_Assert(!node.empty()); + if (preferableBackend == DNN_BACKEND_HALIDE) + { + forwardHalide(ld.outputBlobsWrappers, node); + } + else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) + { + forwardInfEngine(node); + } + else + { + CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); + } } } + else + tm.reset(); tm.stop(); layersTimings[ld.id] = tm.getTimeTicks(); diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index 3b472328c8..1ced532fdc 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -268,6 +268,36 @@ public: } } + void forwardSlice(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE + { + for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize ) + { + int i = 0; + float w = weights_.at(cn); + float b = bias_.at(cn); +#if CV_SIMD128 + v_float32x4 wV = v_setall_f32(w), bV = v_setall_f32(b); + for( ; i <= len - 16; i += 16 ) + { + v_float32x4 x0 = v_load(srcptr + i); + v_float32x4 x1 = v_load(srcptr + i + 4); + v_float32x4 x2 = v_load(srcptr + i + 8); + v_float32x4 x3 = v_load(srcptr + i + 12); + x0 = v_muladd(x0, w, b); + x1 = v_muladd(x1, w, b); + x2 = v_muladd(x2, w, b); + x3 = v_muladd(x3, w, b); + v_store(dstptr + i, x0); + v_store(dstptr + i + 4, x1); + v_store(dstptr + i + 8, x2); + v_store(dstptr + i + 12, x3); + } +#endif + for( ; i < len; i++ ) + dstptr[i] = w * srcptr[i] + b; + } + } + virtual Ptr tryAttach(const Ptr& node) CV_OVERRIDE { switch (node->backendId) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index d08dec548b..08760ab49a 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -296,6 +296,9 @@ public: bool setActivation(const Ptr& layer) CV_OVERRIDE { + if (!activ.empty() && !layer.empty()) + return false; + activ = layer; if (activ.empty()) reluslope.clear(); diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 7473751707..42a6a6c715 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -196,7 +196,7 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_INFERENCE_ENGINE && !_locPredTransposed && _bboxesNormalized; + backendId == DNN_BACKEND_INFERENCE_ENGINE && !_locPredTransposed && _bboxesNormalized && !_clip; } bool getMemoryShapes(const std::vector &inputs, diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 442bfa7aff..3a2c0ddb3f 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -452,8 +452,13 @@ public: bool setActivation(const Ptr& layer) CV_OVERRIDE { - activ = layer; - return !activ.empty(); + if (activ.empty() || layer.empty()) + { + activ = layer; + return !activ.empty(); + } + else + return false; } Ptr activ; diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index dfaa58c7ed..d17ca27383 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -135,8 +135,13 @@ public: virtual bool setActivation(const Ptr& layer) CV_OVERRIDE { - activ = layer; - return !activ.empty(); + if (activ.empty() || layer.empty()) + { + activ = layer; + return !activ.empty(); + } + else + return false; } class FullyConnected : public ParallelLoopBody diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp index 9e4f0ac39c..6a2c6f1dd9 100644 --- a/modules/dnn/src/layers/mvn_layer.cpp +++ b/modules/dnn/src/layers/mvn_layer.cpp @@ -42,6 +42,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_inf_engine.hpp" #include #ifdef HAVE_OPENCL @@ -66,27 +67,25 @@ public: fuse_batch_norm = false; fuse_relu = false; relu_slope = 0.f; + zeroDev = false; } Mat scale, shift; bool fuse_batch_norm; - virtual bool tryFuse(Ptr& top) CV_OVERRIDE - { - if (!fuse_batch_norm) - { - top->getScaleShift(scale, shift); - fuse_batch_norm = !scale.empty() || !shift.empty(); - return fuse_batch_norm; - } - return false; - } - Ptr activ_relu; float relu_slope; bool fuse_relu; + bool zeroDev; // TODO: Doesn't considered in Intel's Inference Engine backend. bool setActivation(const Ptr& layer) CV_OVERRIDE { + if (!layer.empty() && !fuse_relu && !fuse_batch_norm) + { + layer->getScaleShift(scale, shift); + fuse_batch_norm = !scale.empty() || !shift.empty(); + return fuse_batch_norm; + } + if (!layer.empty() && preferableTarget == DNN_TARGET_OPENCL) { activ_relu = layer.dynamicCast(); @@ -97,6 +96,23 @@ public: return fuse_relu; } + void finalize(const std::vector &inputs, std::vector &outputs) CV_OVERRIDE + { + int splitDim = (acrossChannels) ? 1 : 2; + int i, newRows = 1; + for( i = 0; i < splitDim; i++ ) + newRows *= inputs[0]->size[i]; + zeroDev = inputs[0]->total() == newRows; + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + if (backendId == DNN_BACKEND_INFERENCE_ENGINE) + return !zeroDev && (preferableTarget == DNN_TARGET_CPU || eps <= 1e-7f); + else + return backendId == DNN_BACKEND_OPENCV; + } + #ifdef HAVE_OPENCL bool fast_forward_ocl(std::vector &inputs, std::vector &outputs) { @@ -324,6 +340,22 @@ public: } } + virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE + { +#ifdef HAVE_INF_ENGINE + InferenceEngine::LayerParams lp; + lp.name = name; + lp.type = "MVN"; + lp.precision = InferenceEngine::Precision::FP32; + std::shared_ptr ieLayer(new InferenceEngine::MVNLayer(lp)); + ieLayer->params["across_channels"] = acrossChannels ? "1" : "0"; + ieLayer->params["normalize_variance"] = normVariance ? "1" : "0"; + ieLayer->params["eps"] = format("%f", eps); + return Ptr(new InfEngineBackendNode(ieLayer)); +#endif // HAVE_INF_ENGINE + return Ptr(); + } + virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 6cfa78c911..3b53805e1e 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -48,9 +48,8 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { - return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_HALIDE && haveHalide() || - backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine(); + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE || + backendId == DNN_BACKEND_INFERENCE_ENGINE && axis == 1; } void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index e4c723e3bf..2b0685826f 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -111,7 +111,7 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_INFERENCE_ENGINE && sliceRanges.size() == 1; + backendId == DNN_BACKEND_INFERENCE_ENGINE && sliceRanges.size() == 1 && sliceRanges[0].size() == 4; } bool getMemoryShapes(const std::vector &inputs, diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index 5f50289847..eefd321bb3 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -307,15 +307,17 @@ public: return Ptr(); } - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE + virtual Ptr initInfEngine(const std::vector >& inputs) CV_OVERRIDE { #ifdef HAVE_INF_ENGINE + InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]); + InferenceEngine::LayerParams lp; lp.name = name; lp.type = "SoftMax"; lp.precision = InferenceEngine::Precision::FP32; std::shared_ptr ieLayer(new InferenceEngine::SoftMaxLayer(lp)); - ieLayer->axis = axisRaw; + ieLayer->axis = clamp(axisRaw, input->dims.size()); return Ptr(new InfEngineBackendNode(ieLayer)); #endif // HAVE_INF_ENGINE return Ptr(); diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl index adeb38574e..c60b8fcdbb 100644 --- a/modules/dnn/src/opencl/conv_layer_spatial.cl +++ b/modules/dnn/src/opencl/conv_layer_spatial.cl @@ -248,39 +248,38 @@ convolve_simd( int curr_y = or * STRIDE_Y; int curr_x = oc * STRIDE_X + lid; -#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0 - int saved_y = curr_y; -#endif + int in_addr = input_batch_offset + (curr_y - INPUT_PAD_H) * INPUT_WIDTH // y tile offset + curr_x - INPUT_PAD_W; // x tile offset + const int in_limit = (get_global_size(2) / ALIGNED_NUM_FILTERS) * TOTAL_INPUT_DEPTH_SIZE * INPUT_PITCH - 1; + Dtype in_buf[INVEC_SIZE]; for(int kd = 0; kd < INPUT_DEPTH; kd++) { +#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0 + const bool cx_out_of_range = !(curr_x >= INPUT_PAD_W && curr_x < INPUT_WIDTH + INPUT_PAD_W); int in_offset = in_addr; __attribute__((opencl_unroll_hint(INVEC_SIZE))) - for (int reg = 0; reg < INVEC_SIZE; reg++) + for (int reg = 0; reg < INVEC_SIZE; reg++, in_offset += INPUT_WIDTH) { - in_buf[reg] = inputs[in_offset]; -#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0 - if (!(curr_y >= INPUT_PAD_H && curr_y < INPUT_HEIGHT + INPUT_PAD_H && - curr_x >= INPUT_PAD_W && curr_x < INPUT_WIDTH + INPUT_PAD_W)) - { - in_buf[reg] = 0; - } -#endif - curr_y += 1; - in_offset += INPUT_WIDTH; + Dtype input = inputs[clamp(in_offset, 0, in_limit)]; + int cy = curr_y + reg; + in_buf[reg] = (cx_out_of_range || cy < INPUT_PAD_H || cy >= INPUT_HEIGHT + INPUT_PAD_H) ? 0 : input; } +#else + int in_offset = in_addr; + __attribute__((opencl_unroll_hint(INVEC_SIZE))) + for (int reg = 0; reg < INVEC_SIZE; reg++, in_offset += INPUT_WIDTH) + { + in_buf[reg] = inputs[min(in_offset, in_limit)]; + } +#endif in_addr += INPUT_PITCH; -#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0 - curr_y = saved_y; -#endif - Dtype weight_buf[WEIGHT_PREF]; int w_idx=0; diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index fcca577094..66c03a777e 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -716,6 +716,8 @@ void TFImporter::populateNet(Net dstNet) // find all Const layers for params std::map value_id; + // A map with constant blobs which are shared between multiple layers. + std::map sharedWeights; addConstNodes(netBin, value_id, layers_to_ignore); addConstNodes(netTxt, value_id, layers_to_ignore); @@ -805,51 +807,64 @@ void TFImporter::populateNet(Net dstNet) } } - const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id); - kernelFromTensor(kernelTensor, layerParams.blobs[0]); - releaseTensor(const_cast(&kernelTensor)); - int* kshape = layerParams.blobs[0].size.p; - const int outCh = kshape[0]; - const int inCh = kshape[1]; - const int height = kshape[2]; - const int width = kshape[3]; - if (type == "DepthwiseConv2dNative") + int kernelTensorInpId = -1; + const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernelTensorInpId); + const String kernelTensorName = layer.input(kernelTensorInpId); + std::map::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName); + if (sharedWeightsIt == sharedWeights.end()) { - CV_Assert(!locPredTransposed); - const int chMultiplier = kshape[0]; + kernelFromTensor(kernelTensor, layerParams.blobs[0]); + releaseTensor(const_cast(&kernelTensor)); - Mat copy = layerParams.blobs[0].clone(); - float* src = (float*)copy.data; - float* dst = (float*)layerParams.blobs[0].data; - for (int i = 0; i < chMultiplier; ++i) - for (int j = 0; j < inCh; ++j) - for (int s = 0; s < height * width; ++s) - { - int src_i = (i * inCh + j) * height * width + s; - int dst_i = (j * chMultiplier + i) * height* width + s; - dst[dst_i] = src[src_i]; - } - // TODO Use reshape instead - kshape[0] = inCh * chMultiplier; - kshape[1] = 1; - size_t* kstep = layerParams.blobs[0].step.p; - kstep[0] = kstep[1]; // fix steps too - } - layerParams.set("kernel_h", height); - layerParams.set("kernel_w", width); - layerParams.set("num_output", outCh); - - // Shuffle output channels from yxYX to xyXY. - if (locPredTransposed) - { - const int slice = height * width * inCh; - for (int i = 0; i < outCh; i += 2) + int* kshape = layerParams.blobs[0].size.p; + const int outCh = kshape[0]; + const int inCh = kshape[1]; + const int height = kshape[2]; + const int width = kshape[3]; + if (type == "DepthwiseConv2dNative") { - cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr(i)); - cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr(i + 1)); - std::swap_ranges(src.begin(), src.end(), dst.begin()); + CV_Assert(!locPredTransposed); + const int chMultiplier = kshape[0]; + + Mat copy = layerParams.blobs[0].clone(); + float* src = (float*)copy.data; + float* dst = (float*)layerParams.blobs[0].data; + for (int i = 0; i < chMultiplier; ++i) + for (int j = 0; j < inCh; ++j) + for (int s = 0; s < height * width; ++s) + { + int src_i = (i * inCh + j) * height * width + s; + int dst_i = (j * chMultiplier + i) * height* width + s; + dst[dst_i] = src[src_i]; + } + // TODO Use reshape instead + kshape[0] = inCh * chMultiplier; + kshape[1] = 1; + size_t* kstep = layerParams.blobs[0].step.p; + kstep[0] = kstep[1]; // fix steps too } + + // Shuffle output channels from yxYX to xyXY. + if (locPredTransposed) + { + const int slice = height * width * inCh; + for (int i = 0; i < outCh; i += 2) + { + cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr(i)); + cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr(i + 1)); + std::swap_ranges(src.begin(), src.end(), dst.begin()); + } + } + sharedWeights[kernelTensorName] = layerParams.blobs[0]; } + else + { + layerParams.blobs[0] = sharedWeightsIt->second; + } + + layerParams.set("kernel_h", layerParams.blobs[0].size[2]); + layerParams.set("kernel_w", layerParams.blobs[0].size[3]); + layerParams.set("num_output", layerParams.blobs[0].size[0]); setStrides(layerParams, layer); setPadding(layerParams, layer); @@ -954,6 +969,13 @@ void TFImporter::populateNet(Net dstNet) { CV_Assert(layer.input_size() == 2); + // For the object detection networks, TensorFlow Object Detection API + // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) + // order. We can manage it at DetectionOutput layer parsing predictions + // or shuffle last Faster-RCNN's matmul weights. + bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") && + getLayerAttr(layer, "loc_pred_transposed").b(); + layerParams.set("bias_term", false); layerParams.blobs.resize(1); @@ -970,6 +992,17 @@ void TFImporter::populateNet(Net dstNet) blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); ExcludeLayer(net, weights_layer_index, 0, false); layers_to_ignore.insert(next_layers[0].first); + + if (locPredTransposed) + { + const int numWeights = layerParams.blobs[1].total(); + float* biasData = reinterpret_cast(layerParams.blobs[1].data); + CV_Assert(numWeights % 4 == 0); + for (int i = 0; i < numWeights; i += 2) + { + std::swap(biasData[i], biasData[i + 1]); + } + } } int kernel_blob_index = -1; @@ -983,6 +1016,16 @@ void TFImporter::populateNet(Net dstNet) } layerParams.set("num_output", layerParams.blobs[0].size[0]); + if (locPredTransposed) + { + CV_Assert(layerParams.blobs[0].dims == 2); + for (int i = 0; i < layerParams.blobs[0].size[0]; i += 2) + { + cv::Mat src = layerParams.blobs[0].row(i); + cv::Mat dst = layerParams.blobs[0].row(i + 1); + std::swap_ranges(src.begin(), src.end(), dst.begin()); + } + } int id = dstNet.addLayer(name, "InnerProduct", layerParams); layer_id[name] = id; @@ -1010,6 +1053,7 @@ void TFImporter::populateNet(Net dstNet) layer_id[permName] = permId; connect(layer_id, dstNet, inpId, permId, 0); inpId = Pin(permName); + inpLayout = DATA_LAYOUT_NCHW; } else if (newShape.total() == 4 && inpLayout == DATA_LAYOUT_NHWC) { @@ -1024,7 +1068,7 @@ void TFImporter::populateNet(Net dstNet) // one input only connect(layer_id, dstNet, inpId, id, 0); - data_layouts[name] = newShape.total() == 2 ? DATA_LAYOUT_PLANAR : DATA_LAYOUT_UNKNOWN; + data_layouts[name] = newShape.total() == 2 ? DATA_LAYOUT_PLANAR : inpLayout; } else if (type == "Flatten" || type == "Squeeze") { @@ -1696,41 +1740,6 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); data_layouts[name] = DATA_LAYOUT_UNKNOWN; } - else if (type == "DetectionOutput") - { - // op: "DetectionOutput" - // input_0: "locations" - // input_1: "classifications" - // input_2: "prior_boxes" - if (hasLayerAttr(layer, "num_classes")) - layerParams.set("num_classes", getLayerAttr(layer, "num_classes").i()); - if (hasLayerAttr(layer, "share_location")) - layerParams.set("share_location", getLayerAttr(layer, "share_location").b()); - if (hasLayerAttr(layer, "background_label_id")) - layerParams.set("background_label_id", getLayerAttr(layer, "background_label_id").i()); - if (hasLayerAttr(layer, "nms_threshold")) - layerParams.set("nms_threshold", getLayerAttr(layer, "nms_threshold").f()); - if (hasLayerAttr(layer, "top_k")) - layerParams.set("top_k", getLayerAttr(layer, "top_k").i()); - if (hasLayerAttr(layer, "code_type")) - layerParams.set("code_type", getLayerAttr(layer, "code_type").s()); - if (hasLayerAttr(layer, "keep_top_k")) - layerParams.set("keep_top_k", getLayerAttr(layer, "keep_top_k").i()); - if (hasLayerAttr(layer, "confidence_threshold")) - layerParams.set("confidence_threshold", getLayerAttr(layer, "confidence_threshold").f()); - if (hasLayerAttr(layer, "loc_pred_transposed")) - layerParams.set("loc_pred_transposed", getLayerAttr(layer, "loc_pred_transposed").b()); - if (hasLayerAttr(layer, "clip")) - layerParams.set("clip", getLayerAttr(layer, "clip").b()); - if (hasLayerAttr(layer, "variance_encoded_in_target")) - layerParams.set("variance_encoded_in_target", getLayerAttr(layer, "variance_encoded_in_target").b()); - - int id = dstNet.addLayer(name, "DetectionOutput", layerParams); - layer_id[name] = id; - for (int i = 0; i < 3; ++i) - connect(layer_id, dstNet, parsePin(layer.input(i)), id, i); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; - } else if (type == "Softmax") { if (hasLayerAttr(layer, "axis")) diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 0bcbe562a3..63b43f1b72 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -165,12 +165,6 @@ TEST_P(Test_TensorFlow_layers, batch_norm) runTensorFlowNet("unfused_batch_norm"); runTensorFlowNet("fused_batch_norm_no_gamma"); runTensorFlowNet("unfused_batch_norm_no_gamma"); -} - -TEST_P(Test_TensorFlow_layers, mvn_batch_norm) -{ - if (backend == DNN_BACKEND_INFERENCE_ENGINE) - throw SkipTestException(""); runTensorFlowNet("mvn_batch_norm"); runTensorFlowNet("mvn_batch_norm_1x1"); } @@ -323,7 +317,7 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_SSD) TEST_P(Test_TensorFlow_nets, Inception_v2_Faster_RCNN) { checkBackend(); - if (backend == DNN_BACKEND_INFERENCE_ENGINE || + if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU) || (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) throw SkipTestException(""); @@ -343,6 +337,26 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_Faster_RCNN) normAssertDetections(ref, out, "", 0.3); } +TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN) +{ + checkBackend(); + std::string proto = findDataFile("dnn/ssd_mobilenet_v1_ppn_coco.pbtxt", false); + std::string model = findDataFile("dnn/ssd_mobilenet_v1_ppn_coco.pb", false); + + Net net = readNetFromTensorflow(model, proto); + Mat img = imread(findDataFile("dnn/dog416.png", false)); + Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/ssd_mobilenet_v1_ppn_coco.detection_out.npy", false)); + Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false); + + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + + net.setInput(blob); + Mat out = net.forward(); + double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : default_l1; + normAssertDetections(ref, out, "", 0.4, scoreDiff, default_lInf); +} + TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8) { checkBackend(); diff --git a/modules/features2d/doc/read_file_nondiff32.pl b/modules/features2d/doc/read_file_nondiff32.pl index 6f1b420ecb..2ada4c9ea2 100644 --- a/modules/features2d/doc/read_file_nondiff32.pl +++ b/modules/features2d/doc/read_file_nondiff32.pl @@ -131,7 +131,7 @@ my $success_structured; } close $in2 or die "Can't close $filein: $!"; } - #find next else and interprete it + #find next else and interpret it open(my $in3, "<", $filein) or die "Can't open $filein: $!"; $i3=1; $ifcount3=0; diff --git a/modules/features2d/doc/read_file_score32.pl b/modules/features2d/doc/read_file_score32.pl index c1adedac20..10cb77d080 100644 --- a/modules/features2d/doc/read_file_score32.pl +++ b/modules/features2d/doc/read_file_score32.pl @@ -119,7 +119,7 @@ my $is_a_corner; } close $in2 or die "Can't close $filein: $!"; } - #find next else and interprete it + #find next else and interpret it open(my $in3, "<", $filein) or die "Can't open $filein: $!"; $i3=1; $ifcount3=0; diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 23cad31e4e..8925996da9 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -1861,7 +1861,7 @@ gradient term \f$G\f$ and the second gradient term \f$b\f$ gives: The algorithm sets the center of the neighborhood window at this new center \f$q\f$ and then iterates until the center stays within a set threshold. -@param image Input image. +@param image Input single-channel, 8-bit or float image. @param corners Initial coordinates of the input corners and refined coordinates provided for output. @param winSize Half of the side length of the search window. For example, if winSize=Size(5,5) , diff --git a/modules/ml/src/svm.cpp b/modules/ml/src/svm.cpp index 73b74fa9df..02043ac929 100644 --- a/modules/ml/src/svm.cpp +++ b/modules/ml/src/svm.cpp @@ -2048,7 +2048,7 @@ public: svmType == NU_SVC ? "NU_SVC" : svmType == ONE_CLASS ? "ONE_CLASS" : svmType == EPS_SVR ? "EPS_SVR" : - svmType == NU_SVR ? "NU_SVR" : format("Uknown_%d", svmType); + svmType == NU_SVR ? "NU_SVR" : format("Unknown_%d", svmType); String kernel_type_str = kernelType == LINEAR ? "LINEAR" : kernelType == POLY ? "POLY" : diff --git a/modules/objdetect/src/hog.cpp b/modules/objdetect/src/hog.cpp index 8fc18f467f..4bd9596c1d 100644 --- a/modules/objdetect/src/hog.cpp +++ b/modules/objdetect/src/hog.cpp @@ -255,8 +255,8 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle, Mat_ _lut(1, 256); const float* const lut = &_lut(0,0); #if CV_SSE2 - const int indeces[] = { 0, 1, 2, 3 }; - __m128i idx = _mm_loadu_si128((const __m128i*)indeces); + const int indices[] = { 0, 1, 2, 3 }; + __m128i idx = _mm_loadu_si128((const __m128i*)indices); __m128i ifour = _mm_set1_epi32(4); float* const _data = &_lut(0, 0); @@ -273,8 +273,8 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle, idx = _mm_add_epi32(idx, ifour); } #elif CV_NEON - const int indeces[] = { 0, 1, 2, 3 }; - uint32x4_t idx = *(uint32x4_t*)indeces; + const int indices[] = { 0, 1, 2, 3 }; + uint32x4_t idx = *(uint32x4_t*)indices; uint32x4_t ifour = vdupq_n_u32(4); float* const _data = &_lut(0, 0); diff --git a/modules/objdetect/src/qrcode.cpp b/modules/objdetect/src/qrcode.cpp index c3a5593d35..fdbfa66bad 100644 --- a/modules/objdetect/src/qrcode.cpp +++ b/modules/objdetect/src/qrcode.cpp @@ -7,7 +7,6 @@ #include "precomp.hpp" #include "opencv2/objdetect.hpp" -// #include "opencv2/calib3d.hpp" #include #include @@ -21,7 +20,6 @@ class QRDecode { public: void init(Mat src, double eps_vertical_ = 0.2, double eps_horizontal_ = 0.1); - void binarization(); bool localization(); bool transformation(); Mat getBinBarcode() { return bin_barcode; } @@ -35,9 +33,7 @@ protected: Point2f intersectionLines(Point2f a1, Point2f a2, Point2f b1, Point2f b2); vector getQuadrilateral(vector angle_list); bool testBypassRoute(vector hull, int start, int finish); - double getTriangleArea(Point2f a, Point2f b, Point2f c); - double getPolygonArea(vector points); - double getCosVectors(Point2f a, Point2f b, Point2f c); + inline double getCosVectors(Point2f a, Point2f b, Point2f c); Mat barcode, bin_barcode, straight_barcode; vector localization_points, transformation_points; @@ -63,13 +59,7 @@ void QRDecode::init(Mat src, double eps_vertical_, double eps_horizontal_) } eps_vertical = eps_vertical_; eps_horizontal = eps_horizontal_; -} - -void QRDecode::binarization() -{ - Mat filter_barcode; - GaussianBlur(barcode, filter_barcode, Size(3, 3), 0); - threshold(filter_barcode, bin_barcode, 0, 255, THRESH_BINARY + THRESH_OTSU); + adaptiveThreshold(barcode, bin_barcode, 255, ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY, 71, 2); } vector QRDecode::searchVerticalLines() @@ -139,7 +129,7 @@ vector QRDecode::separateHorizontalLines(vector list_lines) for (size_t pnt = 0; pnt < list_lines.size(); pnt++) { - int x = static_cast(list_lines[pnt][0] + list_lines[pnt][2] / 2); + int x = static_cast(list_lines[pnt][0] + list_lines[pnt][2] * 0.5); int y = static_cast(list_lines[pnt][1]); // --------------- Search horizontal up-lines --------------- // @@ -203,7 +193,7 @@ vector QRDecode::separateHorizontalLines(vector list_lines) { point2f_result.push_back( Point2f(static_cast(result[i][1]), - static_cast(result[i][0] + result[i][2] / 2))); + static_cast(result[i][0] + result[i][2] * 0.5))); } return point2f_result; } @@ -345,16 +335,23 @@ bool QRDecode::computeTransformationPoints() } } } + if (down_left_edge_point == Point2f(0, 0) || - up_right_edge_point == Point2f(0, 0)) { return false; } + up_right_edge_point == Point2f(0, 0) || + new_non_zero_elem[0].size() == 0) { return false; } double max_area = -1; up_left_edge_point = new_non_zero_elem[0][0]; + for (size_t i = 0; i < new_non_zero_elem[0].size(); i++) { - double temp_area = getTriangleArea(new_non_zero_elem[0][i], - down_left_edge_point, - up_right_edge_point); + vector list_edge_points; + list_edge_points.push_back(new_non_zero_elem[0][i]); + list_edge_points.push_back(down_left_edge_point); + list_edge_points.push_back(up_right_edge_point); + + double temp_area = fabs(contourArea(list_edge_points)); + if (max_area < temp_area) { up_left_edge_point = new_non_zero_elem[0][i]; @@ -375,6 +372,7 @@ bool QRDecode::computeTransformationPoints() } } + for (size_t i = 0; i < new_non_zero_elem[2].size(); i++) { double temp_norm_delta = norm(up_left_edge_point - new_non_zero_elem[2][i]) @@ -485,7 +483,7 @@ vector QRDecode::getQuadrilateral(vector angle_list) hull[i] = Point2f(x, y); } - const double experimental_area = getPolygonArea(hull); + const double experimental_area = fabs(contourArea(hull)); vector result_hull_point(angle_size); double min_norm; @@ -539,7 +537,7 @@ vector QRDecode::getQuadrilateral(vector angle_list) double temp_norm = getCosVectors(hull[index_hull], intrsc_line_hull, angle_closest_pnt); if (min_norm > temp_norm && norm(hull[index_hull] - hull[next_index_hull]) > - norm(angle_list[1] - angle_list[2]) / 10) + norm(angle_list[1] - angle_list[2]) * 0.1) { min_norm = temp_norm; result_side_begin[0] = hull[index_hull]; @@ -577,7 +575,7 @@ vector QRDecode::getQuadrilateral(vector angle_list) double temp_norm = getCosVectors(hull[index_hull], intrsc_line_hull, angle_closest_pnt); if (min_norm > temp_norm && norm(hull[index_hull] - hull[next_index_hull]) > - norm(angle_list[0] - angle_list[1]) / 20) + norm(angle_list[0] - angle_list[1]) * 0.05) { min_norm = temp_norm; result_side_begin[1] = hull[index_hull]; @@ -611,7 +609,7 @@ vector QRDecode::getQuadrilateral(vector angle_list) if (next_index_hull == hull_size) { next_index_hull = 0; } if (next_index_hull == -1) { next_index_hull = hull_size - 1; } - if (norm(hull[index_hull] - hull[next_index_hull]) < standart_norm / 10.0) + if (norm(hull[index_hull] - hull[next_index_hull]) < standart_norm * 0.1) { index_hull = next_index_hull; continue; } extra_index_hull = finish_line[1]; @@ -623,7 +621,7 @@ vector QRDecode::getQuadrilateral(vector angle_list) if (extra_next_index_hull == hull_size) { extra_next_index_hull = 0; } if (extra_next_index_hull == -1) { extra_next_index_hull = hull_size - 1; } - if (norm(hull[extra_index_hull] - hull[extra_next_index_hull]) < standart_norm / 10.0) + if (norm(hull[extra_index_hull] - hull[extra_next_index_hull]) < standart_norm * 0.1) { extra_index_hull = extra_next_index_hull; continue; } test_result_angle_list[0] @@ -639,7 +637,7 @@ vector QRDecode::getQuadrilateral(vector angle_list) = intersectionLines(hull[index_hull], hull[next_index_hull], result_side_begin[0], result_side_end[0]); - test_diff_area = fabs(getPolygonArea(test_result_angle_list) - experimental_area); + test_diff_area = fabs(fabs(contourArea(test_result_angle_list)) - experimental_area); if (min_diff_area > test_diff_area) { min_diff_area = test_diff_area; @@ -656,53 +654,22 @@ vector QRDecode::getQuadrilateral(vector angle_list) index_hull = next_index_hull; } while(index_hull != unstable_pnt); + + if (norm(result_angle_list[0] - angle_list[1]) > 2) { result_angle_list[0] = angle_list[1]; } + if (norm(result_angle_list[1] - angle_list[0]) > 2) { result_angle_list[1] = angle_list[0]; } + if (norm(result_angle_list[3] - angle_list[2]) > 2) { result_angle_list[3] = angle_list[2]; } + return result_angle_list; } -// b -// / | -// / | -// / | -// / S | -// / | -// a ----- c - -double QRDecode::getTriangleArea(Point2f a, Point2f b, Point2f c) -{ - double norm_sides[] = { norm(a - b), norm(b - c), norm(c - a) }; - double half_perimeter = (norm_sides[0] + norm_sides[1] + norm_sides[2]) / 2.0; - double triangle_area = sqrt(half_perimeter * - (half_perimeter - norm_sides[0]) * - (half_perimeter - norm_sides[1]) * - (half_perimeter - norm_sides[2])); - return triangle_area; -} - -double QRDecode::getPolygonArea(vector points) -{ - CV_Assert(points.size() >= 3); - if (points.size() == 3) - { return getTriangleArea(points[0], points[1], points[2]); } - else - { - double result_area = 0.0; - for (size_t i = 1; i < points.size() - 1; i++) - { - result_area += getTriangleArea(points[0], points[i], points[i + 1]); - } - return result_area; - } -} - // / | b // / | // / | // a/ | c -double QRDecode::getCosVectors(Point2f a, Point2f b, Point2f c) +inline double QRDecode::getCosVectors(Point2f a, Point2f b, Point2f c) { - return ((a - b).x * (c - b).x + (a - b).y * (c - b).y) - / (norm(a - b) * norm(c - b)); + return ((a - b).x * (c - b).x + (a - b).y * (c - b).y) / (norm(a - b) * norm(c - b)); } bool QRDecode::transformation() @@ -764,7 +731,6 @@ bool QRCodeDetector::detect(InputArray in, OutputArray points) const CV_Assert(inarr.type() == CV_8UC1); QRDecode qrdec; qrdec.init(inarr, p->epsX, p->epsY); - qrdec.binarization(); if (!qrdec.localization()) { return false; } if (!qrdec.transformation()) { return false; } vector pnts2f = qrdec.getTransformationPoints(); diff --git a/modules/photo/src/contrast_preserve.hpp b/modules/photo/src/contrast_preserve.hpp index ec8274e883..1afd4bc3e3 100644 --- a/modules/photo/src/contrast_preserve.hpp +++ b/modules/photo/src/contrast_preserve.hpp @@ -159,12 +159,12 @@ void Decolor::gradvector(const Mat &img, vector &grad) const for(int i=0;i(i, j); + grad[i*width + j] = d_trans.at(i, j); const int offset = width * height; for(int i=0;i(i, j); + grad[offset + i * width + j] = d1_trans.at(i, j); } void Decolor::colorGrad(const Mat &img, vector &Cg) const @@ -204,14 +204,19 @@ void Decolor::add_to_vector_poly(vector < vector > &polyGrad, const vec idx1++; } -void Decolor::weak_order(const Mat &img, vector &alf) const +void Decolor::weak_order(const Mat &im, vector &alf) const { - const int h = img.size().height; - const int w = img.size().width; + Mat img; + const int h = im.size().height; + const int w = im.size().width; if((h + w) > 800) { const double sizefactor = double(800)/(h+w); - resize(img, img, Size(cvRound(h*sizefactor), cvRound(w*sizefactor))); + resize(im, img, Size(cvRound(w*sizefactor), cvRound(h*sizefactor))); + } + else + { + img = im; } Mat curIm = Mat(img.size(),CV_32FC1); @@ -246,16 +251,20 @@ void Decolor::weak_order(const Mat &img, vector &alf) const alf[i] -= tmp1[i] * tmp2[i] * tmp3[i]; } -void Decolor::grad_system(const Mat &img, vector < vector < double > > &polyGrad, +void Decolor::grad_system(const Mat &im, vector < vector < double > > &polyGrad, vector < double > &Cg, vector & comb) const { - int h = img.size().height; - int w = img.size().width; - + Mat img; + int h = im.size().height; + int w = im.size().width; if((h + w) > 800) { const double sizefactor = double(800)/(h+w); - resize(img, img, Size(cvRound(h*sizefactor), cvRound(w*sizefactor))); + resize(im, img, Size(cvRound(w*sizefactor), cvRound(h*sizefactor))); + } + else + { + img = im; } h = img.size().height; diff --git a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp index 4acebea5e1..25c0f2ab1e 100644 --- a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp @@ -137,6 +137,21 @@ private: Ptr surf; }; + +/** @brief SIFT features finder. + +@sa detail::FeaturesFinder, SIFT +*/ +class CV_EXPORTS SiftFeaturesFinder : public FeaturesFinder +{ +public: + SiftFeaturesFinder(); + +private: + void find(InputArray image, ImageFeatures &features) CV_OVERRIDE; + Ptr sift; +}; + /** @brief ORB features finder. : @sa detail::FeaturesFinder, ORB diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index 6b9d75cdd8..3d82acf484 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -51,6 +51,7 @@ using namespace cv::cuda; #ifdef HAVE_OPENCV_XFEATURES2D #include "opencv2/xfeatures2d.hpp" using xfeatures2d::SURF; +using xfeatures2d::SIFT; #endif #ifdef HAVE_OPENCV_CUDAIMGPROC @@ -475,6 +476,35 @@ void SurfFeaturesFinder::find(InputArray image, ImageFeatures &features) } } +SiftFeaturesFinder::SiftFeaturesFinder() +{ +#ifdef HAVE_OPENCV_XFEATURES2D + Ptr sift_ = SIFT::create(); + if( !sift_ ) + CV_Error( Error::StsNotImplemented, "OpenCV was built without SIFT support" ); + sift = sift_; +#else + CV_Error( Error::StsNotImplemented, "OpenCV was built without SIFT support" ); +#endif +} + +void SiftFeaturesFinder::find(InputArray image, ImageFeatures &features) +{ + UMat gray_image; + CV_Assert((image.type() == CV_8UC3) || (image.type() == CV_8UC1)); + if(image.type() == CV_8UC3) + { + cvtColor(image, gray_image, COLOR_BGR2GRAY); + } + else + { + gray_image = image.getUMat(); + } + UMat descriptors; + sift->detectAndCompute(gray_image, Mat(), features.keypoints, descriptors); + features.descriptors = descriptors.reshape(1, (int)features.keypoints.size()); +} + OrbFeaturesFinder::OrbFeaturesFinder(Size _grid_size, int n_features, float scaleFactor, int nlevels) { grid_size = _grid_size; diff --git a/modules/ts/include/opencv2/ts/ts_gtest.h b/modules/ts/include/opencv2/ts/ts_gtest.h index 2b1299c3bf..b687a5722e 100644 --- a/modules/ts/include/opencv2/ts/ts_gtest.h +++ b/modules/ts/include/opencv2/ts/ts_gtest.h @@ -9013,7 +9013,7 @@ class NativeArray { // Implements Boolean test assertions such as EXPECT_TRUE. expression can be // either a boolean expression or an AssertionResult. text is a textual -// represenation of expression as it was passed into the EXPECT_TRUE. +// representation of expression as it was passed into the EXPECT_TRUE. #define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (const ::testing::AssertionResult gtest_ar_ = \ diff --git a/modules/videoio/src/cap_gstreamer.cpp b/modules/videoio/src/cap_gstreamer.cpp index 8758b21dd9..2c9570e67f 100644 --- a/modules/videoio/src/cap_gstreamer.cpp +++ b/modules/videoio/src/cap_gstreamer.cpp @@ -613,10 +613,12 @@ int GStreamerCapture::getCaptureDomain() { return CAP_GSTREAMER; } */ bool GStreamerCapture::open(int id) { + gst_initializer::init(); + if (!is_gst_element_exists("v4l2src")) return false; std::ostringstream desc; - desc << "v4l2src device-name=/dev/video" << id + desc << "v4l2src device=/dev/video" << id << " ! " << COLOR_ELEM << " ! appsink"; return open(desc.str()); diff --git a/modules/videoio/src/cap_mjpeg_decoder.cpp b/modules/videoio/src/cap_mjpeg_decoder.cpp index f8ba6857f3..02400fd9f0 100644 --- a/modules/videoio/src/cap_mjpeg_decoder.cpp +++ b/modules/videoio/src/cap_mjpeg_decoder.cpp @@ -146,6 +146,9 @@ bool MotionJpegCapture::grabFrame() } else { + if (m_frame_iterator == m_mjpeg_frames.end()) + return false; + ++m_frame_iterator; } } diff --git a/modules/videoio/src/cap_v4l.cpp b/modules/videoio/src/cap_v4l.cpp index 1b7ae8a19e..d816dbcbf0 100644 --- a/modules/videoio/src/cap_v4l.cpp +++ b/modules/videoio/src/cap_v4l.cpp @@ -431,6 +431,7 @@ static int autosetup_capture_mode_v4l2(CvCaptureCAM_V4L* capture) { V4L2_PIX_FMT_BGR24, V4L2_PIX_FMT_RGB24, V4L2_PIX_FMT_YVU420, + V4L2_PIX_FMT_YUV420, V4L2_PIX_FMT_YUV411P, V4L2_PIX_FMT_YUYV, V4L2_PIX_FMT_UYVY, @@ -532,6 +533,7 @@ static int v4l2_set_fps(CvCaptureCAM_V4L* capture) { static int v4l2_num_channels(__u32 palette) { switch(palette) { case V4L2_PIX_FMT_YVU420: + case V4L2_PIX_FMT_YUV420: case V4L2_PIX_FMT_MJPEG: case V4L2_PIX_FMT_JPEG: case V4L2_PIX_FMT_Y16: @@ -562,6 +564,7 @@ static void v4l2_create_frame(CvCaptureCAM_V4L *capture) { size = CvSize(capture->buffers[capture->bufferIndex].length, 1); break; case V4L2_PIX_FMT_YVU420: + case V4L2_PIX_FMT_YUV420: size.height = size.height * 3 / 2; // "1.5" channels break; case V4L2_PIX_FMT_Y16: @@ -1021,10 +1024,10 @@ move_411_block(int yTL, int yTR, int yBL, int yBR, int u, int v, /* Converts from planar YUV420P to RGB24. */ static inline void -yuv420p_to_rgb24(int width, int height, uchar* src, uchar* dst) +yuv420p_to_rgb24(int width, int height, uchar* src, uchar* dst, bool isYUV) { cvtColor(Mat(height * 3 / 2, width, CV_8U, src), Mat(height, width, CV_8UC3, dst), - COLOR_YUV2BGR_YV12); + isYUV ? COLOR_YUV2BGR_IYUV : COLOR_YUV2BGR_YV12); } // Consider a YUV411P image of 8x2 pixels. @@ -1490,10 +1493,12 @@ static IplImage* icvRetrieveFrameCAM_V4L( CvCaptureCAM_V4L* capture, int) { break; case V4L2_PIX_FMT_YVU420: + case V4L2_PIX_FMT_YUV420: yuv420p_to_rgb24(capture->form.fmt.pix.width, capture->form.fmt.pix.height, (unsigned char*)(capture->buffers[capture->bufferIndex].start), - (unsigned char*)capture->frame.imageData); + (unsigned char*)capture->frame.imageData, + capture->palette == V4L2_PIX_FMT_YUV420); break; case V4L2_PIX_FMT_YUV411P: diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 6ac1dea60b..15aff36c39 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -70,9 +70,7 @@ endif() ocv_install_example_src("." CMakeLists.txt) if(INSTALL_C_EXAMPLES) - install(DIRECTORY data - DESTINATION "${OPENCV_SAMPLES_SRC_INSTALL_PATH}/data" - COMPONENT samples_data) + install(DIRECTORY data DESTINATION "${OPENCV_SAMPLES_SRC_INSTALL_PATH}" COMPONENT samples_data) endif() else() diff --git a/samples/cpp/stitching_detailed.cpp b/samples/cpp/stitching_detailed.cpp index 91641d7a28..2ff4e7b16b 100644 --- a/samples/cpp/stitching_detailed.cpp +++ b/samples/cpp/stitching_detailed.cpp @@ -82,7 +82,7 @@ static void printUsage() "\nMotion Estimation Flags:\n" " --work_megapix \n" " Resolution for image registration step. The default is 0.6 Mpx.\n" - " --features (surf|orb)\n" + " --features (surf|orb|sift)\n" " Type of features used for images matching. The default is surf.\n" " --matcher (homography|affine)\n" " Matcher used for pairwise image matching.\n" @@ -430,6 +430,9 @@ int main(int argc, char* argv[]) { finder = makePtr(); } + else if (features_type == "sift") { + finder = makePtr(); + } else { cout << "Unknown 2D features type: '" << features_type << "'.\n"; diff --git a/samples/cpp/train_HOG.cpp b/samples/cpp/train_HOG.cpp index 1c6c81481c..3a1527d8f4 100644 --- a/samples/cpp/train_HOG.cpp +++ b/samples/cpp/train_HOG.cpp @@ -204,7 +204,7 @@ int main( int argc, char** argv ) const char* keys = { "{help h| | show help message}" - "{pd | | path of directory contains possitive images}" + "{pd | | path of directory contains positive images}" "{nd | | path of directory contains negative images}" "{td | | path of directory contains test images}" "{tv | | test video file name}" diff --git a/samples/cpp/tutorial_code/ml/introduction_to_pca/introduction_to_pca.cpp b/samples/cpp/tutorial_code/ml/introduction_to_pca/introduction_to_pca.cpp index c194e82f24..aa6107c120 100644 --- a/samples/cpp/tutorial_code/ml/introduction_to_pca/introduction_to_pca.cpp +++ b/samples/cpp/tutorial_code/ml/introduction_to_pca/introduction_to_pca.cpp @@ -1,6 +1,6 @@ /** * @file introduction_to_pca.cpp - * @brief This program demonstrates how to use OpenCV PCA to extract the orienation of an object + * @brief This program demonstrates how to use OpenCV PCA to extract the orientation of an object * @author OpenCV team */ diff --git a/samples/cpp/warpPerspective_demo.cpp b/samples/cpp/warpPerspective_demo.cpp index 35bf87dfd9..591e03d59b 100644 --- a/samples/cpp/warpPerspective_demo.cpp +++ b/samples/cpp/warpPerspective_demo.cpp @@ -26,7 +26,7 @@ static void help(char** argv) "\tESC, q - quit the program\n" "\tr - change order of points to rotate transformation\n" "\tc - delete selected points\n" - "\ti - change order of points to invers transformation \n" + "\ti - change order of points to inverse transformation \n" "\nUse your mouse to select a point and move it to see transformation changes" << endl; } diff --git a/samples/dnn/CMakeLists.txt b/samples/dnn/CMakeLists.txt index 0df76517a5..4af6d40928 100644 --- a/samples/dnn/CMakeLists.txt +++ b/samples/dnn/CMakeLists.txt @@ -13,32 +13,6 @@ if(NOT BUILD_EXAMPLES OR NOT OCV_DEPENDENCIES_FOUND) return() endif() -function(download_net name commit hash) - set(DNN_FACE_DETECTOR_MODEL_DOWNLOAD_DIR "${CMAKE_CURRENT_LIST_DIR}/face_detector") - if(COMMAND ocv_download) - ocv_download(FILENAME ${name} - HASH ${hash} - URL - "$ENV{OPENCV_DNN_MODELS_URL}" - "${OPENCV_DNN_MODELS_URL}" - "https://raw.githubusercontent.com/opencv/opencv_3rdparty/${commit}/" - DESTINATION_DIR ${DNN_FACE_DETECTOR_MODEL_DOWNLOAD_DIR} - ID DNN_FACE_DETECTOR - RELATIVE_URL - STATUS res) - endif() -endfunction() - -# Model branch name: dnn_samples_face_detector_20180205_fp16 -download_net("res10_300x300_ssd_iter_140000_fp16.caffemodel" - "19512576c112aa2c7b6328cb0e8d589a4a90a26d" - "f737f886e33835410c69e3ccfe0720a1") - -# Model branch name: dnn_samples_face_detector_20180220_uint8 -download_net("opencv_face_detector_uint8.pb" - "7b425df276ba2161b8edaab0f0756f4a735d61b9" - "56acf81f55d9b9e96c3347bc65409b9e") - project(dnn_samples) ocv_include_modules_recurse(${OPENCV_DNN_SAMPLES_REQUIRED_DEPS}) file(GLOB_RECURSE dnn_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) diff --git a/samples/dnn/custom_layers.hpp b/samples/dnn/custom_layers.hpp index 918cc8ae46..a18bb9a5cf 100644 --- a/samples/dnn/custom_layers.hpp +++ b/samples/dnn/custom_layers.hpp @@ -198,7 +198,7 @@ private: //! [ResizeBilinearLayer] // -// The folowing code is used only to generate tutorials documentation. +// The following code is used only to generate tutorials documentation. // //! [A custom layer interface] diff --git a/samples/dnn/face_detector/download_weights.py b/samples/dnn/face_detector/download_weights.py new file mode 100755 index 0000000000..f872190d02 --- /dev/null +++ b/samples/dnn/face_detector/download_weights.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python + +from __future__ import print_function +import hashlib +import time +import sys +import xml.etree.ElementTree as ET +if sys.version_info[0] < 3: + from urllib2 import urlopen +else: + from urllib.request import urlopen + +class HashMismatchException(Exception): + def __init__(self, expected, actual): + Exception.__init__(self) + self.expected = expected + self.actual = actual + def __str__(self): + return 'Hash mismatch: {} vs {}'.format(self.expected, self.actual) + +class MetalinkDownloader(object): + BUFSIZE = 10*1024*1024 + NS = {'ml': 'urn:ietf:params:xml:ns:metalink'} + tick = 0 + + def download(self, metalink_file): + status = True + for file_elem in ET.parse(metalink_file).getroot().findall('ml:file', self.NS): + url = file_elem.find('ml:url', self.NS).text + fname = file_elem.attrib['name'] + hash_sum = file_elem.find('ml:hash', self.NS).text + print('*** {}'.format(fname)) + try: + self.verify(hash_sum, fname) + except Exception as ex: + print(' {}'.format(ex)) + try: + print(' {}'.format(url)) + with open(fname, 'wb') as file_stream: + self.buffered_read(urlopen(url), file_stream.write) + self.verify(hash_sum, fname) + except Exception as ex: + print(' {}'.format(ex)) + print(' FAILURE') + status = False + continue + print(' SUCCESS') + return status + + def print_progress(self, msg, timeout = 0): + if time.time() - self.tick > timeout: + print(msg, end='') + sys.stdout.flush() + self.tick = time.time() + + def buffered_read(self, in_stream, processing): + self.print_progress(' >') + while True: + buf = in_stream.read(self.BUFSIZE) + if not buf: + break + processing(buf) + self.print_progress('>', 5) + print(' done') + + def verify(self, hash_sum, fname): + sha = hashlib.sha1() + with open(fname, 'rb') as file_stream: + self.buffered_read(file_stream, sha.update) + if hash_sum != sha.hexdigest(): + raise HashMismatchException(hash_sum, sha.hexdigest()) + +if __name__ == '__main__': + sys.exit(0 if MetalinkDownloader().download('weights.meta4') else 1) diff --git a/samples/dnn/face_detector/weights.meta4 b/samples/dnn/face_detector/weights.meta4 new file mode 100644 index 0000000000..35d303085b --- /dev/null +++ b/samples/dnn/face_detector/weights.meta4 @@ -0,0 +1,13 @@ + + + + OpenCV face detector FP16 weights + 31fc22bfdd907567a04bb45b7cfad29966caddc1 + https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180205_fp16/res10_300x300_ssd_iter_140000_fp16.caffemodel + + + OpenCV face detector UINT8 weights + 4f2fdf6f231d759d7bbdb94353c5a68690f3d2ae + https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180220_uint8/opencv_face_detector_uint8.pb + + diff --git a/samples/dnn/tf_text_graph_common.py b/samples/dnn/tf_text_graph_common.py new file mode 100644 index 0000000000..61e3bbcaee --- /dev/null +++ b/samples/dnn/tf_text_graph_common.py @@ -0,0 +1,25 @@ +import tensorflow as tf +from tensorflow.core.framework.node_def_pb2 import NodeDef +from google.protobuf import text_format + +def tensorMsg(values): + if all([isinstance(v, float) for v in values]): + dtype = 'DT_FLOAT' + field = 'float_val' + elif all([isinstance(v, int) for v in values]): + dtype = 'DT_INT32' + field = 'int_val' + else: + raise Exception('Wrong values types') + + msg = 'tensor { dtype: ' + dtype + ' tensor_shape { dim { size: %d } }' % len(values) + for value in values: + msg += '%s: %s ' % (field, str(value)) + return msg + '}' + +def addConstNode(name, values, graph_def): + node = NodeDef() + node.name = name + node.op = 'Const' + text_format.Merge(tensorMsg(values), node.attr["value"]) + graph_def.node.extend([node]) diff --git a/samples/dnn/tf_text_graph_faster_rcnn.py b/samples/dnn/tf_text_graph_faster_rcnn.py index 7ad5de283a..9aea38424a 100644 --- a/samples/dnn/tf_text_graph_faster_rcnn.py +++ b/samples/dnn/tf_text_graph_faster_rcnn.py @@ -6,6 +6,8 @@ from tensorflow.core.framework.node_def_pb2 import NodeDef from tensorflow.tools.graph_transforms import TransformGraph from google.protobuf import text_format +from tf_text_graph_common import tensorMsg, addConstNode + parser = argparse.ArgumentParser(description='Run this script to get a text graph of ' 'SSD model from TensorFlow Object Detection API. ' 'Then pass it with .pb file to cv::dnn::readNetFromTensorflow function.') @@ -93,21 +95,6 @@ while True: if node.op == 'CropAndResize': break -def tensorMsg(values): - if all([isinstance(v, float) for v in values]): - dtype = 'DT_FLOAT' - field = 'float_val' - elif all([isinstance(v, int) for v in values]): - dtype = 'DT_INT32' - field = 'int_val' - else: - raise Exception('Wrong values types') - - msg = 'tensor { dtype: ' + dtype + ' tensor_shape { dim { size: %d } }' % len(values) - for value in values: - msg += '%s: %s ' % (field, str(value)) - return msg + '}' - def addSlice(inp, out, begins, sizes): beginsNode = NodeDef() beginsNode.name = out + '/begins' @@ -151,17 +138,25 @@ def addSoftMax(inp, out): softmax.input.append(inp) graph_def.node.extend([softmax]) +def addFlatten(inp, out): + flatten = NodeDef() + flatten.name = out + flatten.op = 'Flatten' + flatten.input.append(inp) + graph_def.node.extend([flatten]) + addReshape('FirstStageBoxPredictor/ClassPredictor/BiasAdd', 'FirstStageBoxPredictor/ClassPredictor/reshape_1', [0, -1, 2]) addSoftMax('FirstStageBoxPredictor/ClassPredictor/reshape_1', 'FirstStageBoxPredictor/ClassPredictor/softmax') # Compare with Reshape_4 -flatten = NodeDef() -flatten.name = 'FirstStageBoxPredictor/BoxEncodingPredictor/flatten' # Compare with FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd -flatten.op = 'Flatten' -flatten.input.append('FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd') -graph_def.node.extend([flatten]) +addFlatten('FirstStageBoxPredictor/ClassPredictor/softmax', + 'FirstStageBoxPredictor/ClassPredictor/softmax/flatten') + +# Compare with FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd +addFlatten('FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd', + 'FirstStageBoxPredictor/BoxEncodingPredictor/flatten') proposals = NodeDef() proposals.name = 'proposals' # Compare with ClipToWindow/Gather/Gather (NOTE: normalized) @@ -194,7 +189,7 @@ detectionOut.name = 'detection_out' detectionOut.op = 'DetectionOutput' detectionOut.input.append('FirstStageBoxPredictor/BoxEncodingPredictor/flatten') -detectionOut.input.append('FirstStageBoxPredictor/ClassPredictor/softmax') +detectionOut.input.append('FirstStageBoxPredictor/ClassPredictor/softmax/flatten') detectionOut.input.append('proposals') text_format.Merge('i: 2', detectionOut.attr['num_classes']) @@ -204,11 +199,21 @@ text_format.Merge('f: 0.7', detectionOut.attr['nms_threshold']) text_format.Merge('i: 6000', detectionOut.attr['top_k']) text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type']) text_format.Merge('i: 100', detectionOut.attr['keep_top_k']) -text_format.Merge('b: true', detectionOut.attr['clip']) -text_format.Merge('b: true', detectionOut.attr['loc_pred_transposed']) +text_format.Merge('b: false', detectionOut.attr['clip']) graph_def.node.extend([detectionOut]) +addConstNode('clip_by_value/lower', [0.0], graph_def) +addConstNode('clip_by_value/upper', [1.0], graph_def) + +clipByValueNode = NodeDef() +clipByValueNode.name = 'detection_out/clip_by_value' +clipByValueNode.op = 'ClipByValue' +clipByValueNode.input.append('detection_out') +clipByValueNode.input.append('clip_by_value/lower') +clipByValueNode.input.append('clip_by_value/upper') +graph_def.node.extend([clipByValueNode]) + # Save as text. for node in reversed(topNodes): graph_def.node.extend([node]) @@ -225,17 +230,13 @@ addReshape('SecondStageBoxPredictor/Reshape_1/slice', # Replace Flatten subgraph onto a single node. for i in reversed(range(len(graph_def.node))): if graph_def.node[i].op == 'CropAndResize': - graph_def.node[i].input.insert(1, 'detection_out') + graph_def.node[i].input.insert(1, 'detection_out/clip_by_value') if graph_def.node[i].name == 'SecondStageBoxPredictor/Reshape': - shapeNode = NodeDef() - shapeNode.name = 'SecondStageBoxPredictor/Reshape/shape2' - shapeNode.op = 'Const' - text_format.Merge(tensorMsg([1, -1, 4]), shapeNode.attr["value"]) - graph_def.node.extend([shapeNode]) + addConstNode('SecondStageBoxPredictor/Reshape/shape2', [1, -1, 4], graph_def) graph_def.node[i].input.pop() - graph_def.node[i].input.append(shapeNode.name) + graph_def.node[i].input.append('SecondStageBoxPredictor/Reshape/shape2') if graph_def.node[i].name in ['SecondStageBoxPredictor/Flatten/flatten/Shape', 'SecondStageBoxPredictor/Flatten/flatten/strided_slice', @@ -246,12 +247,15 @@ for node in graph_def.node: if node.name == 'SecondStageBoxPredictor/Flatten/flatten/Reshape': node.op = 'Flatten' node.input.pop() - break + + if node.name in ['FirstStageBoxPredictor/BoxEncodingPredictor/Conv2D', + 'SecondStageBoxPredictor/BoxEncodingPredictor/MatMul']: + text_format.Merge('b: true', node.attr["loc_pred_transposed"]) ################################################################################ ### Postprocessing ################################################################################ -addSlice('detection_out', 'detection_out/slice', [0, 0, 0, 3], [-1, -1, -1, 4]) +addSlice('detection_out/clip_by_value', 'detection_out/slice', [0, 0, 0, 3], [-1, -1, -1, 4]) variance = NodeDef() variance.name = 'proposals/variance' @@ -268,12 +272,13 @@ text_format.Merge('i: 2', varianceEncoder.attr["axis"]) graph_def.node.extend([varianceEncoder]) addReshape('detection_out/slice', 'detection_out/slice/reshape', [1, 1, -1]) +addFlatten('variance_encoded', 'variance_encoded/flatten') detectionOut = NodeDef() detectionOut.name = 'detection_out_final' detectionOut.op = 'DetectionOutput' -detectionOut.input.append('variance_encoded') +detectionOut.input.append('variance_encoded/flatten') detectionOut.input.append('SecondStageBoxPredictor/Reshape_1/Reshape') detectionOut.input.append('detection_out/slice/reshape') @@ -283,7 +288,6 @@ text_format.Merge('i: %d' % (args.num_classes + 1), detectionOut.attr['backgroun text_format.Merge('f: 0.6', detectionOut.attr['nms_threshold']) text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type']) text_format.Merge('i: 100', detectionOut.attr['keep_top_k']) -text_format.Merge('b: true', detectionOut.attr['loc_pred_transposed']) text_format.Merge('b: true', detectionOut.attr['clip']) text_format.Merge('b: true', detectionOut.attr['variance_encoded_in_target']) graph_def.node.extend([detectionOut]) diff --git a/samples/dnn/tf_text_graph_ssd.py b/samples/dnn/tf_text_graph_ssd.py index 1bf4079113..573a6d8941 100644 --- a/samples/dnn/tf_text_graph_ssd.py +++ b/samples/dnn/tf_text_graph_ssd.py @@ -15,6 +15,7 @@ from math import sqrt from tensorflow.core.framework.node_def_pb2 import NodeDef from tensorflow.tools.graph_transforms import TransformGraph from google.protobuf import text_format +from tf_text_graph_common import tensorMsg, addConstNode parser = argparse.ArgumentParser(description='Run this script to get a text graph of ' 'SSD model from TensorFlow Object Detection API. ' @@ -29,6 +30,11 @@ parser.add_argument('--aspect_ratios', default=[1.0, 2.0, 0.5, 3.0, 0.333], type help='Hyper-parameter of ssd_anchor_generator from config file.') parser.add_argument('--image_width', default=300, type=int, help='Training images width.') parser.add_argument('--image_height', default=300, type=int, help='Training images height.') +parser.add_argument('--not_reduce_boxes_in_lowest_layer', default=False, action='store_true', + help='A boolean to indicate whether the fixed 3 boxes per ' + 'location is used in the lowest achors generation layer.') +parser.add_argument('--box_predictor', default='convolutional', type=str, + choices=['convolutional', 'weight_shared_convolutional']) args = parser.parse_args() # Nodes that should be kept. @@ -160,28 +166,6 @@ graph_def.node[1].input.append(weights) # Create SSD postprocessing head ############################################### # Concatenate predictions of classes, predictions of bounding boxes and proposals. -def tensorMsg(values): - if all([isinstance(v, float) for v in values]): - dtype = 'DT_FLOAT' - field = 'float_val' - elif all([isinstance(v, int) for v in values]): - dtype = 'DT_INT32' - field = 'int_val' - else: - raise Exception('Wrong values types') - - msg = 'tensor { dtype: ' + dtype + ' tensor_shape { dim { size: %d } }' % len(values) - for value in values: - msg += '%s: %s ' % (field, str(value)) - return msg + '}' - -def addConstNode(name, values): - node = NodeDef() - node.name = name - node.op = 'Const' - text_format.Merge(tensorMsg(values), node.attr["value"]) - graph_def.node.extend([node]) - def addConcatNode(name, inputs, axisNodeName): concat = NodeDef() concat.name = name @@ -194,12 +178,18 @@ def addConcatNode(name, inputs, axisNodeName): addConstNode('concat/axis_flatten', [-1]) addConstNode('PriorBox/concat/axis', [-2]) -for label in ['ClassPredictor', 'BoxEncodingPredictor']: +for label in ['ClassPredictor', 'BoxEncodingPredictor' if args.box_predictor is 'convolutional' else 'BoxPredictor']: concatInputs = [] for i in range(args.num_layers): # Flatten predictions flatten = NodeDef() - inpName = 'BoxPredictor_%d/%s/BiasAdd' % (i, label) + if args.box_predictor is 'convolutional': + inpName = 'BoxPredictor_%d/%s/BiasAdd' % (i, label) + else: + if i == 0: + inpName = 'WeightSharedConvolutionalBoxPredictor/%s/BiasAdd' % label + else: + inpName = 'WeightSharedConvolutionalBoxPredictor_%d/%s/BiasAdd' % (i, label) flatten.input.append(inpName) flatten.name = inpName + '/Flatten' flatten.op = 'Flatten' @@ -210,7 +200,9 @@ for label in ['ClassPredictor', 'BoxEncodingPredictor']: idx = 0 for node in graph_def.node: - if node.name == ('BoxPredictor_%d/BoxEncodingPredictor/Conv2D' % idx): + if node.name == ('BoxPredictor_%d/BoxEncodingPredictor/Conv2D' % idx) or \ + node.name == ('WeightSharedConvolutionalBoxPredictor_%d/BoxPredictor/Conv2D' % idx) or \ + node.name == 'WeightSharedConvolutionalBoxPredictor/BoxPredictor/Conv2D': text_format.Merge('b: true', node.attr["loc_pred_transposed"]) idx += 1 assert(idx == args.num_layers) @@ -224,13 +216,19 @@ for i in range(args.num_layers): priorBox = NodeDef() priorBox.name = 'PriorBox_%d' % i priorBox.op = 'PriorBox' - priorBox.input.append('BoxPredictor_%d/BoxEncodingPredictor/BiasAdd' % i) + if args.box_predictor is 'convolutional': + priorBox.input.append('BoxPredictor_%d/BoxEncodingPredictor/BiasAdd' % i) + else: + if i == 0: + priorBox.input.append('WeightSharedConvolutionalBoxPredictor/BoxPredictor/Conv2D') + else: + priorBox.input.append('WeightSharedConvolutionalBoxPredictor_%d/BoxPredictor/BiasAdd' % i) priorBox.input.append(graph_def.node[0].name) # image_tensor text_format.Merge('b: false', priorBox.attr["flip"]) text_format.Merge('b: false', priorBox.attr["clip"]) - if i == 0: + if i == 0 and not args.not_reduce_boxes_in_lowest_layer: widths = [0.1, args.min_scale * sqrt(2.0), args.min_scale * sqrt(0.5)] heights = [0.1, args.min_scale / sqrt(2.0), args.min_scale / sqrt(0.5)] else: @@ -261,7 +259,10 @@ detectionOut = NodeDef() detectionOut.name = 'detection_out' detectionOut.op = 'DetectionOutput' -detectionOut.input.append('BoxEncodingPredictor/concat') +if args.box_predictor == 'convolutional': + detectionOut.input.append('BoxEncodingPredictor/concat') +else: + detectionOut.input.append('BoxPredictor/concat') detectionOut.input.append(sigmoid.name) detectionOut.input.append('PriorBox/concat') diff --git a/samples/winrt/OcvImageProcessing/OcvImageProcessing/Common/StandardStyles.xaml b/samples/winrt/OcvImageProcessing/OcvImageProcessing/Common/StandardStyles.xaml index 4def039e59..c8f8500db2 100644 --- a/samples/winrt/OcvImageProcessing/OcvImageProcessing/Common/StandardStyles.xaml +++ b/samples/winrt/OcvImageProcessing/OcvImageProcessing/Common/StandardStyles.xaml @@ -1091,7 +1091,7 @@ Style x:Key="SkipBackAppBarButtonStyle" TargetType="ButtonBase" BasedOn="{Static