From 54279523a3c9c543b37b31b7a76212806ef03088 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Thu, 6 Sep 2018 18:56:55 +0300 Subject: [PATCH] Merge pull request #12437 from vpisarev:avx2_fixes * trying to fix the custom AVX2 builder test failures (false alarms) * fixed compile error with CPU_BASELINE=AVX2 on x86; raised tolerance thresholds in a couple of tests * fixed compile error with CPU_BASELINE=AVX2 on x86; raised tolerance thresholds in a couple of tests * fixed compile error with CPU_BASELINE=AVX2 on x86; raised tolerance thresholds in a couple of tests * seemingly disabled false alarm warning in surf.cpp; increased tolerance thresholds in the tests for SolvePnP and in DNN/ENet --- cmake/OpenCVCompilerOptions.cmake | 4 ++-- modules/calib3d/perf/perf_pnp.cpp | 4 ++-- modules/calib3d/test/test_affine3.cpp | 7 +++--- .../include/opencv2/core/hal/intrin_avx.hpp | 22 +++++++++++++++++-- modules/core/perf/opencl/perf_arithm.cpp | 2 +- modules/core/perf/perf_split.cpp | 4 ---- modules/dnn/test/test_caffe_importer.cpp | 2 +- modules/dnn/test/test_torch_importer.cpp | 4 ++-- modules/photo/test/test_hdr.cpp | 4 ---- 9 files changed, 31 insertions(+), 22 deletions(-) diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 30e4a00a3f..082debf195 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -125,8 +125,8 @@ if(CV_GCC OR CV_CLANG) ) add_extra_compiler_option(-Wimplicit-fallthrough=3) endif() - if(CV_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 7.2.0) - add_extra_compiler_option(-Wno-strict-overflow) # Issue is fixed in GCC 7.2.1 + if(CV_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) + add_extra_compiler_option(-Wno-strict-overflow) # Issue appears when compiling surf.cpp from opencv_contrib/modules/xfeatures2d endif() endif() add_extra_compiler_option(-fdiagnostics-show-option) diff --git a/modules/calib3d/perf/perf_pnp.cpp b/modules/calib3d/perf/perf_pnp.cpp index d3d6076252..7c7254a0df 100644 --- a/modules/calib3d/perf/perf_pnp.cpp +++ b/modules/calib3d/perf/perf_pnp.cpp @@ -52,8 +52,8 @@ PERF_TEST_P(PointsNum_Algo, solvePnP, cv::solvePnP(points3d, points2d, intrinsics, distortion, rvec, tvec, false, algo); } - SANITY_CHECK(rvec, 1e-6); - SANITY_CHECK(tvec, 1e-6); + SANITY_CHECK(rvec, 1e-4); + SANITY_CHECK(tvec, 1e-4); } PERF_TEST_P(PointsNum_Algo, solvePnPSmallPoints, diff --git a/modules/calib3d/test/test_affine3.cpp b/modules/calib3d/test/test_affine3.cpp index b0efecaa9f..a69978b1e5 100644 --- a/modules/calib3d/test/test_affine3.cpp +++ b/modules/calib3d/test/test_affine3.cpp @@ -47,16 +47,15 @@ namespace opencv_test { namespace { TEST(Calib3d_Affine3f, accuracy) { + const double eps = 1e-5; cv::Vec3d rvec(0.2, 0.5, 0.3); cv::Affine3d affine(rvec); cv::Mat expected; cv::Rodrigues(rvec, expected); - - ASSERT_EQ(0, cvtest::norm(cv::Mat(affine.matrix, false).colRange(0, 3).rowRange(0, 3) != expected, cv::NORM_L2)); - ASSERT_EQ(0, cvtest::norm(cv::Mat(affine.linear()) != expected, cv::NORM_L2)); - + ASSERT_LE(cvtest::norm(cv::Mat(affine.matrix, false).colRange(0, 3).rowRange(0, 3), expected, cv::NORM_L2), eps); + ASSERT_LE(cvtest::norm(cv::Mat(affine.linear()), expected, cv::NORM_L2), eps); cv::Matx33d R = cv::Matx33d::eye(); diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp index 36c7c0f1a1..c21b46a58f 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp @@ -234,7 +234,15 @@ struct v_uint64x4 { val = _mm256_setr_epi64x((int64)v0, (int64)v1, (int64)v2, (int64)v3); } v_uint64x4() : val(_mm256_setzero_si256()) {} uint64 get0() const - { return (uint64)_mm_cvtsi128_si64(_mm256_castsi256_si128(val)); } + { + #if defined __x86_64__ || defined _M_X64 + return (uint64)_mm_cvtsi128_si64(_mm256_castsi256_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm256_castsi256_si128(val)); + int b = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_srli_epi64(val, 32))); + return (unsigned)a | ((uint64)(unsigned)b << 32); + #endif + } }; struct v_int64x4 @@ -247,7 +255,17 @@ struct v_int64x4 v_int64x4(int64 v0, int64 v1, int64 v2, int64 v3) { val = _mm256_setr_epi64x(v0, v1, v2, v3); } v_int64x4() : val(_mm256_setzero_si256()) {} - int64 get0() const { return (int64)_mm_cvtsi128_si64(_mm256_castsi256_si128(val)); } + + int64 get0() const + { + #if defined __x86_64__ || defined _M_X64 + return (int64)_mm_cvtsi128_si64(_mm256_castsi256_si128(val)); + #else + int a = _mm_cvtsi128_si32(_mm256_castsi256_si128(val)); + int b = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_srli_epi64(val, 32))); + return (int64)((unsigned)a | ((uint64)(unsigned)b << 32)); + #endif + } }; struct v_float64x4 diff --git a/modules/core/perf/opencl/perf_arithm.cpp b/modules/core/perf/opencl/perf_arithm.cpp index 7556041c94..9f5f6e9e77 100644 --- a/modules/core/perf/opencl/perf_arithm.cpp +++ b/modules/core/perf/opencl/perf_arithm.cpp @@ -117,7 +117,7 @@ OCL_PERF_TEST_P(LogFixture, Log, ::testing::Combine( OCL_TEST_CYCLE() cv::log(src, dst); if (CV_MAT_DEPTH(type) >= CV_32F) - SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE); + SANITY_CHECK(dst, 2e-4, ERROR_RELATIVE); else SANITY_CHECK(dst, 1); } diff --git a/modules/core/perf/perf_split.cpp b/modules/core/perf/perf_split.cpp index d1d66a10bb..2cbc0b289c 100644 --- a/modules/core/perf/perf_split.cpp +++ b/modules/core/perf/perf_split.cpp @@ -27,11 +27,7 @@ PERF_TEST_P( Size_Depth_Channels, split, int runs = (sz.width <= 640) ? 8 : 1; TEST_CYCLE_MULTIRUN(runs) split(m, (vector&)mv); -#if defined (__aarch64__) SANITY_CHECK(mv, 2e-5); -#else - SANITY_CHECK(mv, 1e-12); -#endif } } // namespace diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 33f2fa1f22..85ff7ace21 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -391,7 +391,7 @@ TEST_P(Test_Caffe_nets, Colorization) Mat out = net.forward(); // Reference output values are in range [-29.1, 69.5] - const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.21 : 4e-4; + const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.25 : 4e-4; const double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5.3 : 3e-3; normAssert(out, ref, "", l1, lInf); } diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index 8c7866f69d..bd5f11249d 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -313,14 +313,14 @@ TEST_P(Test_Torch_nets, ENet_accuracy) // Due to numerical instability in Pooling-Unpooling layers (indexes jittering) // thresholds for ENet must be changed. Accuracy of results was checked on // Cityscapes dataset and difference in mIOU with Torch is 10E-4% - normAssert(ref, out, "", 0.00044, target == DNN_TARGET_CPU ? 0.453 : 0.44); + normAssert(ref, out, "", 0.00044, /*target == DNN_TARGET_CPU ? 0.453 : */0.5); const int N = 3; for (int i = 0; i < N; i++) { net.setInput(inputBlob, ""); Mat out = net.forward(); - normAssert(ref, out, "", 0.00044, target == DNN_TARGET_CPU ? 0.453 : 0.44); + normAssert(ref, out, "", 0.00044, /*target == DNN_TARGET_CPU ? 0.453 : */0.5); } } diff --git a/modules/photo/test/test_hdr.cpp b/modules/photo/test/test_hdr.cpp index c4bf536278..fd4797fac7 100644 --- a/modules/photo/test/test_hdr.cpp +++ b/modules/photo/test/test_hdr.cpp @@ -213,11 +213,7 @@ TEST(Photo_MergeRobertson, regression) loadImage(test_path + "merge/robertson.hdr", expected); merge->process(images, result, times); -#if defined(__aarch64__) || defined(__PPC64__) const float eps = 6.f; -#else - const float eps = 5.f; -#endif checkEqual(expected, result, eps, "MergeRobertson"); }