core(test): intrinsic tests for all dispatched CPU optimizations
- tests for both SIMD128 / SIMD256 - different dispatched + baseline(SIMD128) intrinsics
This commit is contained in:
parent
67f79aabdd
commit
3f302cabb8
@ -761,24 +761,24 @@ macro(ocv_compiler_optimization_fill_cpu_config)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(ocv_add_dispatched_file filename)
|
||||
macro(__ocv_add_dispatched_file filename target_src_var src_directory dst_directory precomp_hpp optimizations_var)
|
||||
if(NOT OPENCV_INITIAL_PASS)
|
||||
set(__codestr "
|
||||
#include \"${CMAKE_CURRENT_LIST_DIR}/src/precomp.hpp\"
|
||||
#include \"${CMAKE_CURRENT_LIST_DIR}/src/${filename}.simd.hpp\"
|
||||
#include \"${src_directory}/${precomp_hpp}\"
|
||||
#include \"${src_directory}/${filename}.simd.hpp\"
|
||||
")
|
||||
|
||||
set(__declarations_str "#define CV_CPU_SIMD_FILENAME \"${CMAKE_CURRENT_LIST_DIR}/src/${filename}.simd.hpp\"")
|
||||
set(__declarations_str "#define CV_CPU_SIMD_FILENAME \"${src_directory}/${filename}.simd.hpp\"")
|
||||
set(__dispatch_modes "BASELINE")
|
||||
|
||||
set(__optimizations "${ARGN}")
|
||||
set(__optimizations "${${optimizations_var}}")
|
||||
if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS)
|
||||
set(__optimizations "")
|
||||
endif()
|
||||
|
||||
foreach(OPT ${__optimizations})
|
||||
string(TOLOWER "${OPT}" OPT_LOWER)
|
||||
set(__file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.${OPT_LOWER}.cpp")
|
||||
set(__file "${CMAKE_CURRENT_BINARY_DIR}/${dst_directory}${filename}.${OPT_LOWER}.cpp")
|
||||
if(EXISTS "${__file}")
|
||||
file(READ "${__file}" __content)
|
||||
else()
|
||||
@ -791,7 +791,11 @@ macro(ocv_add_dispatched_file filename)
|
||||
endif()
|
||||
|
||||
if(";${CPU_DISPATCH};" MATCHES "${OPT}" OR __CPU_DISPATCH_INCLUDE_ALL)
|
||||
list(APPEND OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED "${__file}")
|
||||
if(EXISTS "${src_directory}/${filename}.${OPT_LOWER}.cpp")
|
||||
message(STATUS "Using overrided ${OPT} source: ${src_directory}/${filename}.${OPT_LOWER}.cpp")
|
||||
else()
|
||||
list(APPEND ${target_src_var} "${__file}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(__declarations_str "${__declarations_str}
|
||||
@ -803,9 +807,11 @@ macro(ocv_add_dispatched_file filename)
|
||||
|
||||
set(__declarations_str "${__declarations_str}
|
||||
#define CV_CPU_DISPATCH_MODES_ALL ${__dispatch_modes}
|
||||
|
||||
#undef CV_CPU_SIMD_FILENAME
|
||||
")
|
||||
|
||||
set(__file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.simd_declarations.hpp")
|
||||
set(__file "${CMAKE_CURRENT_BINARY_DIR}/${dst_directory}${filename}.simd_declarations.hpp")
|
||||
if(EXISTS "${__file}")
|
||||
file(READ "${__file}" __content)
|
||||
endif()
|
||||
@ -817,6 +823,17 @@ macro(ocv_add_dispatched_file filename)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(ocv_add_dispatched_file filename)
|
||||
set(__optimizations "${ARGN}")
|
||||
if(" ${ARGV1}" STREQUAL " TEST")
|
||||
list(REMOVE_AT __optimizations 0)
|
||||
__ocv_add_dispatched_file("${filename}" "OPENCV_MODULE_${the_module}_TEST_SOURCES_DISPATCHED" "${CMAKE_CURRENT_LIST_DIR}/test" "test/" "test_precomp.hpp" __optimizations)
|
||||
else()
|
||||
__ocv_add_dispatched_file("${filename}" "OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED" "${CMAKE_CURRENT_LIST_DIR}/src" "" "precomp.hpp" __optimizations)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
|
||||
# Workaround to support code which always require all code paths
|
||||
macro(ocv_add_dispatched_file_force_all)
|
||||
set(__CPU_DISPATCH_INCLUDE_ALL 1)
|
||||
|
||||
@ -1202,6 +1202,9 @@ function(ocv_add_accuracy_tests)
|
||||
set(OPENCV_TEST_${the_module}_SOURCES ${test_srcs} ${test_hdrs})
|
||||
endif()
|
||||
|
||||
if(OPENCV_MODULE_${the_module}_TEST_SOURCES_DISPATCHED)
|
||||
list(APPEND OPENCV_TEST_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_TEST_SOURCES_DISPATCHED})
|
||||
endif()
|
||||
ocv_compiler_optimization_process_sources(OPENCV_TEST_${the_module}_SOURCES OPENCV_TEST_${the_module}_DEPS ${the_target})
|
||||
|
||||
if(NOT BUILD_opencv_world)
|
||||
@ -1211,6 +1214,9 @@ function(ocv_add_accuracy_tests)
|
||||
source_group("Src" FILES "${${the_target}_pch}")
|
||||
ocv_add_executable(${the_target} ${OPENCV_TEST_${the_module}_SOURCES} ${${the_target}_pch})
|
||||
ocv_target_include_modules(${the_target} ${test_deps} "${test_path}")
|
||||
if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/test")
|
||||
ocv_target_include_directories(${the_target} "${CMAKE_CURRENT_BINARY_DIR}/test")
|
||||
endif()
|
||||
ocv_target_link_libraries(${the_target} LINK_PRIVATE ${test_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS} ${OPENCV_TEST_${the_module}_DEPS})
|
||||
add_dependencies(opencv_tests ${the_target})
|
||||
|
||||
|
||||
@ -3,6 +3,10 @@ set(the_description "The Core Functionality")
|
||||
ocv_add_dispatched_file(mathfuncs_core SSE2 AVX AVX2)
|
||||
ocv_add_dispatched_file(stat SSE4_2 AVX2)
|
||||
|
||||
# dispatching for accuracy tests
|
||||
ocv_add_dispatched_file_force_all(test_intrin128 TEST SSE2 SSE3 SSSE3 SSE4_1 SSE4_2 AVX FP16 AVX2)
|
||||
ocv_add_dispatched_file_force_all(test_intrin256 TEST AVX2)
|
||||
|
||||
ocv_add_module(core
|
||||
OPTIONAL opencv_cudev
|
||||
WRAP java python js)
|
||||
|
||||
@ -204,20 +204,6 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
#define CV_SIMD512_64F 0
|
||||
#endif
|
||||
|
||||
#if CV_SIMD512
|
||||
#define CV_SIMD 1
|
||||
#define CV_SIMD_64F CV_SIMD512_64F
|
||||
#define CV_SIMD_WIDTH 64
|
||||
#elif CV_SIMD256
|
||||
#define CV_SIMD 1
|
||||
#define CV_SIMD_64F CV_SIMD256_64F
|
||||
#define CV_SIMD_WIDTH 32
|
||||
#else
|
||||
#define CV_SIMD CV_SIMD128
|
||||
#define CV_SIMD_64F CV_SIMD128_64F
|
||||
#define CV_SIMD_WIDTH 16
|
||||
#endif
|
||||
|
||||
//==================================================================================================
|
||||
|
||||
#define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
|
||||
@ -309,7 +295,15 @@ template<typename _Tp> struct V_RegTraits
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if CV_SIMD256
|
||||
#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
|
||||
#define CV_SIMD 1
|
||||
#define CV_SIMD_64F CV_SIMD512_64F
|
||||
#define CV_SIMD_WIDTH 64
|
||||
// TODO typedef v_uint8 / v_int32 / etc types here
|
||||
#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
|
||||
#define CV_SIMD 1
|
||||
#define CV_SIMD_64F CV_SIMD256_64F
|
||||
#define CV_SIMD_WIDTH 32
|
||||
typedef v_uint8x32 v_uint8;
|
||||
typedef v_int8x32 v_int8;
|
||||
typedef v_uint16x16 v_uint16;
|
||||
@ -329,7 +323,10 @@ template<typename _Tp> struct V_RegTraits
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256)
|
||||
CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load)
|
||||
inline void vx_cleanup() { v256_cleanup(); }
|
||||
#elif CV_SIMD128 || CV_SIMD128_CPP
|
||||
#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
|
||||
#define CV_SIMD CV_SIMD128
|
||||
#define CV_SIMD_64F CV_SIMD128_64F
|
||||
#define CV_SIMD_WIDTH 16
|
||||
typedef v_uint8x16 v_uint8;
|
||||
typedef v_int8x16 v_int8;
|
||||
typedef v_uint16x8 v_uint16;
|
||||
@ -380,6 +377,14 @@ inline unsigned int trailingZeros32(unsigned int value) {
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD_64F
|
||||
#define CV_SIMD_64F 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD
|
||||
#define CV_SIMD 0
|
||||
#endif
|
||||
|
||||
} // cv::
|
||||
|
||||
//! @endcond
|
||||
|
||||
@ -1,5 +0,0 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#include "test_precomp.hpp"
|
||||
#include "test_intrin.simd.hpp"
|
||||
@ -2,101 +2,100 @@
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#include "test_precomp.hpp"
|
||||
#include "test_intrin.simd.hpp"
|
||||
|
||||
#define CV_CPU_SIMD_FILENAME "test_intrin.simd.hpp"
|
||||
#define CV_CPU_DISPATCH_MODE FP16
|
||||
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
|
||||
#include "test_intrin128.simd.hpp"
|
||||
#include "test_intrin128.simd_declarations.hpp"
|
||||
|
||||
#undef CV_CPU_DISPATCH_MODES_ALL
|
||||
|
||||
#include "opencv2/core/cv_cpu_dispatch.h"
|
||||
#include "test_intrin256.simd.hpp"
|
||||
#include "test_intrin256.simd_declarations.hpp"
|
||||
|
||||
#define CV_CPU_DISPATCH_MODE AVX2
|
||||
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
|
||||
|
||||
namespace opencv_test { namespace hal {
|
||||
using namespace CV_CPU_OPTIMIZATION_NAMESPACE;
|
||||
|
||||
TEST(hal_intrin, uint8x16)
|
||||
{ test_hal_intrin_uint8(); }
|
||||
#define CV_CPU_CALL_BASELINE_(fn, args) CV_CPU_CALL_BASELINE(fn, args)
|
||||
|
||||
TEST(hal_intrin, int8x16)
|
||||
{ test_hal_intrin_int8(); }
|
||||
#define DISPATCH_SIMD128(fn, cpu_opt) do { \
|
||||
CV_CPU_CALL_ ## cpu_opt ## _(fn, ()); \
|
||||
throw SkipTestException("SIMD128 (" #cpu_opt ") is not available or disabled"); \
|
||||
} while(0)
|
||||
|
||||
TEST(hal_intrin, uint16x8)
|
||||
{ test_hal_intrin_uint16(); }
|
||||
#define DISPATCH_SIMD256(fn, cpu_opt) do { \
|
||||
CV_CPU_CALL_ ## cpu_opt ## _(fn, ()); \
|
||||
throw SkipTestException("SIMD256 (" #cpu_opt ") is not available or disabled"); \
|
||||
} while(0)
|
||||
|
||||
TEST(hal_intrin, int16x8)
|
||||
{ test_hal_intrin_int16(); }
|
||||
#define DEFINE_SIMD_TESTS(simd_size, cpu_opt) \
|
||||
TEST(hal_intrin ## simd_size, uint8x16_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_uint8, cpu_opt); } \
|
||||
TEST(hal_intrin ## simd_size, int8x16_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_int8, cpu_opt); } \
|
||||
TEST(hal_intrin ## simd_size, uint16x8_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_uint16, cpu_opt); } \
|
||||
TEST(hal_intrin ## simd_size, int16x8_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_int16, cpu_opt); } \
|
||||
TEST(hal_intrin ## simd_size, int32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_int32, cpu_opt); } \
|
||||
TEST(hal_intrin ## simd_size, uint32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_uint32, cpu_opt); } \
|
||||
TEST(hal_intrin ## simd_size, uint64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_uint64, cpu_opt); } \
|
||||
TEST(hal_intrin ## simd_size, int64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_int64, cpu_opt); } \
|
||||
TEST(hal_intrin ## simd_size, float32x4_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_float32, cpu_opt); } \
|
||||
TEST(hal_intrin ## simd_size, float64x2_ ## cpu_opt) { DISPATCH_SIMD ## simd_size(test_hal_intrin_float64, cpu_opt); } \
|
||||
|
||||
TEST(hal_intrin, int32x4)
|
||||
{ test_hal_intrin_int32(); }
|
||||
namespace intrin128 {
|
||||
|
||||
TEST(hal_intrin, uint32x4)
|
||||
{ test_hal_intrin_uint32(); }
|
||||
DEFINE_SIMD_TESTS(128, BASELINE)
|
||||
|
||||
TEST(hal_intrin, uint64x2)
|
||||
{ test_hal_intrin_uint64(); }
|
||||
#if defined CV_CPU_DISPATCH_COMPILE_SSE2 || defined CV_CPU_BASELINE_COMPILE_SSE2
|
||||
DEFINE_SIMD_TESTS(128, SSE2)
|
||||
#endif
|
||||
#if defined CV_CPU_DISPATCH_COMPILE_SSE3 || defined CV_CPU_BASELINE_COMPILE_SSE3
|
||||
DEFINE_SIMD_TESTS(128, SSE3)
|
||||
#endif
|
||||
#if defined CV_CPU_DISPATCH_COMPILE_SSSE3 || defined CV_CPU_BASELINE_COMPILE_SSSE3
|
||||
DEFINE_SIMD_TESTS(128, SSSE3)
|
||||
#endif
|
||||
#if defined CV_CPU_DISPATCH_COMPILE_SSE4_1 || defined CV_CPU_BASELINE_COMPILE_SSE4_1
|
||||
DEFINE_SIMD_TESTS(128, SSE4_1)
|
||||
#endif
|
||||
#if defined CV_CPU_DISPATCH_COMPILE_SSE4_2 || defined CV_CPU_BASELINE_COMPILE_SSE4_2
|
||||
DEFINE_SIMD_TESTS(128, SSE4_2)
|
||||
#endif
|
||||
#if defined CV_CPU_DISPATCH_COMPILE_AVX || defined CV_CPU_BASELINE_COMPILE_AVX
|
||||
DEFINE_SIMD_TESTS(128, AVX)
|
||||
#endif
|
||||
#if defined CV_CPU_DISPATCH_COMPILE_AVX2 || defined CV_CPU_BASELINE_COMPILE_AVX2
|
||||
DEFINE_SIMD_TESTS(128, AVX2)
|
||||
#endif
|
||||
|
||||
TEST(hal_intrin, int64x2)
|
||||
{ test_hal_intrin_int64(); }
|
||||
|
||||
TEST(hal_intrin, float32x4)
|
||||
{ test_hal_intrin_float32(); }
|
||||
|
||||
TEST(hal_intrin, float64x2)
|
||||
{ test_hal_intrin_float64(); }
|
||||
|
||||
TEST(hal_intrin, float16x8)
|
||||
TEST(hal_intrin128, float16x8_FP16)
|
||||
{
|
||||
CV_CPU_CALL_FP16_(test_hal_intrin_float16, ());
|
||||
throw SkipTestException("Unsupported hardware: FP16 is not available");
|
||||
}
|
||||
|
||||
#define DISPATCH_SIMD_MODES AVX2
|
||||
#define DISPATCH_SIMD_NAME "SIMD256"
|
||||
#define DISPATCH_SIMD(fun) \
|
||||
do { \
|
||||
CV_CPU_DISPATCH(fun, (), DISPATCH_SIMD_MODES); \
|
||||
throw SkipTestException( \
|
||||
"Unsupported hardware: " \
|
||||
DISPATCH_SIMD_NAME \
|
||||
" is not available" \
|
||||
); \
|
||||
} while(0)
|
||||
} // namespace intrin128
|
||||
|
||||
TEST(hal_intrin256, uint8x32)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_uint8); }
|
||||
|
||||
TEST(hal_intrin256, int8x32)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_int8); }
|
||||
namespace intrin256 {
|
||||
|
||||
TEST(hal_intrin256, uint16x16)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_uint16); }
|
||||
|
||||
TEST(hal_intrin256, int16x16)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_int16); }
|
||||
// Not available due missing C++ backend for SIMD256
|
||||
//DEFINE_SIMD_TESTS(256, BASELINE)
|
||||
|
||||
TEST(hal_intrin256, uint32x8)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_uint32); }
|
||||
//#if defined CV_CPU_DISPATCH_COMPILE_AVX
|
||||
//DEFINE_SIMD_TESTS(256, AVX)
|
||||
//#endif
|
||||
|
||||
TEST(hal_intrin256, int32x8)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_int32); }
|
||||
#if defined CV_CPU_DISPATCH_COMPILE_AVX2 || defined CV_CPU_BASELINE_COMPILE_AVX2
|
||||
DEFINE_SIMD_TESTS(256, AVX2)
|
||||
#endif
|
||||
|
||||
TEST(hal_intrin256, uint64x4)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_uint64); }
|
||||
|
||||
TEST(hal_intrin256, int64x4)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_int64); }
|
||||
|
||||
TEST(hal_intrin256, float32x8)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_float32); }
|
||||
|
||||
TEST(hal_intrin256, float64x4)
|
||||
{ DISPATCH_SIMD(test_hal_intrin_float64); }
|
||||
|
||||
TEST(hal_intrin256, float16x16)
|
||||
TEST(hal_intrin256, float16x16_FP16)
|
||||
{
|
||||
if (!CV_CPU_HAS_SUPPORT_FP16)
|
||||
throw SkipTestException("Unsupported hardware: FP16 is not available");
|
||||
DISPATCH_SIMD(test_hal_intrin_float16);
|
||||
//CV_CPU_CALL_FP16_(test_hal_intrin_float16, ());
|
||||
CV_CPU_CALL_AVX2_(test_hal_intrin_float16, ());
|
||||
throw SkipTestException("Unsupported hardware: FP16 is not available");
|
||||
}
|
||||
|
||||
|
||||
} // namespace intrin256
|
||||
|
||||
}} // namespace
|
||||
@ -1,19 +0,0 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#include "test_precomp.hpp"
|
||||
#include "test_intrin_utils.hpp"
|
||||
|
||||
namespace opencv_test { namespace hal {
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||
|
||||
void test_hal_intrin_float16()
|
||||
{
|
||||
TheTest<v_float16>()
|
||||
.test_loadstore_fp16()
|
||||
.test_float_cvt_fp16()
|
||||
;
|
||||
}
|
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||
}} // namespace
|
||||
@ -1,296 +0,0 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#include "test_precomp.hpp"
|
||||
#include "test_intrin_utils.hpp"
|
||||
|
||||
namespace opencv_test { namespace hal {
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||
|
||||
void test_hal_intrin_uint8();
|
||||
void test_hal_intrin_int8();
|
||||
void test_hal_intrin_uint16();
|
||||
void test_hal_intrin_int16();
|
||||
void test_hal_intrin_uint32();
|
||||
void test_hal_intrin_int32();
|
||||
void test_hal_intrin_uint64();
|
||||
void test_hal_intrin_int64();
|
||||
void test_hal_intrin_float32();
|
||||
void test_hal_intrin_float64();
|
||||
|
||||
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
//============= 8-bit integer =====================================================================
|
||||
|
||||
void test_hal_intrin_uint8()
|
||||
{
|
||||
TheTest<v_uint8>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_expand_q()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_cmp()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||
.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
|
||||
;
|
||||
|
||||
#if CV_SIMD256
|
||||
TheTest<v_uint8>()
|
||||
.test_pack<9>().test_pack<10>().test_pack<13>().test_pack<15>()
|
||||
.test_pack_u<9>().test_pack_u<10>().test_pack_u<13>().test_pack_u<15>()
|
||||
.test_extract<16>().test_extract<17>().test_extract<23>().test_extract<31>()
|
||||
.test_rotate<16>().test_rotate<17>().test_rotate<23>().test_rotate<31>()
|
||||
;
|
||||
#endif
|
||||
}
|
||||
|
||||
void test_hal_intrin_int8()
|
||||
{
|
||||
TheTest<v_int8>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_expand_q()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_cmp()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_abs()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
|
||||
;
|
||||
}
|
||||
|
||||
//============= 16-bit integer =====================================================================
|
||||
|
||||
void test_hal_intrin_uint16()
|
||||
{
|
||||
TheTest<v_uint16>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||
.test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
|
||||
;
|
||||
}
|
||||
|
||||
void test_hal_intrin_int16()
|
||||
{
|
||||
TheTest<v_int16>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
.test_dot_prod()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_abs()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
|
||||
;
|
||||
}
|
||||
|
||||
//============= 32-bit integer =====================================================================
|
||||
|
||||
void test_hal_intrin_uint32()
|
||||
{
|
||||
TheTest<v_uint32>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||
.test_transpose()
|
||||
;
|
||||
}
|
||||
|
||||
void test_hal_intrin_int32()
|
||||
{
|
||||
TheTest<v_int32>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_abs()
|
||||
.test_cmp()
|
||||
.test_popcount()
|
||||
.test_shift<1>().test_shift<8>()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||
.test_float_cvt32()
|
||||
.test_float_cvt64()
|
||||
.test_transpose()
|
||||
;
|
||||
}
|
||||
|
||||
//============= 64-bit integer =====================================================================
|
||||
|
||||
void test_hal_intrin_uint64()
|
||||
{
|
||||
TheTest<v_uint64>()
|
||||
.test_loadstore()
|
||||
.test_addsub()
|
||||
.test_shift<1>().test_shift<8>()
|
||||
.test_logic()
|
||||
.test_extract<0>().test_extract<1>()
|
||||
.test_rotate<0>().test_rotate<1>()
|
||||
;
|
||||
}
|
||||
|
||||
void test_hal_intrin_int64()
|
||||
{
|
||||
TheTest<v_int64>()
|
||||
.test_loadstore()
|
||||
.test_addsub()
|
||||
.test_shift<1>().test_shift<8>()
|
||||
.test_logic()
|
||||
.test_extract<0>().test_extract<1>()
|
||||
.test_rotate<0>().test_rotate<1>()
|
||||
;
|
||||
}
|
||||
|
||||
//============= Floating point =====================================================================
|
||||
void test_hal_intrin_float32()
|
||||
{
|
||||
TheTest<v_float32>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_interleave_2channel()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_div()
|
||||
.test_cmp()
|
||||
.test_sqrt_abs()
|
||||
.test_min_max()
|
||||
.test_float_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_unpack()
|
||||
.test_float_math()
|
||||
.test_float_cvt64()
|
||||
.test_matmul()
|
||||
.test_transpose()
|
||||
.test_reduce_sum4()
|
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||
;
|
||||
|
||||
#if CV_SIMD256
|
||||
TheTest<v_float32>()
|
||||
.test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>()
|
||||
.test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>()
|
||||
;
|
||||
#endif
|
||||
}
|
||||
|
||||
void test_hal_intrin_float64()
|
||||
{
|
||||
#if CV_SIMD_64F
|
||||
TheTest<v_float64>()
|
||||
.test_loadstore()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_div()
|
||||
.test_cmp()
|
||||
.test_sqrt_abs()
|
||||
.test_min_max()
|
||||
.test_float_absdiff()
|
||||
.test_mask()
|
||||
.test_unpack()
|
||||
.test_float_math()
|
||||
.test_float_cvt32()
|
||||
.test_extract<0>().test_extract<1>()
|
||||
.test_rotate<0>().test_rotate<1>()
|
||||
;
|
||||
|
||||
#if CV_SIMD256
|
||||
TheTest<v_float64>()
|
||||
.test_extract<2>().test_extract<3>()
|
||||
.test_rotate<2>().test_rotate<3>()
|
||||
;
|
||||
#endif //CV_SIMD256
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CV_FP16 && CV_SIMD_WIDTH > 16
|
||||
void test_hal_intrin_float16()
|
||||
{
|
||||
TheTest<v_float16>()
|
||||
.test_loadstore_fp16()
|
||||
.test_float_cvt_fp16()
|
||||
;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||
|
||||
}} //namespace
|
||||
22
modules/core/test/test_intrin128.simd.hpp
Normal file
22
modules/core/test/test_intrin128.simd.hpp
Normal file
@ -0,0 +1,22 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
#define CV__SIMD_FORCE_WIDTH 128
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
#undef CV__SIMD_FORCE_WIDTH
|
||||
|
||||
#if CV_SIMD_WIDTH != 16
|
||||
#error "Invalid build configuration"
|
||||
#endif
|
||||
|
||||
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
namespace opencv_test { namespace hal { namespace intrin128 {
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||
|
||||
#include "test_intrin_utils.hpp"
|
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||
}}} //namespace
|
||||
23
modules/core/test/test_intrin256.simd.hpp
Normal file
23
modules/core/test/test_intrin256.simd.hpp
Normal file
@ -0,0 +1,23 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#if !defined CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY && \
|
||||
!defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS // TODO? C++ fallback implementation for SIMD256
|
||||
|
||||
#define CV__SIMD_FORCE_WIDTH 256
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
#undef CV__SIMD_FORCE_WIDTH
|
||||
|
||||
#if CV_SIMD_WIDTH != 32
|
||||
#error "Invalid build configuration"
|
||||
#endif
|
||||
|
||||
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
namespace opencv_test { namespace hal { namespace intrin256 {
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||
|
||||
#include "test_intrin_utils.hpp"
|
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||
}}} //namespace
|
||||
@ -1,10 +1,22 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
|
||||
namespace opencv_test { namespace hal {
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||
// This file is not standalone.
|
||||
// It is included with these active namespaces:
|
||||
//namespace opencv_test { namespace hal { namespace intrinXXX {
|
||||
//CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||
|
||||
void test_hal_intrin_uint8();
|
||||
void test_hal_intrin_int8();
|
||||
void test_hal_intrin_uint16();
|
||||
void test_hal_intrin_int16();
|
||||
void test_hal_intrin_uint32();
|
||||
void test_hal_intrin_int32();
|
||||
void test_hal_intrin_uint64();
|
||||
void test_hal_intrin_int64();
|
||||
void test_hal_intrin_float32();
|
||||
void test_hal_intrin_float64();
|
||||
|
||||
void test_hal_intrin_float16();
|
||||
|
||||
@ -258,6 +270,7 @@ template<typename R> struct TheTest
|
||||
v_store(out.u.d, r_low);
|
||||
for (int i = 0; i < R::nlanes/2; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ((LaneType)data.u[i], (LaneType)out.u[i]);
|
||||
}
|
||||
|
||||
@ -266,6 +279,7 @@ template<typename R> struct TheTest
|
||||
v_store(out.u.d, r_low_align8byte);
|
||||
for (int i = 0; i < R::nlanes/2; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ((LaneType)data.u[i + R::nlanes/2], (LaneType)out.u[i]);
|
||||
}
|
||||
|
||||
@ -296,6 +310,7 @@ template<typename R> struct TheTest
|
||||
resV.fill((LaneType)8);
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ((LaneType)0, resZ[i]);
|
||||
EXPECT_EQ((LaneType)8, resV[i]);
|
||||
}
|
||||
@ -342,6 +357,7 @@ template<typename R> struct TheTest
|
||||
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(data1, Data<R>(a));
|
||||
EXPECT_EQ(data2, Data<R>(b));
|
||||
EXPECT_EQ(data3, Data<R>(c));
|
||||
@ -374,6 +390,7 @@ template<typename R> struct TheTest
|
||||
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(data1, Data<R>(a));
|
||||
EXPECT_EQ(data2, Data<R>(b));
|
||||
}
|
||||
@ -397,6 +414,7 @@ template<typename R> struct TheTest
|
||||
const int n = Rx2::nlanes;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(dataA[i], resB[i]);
|
||||
EXPECT_EQ(dataA[i], resC[i]);
|
||||
EXPECT_EQ(dataA[i + n], resD[i]);
|
||||
@ -412,7 +430,10 @@ template<typename R> struct TheTest
|
||||
Data<Rx4> out = vx_load_expand_q(data.d);
|
||||
const int n = Rx4::nlanes;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(data[i], out[i]);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
@ -426,6 +447,7 @@ template<typename R> struct TheTest
|
||||
Data<R> resC = a + b, resD = a - b;
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(saturate_cast<LaneType>(dataA[i] + dataB[i]), resC[i]);
|
||||
EXPECT_EQ(saturate_cast<LaneType>(dataA[i] - dataB[i]), resD[i]);
|
||||
}
|
||||
@ -443,6 +465,7 @@ template<typename R> struct TheTest
|
||||
resD = v_sub_wrap(a, b);
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ((LaneType)(dataA[i] + dataB[i]), resC[i]);
|
||||
EXPECT_EQ((LaneType)(dataA[i] - dataB[i]), resD[i]);
|
||||
}
|
||||
@ -458,6 +481,7 @@ template<typename R> struct TheTest
|
||||
Data<R> resC = a * b;
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(dataA[i] * dataB[i], resC[i]);
|
||||
}
|
||||
|
||||
@ -473,6 +497,7 @@ template<typename R> struct TheTest
|
||||
Data<R> resC = a / b;
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(dataA[i] / dataB[i], resC[i]);
|
||||
}
|
||||
|
||||
@ -492,6 +517,7 @@ template<typename R> struct TheTest
|
||||
const int n = R::nlanes / 2;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ((typename Rx2::lane_type)dataA[i] * dataB[i], resC[i]);
|
||||
EXPECT_EQ((typename Rx2::lane_type)dataA[i + n] * dataB[i + n], resD[i]);
|
||||
}
|
||||
@ -511,6 +537,7 @@ template<typename R> struct TheTest
|
||||
|
||||
for (int i = 0; i < Ru::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ((u_type)std::abs(dataA[i] - dataB[i]), resC[i]);
|
||||
}
|
||||
|
||||
@ -529,6 +556,7 @@ template<typename R> struct TheTest
|
||||
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(static_cast<LaneType>(dataA[i] << s), resB[i]);
|
||||
EXPECT_EQ(static_cast<LaneType>(dataA[i] << s), resC[i]);
|
||||
EXPECT_EQ(static_cast<LaneType>(dataA[i] >> s), resD[i]);
|
||||
@ -553,6 +581,7 @@ template<typename R> struct TheTest
|
||||
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0);
|
||||
EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0);
|
||||
EXPECT_EQ(dataA[i] > dataB[i], resE[i] != 0);
|
||||
@ -583,6 +612,7 @@ template<typename R> struct TheTest
|
||||
const int n = R::nlanes / 2;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1], resD[i]);
|
||||
EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1] + dataC[i], resE[i]);
|
||||
}
|
||||
@ -597,6 +627,7 @@ template<typename R> struct TheTest
|
||||
Data<R> resC = a & b, resD = a | b, resE = a ^ b, resF = ~a;
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(dataA[i] & dataB[i], resC[i]);
|
||||
EXPECT_EQ(dataA[i] | dataB[i], resD[i]);
|
||||
EXPECT_EQ(dataA[i] ^ dataB[i], resE[i]);
|
||||
@ -615,6 +646,7 @@ template<typename R> struct TheTest
|
||||
Data<R> resB = v_sqrt(a), resC = v_invsqrt(a), resE = v_abs(d);
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_COMPARE_EQ((float)std::sqrt(dataA[i]), (float)resB[i]);
|
||||
EXPECT_COMPARE_EQ(1/(float)std::sqrt(dataA[i]), (float)resC[i]);
|
||||
EXPECT_COMPARE_EQ((float)abs(dataA[i]), (float)resE[i]);
|
||||
@ -632,6 +664,7 @@ template<typename R> struct TheTest
|
||||
Data<R> resC = v_min(a, b), resD = v_max(a, b);
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(std::min(dataA[i], dataB[i]), resC[i]);
|
||||
EXPECT_EQ(std::max(dataA[i], dataB[i]), resD[i]);
|
||||
}
|
||||
@ -672,6 +705,7 @@ template<typename R> struct TheTest
|
||||
const u_type mask = std::numeric_limits<LaneType>::is_signed ? (u_type)(1 << (sizeof(u_type)*8 - 1)) : 0;
|
||||
for (int i = 0; i < Ru::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
u_type uA = dataA[i] ^ mask;
|
||||
u_type uB = dataB[i] ^ mask;
|
||||
EXPECT_EQ(uA > uB ? uA - uB : uB - uA, resC[i]);
|
||||
@ -691,6 +725,7 @@ template<typename R> struct TheTest
|
||||
Data<R> resC = v_absdiff(a, b);
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(dataA[i] > dataB[i] ? dataA[i] - dataB[i] : dataB[i] - dataA[i], resC[i]);
|
||||
}
|
||||
return *this;
|
||||
@ -744,6 +779,7 @@ template<typename R> struct TheTest
|
||||
Data<R> resF = f;
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
int_type m2 = dataB.as_int(i);
|
||||
EXPECT_EQ((dataD.as_int(i) & m2) | (dataE.as_int(i) & ~m2), resF.as_int(i));
|
||||
}
|
||||
@ -776,6 +812,7 @@ template<typename R> struct TheTest
|
||||
const w_type add = (w_type)1 << (s - 1);
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(pack_saturate_cast<LaneType>(dataA[i]), resC[i]);
|
||||
EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resC[i + n]);
|
||||
EXPECT_EQ(pack_saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]);
|
||||
@ -816,6 +853,7 @@ template<typename R> struct TheTest
|
||||
const w_type add = (w_type)1 << (s - 1);
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(pack_saturate_cast<LaneType>(dataA[i]), resC[i]);
|
||||
EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resC[i + n]);
|
||||
EXPECT_EQ(pack_saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]);
|
||||
@ -845,6 +883,7 @@ template<typename R> struct TheTest
|
||||
const int n = R::nlanes/2;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(dataA[i], resC[i*2]);
|
||||
EXPECT_EQ(dataB[i], resC[i*2+1]);
|
||||
EXPECT_EQ(dataA[i+n], resD[i*2]);
|
||||
@ -876,6 +915,7 @@ template<typename R> struct TheTest
|
||||
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
if (i + s >= R::nlanes)
|
||||
EXPECT_EQ(dataB[i - R::nlanes + s], resC[i]);
|
||||
else
|
||||
@ -901,6 +941,7 @@ template<typename R> struct TheTest
|
||||
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
if (i + s >= R::nlanes)
|
||||
{
|
||||
EXPECT_EQ((LaneType)0, resC[i]);
|
||||
@ -940,6 +981,7 @@ template<typename R> struct TheTest
|
||||
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ(cvRound(data1[i]), resB[i]);
|
||||
EXPECT_EQ((typename Ri::lane_type)data1[i], resC[i]);
|
||||
EXPECT_EQ(cvFloor(data1[i]), resD[i]);
|
||||
@ -964,6 +1006,7 @@ template<typename R> struct TheTest
|
||||
int n = std::min<int>(Rt::nlanes, R::nlanes);
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]);
|
||||
}
|
||||
return *this;
|
||||
@ -983,10 +1026,12 @@ template<typename R> struct TheTest
|
||||
int n = std::min<int>(Rt::nlanes, R::nlanes);
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]);
|
||||
}
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ((typename Rt::lane_type)dataA[i+n], resC[i]);
|
||||
}
|
||||
#endif
|
||||
@ -1006,6 +1051,7 @@ template<typename R> struct TheTest
|
||||
{
|
||||
for (int j = i; j < i + 4; ++j)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d j=%d", i, j));
|
||||
LaneType val = dataV[i] * dataA[j]
|
||||
+ dataV[i + 1] * dataB[j]
|
||||
+ dataV[i + 2] * dataC[j]
|
||||
@ -1019,6 +1065,7 @@ template<typename R> struct TheTest
|
||||
{
|
||||
for (int j = i; j < i + 4; ++j)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d j=%d", i, j));
|
||||
LaneType val = dataV[i] * dataA[j]
|
||||
+ dataV[i + 1] * dataB[j]
|
||||
+ dataV[i + 2] * dataC[j]
|
||||
@ -1045,6 +1092,7 @@ template<typename R> struct TheTest
|
||||
{
|
||||
for (int j = 0; j < 4; ++j)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d j=%d", i, j));
|
||||
EXPECT_EQ(dataA[i + j], res[j][i]);
|
||||
EXPECT_EQ(dataB[i + j], res[j][i + 1]);
|
||||
EXPECT_EQ(dataC[i + j], res[j][i + 2]);
|
||||
@ -1066,6 +1114,7 @@ template<typename R> struct TheTest
|
||||
|
||||
for (int i = 0; i < R::nlanes; i += 4)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_COMPARE_EQ(dataA.sum(i, 4), res[i]);
|
||||
EXPECT_COMPARE_EQ(dataB.sum(i, 4), res[i + 1]);
|
||||
EXPECT_COMPARE_EQ(dataC.sum(i, 4), res[i + 2]);
|
||||
@ -1121,7 +1170,304 @@ template<typename R> struct TheTest
|
||||
|
||||
};
|
||||
|
||||
|
||||
#if 1
|
||||
#define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*(int)sizeof(v_uint8), CV__TRACE_FUNCTION);
|
||||
#endif
|
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||
}} // namespace
|
||||
//============= 8-bit integer =====================================================================
|
||||
|
||||
void test_hal_intrin_uint8()
|
||||
{
|
||||
DUMP_ENTRY(v_uint8);
|
||||
TheTest<v_uint8>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_expand_q()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_cmp()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||
.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
|
||||
;
|
||||
|
||||
#if CV_SIMD_WIDTH == 32
|
||||
TheTest<v_uint8>()
|
||||
.test_pack<9>().test_pack<10>().test_pack<13>().test_pack<15>()
|
||||
.test_pack_u<9>().test_pack_u<10>().test_pack_u<13>().test_pack_u<15>()
|
||||
.test_extract<16>().test_extract<17>().test_extract<23>().test_extract<31>()
|
||||
.test_rotate<16>().test_rotate<17>().test_rotate<23>().test_rotate<31>()
|
||||
;
|
||||
#endif
|
||||
}
|
||||
|
||||
void test_hal_intrin_int8()
|
||||
{
|
||||
DUMP_ENTRY(v_int8);
|
||||
TheTest<v_int8>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_expand_q()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_cmp()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_abs()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
|
||||
;
|
||||
}
|
||||
|
||||
//============= 16-bit integer =====================================================================
|
||||
|
||||
void test_hal_intrin_uint16()
|
||||
{
|
||||
DUMP_ENTRY(v_uint16);
|
||||
TheTest<v_uint16>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||
.test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
|
||||
;
|
||||
}
|
||||
|
||||
void test_hal_intrin_int16()
|
||||
{
|
||||
DUMP_ENTRY(v_int16);
|
||||
TheTest<v_int16>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_addsub_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
.test_dot_prod()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_abs()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
|
||||
;
|
||||
}
|
||||
|
||||
//============= 32-bit integer =====================================================================
|
||||
|
||||
void test_hal_intrin_uint32()
|
||||
{
|
||||
DUMP_ENTRY(v_uint32);
|
||||
TheTest<v_uint32>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||
.test_transpose()
|
||||
;
|
||||
}
|
||||
|
||||
void test_hal_intrin_int32()
|
||||
{
|
||||
DUMP_ENTRY(v_int32);
|
||||
TheTest<v_int32>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_expand()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_abs()
|
||||
.test_cmp()
|
||||
.test_popcount()
|
||||
.test_shift<1>().test_shift<8>()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||
.test_unpack()
|
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||
.test_float_cvt32()
|
||||
.test_float_cvt64()
|
||||
.test_transpose()
|
||||
;
|
||||
}
|
||||
|
||||
//============= 64-bit integer =====================================================================
|
||||
|
||||
void test_hal_intrin_uint64()
|
||||
{
|
||||
DUMP_ENTRY(v_uint64);
|
||||
TheTest<v_uint64>()
|
||||
.test_loadstore()
|
||||
.test_addsub()
|
||||
.test_shift<1>().test_shift<8>()
|
||||
.test_logic()
|
||||
.test_extract<0>().test_extract<1>()
|
||||
.test_rotate<0>().test_rotate<1>()
|
||||
;
|
||||
}
|
||||
|
||||
void test_hal_intrin_int64()
|
||||
{
|
||||
DUMP_ENTRY(v_int64);
|
||||
TheTest<v_int64>()
|
||||
.test_loadstore()
|
||||
.test_addsub()
|
||||
.test_shift<1>().test_shift<8>()
|
||||
.test_logic()
|
||||
.test_extract<0>().test_extract<1>()
|
||||
.test_rotate<0>().test_rotate<1>()
|
||||
;
|
||||
}
|
||||
|
||||
//============= Floating point =====================================================================
|
||||
void test_hal_intrin_float32()
|
||||
{
|
||||
DUMP_ENTRY(v_float32);
|
||||
TheTest<v_float32>()
|
||||
.test_loadstore()
|
||||
.test_interleave()
|
||||
.test_interleave_2channel()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_div()
|
||||
.test_cmp()
|
||||
.test_sqrt_abs()
|
||||
.test_min_max()
|
||||
.test_float_absdiff()
|
||||
.test_reduce()
|
||||
.test_mask()
|
||||
.test_unpack()
|
||||
.test_float_math()
|
||||
.test_float_cvt64()
|
||||
.test_matmul()
|
||||
.test_transpose()
|
||||
.test_reduce_sum4()
|
||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||
;
|
||||
|
||||
#if CV_SIMD_WIDTH == 32
|
||||
TheTest<v_float32>()
|
||||
.test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>()
|
||||
.test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>()
|
||||
;
|
||||
#endif
|
||||
}
|
||||
|
||||
void test_hal_intrin_float64()
|
||||
{
|
||||
DUMP_ENTRY(v_float64);
|
||||
#if CV_SIMD_64F
|
||||
TheTest<v_float64>()
|
||||
.test_loadstore()
|
||||
.test_addsub()
|
||||
.test_mul()
|
||||
.test_div()
|
||||
.test_cmp()
|
||||
.test_sqrt_abs()
|
||||
.test_min_max()
|
||||
.test_float_absdiff()
|
||||
.test_mask()
|
||||
.test_unpack()
|
||||
.test_float_math()
|
||||
.test_float_cvt32()
|
||||
.test_extract<0>().test_extract<1>()
|
||||
.test_rotate<0>().test_rotate<1>()
|
||||
;
|
||||
|
||||
#if CV_SIMD_WIDTH == 32
|
||||
TheTest<v_float64>()
|
||||
.test_extract<2>().test_extract<3>()
|
||||
.test_rotate<2>().test_rotate<3>()
|
||||
;
|
||||
#endif //CV_SIMD256
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CV_FP16
|
||||
void test_hal_intrin_float16()
|
||||
{
|
||||
DUMP_ENTRY(v_float16);
|
||||
#if CV_SIMD_WIDTH > 16
|
||||
TheTest<v_float16>()
|
||||
.test_loadstore_fp16()
|
||||
.test_float_cvt_fp16()
|
||||
;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
/*#if defined(CV_CPU_DISPATCH_MODE_FP16) && CV_CPU_DISPATCH_MODE == FP16
|
||||
void test_hal_intrin_float16()
|
||||
{
|
||||
TheTest<v_float16>()
|
||||
.test_loadstore_fp16()
|
||||
.test_float_cvt_fp16()
|
||||
;
|
||||
}
|
||||
#endif*/
|
||||
|
||||
#endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
//CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||
//}}} // namespace
|
||||
|
||||
@ -11,6 +11,5 @@
|
||||
#include "opencv2/core/cvdef.h"
|
||||
#include "opencv2/core/private.hpp"
|
||||
#include "opencv2/core/hal/hal.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
|
||||
#endif
|
||||
|
||||
Loading…
Reference in New Issue
Block a user