Merge pull request #15422 from mipsopen-fwu:msa-dev

* Added MSA implementations for mips platforms. Intrinsics for MSA and build scripts for MIPS platforms are added.

Signed-off-by: Fei Wu <fwu@wavecomp.com>

* Removed some unused code in mips.toolchain.cmake.

Signed-off-by: Fei Wu <fwu@wavecomp.com>

* Added comments for mips toolchain configuration and disabled compiling warnings for libpng.

Signed-off-by: Fei Wu <fwu@wavecomp.com>

* Fixed the build error of unsupported opcode 'pause' when mips isa_rev is less than 2.

Signed-off-by: Fei Wu <fwu@wavecomp.com>

* 1. Removed FP16 related item in MSA option defines in OpenCVCompilerOptimizations.cmake.
2. Use CV_CPU_COMPILE_MSA instead of __mips_msa for MSA feature check in cv_cpu_dispatch.h.
3. Removed hasSIMD128() in intrin_msa.hpp.
4. Define CPU_MSA as 150.
Signed-off-by: Fei Wu <fwu@wavecomp.com>

* 1. Removed unnecessary CV_SIMD128_64F guarding in intrin_msa.hpp.
2. Removed unnecessary CV_MSA related code block in dotProd_8u().

Signed-off-by: Fei Wu <fwu@wavecomp.com>

* 1. Defined CPU_MSA_FLAGS_ON as "-mmsa".
2. Removed CV_SIMD128_64F guardings in intrin_msa.hpp.

Signed-off-by: Fei Wu <fwu@wavecomp.com>

* Removed unused msa_mlal_u16() and msa_mlal_s16 from msa_macros.h.

Signed-off-by: Fei Wu <fwu@wavecomp.com>
This commit is contained in:
mipsopen-fwu
2019-09-21 00:52:48 +08:00
committed by Alexander Alekhin
parent 33e9fe9312
commit b1ea91d8bd
20 changed files with 4400 additions and 5 deletions
@@ -152,6 +152,11 @@
# define CV_VSX3 1
#endif
#ifdef CV_CPU_COMPILE_MSA
# include "hal/msa_macros.h"
# define CV_MSA 1
#endif
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
@@ -319,3 +324,7 @@ struct VZeroUpperGuard {
#ifndef CV_VSX3
# define CV_VSX3 0
#endif
#ifndef CV_MSA
# define CV_MSA 0
#endif
@@ -420,6 +420,27 @@
#endif
#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_MSA
# define CV_TRY_MSA 1
# define CV_CPU_FORCE_MSA 1
# define CV_CPU_HAS_SUPPORT_MSA 1
# define CV_CPU_CALL_MSA(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_MSA_(fn, args) return (opt_MSA::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_MSA
# define CV_TRY_MSA 1
# define CV_CPU_FORCE_MSA 0
# define CV_CPU_HAS_SUPPORT_MSA (cv::checkHardwareSupport(CV_CPU_MSA))
# define CV_CPU_CALL_MSA(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args)
# define CV_CPU_CALL_MSA_(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args)
#else
# define CV_TRY_MSA 0
# define CV_CPU_FORCE_MSA 0
# define CV_CPU_HAS_SUPPORT_MSA 0
# define CV_CPU_CALL_MSA(fn, args)
# define CV_CPU_CALL_MSA_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_MSA(fn, args, mode, ...) CV_CPU_CALL_MSA(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
# define CV_TRY_VSX 1
# define CV_CPU_FORCE_VSX 1
@@ -258,6 +258,8 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
#define CV_CPU_NEON 100
#define CV_CPU_MSA 150
#define CV_CPU_VSX 200
#define CV_CPU_VSX3 201
@@ -308,6 +310,8 @@ enum CpuFeatures {
CPU_NEON = 100,
CPU_MSA = 150,
CPU_VSX = 200,
CPU_VSX3 = 201,
@@ -165,9 +165,10 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
# undef CV_NEON
# undef CV_VSX
# undef CV_FP16
# undef CV_MSA
#endif
#if CV_SSE2 || CV_NEON || CV_VSX
#if CV_SSE2 || CV_NEON || CV_VSX || CV_MSA
#define CV__SIMD_FORWARD 128
#include "opencv2/core/hal/intrin_forward.hpp"
#endif
@@ -185,6 +186,10 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
#include "opencv2/core/hal/intrin_vsx.hpp"
#elif CV_MSA
#include "opencv2/core/hal/intrin_msa.hpp"
#else
#define CV_SIMD128_CPP 1
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff