Add Loongson Advanced SIMD Extension support: -DCPU_BASELINE=LASX

* Add Loongson Advanced SIMD Extension support: -DCPU_BASELINE=LASX
* Add resize.lasx.cpp for Loongson SIMD acceleration
* Add imgwarp.lasx.cpp for Loongson SIMD acceleration
* Add LASX acceleration support for dnn/conv
* Add CV_PAUSE(v) for Loongarch
* Set LASX by default on Loongarch64
* LoongArch: tune test threshold for Core/HAL.mat_decomp/15

Co-authored-by: shengwenxue <shengwenxue@loongson.cn>
This commit is contained in:
wxsheng
2022-09-10 14:39:43 +08:00
committed by GitHub
parent 866191478f
commit 4154bd0667
24 changed files with 5071 additions and 6 deletions
@@ -172,6 +172,11 @@
# define CV_MSA 1
#endif
#ifdef CV_CPU_COMPILE_LASX
# include <lasxintrin.h>
# define CV_LASX 1
#endif
#ifdef __EMSCRIPTEN__
# define CV_WASM_SIMD 1
# include <wasm_simd128.h>
@@ -370,3 +375,7 @@ struct VZeroUpperGuard {
#ifndef CV_RVV
# define CV_RVV 0
#endif
#ifndef CV_LASX
# define CV_LASX 0
#endif
@@ -525,5 +525,26 @@
#endif
#define __CV_CPU_DISPATCH_CHAIN_RVV(fn, args, mode, ...) CV_CPU_CALL_RVV(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_LASX
# define CV_TRY_LASX 1
# define CV_CPU_FORCE_LASX 1
# define CV_CPU_HAS_SUPPORT_LASX 1
# define CV_CPU_CALL_LASX(fn, args) return (cpu_baseline::fn args)
# define CV_CPU_CALL_LASX_(fn, args) return (opt_LASX::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_LASX
# define CV_TRY_LASX 1
# define CV_CPU_FORCE_LASX 0
# define CV_CPU_HAS_SUPPORT_LASX (cv::checkHardwareSupport(CV_CPU_LASX))
# define CV_CPU_CALL_LASX(fn, args) if (CV_CPU_HAS_SUPPORT_LASX) return (opt_LASX::fn args)
# define CV_CPU_CALL_LASX_(fn, args) if (CV_CPU_HAS_SUPPORT_LASX) return (opt_LASX::fn args)
#else
# define CV_TRY_LASX 0
# define CV_CPU_FORCE_LASX 0
# define CV_CPU_HAS_SUPPORT_LASX 0
# define CV_CPU_CALL_LASX(fn, args)
# define CV_CPU_CALL_LASX_(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_LASX(fn, args, mode, ...) CV_CPU_CALL_LASX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
@@ -279,6 +279,8 @@ namespace cv {
#define CV_CPU_RVV 210
#define CV_CPU_LASX 230
// CPU features groups
#define CV_CPU_AVX512_SKX 256
#define CV_CPU_AVX512_COMMON 257
@@ -336,6 +338,8 @@ enum CpuFeatures {
CPU_RVV = 210,
CPU_LASX = 230,
CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
CPU_AVX512_COMMON = 257, //!< Common instructions AVX-512F/CD for all CPUs that support AVX-512
CPU_AVX512_KNL = 258, //!< Knights Landing with AVX-512F/CD/ER/PF
@@ -231,8 +231,16 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
#elif CV_RVV && !defined(CV_FORCE_SIMD128_CPP) && !defined(CV_RVV_SCALABLE)
#include "opencv2/core/hal/intrin_rvv.hpp"
#elif CV_RVV && !defined(CV_FORCE_SIMD128_CPP) && CV_RVV_SCALABLE
#include "opencv2/core/hal/intrin_rvv_scalable.hpp"
#elif CV_LASX
#if !defined(CV_FORCE_SIMD128_CPP)
#define CV_FORCE_SIMD128_CPP 1
#endif
#include "opencv2/core/hal/intrin_cpp.hpp"
#else
#include "opencv2/core/hal/intrin_cpp.hpp"
@@ -267,6 +275,14 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
#endif
#if CV_LASX
#define CV__SIMD_FORWARD 256
#include "opencv2/core/hal/intrin_forward.hpp"
#include "opencv2/core/hal/intrin_lasx.hpp"
#endif
//! @cond IGNORED
namespace cv {
File diff suppressed because it is too large Load Diff
+2
View File
@@ -59,6 +59,8 @@ DECLARE_CV_PAUSE
// https://github.com/riscv/riscv-isa-manual/issues/43
// # define CV_PAUSE(v) do { for (int __delay = (v); __delay > 0; --__delay) { asm volatile("pause"); } } while (0)
# define CV_PAUSE(v) do { for (int __delay = (v); __delay > 0; --__delay) { asm volatile("nop"); } } while (0)
# elif defined __GNUC__ && defined __loongarch__
# define CV_PAUSE(v) do { for (int __delay = (v); __delay > 0; --__delay) { asm volatile("nop"); } } while (0)
# else
# warning "Can't detect 'pause' (CPU-yield) instruction on the target platform. Specify CV_PAUSE() definition via compiler flags."
# define CV_PAUSE(...) do { /* no-op: works, but not effective */ } while (0)
+6
View File
@@ -434,6 +434,8 @@ struct HWFeatures
g_hwFeatureNames[CPU_AVX512_ICL] = "AVX512-ICL";
g_hwFeatureNames[CPU_RVV] = "RVV";
g_hwFeatureNames[CPU_LASX] = "LASX";
}
void initialize(void)
@@ -689,6 +691,10 @@ struct HWFeatures
have[CV_CPU_RVV] = true;
#endif
#if defined __loongarch_asx
have[CV_CPU_LASX] = true;
#endif
bool skip_baseline_check = false;
#ifndef NO_GETENV
if (getenv("OPENCV_SKIP_CPU_BASELINE_CHECK"))
+4
View File
@@ -136,7 +136,11 @@ TEST_P(HAL, mat_decomp)
int size = (hcase / 2) % 4;
size = size == 0 ? 3 : size == 1 ? 4 : size == 2 ? 6 : 15;
int nfunc = (hcase / 8);
#if CV_LASX
double eps = depth == CV_32F ? 1e-5 : 2e-10;
#else
double eps = depth == CV_32F ? 1e-5 : 1e-10;
#endif
if( size == 3 )
return; // TODO ???