diff --git a/cmake/checks/cpu_rvv.cpp b/cmake/checks/cpu_rvv.cpp index 684b2ecbeb..b9f19c17fd 100644 --- a/cmake/checks/cpu_rvv.cpp +++ b/cmake/checks/cpu_rvv.cpp @@ -9,6 +9,9 @@ int test() { const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + uint64_t ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A}; + vuint8m1_t a = vreinterpret_v_u64m1_u8m1(vle64_v_u64m1(ptr, 2)); + //vuint8m1_t a = (vuint8m1_t)vle64_v_u64m1(ptr, 2); vfloat32m1_t val = vle32_v_f32m1((const float*)(src), 4); return (int)vfmv_f_s_f32m1_f32(val); } diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp index dca54a27d1..3e7ce51f6b 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp @@ -1920,20 +1920,29 @@ inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_ #define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, vl) \ inline bool v_check_all(const _Tpvec& a) \ { \ - v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl)); \ + v_uint64x2 v = v_uint64x2(vreinterpret_v_##suffix##m1_u64m1(vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl))); \ return (v.val[0] | v.val[1]) == 0; \ } \ inline bool v_check_any(const _Tpvec& a) \ { \ - v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(a, shift, vl)); \ + v_uint64x2 v = v_uint64x2(vreinterpret_v_##suffix##m1_u64m1(vsrl_vx_##suffix##m1(a, shift, vl))); \ return (v.val[0] | v.val[1]) != 0; \ } OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 16) OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint16x8, u16, 15, 8) OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint32x4, u32, 31, 4) -OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 2) - +//OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 2) +inline bool v_check_all(const v_uint64x2& a) +{ + v_uint64x2 v = v_uint64x2(vsrl_vx_u64m1(vnot_v_u64m1(a, 2), 63, 2)); + return (v.val[0] | v.val[1]) == 0; +} +inline bool v_check_any(const v_uint64x2& a) +{ + v_uint64x2 v = v_uint64x2(vsrl_vx_u64m1(a, 63, 2)); + return (v.val[0] | v.val[1]) != 0; +} inline bool v_check_all(const v_int8x16& a) { return v_check_all(v_reinterpret_as_u8(a)); } @@ -2035,15 +2044,15 @@ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16x8, absdiffs) // use reinterpret instead of c-style casting. #ifndef __clang__ -#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, vl) \ +#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \ inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ { \ - return _rTpvec(rshr((_nwTpvec)sub(v_max(a, b), v_min(a, b), vl), 0, vl)); \ + return _rTpvec(rshr(vreinterpret_v_i##width##m2_u##width##m2(sub(v_max(a, b), v_min(a, b), vl)), 0, vl)); \ } -OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16) -OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 8) -OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 4) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16, 16) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 32, 8) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 64, 4) #else #define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \ inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ @@ -2806,12 +2815,15 @@ OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float64x2, double, f64) //////////// Pack triplets //////////// -// use reinterpret instead of c-style casting. -#ifndef __clang__ inline v_int8x16 v_pack_triplets(const v_int8x16& vec) { - uint64 ptr[2] = {0x0908060504020100, 0xFFFFFF0F0E0D0C0A}; - return v_int8x16((vint8m1_t)vrgather_vv_u8m1((vuint8m1_t)vint8m1_t(vec), (vuint8m1_t)vle64_v_u64m1(ptr, 2), 16)); + const uint64 ptr[2] = {0x0908060504020100, 0xFFFFFF0F0E0D0C0A}; + const v_uint64x2 flags(vle64_v_u64m1(ptr, 2)); + return v_reinterpret_as_s8(v_uint8x16( + vrgather_vv_u8m1( + v_reinterpret_as_u8(vec), + v_reinterpret_as_u8(flags), + 16))); } inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { @@ -2820,8 +2832,13 @@ inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) inline v_int16x8 v_pack_triplets(const v_int16x8& vec) { - uint64 ptr[2] = {0x0908050403020100, 0xFFFF0F0E0D0C0B0A}; - return v_int16x8((vint16m1_t)vrgather_vv_u8m1((vuint8m1_t)vint16m1_t(vec), (vuint8m1_t)vle64_v_u64m1(ptr, 2), 16)); + const uint64 ptr[2] = {0x0908050403020100, 0xFFFF0F0E0D0C0B0A}; + const v_uint64x2 flags(vle64_v_u64m1(ptr, 2)); + return v_reinterpret_as_s16(v_uint8x16( + vrgather_vv_u8m1( + v_reinterpret_as_u8(vec), + v_reinterpret_as_u8(flags), + 16))); } inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { @@ -2832,34 +2849,6 @@ inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } -#else - -inline v_int8x16 v_pack_triplets(const v_int8x16& vec) -{ - uint64 ptr[2] = {0x0908060504020100, 0xFFFFFF0F0E0D0C0A}; - return v_int8x16(vreinterpret_i8m1(vrgather_vv_u8m1(v_reinterpret_as_u8(vec), vreinterpret_u8m1(vle64_v_u64m1(ptr, 2)), 16))); -} -inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) -{ - return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); -} - -inline v_int16x8 v_pack_triplets(const v_int16x8& vec) -{ - uint64 ptr[2] = {0x0908050403020100, 0xFFFF0F0E0D0C0B0A}; - return v_int16x8(v_reinterpret_as_s16(v_uint8x16(vrgather_vv_u8m1(v_reinterpret_as_u8(vec), vreinterpret_u8m1(vle64_v_u64m1(ptr, 2)), 16)))); -} -inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) -{ - return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); -} - -inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } -inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } -inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } - -#endif - ////// FP16 support /////// #if CV_FP16