Merge pull request #22938 from Xxfore:4.x

Use reinterpret instead of c-style casting for GCC

Co-authored-by: Xu Zhang <xu.zhang@hexintek.com>
Co-authored-by: Maksim Shabunin <maksim.shabunin@gmail.com>
This commit is contained in:
Xxfore 2023-01-11 22:11:16 +08:00 committed by GitHub
parent ad8f8b0c8c
commit ef0fcb9238
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 35 additions and 43 deletions

View File

@ -9,6 +9,9 @@
int test() int test()
{ {
const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f }; const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
uint64_t ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A};
vuint8m1_t a = vreinterpret_v_u64m1_u8m1(vle64_v_u64m1(ptr, 2));
//vuint8m1_t a = (vuint8m1_t)vle64_v_u64m1(ptr, 2);
vfloat32m1_t val = vle32_v_f32m1((const float*)(src), 4); vfloat32m1_t val = vle32_v_f32m1((const float*)(src), 4);
return (int)vfmv_f_s_f32m1_f32(val); return (int)vfmv_f_s_f32m1_f32(val);
} }

View File

@ -1920,20 +1920,29 @@ inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_
#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, vl) \ #define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, vl) \
inline bool v_check_all(const _Tpvec& a) \ inline bool v_check_all(const _Tpvec& a) \
{ \ { \
v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl)); \ v_uint64x2 v = v_uint64x2(vreinterpret_v_##suffix##m1_u64m1(vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl))); \
return (v.val[0] | v.val[1]) == 0; \ return (v.val[0] | v.val[1]) == 0; \
} \ } \
inline bool v_check_any(const _Tpvec& a) \ inline bool v_check_any(const _Tpvec& a) \
{ \ { \
v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(a, shift, vl)); \ v_uint64x2 v = v_uint64x2(vreinterpret_v_##suffix##m1_u64m1(vsrl_vx_##suffix##m1(a, shift, vl))); \
return (v.val[0] | v.val[1]) != 0; \ return (v.val[0] | v.val[1]) != 0; \
} }
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 16) OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 16)
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint16x8, u16, 15, 8) OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint16x8, u16, 15, 8)
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint32x4, u32, 31, 4) OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint32x4, u32, 31, 4)
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 2) //OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 2)
inline bool v_check_all(const v_uint64x2& a)
{
v_uint64x2 v = v_uint64x2(vsrl_vx_u64m1(vnot_v_u64m1(a, 2), 63, 2));
return (v.val[0] | v.val[1]) == 0;
}
inline bool v_check_any(const v_uint64x2& a)
{
v_uint64x2 v = v_uint64x2(vsrl_vx_u64m1(a, 63, 2));
return (v.val[0] | v.val[1]) != 0;
}
inline bool v_check_all(const v_int8x16& a) inline bool v_check_all(const v_int8x16& a)
{ return v_check_all(v_reinterpret_as_u8(a)); } { return v_check_all(v_reinterpret_as_u8(a)); }
@ -2035,15 +2044,15 @@ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16x8, absdiffs)
// use reinterpret instead of c-style casting. // use reinterpret instead of c-style casting.
#ifndef __clang__ #ifndef __clang__
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, vl) \ #define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \
inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
{ \ { \
return _rTpvec(rshr((_nwTpvec)sub(v_max(a, b), v_min(a, b), vl), 0, vl)); \ return _rTpvec(rshr(vreinterpret_v_i##width##m2_u##width##m2(sub(v_max(a, b), v_min(a, b), vl)), 0, vl)); \
} }
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16) OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16, 16)
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 8) OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 32, 8)
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 4) OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 64, 4)
#else #else
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \ #define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \
inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
@ -2806,12 +2815,15 @@ OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float64x2, double, f64)
//////////// Pack triplets //////////// //////////// Pack triplets ////////////
// use reinterpret instead of c-style casting.
#ifndef __clang__
inline v_int8x16 v_pack_triplets(const v_int8x16& vec) inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
{ {
uint64 ptr[2] = {0x0908060504020100, 0xFFFFFF0F0E0D0C0A}; const uint64 ptr[2] = {0x0908060504020100, 0xFFFFFF0F0E0D0C0A};
return v_int8x16((vint8m1_t)vrgather_vv_u8m1((vuint8m1_t)vint8m1_t(vec), (vuint8m1_t)vle64_v_u64m1(ptr, 2), 16)); const v_uint64x2 flags(vle64_v_u64m1(ptr, 2));
return v_reinterpret_as_s8(v_uint8x16(
vrgather_vv_u8m1(
v_reinterpret_as_u8(vec),
v_reinterpret_as_u8(flags),
16)));
} }
inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec)
{ {
@ -2820,8 +2832,13 @@ inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec)
inline v_int16x8 v_pack_triplets(const v_int16x8& vec) inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
{ {
uint64 ptr[2] = {0x0908050403020100, 0xFFFF0F0E0D0C0B0A}; const uint64 ptr[2] = {0x0908050403020100, 0xFFFF0F0E0D0C0B0A};
return v_int16x8((vint16m1_t)vrgather_vv_u8m1((vuint8m1_t)vint16m1_t(vec), (vuint8m1_t)vle64_v_u64m1(ptr, 2), 16)); const v_uint64x2 flags(vle64_v_u64m1(ptr, 2));
return v_reinterpret_as_s16(v_uint8x16(
vrgather_vv_u8m1(
v_reinterpret_as_u8(vec),
v_reinterpret_as_u8(flags),
16)));
} }
inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec)
{ {
@ -2832,34 +2849,6 @@ inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
#else
inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
{
uint64 ptr[2] = {0x0908060504020100, 0xFFFFFF0F0E0D0C0A};
return v_int8x16(vreinterpret_i8m1(vrgather_vv_u8m1(v_reinterpret_as_u8(vec), vreinterpret_u8m1(vle64_v_u64m1(ptr, 2)), 16)));
}
inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec)
{
return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec)));
}
inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
{
uint64 ptr[2] = {0x0908050403020100, 0xFFFF0F0E0D0C0B0A};
return v_int16x8(v_reinterpret_as_s16(v_uint8x16(vrgather_vv_u8m1(v_reinterpret_as_u8(vec), vreinterpret_u8m1(vle64_v_u64m1(ptr, 2)), 16))));
}
inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec)
{
return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec)));
}
inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
#endif
////// FP16 support /////// ////// FP16 support ///////
#if CV_FP16 #if CV_FP16