Merge pull request #22938 from Xxfore:4.x
Use reinterpret instead of c-style casting for GCC Co-authored-by: Xu Zhang <xu.zhang@hexintek.com> Co-authored-by: Maksim Shabunin <maksim.shabunin@gmail.com>
This commit is contained in:
parent
ad8f8b0c8c
commit
ef0fcb9238
@ -9,6 +9,9 @@
|
||||
int test()
|
||||
{
|
||||
const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
||||
uint64_t ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A};
|
||||
vuint8m1_t a = vreinterpret_v_u64m1_u8m1(vle64_v_u64m1(ptr, 2));
|
||||
//vuint8m1_t a = (vuint8m1_t)vle64_v_u64m1(ptr, 2);
|
||||
vfloat32m1_t val = vle32_v_f32m1((const float*)(src), 4);
|
||||
return (int)vfmv_f_s_f32m1_f32(val);
|
||||
}
|
||||
|
||||
@ -1920,20 +1920,29 @@ inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_
|
||||
#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, vl) \
|
||||
inline bool v_check_all(const _Tpvec& a) \
|
||||
{ \
|
||||
v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl)); \
|
||||
v_uint64x2 v = v_uint64x2(vreinterpret_v_##suffix##m1_u64m1(vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl))); \
|
||||
return (v.val[0] | v.val[1]) == 0; \
|
||||
} \
|
||||
inline bool v_check_any(const _Tpvec& a) \
|
||||
{ \
|
||||
v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(a, shift, vl)); \
|
||||
v_uint64x2 v = v_uint64x2(vreinterpret_v_##suffix##m1_u64m1(vsrl_vx_##suffix##m1(a, shift, vl))); \
|
||||
return (v.val[0] | v.val[1]) != 0; \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 16)
|
||||
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint16x8, u16, 15, 8)
|
||||
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint32x4, u32, 31, 4)
|
||||
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 2)
|
||||
|
||||
//OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 2)
|
||||
inline bool v_check_all(const v_uint64x2& a)
|
||||
{
|
||||
v_uint64x2 v = v_uint64x2(vsrl_vx_u64m1(vnot_v_u64m1(a, 2), 63, 2));
|
||||
return (v.val[0] | v.val[1]) == 0;
|
||||
}
|
||||
inline bool v_check_any(const v_uint64x2& a)
|
||||
{
|
||||
v_uint64x2 v = v_uint64x2(vsrl_vx_u64m1(a, 63, 2));
|
||||
return (v.val[0] | v.val[1]) != 0;
|
||||
}
|
||||
|
||||
inline bool v_check_all(const v_int8x16& a)
|
||||
{ return v_check_all(v_reinterpret_as_u8(a)); }
|
||||
@ -2035,15 +2044,15 @@ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16x8, absdiffs)
|
||||
|
||||
// use reinterpret instead of c-style casting.
|
||||
#ifndef __clang__
|
||||
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, vl) \
|
||||
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \
|
||||
inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
return _rTpvec(rshr((_nwTpvec)sub(v_max(a, b), v_min(a, b), vl), 0, vl)); \
|
||||
return _rTpvec(rshr(vreinterpret_v_i##width##m2_u##width##m2(sub(v_max(a, b), v_min(a, b), vl)), 0, vl)); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16)
|
||||
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 8)
|
||||
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 4)
|
||||
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16, 16)
|
||||
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 32, 8)
|
||||
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 64, 4)
|
||||
#else
|
||||
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \
|
||||
inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
|
||||
@ -2806,12 +2815,15 @@ OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float64x2, double, f64)
|
||||
|
||||
//////////// Pack triplets ////////////
|
||||
|
||||
// use reinterpret instead of c-style casting.
|
||||
#ifndef __clang__
|
||||
inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
|
||||
{
|
||||
uint64 ptr[2] = {0x0908060504020100, 0xFFFFFF0F0E0D0C0A};
|
||||
return v_int8x16((vint8m1_t)vrgather_vv_u8m1((vuint8m1_t)vint8m1_t(vec), (vuint8m1_t)vle64_v_u64m1(ptr, 2), 16));
|
||||
const uint64 ptr[2] = {0x0908060504020100, 0xFFFFFF0F0E0D0C0A};
|
||||
const v_uint64x2 flags(vle64_v_u64m1(ptr, 2));
|
||||
return v_reinterpret_as_s8(v_uint8x16(
|
||||
vrgather_vv_u8m1(
|
||||
v_reinterpret_as_u8(vec),
|
||||
v_reinterpret_as_u8(flags),
|
||||
16)));
|
||||
}
|
||||
inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec)
|
||||
{
|
||||
@ -2820,8 +2832,13 @@ inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec)
|
||||
|
||||
inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
|
||||
{
|
||||
uint64 ptr[2] = {0x0908050403020100, 0xFFFF0F0E0D0C0B0A};
|
||||
return v_int16x8((vint16m1_t)vrgather_vv_u8m1((vuint8m1_t)vint16m1_t(vec), (vuint8m1_t)vle64_v_u64m1(ptr, 2), 16));
|
||||
const uint64 ptr[2] = {0x0908050403020100, 0xFFFF0F0E0D0C0B0A};
|
||||
const v_uint64x2 flags(vle64_v_u64m1(ptr, 2));
|
||||
return v_reinterpret_as_s16(v_uint8x16(
|
||||
vrgather_vv_u8m1(
|
||||
v_reinterpret_as_u8(vec),
|
||||
v_reinterpret_as_u8(flags),
|
||||
16)));
|
||||
}
|
||||
inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec)
|
||||
{
|
||||
@ -2832,34 +2849,6 @@ inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
|
||||
inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
|
||||
inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
|
||||
|
||||
#else
|
||||
|
||||
inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
|
||||
{
|
||||
uint64 ptr[2] = {0x0908060504020100, 0xFFFFFF0F0E0D0C0A};
|
||||
return v_int8x16(vreinterpret_i8m1(vrgather_vv_u8m1(v_reinterpret_as_u8(vec), vreinterpret_u8m1(vle64_v_u64m1(ptr, 2)), 16)));
|
||||
}
|
||||
inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec)
|
||||
{
|
||||
return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec)));
|
||||
}
|
||||
|
||||
inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
|
||||
{
|
||||
uint64 ptr[2] = {0x0908050403020100, 0xFFFF0F0E0D0C0B0A};
|
||||
return v_int16x8(v_reinterpret_as_s16(v_uint8x16(vrgather_vv_u8m1(v_reinterpret_as_u8(vec), vreinterpret_u8m1(vle64_v_u64m1(ptr, 2)), 16))));
|
||||
}
|
||||
inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec)
|
||||
{
|
||||
return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec)));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
|
||||
inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
|
||||
inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
|
||||
|
||||
#endif
|
||||
|
||||
////// FP16 support ///////
|
||||
|
||||
#if CV_FP16
|
||||
|
||||
Loading…
Reference in New Issue
Block a user