Merge pull request #22938 from Xxfore:4.x
Use reinterpret instead of c-style casting for GCC Co-authored-by: Xu Zhang <xu.zhang@hexintek.com> Co-authored-by: Maksim Shabunin <maksim.shabunin@gmail.com>
This commit is contained in:
parent
ad8f8b0c8c
commit
ef0fcb9238
@ -9,6 +9,9 @@
|
|||||||
int test()
|
int test()
|
||||||
{
|
{
|
||||||
const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
||||||
|
uint64_t ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A};
|
||||||
|
vuint8m1_t a = vreinterpret_v_u64m1_u8m1(vle64_v_u64m1(ptr, 2));
|
||||||
|
//vuint8m1_t a = (vuint8m1_t)vle64_v_u64m1(ptr, 2);
|
||||||
vfloat32m1_t val = vle32_v_f32m1((const float*)(src), 4);
|
vfloat32m1_t val = vle32_v_f32m1((const float*)(src), 4);
|
||||||
return (int)vfmv_f_s_f32m1_f32(val);
|
return (int)vfmv_f_s_f32m1_f32(val);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1920,20 +1920,29 @@ inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_
|
|||||||
#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, vl) \
|
#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, vl) \
|
||||||
inline bool v_check_all(const _Tpvec& a) \
|
inline bool v_check_all(const _Tpvec& a) \
|
||||||
{ \
|
{ \
|
||||||
v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl)); \
|
v_uint64x2 v = v_uint64x2(vreinterpret_v_##suffix##m1_u64m1(vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl))); \
|
||||||
return (v.val[0] | v.val[1]) == 0; \
|
return (v.val[0] | v.val[1]) == 0; \
|
||||||
} \
|
} \
|
||||||
inline bool v_check_any(const _Tpvec& a) \
|
inline bool v_check_any(const _Tpvec& a) \
|
||||||
{ \
|
{ \
|
||||||
v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(a, shift, vl)); \
|
v_uint64x2 v = v_uint64x2(vreinterpret_v_##suffix##m1_u64m1(vsrl_vx_##suffix##m1(a, shift, vl))); \
|
||||||
return (v.val[0] | v.val[1]) != 0; \
|
return (v.val[0] | v.val[1]) != 0; \
|
||||||
}
|
}
|
||||||
|
|
||||||
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 16)
|
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 16)
|
||||||
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint16x8, u16, 15, 8)
|
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint16x8, u16, 15, 8)
|
||||||
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint32x4, u32, 31, 4)
|
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint32x4, u32, 31, 4)
|
||||||
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 2)
|
//OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 2)
|
||||||
|
inline bool v_check_all(const v_uint64x2& a)
|
||||||
|
{
|
||||||
|
v_uint64x2 v = v_uint64x2(vsrl_vx_u64m1(vnot_v_u64m1(a, 2), 63, 2));
|
||||||
|
return (v.val[0] | v.val[1]) == 0;
|
||||||
|
}
|
||||||
|
inline bool v_check_any(const v_uint64x2& a)
|
||||||
|
{
|
||||||
|
v_uint64x2 v = v_uint64x2(vsrl_vx_u64m1(a, 63, 2));
|
||||||
|
return (v.val[0] | v.val[1]) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
inline bool v_check_all(const v_int8x16& a)
|
inline bool v_check_all(const v_int8x16& a)
|
||||||
{ return v_check_all(v_reinterpret_as_u8(a)); }
|
{ return v_check_all(v_reinterpret_as_u8(a)); }
|
||||||
@ -2035,15 +2044,15 @@ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16x8, absdiffs)
|
|||||||
|
|
||||||
// use reinterpret instead of c-style casting.
|
// use reinterpret instead of c-style casting.
|
||||||
#ifndef __clang__
|
#ifndef __clang__
|
||||||
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, vl) \
|
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \
|
||||||
inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
|
inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
|
||||||
{ \
|
{ \
|
||||||
return _rTpvec(rshr((_nwTpvec)sub(v_max(a, b), v_min(a, b), vl), 0, vl)); \
|
return _rTpvec(rshr(vreinterpret_v_i##width##m2_u##width##m2(sub(v_max(a, b), v_min(a, b), vl)), 0, vl)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16)
|
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16, 16)
|
||||||
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 8)
|
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 32, 8)
|
||||||
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 4)
|
OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 64, 4)
|
||||||
#else
|
#else
|
||||||
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \
|
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \
|
||||||
inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
|
inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
|
||||||
@ -2806,12 +2815,15 @@ OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float64x2, double, f64)
|
|||||||
|
|
||||||
//////////// Pack triplets ////////////
|
//////////// Pack triplets ////////////
|
||||||
|
|
||||||
// use reinterpret instead of c-style casting.
|
|
||||||
#ifndef __clang__
|
|
||||||
inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
|
inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
|
||||||
{
|
{
|
||||||
uint64 ptr[2] = {0x0908060504020100, 0xFFFFFF0F0E0D0C0A};
|
const uint64 ptr[2] = {0x0908060504020100, 0xFFFFFF0F0E0D0C0A};
|
||||||
return v_int8x16((vint8m1_t)vrgather_vv_u8m1((vuint8m1_t)vint8m1_t(vec), (vuint8m1_t)vle64_v_u64m1(ptr, 2), 16));
|
const v_uint64x2 flags(vle64_v_u64m1(ptr, 2));
|
||||||
|
return v_reinterpret_as_s8(v_uint8x16(
|
||||||
|
vrgather_vv_u8m1(
|
||||||
|
v_reinterpret_as_u8(vec),
|
||||||
|
v_reinterpret_as_u8(flags),
|
||||||
|
16)));
|
||||||
}
|
}
|
||||||
inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec)
|
inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec)
|
||||||
{
|
{
|
||||||
@ -2820,8 +2832,13 @@ inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec)
|
|||||||
|
|
||||||
inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
|
inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
|
||||||
{
|
{
|
||||||
uint64 ptr[2] = {0x0908050403020100, 0xFFFF0F0E0D0C0B0A};
|
const uint64 ptr[2] = {0x0908050403020100, 0xFFFF0F0E0D0C0B0A};
|
||||||
return v_int16x8((vint16m1_t)vrgather_vv_u8m1((vuint8m1_t)vint16m1_t(vec), (vuint8m1_t)vle64_v_u64m1(ptr, 2), 16));
|
const v_uint64x2 flags(vle64_v_u64m1(ptr, 2));
|
||||||
|
return v_reinterpret_as_s16(v_uint8x16(
|
||||||
|
vrgather_vv_u8m1(
|
||||||
|
v_reinterpret_as_u8(vec),
|
||||||
|
v_reinterpret_as_u8(flags),
|
||||||
|
16)));
|
||||||
}
|
}
|
||||||
inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec)
|
inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec)
|
||||||
{
|
{
|
||||||
@ -2832,34 +2849,6 @@ inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
|
|||||||
inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
|
inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
|
||||||
inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
|
inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
|
|
||||||
{
|
|
||||||
uint64 ptr[2] = {0x0908060504020100, 0xFFFFFF0F0E0D0C0A};
|
|
||||||
return v_int8x16(vreinterpret_i8m1(vrgather_vv_u8m1(v_reinterpret_as_u8(vec), vreinterpret_u8m1(vle64_v_u64m1(ptr, 2)), 16)));
|
|
||||||
}
|
|
||||||
inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec)
|
|
||||||
{
|
|
||||||
return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec)));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
|
|
||||||
{
|
|
||||||
uint64 ptr[2] = {0x0908050403020100, 0xFFFF0F0E0D0C0B0A};
|
|
||||||
return v_int16x8(v_reinterpret_as_s16(v_uint8x16(vrgather_vv_u8m1(v_reinterpret_as_u8(vec), vreinterpret_u8m1(vle64_v_u64m1(ptr, 2)), 16))));
|
|
||||||
}
|
|
||||||
inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec)
|
|
||||||
{
|
|
||||||
return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec)));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
|
|
||||||
inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
|
|
||||||
inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
////// FP16 support ///////
|
////// FP16 support ///////
|
||||||
|
|
||||||
#if CV_FP16
|
#if CV_FP16
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user