build: fix v_reduce_sum4 (requires SSE3)
This commit is contained in:
@@ -1129,9 +1129,15 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_s
|
||||
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
|
||||
const v_float32x4& c, const v_float32x4& d)
|
||||
{
|
||||
#if CV_SSE3
|
||||
__m128 ab = _mm_hadd_ps(a.val, b.val);
|
||||
__m128 cd = _mm_hadd_ps(c.val, d.val);
|
||||
return v_float32x4(_mm_hadd_ps(ab, cd));
|
||||
#else
|
||||
__m128 ac = _mm_add_ps(_mm_unpacklo_ps(a.val, c.val), _mm_unpackhi_ps(a.val, c.val));
|
||||
__m128 bd = _mm_add_ps(_mm_unpacklo_ps(b.val, d.val), _mm_unpackhi_ps(b.val, d.val));
|
||||
return v_float32x4(_mm_add_ps(_mm_unpacklo_ps(ac, bd), _mm_unpackhi_ps(ac, bd)));
|
||||
#endif
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max)
|
||||
|
||||
Reference in New Issue
Block a user