build: fix v_reduce_sum4 (requires SSE3)

This commit is contained in:
Alexander Alekhin
2017-06-13 22:24:46 +00:00
parent f49f056d29
commit e23b59da5c
2 changed files with 24 additions and 0 deletions
@@ -1129,9 +1129,15 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_s
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
const v_float32x4& c, const v_float32x4& d)
{
#if CV_SSE3
__m128 ab = _mm_hadd_ps(a.val, b.val);
__m128 cd = _mm_hadd_ps(c.val, d.val);
return v_float32x4(_mm_hadd_ps(ab, cd));
#else
__m128 ac = _mm_add_ps(_mm_unpacklo_ps(a.val, c.val), _mm_unpackhi_ps(a.val, c.val));
__m128 bd = _mm_add_ps(_mm_unpacklo_ps(b.val, d.val), _mm_unpackhi_ps(b.val, d.val));
return v_float32x4(_mm_add_ps(_mm_unpacklo_ps(ac, bd), _mm_unpackhi_ps(ac, bd)));
#endif
}
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max)