Merge pull request #20011 from Developer-Ecosystem-Engineering:3.4
Improve performance on Arm64 * Improve performance on Apple silicon This patch will - Enable dot product intrinsics for macOS arm64 builds - Enable for macOS arm64 builds - Improve HAL primitives - reduction (sum, min, max, sad) - signmask - mul_expand - check_any / check_all Results on a M1 Macbook Pro * Updates to #20011 based on feedback - Removes Apple Silicon specific workarounds - Makes #ifdef sections smaller for v_mul_expand cases - Moves dot product optimization to compiler optimization check - Adds 4x4 matrix transpose optimization * Remove dotprod and fix v_transpose Based on the latest, we've removed dotprod entirely and will revisit in a future PR. Added explicit cats with v_transpose4x4() This should resolve all opens with this PR * Remove commented out lines Remove two extraneous comments
This commit is contained in:
committed by
GitHub
parent
d3be58b6d7
commit
814550d2a6
@@ -577,6 +577,25 @@ template<typename R> struct TheTest
|
||||
return *this;
|
||||
}
|
||||
|
||||
TheTest & test_mul_hi()
|
||||
{
|
||||
// typedef typename V_RegTraits<R>::w_reg Rx2;
|
||||
Data<R> dataA, dataB(32767);
|
||||
R a = dataA, b = dataB;
|
||||
|
||||
R c = v_mul_hi(a, b);
|
||||
|
||||
Data<R> resC = c;
|
||||
const int n = R::nlanes / 2;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
SCOPED_TRACE(cv::format("i=%d", i));
|
||||
EXPECT_EQ((typename R::lane_type)((dataA[i] * dataB[i]) >> 16), resC[i]);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
TheTest & test_abs()
|
||||
{
|
||||
typedef typename V_RegTraits<R>::u_reg Ru;
|
||||
@@ -1663,6 +1682,7 @@ void test_hal_intrin_uint16()
|
||||
.test_arithm_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_mul_hi()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
@@ -1697,6 +1717,7 @@ void test_hal_intrin_int16()
|
||||
.test_arithm_wrap()
|
||||
.test_mul()
|
||||
.test_mul_expand()
|
||||
.test_mul_hi()
|
||||
.test_cmp()
|
||||
.test_shift<1>()
|
||||
.test_shift<8>()
|
||||
|
||||
Reference in New Issue
Block a user