From c813ad5533594470deebe16f2a5572192f2c9443 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 14 Oct 2018 01:49:14 +0000 Subject: [PATCH] core(ocl): replace ambiguous 'depth' to 'DEPTH_dst' - always pass DEPTH_dst value to core/arithm kernel --- modules/core/src/arithm.cpp | 26 +++++++++++++++----------- modules/core/src/convert_scale.cpp | 4 ++-- modules/core/src/mathfuncs.cpp | 25 +++++++++++++------------ modules/core/src/matmul.cpp | 4 ++-- modules/core/src/opencl/arithm.cl | 29 ++++++++++++++++++++++++++--- 5 files changed, 58 insertions(+), 30 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 999176bebb..0626607e2f 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -105,14 +105,18 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst, int scalarcn = kercn == 3 ? 4 : kercn; int rowsPerWI = d.isIntel() ? 4 : 1; - sprintf(opts, "-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d -D rowsPerWI=%d", + const int dstDepth = srcdepth; + const int dstType = CV_MAKETYPE(dstDepth, kercn); + const int dstType1 = CV_MAKETYPE(dstDepth, 1); + const int scalarType = CV_MAKETYPE(srcdepth, scalarcn); + + sprintf(opts, "-D %s%s -D %s%s -D dstT=%s -D DEPTH_dst=%d -D dstT_C1=%s -D workST=%s -D cn=%d -D rowsPerWI=%d", haveMask ? "MASK_" : "", haveScalar ? "UNARY_OP" : "BINARY_OP", oclop2str[oclop], - bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) : - ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)), doubleSupport ? " -D DOUBLE_SUPPORT" : "", - bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, 1)) : - ocl::typeToStr(CV_MAKETYPE(srcdepth, 1)), - bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, scalarcn)) : - ocl::typeToStr(CV_MAKETYPE(srcdepth, scalarcn)), + doubleSupport ? " -D DOUBLE_SUPPORT" : "", + bitwise ? ocl::memopTypeToStr(dstType) : ocl::typeToStr(dstType), + dstDepth, + bitwise ? ocl::memopTypeToStr(dstType1) : ocl::typeToStr(dstType1), + bitwise ? ocl::memopTypeToStr(scalarType) : ocl::typeToStr(scalarType), kercn, rowsPerWI); ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); @@ -501,12 +505,12 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, char cvtstr[4][32], opts[1024]; sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s " - "-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s " + "-D dstT=%s -D DEPTH_dst=%d -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s " "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d -D rowsPerWI=%d -D convertFromU=%s", (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), - ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), + ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), ddepth, ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)), ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)), ocl::typeToStr(wdepth), wdepth, @@ -1152,12 +1156,12 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" }; char cvt[40]; - String opts = format("-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d" + String opts = format("-D %s -D srcT1=%s -D dstT=%s -D DEPTH_dst=%d -D workT=srcT1 -D cn=%d" " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s" " -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s -D rowsPerWI=%d%s", haveScalar ? "UNARY_OP" : "BINARY_OP", ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)), - ocl::typeToStr(CV_8UC(kercn)), kercn, + ocl::typeToStr(CV_8UC(kercn)), CV_8U, kercn, ocl::convertTypeStr(depth1, CV_8U, kercn, cvt), operationMap[op], ocl::typeToStr(depth1), ocl::typeToStr(depth1), ocl::typeToStr(CV_8U), diff --git a/modules/core/src/convert_scale.cpp b/modules/core/src/convert_scale.cpp index 64a98328cf..a63ea5be72 100644 --- a/modules/core/src/convert_scale.cpp +++ b/modules/core/src/convert_scale.cpp @@ -376,10 +376,10 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha int rowsPerWI = d.isIntel() ? 4 : 1; char cvt[2][50]; int wdepth = std::max(depth, CV_32F); - String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s" + String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D DEPTH_dst=%d -D srcT1=%s" " -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s" " -D workT1=%s -D rowsPerWI=%d%s", - ocl::typeToStr(CV_8UC(kercn)), + ocl::typeToStr(CV_8UC(kercn)), CV_8U, ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), wdepth, ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]), diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index e8067b5128..b1fb96ad2d 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -71,8 +71,8 @@ static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, in int rowsPerWI = d.isIntel() ? 4 : 1; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D %s -D %s -D dstT=%s -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP", - oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), rowsPerWI, + format("-D %s -D %s -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP", + oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), depth, rowsPerWI, double_support ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; @@ -238,9 +238,9 @@ static bool ocl_cartToPolar( InputArray _src1, InputArray _src2, return false; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D BINARY_OP -D dstT=%s -D depth=%d -D rowsPerWI=%d -D OP_CTP_%s%s", - ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), - depth, rowsPerWI, angleInDegrees ? "AD" : "AR", + format("-D BINARY_OP -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D OP_CTP_%s%s", + ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), depth, + rowsPerWI, angleInDegrees ? "AD" : "AR", doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; @@ -474,9 +474,10 @@ static bool ocl_polarToCart( InputArray _mag, InputArray _angle, return false; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D dstT=%s -D rowsPerWI=%d -D depth=%d -D BINARY_OP -D OP_PTC_%s%s", - ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), rowsPerWI, - depth, angleInDegrees ? "AD" : "AR", + format("-D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D BINARY_OP -D OP_PTC_%s%s", + ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), depth, + rowsPerWI, + angleInDegrees ? "AD" : "AR", doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; @@ -1169,8 +1170,8 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst, const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW"; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D dstT=%s -D depth=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s", - ocl::typeToStr(depth), depth, rowsPerWI, op, + format("-D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s", + ocl::typeToStr(depth), depth, rowsPerWI, op, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; @@ -1560,8 +1561,8 @@ static bool ocl_patchNaNs( InputOutputArray _a, float value ) { int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D rowsPerWI=%d", - rowsPerWI)); + format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D DEPTH_dst=%d -D rowsPerWI=%d", + CV_32F, rowsPerWI)); if (k.empty()) return false; diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index 7cd89c6222..e289716dea 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -2375,10 +2375,10 @@ static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, Outp char cvt[2][50]; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s" + format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D DEPTH_dst=%d -D workT=%s -D convertToWT1=%s" " -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s -D workT1=%s" " -D wdepth=%d%s -D rowsPerWI=%d", - ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), + ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), depth, ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]), ocl::convertTypeStr(wdepth, depth, kercn, cvt[1]), diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index 0b81d76183..b037a07d09 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -71,7 +71,30 @@ #pragma OPENCL FP_FAST_FMA ON #endif -#if depth <= 5 +#if !defined(DEPTH_dst) +#error "Kernel configuration error: DEPTH_dst value is required" +#elif !(DEPTH_dst >= 0 && DEPTH_dst <= 7) +#error "Kernel configuration error: invalid DEPTH_dst value" +#endif +#if defined(depth) +#error "Kernel configuration error: ambiguous 'depth' value is defined, use 'DEPTH_dst' instead" +#endif + + +#if DEPTH_dst < 5 /* CV_32F */ +#define CV_DST_TYPE_IS_INTEGER +#else +#define CV_DST_TYPE_IS_FP +#endif + +#if DEPTH_dst != 6 /* CV_64F */ +#define CV_DST_TYPE_FIT_32F 1 +#else +#define CV_DST_TYPE_FIT_32F 0 +#endif + + +#if CV_DST_TYPE_FIT_32F #define CV_PI M_PI_F #else #define CV_PI M_PI @@ -283,7 +306,7 @@ #define PROCESS_ELEM storedst(pown(srcelem1, srcelem2)) #elif defined OP_SQRT -#if depth <= 5 +#if CV_DST_TYPE_FIT_32F #define PROCESS_ELEM storedst(native_sqrt(srcelem1)) #else #define PROCESS_ELEM storedst(sqrt(srcelem1)) @@ -324,7 +347,7 @@ #endif #elif defined OP_CTP_AD || defined OP_CTP_AR -#if depth <= 5 +#if CV_DST_TYPE_FIT_32F #define CV_EPSILON FLT_EPSILON #else #define CV_EPSILON DBL_EPSILON