From 6cf4371eb4adbe9df164f724ffad46713e2dbdf5 Mon Sep 17 00:00:00 2001 From: chacha21 Date: Fri, 3 Mar 2017 18:24:12 +0100 Subject: [PATCH 1/3] make cuda::absdiff support multi-channel scalars I took the subScalar.cu code and changed the inner operation --- modules/cudaarithm/src/cuda/absdiff_scalar.cu | 109 ++++++++++++++---- 1 file changed, 88 insertions(+), 21 deletions(-) diff --git a/modules/cudaarithm/src/cuda/absdiff_scalar.cu b/modules/cudaarithm/src/cuda/absdiff_scalar.cu index 3ffd0661b7..225298b291 100644 --- a/modules/cudaarithm/src/cuda/absdiff_scalar.cu +++ b/modules/cudaarithm/src/cuda/absdiff_scalar.cu @@ -56,14 +56,14 @@ void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const G namespace { - template struct AbsDiffScalarOp : unary_function + template struct AbsDiffScalarOp : unary_function { - S val; + ScalarType val; - __device__ __forceinline__ T operator ()(T a) const + __device__ __forceinline__ DstType operator ()(SrcType a) const { - abs_func f; - return saturate_cast(f(a - val)); + abs_func f; + return saturate_cast(f(saturate_cast(a) - val)); } }; @@ -77,34 +77,101 @@ namespace }; }; - template - void absDiffScalarImpl(const GpuMat& src, double value, GpuMat& dst, Stream& stream) + template + void absDiffScalarImpl(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream) { - AbsDiffScalarOp op; - op.val = static_cast(value); + typedef typename MakeVec::cn>::type ScalarType; + + cv::Scalar_ value_ = value; + + AbsDiffScalarOp op; + op.val = VecTraits::make(value_.val); gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), op, stream); } } void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int) { - typedef void (*func_t)(const GpuMat& src, double val, GpuMat& dst, Stream& stream); - static const func_t funcs[] = + typedef void (*func_t)(const GpuMat& src, cv::Scalar val, GpuMat& dst, Stream& stream); + static const func_t funcs[7][7][4] = { - absDiffScalarImpl, - absDiffScalarImpl, - absDiffScalarImpl, - absDiffScalarImpl, - absDiffScalarImpl, - absDiffScalarImpl, - absDiffScalarImpl + { + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + }, + { + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + }, + { + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + }, + { + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + }, + { + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + }, + { + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + }, + { + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + } }; - const int depth = src.depth(); + const int sdepth = src.depth(); + const int ddepth = dst.depth(); + const int cn = src.channels(); - CV_DbgAssert( depth <= CV_64F ); + CV_DbgAssert( sdepth <= CV_64F && ddepth <= CV_64F && cn <= 4 ); - funcs[depth](src, val[0], dst, stream); + const func_t func = funcs[sdepth][ddepth][cn - 1]; + + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src, val, dst, stream); } #endif From 2375e587056b2f44962c1afce4e1aa5f41ba6857 Mon Sep 17 00:00:00 2001 From: chacha21 Date: Mon, 6 Mar 2017 15:08:59 +0100 Subject: [PATCH 2/3] assert SrcDepth == DstDepth simplify code by requesting srcDepth == dstDepth --- modules/cudaarithm/src/cuda/absdiff_scalar.cu | 73 ++++--------------- 1 file changed, 15 insertions(+), 58 deletions(-) diff --git a/modules/cudaarithm/src/cuda/absdiff_scalar.cu b/modules/cudaarithm/src/cuda/absdiff_scalar.cu index 225298b291..be3046b3cb 100644 --- a/modules/cudaarithm/src/cuda/absdiff_scalar.cu +++ b/modules/cudaarithm/src/cuda/absdiff_scalar.cu @@ -56,14 +56,14 @@ void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const G namespace { - template struct AbsDiffScalarOp : unary_function + template struct AbsDiffScalarOp : unary_function { ScalarType val; - __device__ __forceinline__ DstType operator ()(SrcType a) const + __device__ __forceinline__ SrcType operator ()(SrcType a) const { abs_func f; - return saturate_cast(f(saturate_cast(a) - val)); + return saturate_cast(f(saturate_cast(a) - val)); } }; @@ -77,14 +77,14 @@ namespace }; }; - template + template void absDiffScalarImpl(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream) { typedef typename MakeVec::cn>::type ScalarType; cv::Scalar_ value_ = value; - AbsDiffScalarOp op; + AbsDiffScalarOp op; op.val = VecTraits::make(value_.val); gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), op, stream); } @@ -93,70 +93,28 @@ namespace void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int) { typedef void (*func_t)(const GpuMat& src, cv::Scalar val, GpuMat& dst, Stream& stream); - static const func_t funcs[7][7][4] = + static const func_t funcs[7][4] = { { - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl }, { - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl }, { - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl }, { - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl }, { - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl }, { - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl }, { - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, - {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl } }; @@ -164,10 +122,9 @@ void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const G const int ddepth = dst.depth(); const int cn = src.channels(); - CV_DbgAssert( sdepth <= CV_64F && ddepth <= CV_64F && cn <= 4 ); - - const func_t func = funcs[sdepth][ddepth][cn - 1]; + CV_DbgAssert( sdepth <= CV_64F && ddepth <= CV_64F && cn <= 4 && src.type() == dst.type()); + const func_t func = funcs[sdepth][cn - 1]; if (!func) CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); From bfd800342bfb1900bc2b1cdb0e94fbbff88311f2 Mon Sep 17 00:00:00 2001 From: chacha21 Date: Tue, 7 Mar 2017 09:49:49 +0100 Subject: [PATCH 3/3] use "double" from scalar only for CV_64F input Restore "DstType" in AbsDiffScalarOp template arguments, even if eventually it will be equal to SrcType use "double" from scalar only for CV_64F input --- modules/cudaarithm/src/cuda/absdiff_scalar.cu | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/cudaarithm/src/cuda/absdiff_scalar.cu b/modules/cudaarithm/src/cuda/absdiff_scalar.cu index be3046b3cb..01770a5f23 100644 --- a/modules/cudaarithm/src/cuda/absdiff_scalar.cu +++ b/modules/cudaarithm/src/cuda/absdiff_scalar.cu @@ -56,14 +56,14 @@ void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const G namespace { - template struct AbsDiffScalarOp : unary_function + template struct AbsDiffScalarOp : unary_function { ScalarType val; - __device__ __forceinline__ SrcType operator ()(SrcType a) const + __device__ __forceinline__ DstType operator ()(SrcType a) const { abs_func f; - return saturate_cast(f(saturate_cast(a) - val)); + return saturate_cast(f(saturate_cast(a) - val)); } }; @@ -84,7 +84,7 @@ namespace cv::Scalar_ value_ = value; - AbsDiffScalarOp op; + AbsDiffScalarOp op; op.val = VecTraits::make(value_.val); gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), op, stream); } @@ -96,22 +96,22 @@ void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const G static const func_t funcs[7][4] = { { - absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl }, { - absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl }, { - absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl }, { - absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl }, { - absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl }, { - absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl }, { absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl