diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 0626d8938a..0cb8129b58 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -47,37 +47,33 @@ #include "opencv2/gpu/device/saturate_cast.hpp" #include "internal_shared.hpp" -using namespace cv::gpu; -using namespace cv::gpu::device; - -namespace cv { namespace gpu { namespace mathfunc +namespace cv { namespace gpu { namespace device { - ////////////////////////////////////////////////////////////////////////////////////// // Compare - template struct NotEqual : binary_function + template struct NotEqual : binary_function { - __device__ __forceinline__ uchar operator()(const T1& src1, const T2& src2) const + __device__ __forceinline__ uchar operator()(T src1, T src2) const { return static_cast(static_cast(src1 != src2) * 255); } }; - template + template inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream) { - NotEqual op; - transform(static_cast< DevMem2D_ >(src1), static_cast< DevMem2D_ >(src2), dst, op, stream); + NotEqual op; + transform(static_cast< DevMem2D_ >(src1), static_cast< DevMem2D_ >(src2), dst, op, stream); } void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream) { - compare_ne(src1, src2, dst, stream); + compare_ne(src1, src2, dst, stream); } void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream) { - compare_ne(src1, src2, dst, stream); + compare_ne(src1, src2, dst, stream); } @@ -354,6 +350,35 @@ namespace cv { namespace gpu { namespace mathfunc ////////////////////////////////////////////////////////////////////////// // min/max + + namespace detail + { + template struct MinMaxTraits : DefaultTransformFunctorTraits + { + }; + template struct MinMaxTraits<2, F> : DefaultTransformFunctorTraits + { + enum { smart_shift = 4 }; + }; + template struct MinMaxTraits<4, F> : DefaultTransformFunctorTraits + { + enum { smart_block_dim_y = 4 }; + enum { smart_shift = 4 }; + }; + } + + template struct TransformFunctorTraits< minimum > : detail::MinMaxTraits< sizeof(T), minimum > + { + }; + template struct TransformFunctorTraits< maximum > : detail::MinMaxTraits< sizeof(T), maximum > + { + }; + template struct TransformFunctorTraits< binder2nd< minimum > > : detail::MinMaxTraits< sizeof(T), binder2nd< minimum > > + { + }; + template struct TransformFunctorTraits< binder2nd< maximum > > : detail::MinMaxTraits< sizeof(T), binder2nd< maximum > > + { + }; template void min_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream) @@ -413,7 +438,39 @@ namespace cv { namespace gpu { namespace mathfunc ////////////////////////////////////////////////////////////////////////// - // threshold + // threshold + + namespace detail + { + template struct ThresholdTraits : DefaultTransformFunctorTraits + { + }; + template struct ThresholdTraits<2, F> : DefaultTransformFunctorTraits + { + enum { smart_shift = 4 }; + }; + template struct ThresholdTraits<4, F> : DefaultTransformFunctorTraits + { + enum { smart_block_dim_y = 4 }; + enum { smart_shift = 4 }; + }; + } + + template struct TransformFunctorTraits< thresh_binary_func > : detail::ThresholdTraits< sizeof(T), thresh_binary_func > + { + }; + template struct TransformFunctorTraits< thresh_binary_inv_func > : detail::ThresholdTraits< sizeof(T), thresh_binary_inv_func > + { + }; + template struct TransformFunctorTraits< thresh_trunc_func > : detail::ThresholdTraits< sizeof(T), thresh_trunc_func > + { + }; + template struct TransformFunctorTraits< thresh_to_zero_func > : detail::ThresholdTraits< sizeof(T), thresh_to_zero_func > + { + }; + template struct TransformFunctorTraits< thresh_to_zero_inv_func > : detail::ThresholdTraits< sizeof(T), thresh_to_zero_inv_func > + { + }; template