renamed cuda namespace to cudev

2013-04-04 15:36:22 +04:00
parent 1bb141c465
commit 910ef57109
130 changed files with 592 additions and 594 deletions
@@ -51,7 +51,7 @@

 #include "opencv2/core/cuda/common.hpp"

-namespace cv { namespace gpu { namespace cuda {
+namespace cv { namespace gpu { namespace cudev {
    namespace video_decoding
    {
        __constant__ uint constAlpha = ((uint)0xff << 24);
@@ -50,7 +50,7 @@
 #include "opencv2/core/cuda/datamov_utils.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace bf_knnmatch
    {
@@ -1249,7 +1249,7 @@ namespace cv { namespace gpu { namespace cuda
        //template void match2Hamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream);
        template void match2Hamming_gpu<int   >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream);
    } // namespace bf_knnmatch
-}}} // namespace cv { namespace gpu { namespace cuda {
+}}} // namespace cv { namespace gpu { namespace cudev {


 #endif /* CUDA_DISABLER */
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/vec_distance.hpp"
 #include "opencv2/core/cuda/datamov_utils.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace bf_match
    {
@@ -768,7 +768,7 @@ namespace cv { namespace gpu { namespace cuda
        //template void matchHamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream);
        template void matchHamming_gpu<int   >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream);
    } // namespace bf_match
-}}} // namespace cv { namespace gpu { namespace cuda {
+}}} // namespace cv { namespace gpu { namespace cudev {


 #endif /* CUDA_DISABLER */
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/vec_distance.hpp"
 #include "opencv2/core/cuda/datamov_utils.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace bf_radius_match
    {
@@ -457,7 +457,7 @@ namespace cv { namespace gpu { namespace cuda
        //template void matchHamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream);
        template void matchHamming_gpu<int   >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream);
    } // namespace bf_radius_match
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev


 #endif /* CUDA_DISABLER */
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/vec_traits.hpp"
 #include "opencv2/core/cuda/limits.hpp"

-namespace cv { namespace gpu { namespace cuda {
+namespace cv { namespace gpu { namespace cudev {
    namespace bgfg_gmg
    {
        __constant__ int   c_width;
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/limits.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace mog
    {
@@ -55,7 +55,7 @@ typedef unsigned short ushort;
 //////////////////////////////////////////////////////////////////////////////////
 /// Bilateral filtering

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace imgproc
    {
@@ -163,7 +163,7 @@ namespace cv { namespace gpu { namespace cuda


 #define OCV_INSTANTIATE_BILATERAL_FILTER(T) \
-    template void cv::gpu::cuda::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
+    template void cv::gpu::cudev::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);

 OCV_INSTANTIATE_BILATERAL_FILTER(uchar)
 //OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)
@@ -44,7 +44,7 @@

 #include "opencv2/core/cuda/common.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace blend
    {
@@ -115,7 +115,7 @@ namespace cv { namespace gpu { namespace cuda
                cvCudaSafeCall(cudaDeviceSynchronize());
        }
    } // namespace blend
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev


 #endif /* CUDA_DISABLER */
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/functional.hpp"
 #include "opencv2/core/cuda/reduce.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    #define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200

@@ -79,7 +79,7 @@ namespace cv { namespace gpu { namespace cuda
            cvCudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
            cvCudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
            cvCudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
-            cv::gpu::cuda::transform(src, dst, TransformOp(), WithOutMask(), stream);
+            cv::gpu::cudev::transform(src, dst, TransformOp(), WithOutMask(), stream);
        }
    } // namespace transform_points

@@ -120,7 +120,7 @@ namespace cv { namespace gpu { namespace cuda
            cvCudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
            cvCudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
            cvCudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
-            cv::gpu::cuda::transform(src, dst, ProjectOp(), WithOutMask(), stream);
+            cv::gpu::cudev::transform(src, dst, ProjectOp(), WithOutMask(), stream);
        }
    } // namespace project_points

@@ -187,7 +187,7 @@ namespace cv { namespace gpu { namespace cuda
            cvCudaSafeCall( cudaDeviceSynchronize() );
        }
    } // namespace solvepnp_ransac
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev


 #endif /* CUDA_DISABLER */
@@ -51,7 +51,7 @@
 #include "opencv2/core/cuda/utility.hpp"

 using namespace cv::gpu;
-using namespace cv::gpu::cuda;
+using namespace cv::gpu::cudev;

 namespace canny
 {
@@ -77,7 +77,7 @@ namespace canny
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <> struct TransformFunctorTraits<canny::L1> : DefaultTransformFunctorTraits<canny::L1>
    {
@@ -475,7 +475,7 @@ namespace canny
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <> struct TransformFunctorTraits<canny::GetEdges> : DefaultTransformFunctorTraits<canny::GetEdges>
    {
@@ -50,7 +50,7 @@
 #include <iostream>
 #include <stdio.h>

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace ccl
    {
@@ -50,7 +50,7 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"

 using namespace cv::gpu;
-using namespace cv::gpu::cuda;
+using namespace cv::gpu::cudev;

 namespace clahe
 {
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/color.hpp"
 #include "cvt_color_internal.h"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
    {
@@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace cuda
        traits::functor_type functor = traits::create_functor(); \
        typedef typename traits::functor_type::argument_type src_t; \
        typedef typename traits::functor_type::result_type   dst_t; \
-        cv::gpu::cuda::transform((PtrStepSz<src_t>)src, (PtrStepSz<dst_t>)dst, functor, WithOutMask(), stream); \
+        cv::gpu::cudev::transform((PtrStepSz<src_t>)src, (PtrStepSz<dst_t>)dst, functor, WithOutMask(), stream); \
    }

 #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
@@ -456,6 +456,6 @@ namespace cv { namespace gpu { namespace cuda
    #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
    #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
    #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev

 #endif /* CUDA_DISABLER */
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/border_interpolate.hpp"

 using namespace cv::gpu;
-using namespace cv::gpu::cuda;
+using namespace cv::gpu::cudev;

 namespace column_filter
 {
@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace imgproc
    {
@@ -126,6 +126,6 @@ namespace cv { namespace gpu { namespace cuda
        template void copyMakeBorder_gpu<float, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
        template void copyMakeBorder_gpu<float, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
    } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev

 #endif /* CUDA_DISABLER */
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/color.hpp"
 #include "opencv2/core/cuda/saturate_cast.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T> struct Bayer2BGR;

@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/limits.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace disp_bilateral_filter
    {
@@ -218,6 +218,6 @@ namespace cv { namespace gpu { namespace cuda
        template void disp_bilateral_filter<uchar>(PtrStepSz<uchar> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
        template void disp_bilateral_filter<short>(PtrStepSz<short> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
    } // namespace bilateral_filter
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev

 #endif /* CUDA_DISABLER */
@@ -51,7 +51,7 @@
 #include "opencv2/core/cuda/simd_functions.hpp"

 using namespace cv::gpu;
-using namespace cv::gpu::cuda;
+using namespace cv::gpu::cudev;

 namespace arithm
 {
@@ -193,7 +193,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <> struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -216,21 +216,21 @@ namespace arithm
 {
    void addMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VAdd4(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VAdd4(), WithOutMask(), stream);
    }

    void addMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VAdd2(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VAdd2(), WithOutMask(), stream);
    }

    template <typename T, typename D>
    void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
    {
        if (mask.data)
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), mask, stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), mask, stream);
        else
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), WithOutMask(), stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), WithOutMask(), stream);
    }

    template void addMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
@@ -308,7 +308,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::AddScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
    {
@@ -323,9 +323,9 @@ namespace arithm
        AddScalar<T, S, D> op(static_cast<S>(val));

        if (mask.data)
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
        else
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
    }

    template void addScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
@@ -428,7 +428,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -451,21 +451,21 @@ namespace arithm
 {
    void subMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VSub4(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VSub4(), WithOutMask(), stream);
    }

    void subMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VSub2(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VSub2(), WithOutMask(), stream);
    }

    template <typename T, typename D>
    void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
    {
        if (mask.data)
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), mask, stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), mask, stream);
        else
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), WithOutMask(), stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), WithOutMask(), stream);
    }

    template void subMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
@@ -536,9 +536,9 @@ namespace arithm
        AddScalar<T, S, D> op(-static_cast<S>(val));

        if (mask.data)
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
        else
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
    }

    template void subScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
@@ -657,7 +657,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <> struct TransformFunctorTraits<arithm::Mul_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -676,12 +676,12 @@ namespace arithm
 {
    void mulMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, Mul_8uc4_32f(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, Mul_8uc4_32f(), WithOutMask(), stream);
    }

    void mulMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, Mul_16sc4_32f(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, Mul_16sc4_32f(), WithOutMask(), stream);
    }

    template <typename T, typename S, typename D>
@@ -690,12 +690,12 @@ namespace arithm
        if (scale == 1)
        {
            Mul<T, D> op;
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
        }
        else
        {
            MulScale<T, S, D> op(static_cast<S>(scale));
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
        }
    }

@@ -774,7 +774,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
    {
@@ -787,7 +787,7 @@ namespace arithm
    void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
    {
        MulScalar<T, S, D> op(static_cast<S>(val));
-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
    }

    template void mulScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
@@ -925,7 +925,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <> struct TransformFunctorTraits<arithm::Div_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -944,12 +944,12 @@ namespace arithm
 {
    void divMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream);
    }

    void divMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream);
    }

    template <typename T, typename S, typename D>
@@ -958,12 +958,12 @@ namespace arithm
        if (scale == 1)
        {
            Div<T, D> op;
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
        }
        else
        {
            DivScale<T, S, D> op(static_cast<S>(scale));
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
        }
    }

@@ -1033,7 +1033,7 @@ namespace arithm
    void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
    {
        MulScalar<T, S, D> op(static_cast<S>(1.0 / val));
-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
    }

    template void divScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
@@ -1111,7 +1111,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivInv<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
    {
@@ -1124,7 +1124,7 @@ namespace arithm
    void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
    {
        DivInv<T, S, D> op(static_cast<S>(val));
-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
    }

    template void divInv<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
@@ -1240,7 +1240,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <> struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -1263,18 +1263,18 @@ namespace arithm
 {
    void absDiffMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VAbsDiff4(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VAbsDiff4(), WithOutMask(), stream);
    }

    void absDiffMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VAbsDiff2(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VAbsDiff2(), WithOutMask(), stream);
    }

    template <typename T>
    void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, AbsDiffMat<T>(), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, AbsDiffMat<T>(), WithOutMask(), stream);
    }

    template void absDiffMat<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
@@ -1305,7 +1305,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T, typename S> struct TransformFunctorTraits< arithm::AbsDiffScalar<T, S> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -1319,7 +1319,7 @@ namespace arithm
    {
        AbsDiffScalar<T, S> op(static_cast<S>(val));

-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, op, WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, op, WithOutMask(), stream);
    }

    template void absDiffScalar<uchar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
@@ -1334,7 +1334,7 @@ namespace arithm
 //////////////////////////////////////////////////////////////////////////
 // absMat

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T> struct TransformFunctorTraits< abs_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -1346,7 +1346,7 @@ namespace arithm
    template <typename T>
    void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, abs_func<T>(), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, abs_func<T>(), WithOutMask(), stream);
    }

    template void absMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@@ -1375,7 +1375,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T> struct TransformFunctorTraits< arithm::Sqr<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -1387,7 +1387,7 @@ namespace arithm
    template <typename T>
    void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Sqr<T>(), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Sqr<T>(), WithOutMask(), stream);
    }

    template void sqrMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@@ -1402,7 +1402,7 @@ namespace arithm
 //////////////////////////////////////////////////////////////////////////
 // sqrtMat

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T> struct TransformFunctorTraits< sqrt_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -1414,7 +1414,7 @@ namespace arithm
    template <typename T>
    void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, sqrt_func<T>(), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, sqrt_func<T>(), WithOutMask(), stream);
    }

    template void sqrtMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@@ -1429,7 +1429,7 @@ namespace arithm
 //////////////////////////////////////////////////////////////////////////
 // logMat

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T> struct TransformFunctorTraits< log_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -1441,7 +1441,7 @@ namespace arithm
    template <typename T>
    void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, log_func<T>(), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, log_func<T>(), WithOutMask(), stream);
    }

    template void logMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@@ -1471,7 +1471,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T> struct TransformFunctorTraits< arithm::Exp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -1483,7 +1483,7 @@ namespace arithm
    template <typename T>
    void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Exp<T>(), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Exp<T>(), WithOutMask(), stream);
    }

    template void expMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@@ -1554,7 +1554,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -1580,26 +1580,26 @@ namespace arithm
 {
    void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream);
    }
    void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream);
    }
    void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream);
    }
    void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream);
    }

    template <template <typename> class Op, typename T>
    void cmpMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
    {
        Cmp<Op<T>, T> op;
-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, dst, op, WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, dst, op, WithOutMask(), stream);
    }

    template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
@@ -1716,7 +1716,7 @@ namespace arithm
 #undef TYPE_VEC
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <class Op, typename T> struct TransformFunctorTraits< arithm::CmpScalar<Op, T, 1> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
    {
@@ -1735,7 +1735,7 @@ namespace arithm
        src_t val1 = VecTraits<src_t>::make(sval);

        CmpScalar<Op<T>, T, cn> op(val1);
-        cuda::transform((PtrStepSz<src_t>) src, (PtrStepSz<dst_t>) dst, op, WithOutMask(), stream);
+        cudev::transform((PtrStepSz<src_t>) src, (PtrStepSz<dst_t>) dst, op, WithOutMask(), stream);
    }

    template <typename T> void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
@@ -1875,7 +1875,7 @@ namespace arithm
 //////////////////////////////////////////////////////////////////////////////////////
 // bitMat

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T> struct TransformFunctorTraits< bit_not<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -1899,33 +1899,33 @@ namespace arithm
    template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
    {
        if (mask.data)
-            cuda::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), mask, stream);
+            cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), mask, stream);
        else
-            cuda::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), WithOutMask(), stream);
+            cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), WithOutMask(), stream);
    }

    template <typename T> void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
    {
        if (mask.data)
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), mask, stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), mask, stream);
        else
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), WithOutMask(), stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), WithOutMask(), stream);
    }

    template <typename T> void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
    {
        if (mask.data)
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), mask, stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), mask, stream);
        else
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), WithOutMask(), stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), WithOutMask(), stream);
    }

    template <typename T> void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
    {
        if (mask.data)
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), mask, stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), mask, stream);
        else
-            cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), WithOutMask(), stream);
+            cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), WithOutMask(), stream);
    }

    template void bitMatNot<uchar>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
@@ -1948,7 +1948,7 @@ namespace arithm
 //////////////////////////////////////////////////////////////////////////////////////
 // bitScalar

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -1967,17 +1967,17 @@ namespace arithm
 {
    template <typename T> void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cuda::bind2nd(bit_and<T>(), src2), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_and<T>(), src2), WithOutMask(), stream);
    }

    template <typename T> void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cuda::bind2nd(bit_or<T>(), src2), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_or<T>(), src2), WithOutMask(), stream);
    }

    template <typename T> void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cuda::bind2nd(bit_xor<T>(), src2), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_xor<T>(), src2), WithOutMask(), stream);
    }

    template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
@@ -2026,7 +2026,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <> struct TransformFunctorTraits< arithm::VMin4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -2053,17 +2053,17 @@ namespace arithm
 {
    void minMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VMin4(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VMin4(), WithOutMask(), stream);
    }

    void minMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VMin2(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VMin2(), WithOutMask(), stream);
    }

    template <typename T> void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, minimum<T>(), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, minimum<T>(), WithOutMask(), stream);
    }

    template void minMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
@@ -2076,7 +2076,7 @@ namespace arithm

    template <typename T> void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cuda::bind2nd(minimum<T>(), src2), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(minimum<T>(), src2), WithOutMask(), stream);
    }

    template void minScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
@@ -2118,7 +2118,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <> struct TransformFunctorTraits< arithm::VMax4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
    {
@@ -2145,17 +2145,17 @@ namespace arithm
 {
    void maxMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VMax4(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VMax4(), WithOutMask(), stream);
    }

    void maxMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
    {
-        cuda::transform(src1, src2, dst, VMax2(), WithOutMask(), stream);
+        cudev::transform(src1, src2, dst, VMax2(), WithOutMask(), stream);
    }

    template <typename T> void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, maximum<T>(), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, maximum<T>(), WithOutMask(), stream);
    }

    template void maxMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
@@ -2168,7 +2168,7 @@ namespace arithm

    template <typename T> void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cuda::bind2nd(maximum<T>(), src2), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(maximum<T>(), src2), WithOutMask(), stream);
    }

    template void maxScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
@@ -2183,7 +2183,7 @@ namespace arithm
 //////////////////////////////////////////////////////////////////////////
 // threshold

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -2212,7 +2212,7 @@ namespace arithm
    void threshold_caller(PtrStepSz<T> src, PtrStepSz<T> dst, T thresh, T maxVal, cudaStream_t stream)
    {
        Op<T> op(thresh, maxVal);
-        cuda::transform(src, dst, op, WithOutMask(), stream);
+        cudev::transform(src, dst, op, WithOutMask(), stream);
    }

    template <typename T>
@@ -2297,7 +2297,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T> struct TransformFunctorTraits< arithm::PowOp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
    {
@@ -2309,7 +2309,7 @@ namespace arithm
    template<typename T>
    void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream)
    {
-        cuda::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, PowOp<T>(power), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, PowOp<T>(power), WithOutMask(), stream);
    }

    template void pow<uchar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
@@ -2372,7 +2372,7 @@ namespace arithm
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <typename T1, typename T2, typename D, size_t src1_size, size_t src2_size, size_t dst_size> struct AddWeightedTraits : DefaultTransformFunctorTraits< arithm::AddWeighted<T1, T2, D> >
    {
@@ -2393,7 +2393,7 @@ namespace arithm
    {
        AddWeighted<T1, T2, D> op(alpha, beta, gamma);

-        cuda::transform((PtrStepSz<T1>) src1, (PtrStepSz<T2>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T1>) src1, (PtrStepSz<T2>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
    }

    template void addWeighted<uchar, uchar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/utility.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace fast
    {
@@ -51,7 +51,7 @@
 #include "fgd_bgfg_common.hpp"

 using namespace cv::gpu;
-using namespace cv::gpu::cuda;
+using namespace cv::gpu::cudev;

 namespace bgfg
 {
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/utility.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace gfft
    {
@@ -47,7 +47,7 @@
 #include <thrust/functional.h>
 #include "opencv2/core/cuda/common.hpp"

-namespace cv { namespace gpu { namespace cuda { namespace globmotion {
+namespace cv { namespace gpu { namespace cudev { namespace globmotion {

 __constant__ float cml[9];
 __constant__ float cmr[9];
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/transform.hpp"

 using namespace cv::gpu;
-using namespace cv::gpu::cuda;
+using namespace cv::gpu::cudev;

 namespace hist
 {
@@ -127,7 +127,7 @@ namespace hist
    };
 }

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    template <> struct TransformFunctorTraits<hist::EqualizeHist> : DefaultTransformFunctorTraits<hist::EqualizeHist>
    {
@@ -146,7 +146,7 @@ namespace hist

        const float scale = 255.0f / (src.cols * src.rows);

-        cuda::transform(src, dst, EqualizeHist(scale), WithOutMask(), stream);
+        cudev::transform(src, dst, EqualizeHist(scale), WithOutMask(), stream);
    }
 }

@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/functional.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    // Other values are not supported
    #define CELL_WIDTH 8
@@ -808,7 +808,7 @@ namespace cv { namespace gpu { namespace cuda
        void resize_8UC1(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
        void resize_8UC4(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
    } // namespace hog
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev


 #endif /* CUDA_DISABLER */
@@ -51,7 +51,7 @@
 #include "opencv2/core/cuda/limits.hpp"
 #include "opencv2/core/cuda/dynamic_smem.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace hough
    {
@@ -1424,7 +1424,7 @@ namespace cv { namespace gpu { namespace cuda
            cvCudaSafeCall( cudaDeviceSynchronize() );

            thrust::device_ptr<int> sizesPtr(sizes);
-            thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cuda::bind2nd(cuda::minimum<int>(), maxSize));
+            thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cudev::bind2nd(cudev::minimum<int>(), maxSize));
        }

        void GHT_Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/border_interpolate.hpp"
 #include "internal_shared.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace imgproc
    {
@@ -1002,7 +1002,7 @@ namespace cv { namespace gpu { namespace cuda
        template void filter2D_gpu<float, float>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
        template void filter2D_gpu<float4, float4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
    } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cuda {
+}}} // namespace cv { namespace gpu { namespace cudev {


 #endif /* CUDA_DISABLER */
@@ -44,7 +44,7 @@

 #include "opencv2/core/cuda/common.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace imgproc
    {
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/vec_traits.hpp"
 #include "opencv2/core/cuda/saturate_cast.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace lbp
    {
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/emulation.hpp"

-namespace cv { namespace gpu { namespace cuda {
+namespace cv { namespace gpu { namespace cudev {

 namespace lbp {

@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/vec_math.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace match_template
    {
@@ -910,7 +910,7 @@ namespace cv { namespace gpu { namespace cuda
                cvCudaSafeCall( cudaDeviceSynchronize() );
        }
    } //namespace match_template
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev


 #endif /* CUDA_DISABLER */
@@ -44,7 +44,7 @@

 #include "opencv2/core/cuda/common.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace mathfunc
    {
@@ -212,6 +212,6 @@ namespace cv { namespace gpu { namespace cuda
            callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
        }
    } // namespace mathfunc
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev

 #endif /* CUDA_DISABLER */
@@ -53,7 +53,7 @@
 #include "opencv2/core/cuda/type_traits.hpp"

 using namespace cv::gpu;
-using namespace cv::gpu::cuda;
+using namespace cv::gpu::cudev;

 namespace detail
 {
@@ -205,7 +205,7 @@ namespace detail
        template <int BLOCK_SIZE, typename R>
        static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*> smem_tuple(R* smem)
        {
-            return cv::gpu::cuda::smem_tuple(smem, smem + BLOCK_SIZE);
+            return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE);
        }

        template <typename R>
@@ -225,7 +225,7 @@ namespace detail
        template <int BLOCK_SIZE, typename R>
        static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*> smem_tuple(R* smem)
        {
-            return cv::gpu::cuda::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
+            return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
        }

        template <typename R>
@@ -245,7 +245,7 @@ namespace detail
        template <int BLOCK_SIZE, typename R>
        static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*, volatile R*> smem_tuple(R* smem)
        {
-            return cv::gpu::cuda::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
+            return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
        }

        template <typename R>
@@ -340,7 +340,7 @@ namespace sum
            {
                sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);

-                cuda::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
+                cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));

                if (tid == 0)
                {
@@ -383,7 +383,7 @@ namespace sum
            }
        }

-        cuda::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
+        cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));

        GlobalReduce<BLOCK_SIZE, R, cn>::run(sum, result, tid, bid, smem);
    }
@@ -642,7 +642,7 @@ namespace minMax

                const minimum<R> minOp;
                const maximum<R> maxOp;
-                cuda::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
+                cudev::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));

                if (tid == 0)
                {
@@ -690,7 +690,7 @@ namespace minMax
            }
        }

-        cuda::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
+        cudev::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));

        GlobalReduce<BLOCK_SIZE, R>::run(mymin, mymax, minval, maxval, tid, bid, sminval, smaxval);
    }
@@ -994,7 +994,7 @@ namespace countNonZero
            }
        }

-        cuda::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());
+        cudev::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());

    #if __CUDA_ARCH__ >= 200
        if (tid == 0)
@@ -1019,7 +1019,7 @@ namespace countNonZero
        {
            mycount = tid < gridDim.x * gridDim.y ? count[tid] : 0;

-            cuda::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());
+            cudev::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());

            if (tid == 0)
            {
@@ -1217,7 +1217,7 @@ namespace reduce
        volatile S* srow = smem + threadIdx.y * 16;

        myVal = srow[threadIdx.x];
-        cuda::reduce<16>(srow, myVal, threadIdx.x, op);
+        cudev::reduce<16>(srow, myVal, threadIdx.x, op);

        if (threadIdx.x == 0)
            srow[0] = myVal;
@@ -1301,7 +1301,7 @@ namespace reduce
        for (int x = threadIdx.x; x < src.cols; x += BLOCK_SIZE)
            myVal = op(myVal, saturate_cast<work_type>(srcRow[x]));

-        cuda::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(myVal), threadIdx.x, detail::Unroll<cn>::op(op));
+        cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(myVal), threadIdx.x, detail::Unroll<cn>::op(op));

        if (threadIdx.x == 0)
            dst[y] = saturate_cast<dst_type>(op.result(myVal, src.cols));
@@ -57,7 +57,7 @@ typedef unsigned short ushort;
 //////////////////////////////////////////////////////////////////////////////////
 //// Non Local Means Denosing

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace imgproc
    {
@@ -179,7 +179,7 @@ namespace cv { namespace gpu { namespace cuda
 //////////////////////////////////////////////////////////////////////////////////
 //// Non Local Means Denosing (fast approximate version)

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace imgproc
    {
@@ -190,7 +190,7 @@ namespace cv { namespace gpu { namespace cuda
            template <int BLOCK_SIZE>
            static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*> smem_tuple(float* smem)
            {
-                return cv::gpu::cuda::smem_tuple(smem, smem + BLOCK_SIZE);
+                return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE);
            }

            static __device__ __forceinline__ thrust::tuple<float&, float&> tie(float& val1, float& val2)
@@ -209,7 +209,7 @@ namespace cv { namespace gpu { namespace cuda
            template <int BLOCK_SIZE>
            static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*> smem_tuple(float* smem)
            {
-                return cv::gpu::cuda::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
+                return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
            }

            static __device__ __forceinline__ thrust::tuple<float&, float&, float&> tie(float& val1, float2& val2)
@@ -228,7 +228,7 @@ namespace cv { namespace gpu { namespace cuda
            template <int BLOCK_SIZE>
            static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*, volatile float*> smem_tuple(float* smem)
            {
-                return cv::gpu::cuda::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
+                return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
            }

            static __device__ __forceinline__ thrust::tuple<float&, float&, float&, float&> tie(float& val1, float3& val2)
@@ -247,7 +247,7 @@ namespace cv { namespace gpu { namespace cuda
            template <int BLOCK_SIZE>
            static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*, volatile float*, volatile float*> smem_tuple(float* smem)
            {
-                return cv::gpu::cuda::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE, smem + 4 * BLOCK_SIZE);
+                return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE, smem + 4 * BLOCK_SIZE);
            }

            static __device__ __forceinline__ thrust::tuple<float&, float&, float&, float&, float&> tie(float& val1, float4& val2)
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/reduce.hpp"

 using namespace cv::gpu;
-using namespace cv::gpu::cuda;
+using namespace cv::gpu::cudev;

 namespace optflowbm
 {
@@ -44,7 +44,7 @@

 #include "opencv2/core/cuda/common.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace optical_flow
    {
@@ -55,7 +55,7 @@
 #define BORDER_SIZE 5
 #define MAX_KSIZE_HALF 100

-namespace cv { namespace gpu { namespace cuda { namespace optflow_farneback
+namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
 {
    __constant__ float c_g[8];
    __constant__ float c_xg[8];
@@ -641,7 +641,7 @@ namespace cv { namespace gpu { namespace cuda { namespace optflow_farneback
        callers[borderMode](src, ksizeHalf, dst, stream);
    }

-}}}} // namespace cv { namespace gpu { namespace cuda { namespace optflow_farneback
+}}}} // namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback


 #endif /* CUDA_DISABLER */
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/reduce.hpp"
 #include "opencv2/core/cuda/functional.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace orb
    {
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/saturate_cast.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace imgproc
    {
@@ -222,7 +222,7 @@ namespace cv { namespace gpu { namespace cuda
        template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
        template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
    } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev


 #endif /* CUDA_DISABLER */
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/saturate_cast.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace imgproc
    {
@@ -191,6 +191,6 @@ namespace cv { namespace gpu { namespace cuda
        template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
        template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
    } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev

 #endif /* CUDA_DISABLER */
@@ -50,7 +50,7 @@
 #include "opencv2/core/cuda/reduce.hpp"

 using namespace cv::gpu;
-using namespace cv::gpu::cuda;
+using namespace cv::gpu::cudev;

 namespace pyrlk
 {
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/filters.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace imgproc
    {
@@ -268,7 +268,7 @@ namespace cv { namespace gpu { namespace cuda
        template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
        template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev


 #endif /* CUDA_DISABLER */
@@ -51,7 +51,7 @@
 #include "opencv2/core/cuda/filters.hpp"
 #include "opencv2/core/cuda/scan.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace imgproc
    {
@@ -296,7 +296,7 @@ namespace cv { namespace gpu { namespace cuda
        };

    } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev


 #endif /* CUDA_DISABLER */
@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/vec_traits.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace video_encoding
    {
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/border_interpolate.hpp"

 using namespace cv::gpu;
-using namespace cv::gpu::cuda;
+using namespace cv::gpu::cudev;

 namespace row_filter
 {
@@ -44,7 +44,7 @@

 #include "opencv2/core/cuda/common.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace split_merge
    {
@@ -505,7 +505,7 @@ namespace cv { namespace gpu { namespace cuda
            split_func(src, dst, stream);
        }
    } // namespace split_merge
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev


 #endif /* CUDA_DISABLER */
@@ -44,7 +44,7 @@

 #include "opencv2/core/cuda/common.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace stereobm
    {
@@ -534,7 +534,7 @@ namespace cv { namespace gpu { namespace cuda
            cvCudaSafeCall( cudaUnbindTexture (texForTF) );
        }
    } // namespace stereobm
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev


 #endif /* CUDA_DISABLER */
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/limits.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace stereobp
    {
@@ -384,7 +384,7 @@ namespace cv { namespace gpu { namespace cuda
        template <typename T>
        __device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* data, T* dst, size_t msg_disp_step, size_t data_disp_step)
        {
-            float minimum = cuda::numeric_limits<float>::max();
+            float minimum = cudev::numeric_limits<float>::max();

            for(int i = 0; i < cndisp; ++i)
            {
@@ -533,6 +533,6 @@ namespace cv { namespace gpu { namespace cuda
        template void output_gpu<short>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
        template void output_gpu<float>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
    } // namespace stereobp
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev

 #endif /* CUDA_DISABLER */
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/reduce.hpp"
 #include "opencv2/core/cuda/functional.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace stereocsbp
    {
@@ -146,7 +146,7 @@ namespace cv { namespace gpu { namespace cuda

                for(int i = 0; i < nr_plane; i++)
                {
-                    T minimum = cuda::numeric_limits<T>::max();
+                    T minimum = cudev::numeric_limits<T>::max();
                    int id = 0;
                    for(int d = 0; d < cndisp; d++)
                    {
@@ -859,6 +859,6 @@ namespace cv { namespace gpu { namespace cuda
        template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
            const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream);
    } // namespace stereocsbp
-}}} // namespace cv { namespace gpu { namespace cuda {
+}}} // namespace cv { namespace gpu { namespace cudev {

 #endif /* CUDA_DISABLER */
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/limits.hpp"

 using namespace cv::gpu;
-using namespace cv::gpu::cuda;
+using namespace cv::gpu::cudev;

 ////////////////////////////////////////////////////////////
 // centeredGradient
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/filters.hpp"

-namespace cv { namespace gpu { namespace cuda
+namespace cv { namespace gpu { namespace cudev
 {
    namespace imgproc
    {
@@ -383,7 +383,7 @@ namespace cv { namespace gpu { namespace cuda
        template void warpPerspective_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
        template void warpPerspective_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cuda
+}}} // namespace cv { namespace gpu { namespace cudev


 #endif /* CUDA_DISABLER */