From f6b3bc01e51b69ccbbad69bd55f011c58eddd00e Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 29 Dec 2014 20:32:11 +0300
Subject: [PATCH 01/55] addWeighted

---
 modules/core/src/arithm.cpp | 93 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 92 insertions(+), 1 deletion(-)

diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp
index 68c8979a8d..2f377350e8 100644
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@@ -2772,7 +2772,98 @@ struct AddWeighted_SIMD
     }
 };
 
-#if CV_NEON
+#if CV_SSE2
+
+template <>
+struct AddWeighted_SIMD<schar, float>
+{
+    AddWeighted_SIMD()
+    {
+        haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
+    }
+
+    int operator() (const schar * src1, const schar * src2, schar * dst, int width, float alpha, float beta, float gamma) const
+    {
+        int x = 0;
+
+        if (!haveSSE2)
+            return x;
+
+        __m128i v_zero = _mm_setzero_si128();
+        __m128 v_alpha = _mm_set1_ps(alpha), v_beta = _mm_set1_ps(beta),
+               v_gamma = _mm_set1_ps(gamma);
+
+        for( ; x <= width - 8; x += 8 )
+        {
+            __m128i v_src1 = _mm_loadl_epi64((const __m128i *)(src1 + x));
+            __m128i v_src2 = _mm_loadl_epi64((const __m128i *)(src2 + x));
+
+            __m128i v_src1_p = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src1), 8);
+            __m128i v_src2_p = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src2), 8);
+
+            __m128 v_dstf0 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src1_p), 16)), v_alpha);
+            v_dstf0 = _mm_add_ps(_mm_add_ps(v_dstf0, v_gamma),
+                                 _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src2_p), 16)), v_beta));
+
+            __m128 v_dstf1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src1_p), 16)), v_alpha);
+            v_dstf1 = _mm_add_ps(_mm_add_ps(v_dstf1, v_gamma),
+                                 _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src2_p), 16)), v_beta));
+
+            __m128i v_dst16 = _mm_packs_epi32(_mm_cvtps_epi32(v_dstf0),
+                                              _mm_cvtps_epi32(v_dstf1));
+
+            _mm_storel_epi64((__m128i *)(dst + x), _mm_packs_epi16(v_dst16, v_zero));
+        }
+
+        return x;
+    }
+
+    bool haveSSE2;
+};
+
+template <>
+struct AddWeighted_SIMD<short, float>
+{
+    AddWeighted_SIMD()
+    {
+        haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
+    }
+
+    int operator() (const short * src1, const short * src2, short * dst, int width, float alpha, float beta, float gamma) const
+    {
+        int x = 0;
+
+        if (!haveSSE2)
+            return x;
+
+        __m128i v_zero = _mm_setzero_si128();
+        __m128 v_alpha = _mm_set1_ps(alpha), v_beta = _mm_set1_ps(beta),
+               v_gamma = _mm_set1_ps(gamma);
+
+        for( ; x <= width - 8; x += 8 )
+        {
+            __m128i v_src1 = _mm_loadu_si128((const __m128i *)(src1 + x));
+            __m128i v_src2 = _mm_loadu_si128((const __m128i *)(src2 + x));
+
+            __m128 v_dstf0 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src1), 16)), v_alpha);
+            v_dstf0 = _mm_add_ps(_mm_add_ps(v_dstf0, v_gamma),
+                                 _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src2), 16)), v_beta));
+
+            __m128 v_dstf1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src1), 16)), v_alpha);
+            v_dstf1 = _mm_add_ps(_mm_add_ps(v_dstf1, v_gamma),
+                                 _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src2), 16)), v_beta));
+
+            _mm_storeu_si128((__m128i *)(dst + x), _mm_packs_epi32(_mm_cvtps_epi32(v_dstf0),
+                                                                   _mm_cvtps_epi32(v_dstf1)));
+        }
+
+        return x;
+    }
+
+    bool haveSSE2;
+};
+
+#elif CV_NEON
 
 template <>
 struct AddWeighted_SIMD<schar, float>

From 00e7816c1bf177887e4645ee2afa811bc72a395a Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 24 Dec 2014 13:33:17 +0300
Subject: [PATCH 02/55] add auxiliary functions to work with Input/Output
 arrays:

they allow to perform asynchronous upload/download into temporary buffer
to get valid GpuMat object
---
 .../include/opencv2/core/private.cuda.hpp     | 10 ++++
 modules/core/src/cuda_gpu_mat.cpp             | 47 +++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/modules/core/include/opencv2/core/private.cuda.hpp b/modules/core/include/opencv2/core/private.cuda.hpp
index a97388bd05..aaa777c914 100644
--- a/modules/core/include/opencv2/core/private.cuda.hpp
+++ b/modules/core/include/opencv2/core/private.cuda.hpp
@@ -106,6 +106,16 @@ namespace cv { namespace cuda
         GpuMat::Allocator* allocator_;
     };
 
+    CV_EXPORTS GpuMat getInputMat(InputArray _src, Stream& stream);
+
+    CV_EXPORTS GpuMat getOutputMat(OutputArray _dst, int rows, int cols, int type, Stream& stream);
+    static inline GpuMat getOutputMat(OutputArray _dst, Size size, int type, Stream& stream)
+    {
+        return getOutputMat(_dst, size.height, size.width, type, stream);
+    }
+
+    CV_EXPORTS void syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream);
+
     static inline void checkNppError(int code, const char* file, const int line, const char* func)
     {
         if (code < 0)
diff --git a/modules/core/src/cuda_gpu_mat.cpp b/modules/core/src/cuda_gpu_mat.cpp
index 4440d58536..8a7b236acb 100644
--- a/modules/core/src/cuda_gpu_mat.cpp
+++ b/modules/core/src/cuda_gpu_mat.cpp
@@ -342,6 +342,53 @@ void cv::cuda::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
     }
 }
 
+GpuMat cv::cuda::getInputMat(InputArray _src, Stream& stream)
+{
+    GpuMat src;
+
+    if (_src.kind() == _InputArray::CUDA_GPU_MAT)
+    {
+        src = _src.getGpuMat();
+    }
+    else if (!_src.empty())
+    {
+        BufferPool pool(stream);
+        src = pool.getBuffer(_src.size(), _src.type());
+        src.upload(_src, stream);
+    }
+
+    return src;
+}
+
+GpuMat cv::cuda::getOutputMat(OutputArray _dst, int rows, int cols, int type, Stream& stream)
+{
+    GpuMat dst;
+
+    if (_dst.kind() == _InputArray::CUDA_GPU_MAT)
+    {
+        _dst.create(rows, cols, type);
+        dst = _dst.getGpuMat();
+    }
+    else
+    {
+        BufferPool pool(stream);
+        dst = pool.getBuffer(rows, cols, type);
+    }
+
+    return dst;
+}
+
+void cv::cuda::syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream)
+{
+    if (_dst.kind() != _InputArray::CUDA_GPU_MAT)
+    {
+        if (stream)
+            dst.download(_dst, stream);
+        else
+            dst.download(_dst);
+    }
+}
+
 #ifndef HAVE_CUDA
 
 GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator()

From 3d0410c147b011ab9dfc67fe6b34763d74e4135e Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 24 Dec 2014 13:35:23 +0300
Subject: [PATCH 03/55] remove reciprocal version of cuda::divide

it might cause errors, due to implicit type conversion and another cuda::divide
overload
---
 modules/cudaarithm/include/opencv2/cudaarithm.hpp   | 6 ------
 modules/cudaarithm/test/test_element_operations.cpp | 4 ++--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/modules/cudaarithm/include/opencv2/cudaarithm.hpp b/modules/cudaarithm/include/opencv2/cudaarithm.hpp
index 98ebfbef88..be095b92e1 100644
--- a/modules/cudaarithm/include/opencv2/cudaarithm.hpp
+++ b/modules/cudaarithm/include/opencv2/cudaarithm.hpp
@@ -130,12 +130,6 @@ This function, in contrast to divide, uses a round-down rounding mode.
  */
 CV_EXPORTS void divide(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null());
 
-//! computes element-wise weighted reciprocal of an array (dst = scale/src2)
-static inline void divide(double src1, InputArray src2, OutputArray dst, int dtype = -1, Stream& stream = Stream::Null())
-{
-    divide(src1, src2, dst, 1.0, dtype, stream);
-}
-
 /** @brief Computes per-element absolute difference of two matrices (or of a matrix and scalar).
 
 @param src1 First source matrix or scalar.
diff --git a/modules/cudaarithm/test/test_element_operations.cpp b/modules/cudaarithm/test/test_element_operations.cpp
index 4a43d9d306..a4a16ea89f 100644
--- a/modules/cudaarithm/test/test_element_operations.cpp
+++ b/modules/cudaarithm/test/test_element_operations.cpp
@@ -1329,7 +1329,7 @@ CUDA_TEST_P(Divide_Scalar_First, Accuracy)
         try
         {
             cv::cuda::GpuMat dst;
-            cv::cuda::divide(scale, loadMat(mat), dst, depth.second);
+            cv::cuda::divide(scale, loadMat(mat), dst, 1.0, depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -1339,7 +1339,7 @@ CUDA_TEST_P(Divide_Scalar_First, Accuracy)
     else
     {
         cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::cuda::divide(scale, loadMat(mat, useRoi), dst, depth.second);
+        cv::cuda::divide(scale, loadMat(mat, useRoi), dst, 1.0, depth.second);
 
         cv::Mat dst_gold;
         cv::divide(scale, mat, dst_gold, depth.second);

From 7454189c2a9fadadb7f3ddfc8b24a2070b70cda6 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 24 Dec 2014 13:38:02 +0300
Subject: [PATCH 04/55] use new getInputMat/getOutputMat/syncOutput methods in
 cudaarithm routines

---
 modules/cudaarithm/src/arithm.cpp             | 22 +++---
 modules/cudaarithm/src/core.cpp               |  6 +-
 modules/cudaarithm/src/cuda/add_weighted.cu   | 28 ++++---
 modules/cudaarithm/src/cuda/bitwise_mat.cu    | 12 ++-
 .../cudaarithm/src/cuda/copy_make_border.cu   | 10 ++-
 modules/cudaarithm/src/cuda/lut.cu            |  9 ++-
 modules/cudaarithm/src/cuda/math.cu           | 77 +++++++++---------
 modules/cudaarithm/src/cuda/mul_spectrums.cu  | 21 +++--
 modules/cudaarithm/src/cuda/polar_cart.cu     | 78 ++++++++++---------
 modules/cudaarithm/src/cuda/reduce.cu         | 10 ++-
 modules/cudaarithm/src/cuda/split_merge.cu    | 22 +++---
 modules/cudaarithm/src/cuda/threshold.cu      | 13 ++--
 modules/cudaarithm/src/cuda/transpose.cu      |  9 ++-
 modules/cudaarithm/src/element_operations.cpp | 75 ++++++++++--------
 14 files changed, 220 insertions(+), 172 deletions(-)

diff --git a/modules/cudaarithm/src/arithm.cpp b/modules/cudaarithm/src/arithm.cpp
index 63246abd57..b2107dd1f6 100644
--- a/modules/cudaarithm/src/arithm.cpp
+++ b/modules/cudaarithm/src/arithm.cpp
@@ -169,9 +169,9 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
 #else
     // CUBLAS works with column-major matrices
 
-    GpuMat src1 = _src1.getGpuMat();
-    GpuMat src2 = _src2.getGpuMat();
-    GpuMat src3 = _src3.getGpuMat();
+    GpuMat src1 = getInputMat(_src1, stream);
+    GpuMat src2 = getInputMat(_src2, stream);
+    GpuMat src3 = getInputMat(_src3, stream);
 
     CV_Assert( src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2 );
     CV_Assert( src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()) );
@@ -200,8 +200,7 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
     CV_Assert( src1Size.width == src2Size.height );
     CV_Assert( src3.empty() || src3Size == dstSize );
 
-    _dst.create(dstSize, src1.type());
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, dstSize, src1.type(), stream);
 
     if (beta != 0)
     {
@@ -281,6 +280,8 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
     }
 
     cublasSafeCall( cublasDestroy_v2(handle) );
+
+    syncOutput(dst, _dst, stream);
 #endif
 }
 
@@ -297,7 +298,7 @@ void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags,
     (void) stream;
     throw_no_cuda();
 #else
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
     CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 );
 
@@ -462,16 +463,15 @@ namespace
 
     void ConvolutionImpl::convolve(InputArray _image, InputArray _templ, OutputArray _result, bool ccorr, Stream& _stream)
     {
-        GpuMat image = _image.getGpuMat();
-        GpuMat templ = _templ.getGpuMat();
+        GpuMat image = getInputMat(_image, _stream);
+        GpuMat templ = getInputMat(_templ, _stream);
 
         CV_Assert( image.type() == CV_32FC1 );
         CV_Assert( templ.type() == CV_32FC1 );
 
         create(image.size(), templ.size());
 
-        _result.create(result_size, CV_32FC1);
-        GpuMat result = _result.getGpuMat();
+        GpuMat result = getOutputMat(_result, result_size, CV_32FC1, _stream);
 
         cudaStream_t stream = StreamAccessor::getStream(_stream);
 
@@ -520,6 +520,8 @@ namespace
 
         cufftSafeCall( cufftDestroy(planR2C) );
         cufftSafeCall( cufftDestroy(planC2R) );
+
+        syncOutput(result, _result, _stream);
     }
 }
 
diff --git a/modules/cudaarithm/src/core.cpp b/modules/cudaarithm/src/core.cpp
index eb71d6a4ec..7dd51f9781 100644
--- a/modules/cudaarithm/src/core.cpp
+++ b/modules/cudaarithm/src/core.cpp
@@ -119,15 +119,17 @@ void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& str
         {NppMirror<CV_32F, nppiMirror_32f_C1R>::call, 0, NppMirror<CV_32F, nppiMirror_32f_C3R>::call, NppMirror<CV_32F, nppiMirror_32f_C4R>::call}
     };
 
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
     CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F);
     CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
 
     _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
 
     funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream));
+
+    syncOutput(dst, _dst, stream);
 }
 
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/cudaarithm/src/cuda/add_weighted.cu b/modules/cudaarithm/src/cuda/add_weighted.cu
index d5c00f6072..929301076d 100644
--- a/modules/cudaarithm/src/cuda/add_weighted.cu
+++ b/modules/cudaarithm/src/cuda/add_weighted.cu
@@ -50,7 +50,10 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 namespace
@@ -63,7 +66,7 @@ namespace
 
         __device__ __forceinline__ D operator ()(T1 a, T2 b) const
         {
-            return saturate_cast<D>(a * alpha + b * beta + gamma);
+            return cudev::saturate_cast<D>(a * alpha + b * beta + gamma);
         }
     };
 
@@ -555,8 +558,8 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
         }
     };
 
-    GpuMat src1 = _src1.getGpuMat();
-    GpuMat src2 = _src2.getGpuMat();
+    GpuMat src1 = getInputMat(_src1, stream);
+    GpuMat src2 = getInputMat(_src2, stream);
 
     int sdepth1 = src1.depth();
     int sdepth2 = src2.depth();
@@ -564,19 +567,18 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
     ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2);
     const int cn = src1.channels();
 
-    CV_DbgAssert( src2.size() == src1.size() && src2.channels() == cn );
-    CV_DbgAssert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
+    CV_Assert( src2.size() == src1.size() && src2.channels() == cn );
+    CV_Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
 
-    _dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn));
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src1.size(), CV_MAKE_TYPE(ddepth, cn), stream);
 
-    GpuMat src1_ = src1.reshape(1);
-    GpuMat src2_ = src2.reshape(1);
-    GpuMat dst_ = dst.reshape(1);
+    GpuMat src1_single = src1.reshape(1);
+    GpuMat src2_single = src2.reshape(1);
+    GpuMat dst_single = dst.reshape(1);
 
     if (sdepth1 > sdepth2)
     {
-        src1_.swap(src2_);
+        src1_single.swap(src2_single);
         std::swap(alpha, beta);
         std::swap(sdepth1, sdepth2);
     }
@@ -586,7 +588,9 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
     if (!func)
         CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
 
-    func(src1_, alpha, src2_, beta, gamma, dst_, stream);
+    func(src1_single, alpha, src2_single, beta, gamma, dst_single, stream);
+
+    syncOutput(dst, _dst, stream);
 }
 
 #endif
diff --git a/modules/cudaarithm/src/cuda/bitwise_mat.cu b/modules/cudaarithm/src/cuda/bitwise_mat.cu
index b2bf288be7..f151c1a486 100644
--- a/modules/cudaarithm/src/cuda/bitwise_mat.cu
+++ b/modules/cudaarithm/src/cuda/bitwise_mat.cu
@@ -50,7 +50,10 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
@@ -60,16 +63,15 @@ void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& m
 
 void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
 {
-    GpuMat src = _src.getGpuMat();
-    GpuMat mask = _mask.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
+    GpuMat mask = getInputMat(_mask, stream);
 
     const int depth = src.depth();
 
     CV_DbgAssert( depth <= CV_32F );
     CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
 
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
 
     if (mask.empty())
     {
@@ -125,6 +127,8 @@ void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask,
             gridTransformUnary(vsrc, vdst, bit_not<uchar>(), singleMaskChannels(globPtr<uchar>(mask), src.channels()), stream);
         }
     }
+
+    syncOutput(dst, _dst, stream);
 }
 
 //////////////////////////////////////////////////////////////////////////////
diff --git a/modules/cudaarithm/src/cuda/copy_make_border.cu b/modules/cudaarithm/src/cuda/copy_make_border.cu
index f7dd91f987..ce9cda36cf 100644
--- a/modules/cudaarithm/src/cuda/copy_make_border.cu
+++ b/modules/cudaarithm/src/cuda/copy_make_border.cu
@@ -50,7 +50,10 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 namespace
@@ -133,7 +136,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
         {    copyMakeBorderImpl<float , 1>  , 0 /*copyMakeBorderImpl<float , 2>*/,     copyMakeBorderImpl<float , 3>  ,     copyMakeBorderImpl<float  ,4>  }
     };
 
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
     const int depth = src.depth();
     const int cn = src.channels();
@@ -141,8 +144,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
     CV_Assert( depth <= CV_32F && cn <= 4 );
     CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
 
-    _dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src.rows + top + bottom, src.cols + left + right, src.type(), stream);
 
     const func_t func = funcs[depth][cn - 1];
 
@@ -150,6 +152,8 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
         CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
 
     func(src, dst, top, left, borderType, value, stream);
+
+    syncOutput(dst, _dst, stream);
 }
 
 #endif
diff --git a/modules/cudaarithm/src/cuda/lut.cu b/modules/cudaarithm/src/cuda/lut.cu
index 0b1fe8b0d5..56efb8fa88 100644
--- a/modules/cudaarithm/src/cuda/lut.cu
+++ b/modules/cudaarithm/src/cuda/lut.cu
@@ -50,8 +50,10 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
 using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 namespace
@@ -165,7 +167,7 @@ namespace
 
     void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& stream)
     {
-        GpuMat src = _src.getGpuMat();
+        GpuMat src = getInputMat(_src, stream);
 
         const int cn = src.channels();
         const int lut_cn = d_lut.channels();
@@ -173,8 +175,7 @@ namespace
         CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 );
         CV_Assert( lut_cn == 1 || lut_cn == cn );
 
-        _dst.create(src.size(), src.type());
-        GpuMat dst = _dst.getGpuMat();
+        GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
 
         if (lut_cn == 1)
         {
@@ -196,6 +197,8 @@ namespace
 
             dst3.assign(lut_(src3, tbl), stream);
         }
+
+        syncOutput(dst, _dst, stream);
     }
 }
 
diff --git a/modules/cudaarithm/src/cuda/math.cu b/modules/cudaarithm/src/cuda/math.cu
index 39f822081d..41d762f6a6 100644
--- a/modules/cudaarithm/src/cuda/math.cu
+++ b/modules/cudaarithm/src/cuda/math.cu
@@ -50,7 +50,10 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 namespace
@@ -92,16 +95,15 @@ void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream)
         absMat<double>
     };
 
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
-    const int depth = src.depth();
+    CV_Assert( src.depth() <= CV_64F );
 
-    CV_DbgAssert( depth <= CV_64F );
+    GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
 
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
+    funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
 
-    funcs[depth](src.reshape(1), dst.reshape(1), stream);
+    syncOutput(dst, _dst, stream);
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -113,7 +115,7 @@ namespace
     {
         __device__ __forceinline__ T operator ()(T x) const
         {
-            return saturate_cast<T>(x * x);
+            return cudev::saturate_cast<T>(x * x);
         }
     };
 
@@ -138,16 +140,15 @@ void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream)
         sqrMat<double>
     };
 
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
-    const int depth = src.depth();
+    CV_Assert( src.depth() <= CV_64F );
 
-    CV_DbgAssert( depth <= CV_64F );
+    GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
 
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
+    funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
 
-    funcs[depth](src.reshape(1), dst.reshape(1), stream);
+    syncOutput(dst, _dst, stream);
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -176,16 +177,15 @@ void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream)
         sqrtMat<double>
     };
 
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
-    const int depth = src.depth();
+    CV_Assert( src.depth() <= CV_64F );
 
-    CV_DbgAssert( depth <= CV_64F );
+    GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
 
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
+    funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
 
-    funcs[depth](src.reshape(1), dst.reshape(1), stream);
+    syncOutput(dst, _dst, stream);
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -198,7 +198,7 @@ namespace
         __device__ __forceinline__ T operator ()(T x) const
         {
             exp_func<T> f;
-            return saturate_cast<T>(f(x));
+            return cudev::saturate_cast<T>(f(x));
         }
     };
 
@@ -223,16 +223,15 @@ void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream)
         expMat<double>
     };
 
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
-    const int depth = src.depth();
+    CV_Assert( src.depth() <= CV_64F );
 
-    CV_DbgAssert( depth <= CV_64F );
+    GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
 
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
+    funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
 
-    funcs[depth](src.reshape(1), dst.reshape(1), stream);
+    syncOutput(dst, _dst, stream);
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -261,16 +260,15 @@ void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream)
         logMat<double>
     };
 
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
-    const int depth = src.depth();
+    CV_Assert( src.depth() <= CV_64F );
 
-    CV_DbgAssert( depth <= CV_64F );
+    GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
 
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
+    funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
 
-    funcs[depth](src.reshape(1), dst.reshape(1), stream);
+    syncOutput(dst, _dst, stream);
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -284,7 +282,7 @@ namespace
 
         __device__ __forceinline__ T operator()(T e) const
         {
-            return saturate_cast<T>(__powf((float)e, power));
+            return cudev::saturate_cast<T>(__powf((float)e, power));
         }
     };
     template<typename T> struct PowOp<T, true> : unary_function<T, T>
@@ -293,7 +291,7 @@ namespace
 
         __device__ __forceinline__ T operator()(T e) const
         {
-            T res = saturate_cast<T>(__powf((float)e, power));
+            T res = cudev::saturate_cast<T>(__powf((float)e, power));
 
             if ((e < 0) && (1 & static_cast<int>(power)))
                 res *= -1;
@@ -344,16 +342,15 @@ void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stre
         powMat<double>
     };
 
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
-    const int depth = src.depth();
+    CV_Assert( src.depth() <= CV_64F );
 
-    CV_DbgAssert(depth <= CV_64F);
+    GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
 
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
+    funcs[src.depth()](src.reshape(1), power, dst.reshape(1), stream);
 
-    funcs[depth](src.reshape(1), power, dst.reshape(1), stream);
+    syncOutput(dst, _dst, stream);
 }
 
 #endif
diff --git a/modules/cudaarithm/src/cuda/mul_spectrums.cu b/modules/cudaarithm/src/cuda/mul_spectrums.cu
index b060904816..bd62f99030 100644
--- a/modules/cudaarithm/src/cuda/mul_spectrums.cu
+++ b/modules/cudaarithm/src/cuda/mul_spectrums.cu
@@ -50,7 +50,10 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 //////////////////////////////////////////////////////////////////////////////
@@ -120,33 +123,33 @@ void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst
 {
     (void) flags;
 
-    GpuMat src1 = _src1.getGpuMat();
-    GpuMat src2 = _src2.getGpuMat();
+    GpuMat src1 = getInputMat(_src1, stream);
+    GpuMat src2 = getInputMat(_src2, stream);
 
     CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 );
     CV_Assert( src1.size() == src2.size() );
 
-    _dst.create(src1.size(), CV_32FC2);
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream);
 
     if (conjB)
         gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul_conj(), stream);
     else
         gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul(), stream);
+
+    syncOutput(dst, _dst, stream);
 }
 
 void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
 {
     (void) flags;
 
-    GpuMat src1 = _src1.getGpuMat();
-    GpuMat src2 = _src2.getGpuMat();
+    GpuMat src1 = getInputMat(_src1, stream);
+    GpuMat src2 = getInputMat(_src2, stream);
 
     CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2);
     CV_Assert( src1.size() == src2.size() );
 
-    _dst.create(src1.size(), CV_32FC2);
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream);
 
     if (conjB)
     {
@@ -160,6 +163,8 @@ void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputAr
         op.scale = scale;
         gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
     }
+
+    syncOutput(dst, _dst, stream);
 }
 
 #endif
diff --git a/modules/cudaarithm/src/cuda/polar_cart.cu b/modules/cudaarithm/src/cuda/polar_cart.cu
index 200b79c055..0a949b42ed 100644
--- a/modules/cudaarithm/src/cuda/polar_cart.cu
+++ b/modules/cudaarithm/src/cuda/polar_cart.cu
@@ -50,55 +50,59 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
 {
-    GpuMat x = _x.getGpuMat();
-    GpuMat y = _y.getGpuMat();
+    GpuMat x = getInputMat(_x, stream);
+    GpuMat y = getInputMat(_y, stream);
 
-    CV_DbgAssert( x.depth() == CV_32F );
-    CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
+    CV_Assert( x.depth() == CV_32F );
+    CV_Assert( y.type() == x.type() && y.size() == x.size() );
 
-    _dst.create(x.size(), CV_32FC1);
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
 
     GpuMat_<float> xc(x.reshape(1));
     GpuMat_<float> yc(y.reshape(1));
     GpuMat_<float> magc(dst.reshape(1));
 
     gridTransformBinary(xc, yc, magc, magnitude_func<float>(), stream);
+
+    syncOutput(dst, _dst, stream);
 }
 
 void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
 {
-    GpuMat x = _x.getGpuMat();
-    GpuMat y = _y.getGpuMat();
+    GpuMat x = getInputMat(_x, stream);
+    GpuMat y = getInputMat(_y, stream);
 
-    CV_DbgAssert( x.depth() == CV_32F );
-    CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
+    CV_Assert( x.depth() == CV_32F );
+    CV_Assert( y.type() == x.type() && y.size() == x.size() );
 
-    _dst.create(x.size(), CV_32FC1);
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
 
     GpuMat_<float> xc(x.reshape(1));
     GpuMat_<float> yc(y.reshape(1));
     GpuMat_<float> magc(dst.reshape(1));
 
     gridTransformBinary(xc, yc, magc, magnitude_sqr_func<float>(), stream);
+
+    syncOutput(dst, _dst, stream);
 }
 
 void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream)
 {
-    GpuMat x = _x.getGpuMat();
-    GpuMat y = _y.getGpuMat();
+    GpuMat x = getInputMat(_x, stream);
+    GpuMat y = getInputMat(_y, stream);
 
-    CV_DbgAssert( x.depth() == CV_32F );
-    CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
+    CV_Assert( x.depth() == CV_32F );
+    CV_Assert( y.type() == x.type() && y.size() == x.size() );
 
-    _dst.create(x.size(), CV_32FC1);
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
 
     GpuMat_<float> xc(x.reshape(1));
     GpuMat_<float> yc(y.reshape(1));
@@ -108,21 +112,20 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI
         gridTransformBinary(xc, yc, anglec, direction_func<float, true>(), stream);
     else
         gridTransformBinary(xc, yc, anglec, direction_func<float, false>(), stream);
+
+    syncOutput(dst, _dst, stream);
 }
 
 void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
 {
-    GpuMat x = _x.getGpuMat();
-    GpuMat y = _y.getGpuMat();
+    GpuMat x = getInputMat(_x, stream);
+    GpuMat y = getInputMat(_y, stream);
 
-    CV_DbgAssert( x.depth() == CV_32F );
-    CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
+    CV_Assert( x.depth() == CV_32F );
+    CV_Assert( y.type() == x.type() && y.size() == x.size() );
 
-    _mag.create(x.size(), CV_32FC1);
-    GpuMat mag = _mag.getGpuMat();
-
-    _angle.create(x.size(), CV_32FC1);
-    GpuMat angle = _angle.getGpuMat();
+    GpuMat mag = getOutputMat(_mag, x.size(), CV_32FC1, stream);
+    GpuMat angle = getOutputMat(_angle, x.size(), CV_32FC1, stream);
 
     GpuMat_<float> xc(x.reshape(1));
     GpuMat_<float> yc(y.reshape(1));
@@ -147,6 +150,9 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu
                                binaryTupleAdapter<0, 1>(direction_func<float, false>())),
                            stream);
     }
+
+    syncOutput(mag, _mag, stream);
+    syncOutput(angle, _angle, stream);
 }
 
 namespace
@@ -173,17 +179,14 @@ namespace
 
 void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& _stream)
 {
-    GpuMat mag = _mag.getGpuMat();
-    GpuMat angle = _angle.getGpuMat();
+    GpuMat mag = getInputMat(_mag, _stream);
+    GpuMat angle = getInputMat(_angle, _stream);
 
-    CV_DbgAssert( angle.depth() == CV_32F );
-    CV_DbgAssert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) );
+    CV_Assert( angle.depth() == CV_32F );
+    CV_Assert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) );
 
-    _x.create(angle.size(), CV_32FC1);
-    GpuMat x = _x.getGpuMat();
-
-    _y.create(angle.size(), CV_32FC1);
-    GpuMat y = _y.getGpuMat();
+    GpuMat x = getOutputMat(_x, angle.size(), CV_32FC1, _stream);
+    GpuMat y = getOutputMat(_y, angle.size(), CV_32FC1, _stream);
 
     GpuMat_<float> xc(x.reshape(1));
     GpuMat_<float> yc(y.reshape(1));
@@ -204,6 +207,9 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O
 
     CV_CUDEV_SAFE_CALL( cudaGetLastError() );
 
+    syncOutput(x, _x, _stream);
+    syncOutput(y, _y, _stream);
+
     if (stream == 0)
         CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
 }
diff --git a/modules/cudaarithm/src/cuda/reduce.cu b/modules/cudaarithm/src/cuda/reduce.cu
index 2cb2dacc73..5fb90287a9 100644
--- a/modules/cudaarithm/src/cuda/reduce.cu
+++ b/modules/cudaarithm/src/cuda/reduce.cu
@@ -50,7 +50,10 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 namespace
@@ -125,7 +128,7 @@ namespace
 
 void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream)
 {
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
     CV_Assert( src.channels() <= 4 );
     CV_Assert( dim == 0 || dim == 1 );
@@ -134,8 +137,7 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp,
     if (dtype < 0)
         dtype = src.depth();
 
-    _dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, 1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()), stream);
 
     if (dim == 0)
     {
@@ -292,6 +294,8 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp,
 
         func(src, dst, reduceOp, stream);
     }
+
+    syncOutput(dst, _dst, stream);
 }
 
 #endif
diff --git a/modules/cudaarithm/src/cuda/split_merge.cu b/modules/cudaarithm/src/cuda/split_merge.cu
index 13d6a349fb..5b3af10775 100644
--- a/modules/cudaarithm/src/cuda/split_merge.cu
+++ b/modules/cudaarithm/src/cuda/split_merge.cu
@@ -50,7 +50,10 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 ////////////////////////////////////////////////////////////////////////
@@ -92,20 +95,18 @@ namespace
 
     void mergeImpl(const GpuMat* src, size_t n, cv::OutputArray _dst, Stream& stream)
     {
-        CV_DbgAssert( src != 0 );
-        CV_DbgAssert( n > 0 && n <= 4 );
+        CV_Assert( src != 0 );
+        CV_Assert( n > 0 && n <= 4 );
 
         const int depth = src[0].depth();
         const cv::Size size = src[0].size();
 
-#ifdef _DEBUG
         for (size_t i = 0; i < n; ++i)
         {
             CV_Assert( src[i].size() == size );
             CV_Assert( src[i].depth() == depth );
             CV_Assert( src[i].channels() == 1 );
         }
-#endif
 
         if (n == 1)
         {
@@ -123,8 +124,7 @@ namespace
 
             const int channels = static_cast<int>(n);
 
-            _dst.create(size, CV_MAKE_TYPE(depth, channels));
-            GpuMat dst = _dst.getGpuMat();
+            GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(depth, channels), stream);
 
             const func_t func = funcs[channels - 2][CV_ELEM_SIZE(depth) / 2];
 
@@ -132,6 +132,8 @@ namespace
                 CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported channel count or data type");
 
             func(src, dst, stream);
+
+            syncOutput(dst, _dst, stream);
         }
     }
 }
@@ -203,12 +205,12 @@ namespace
             {SplitFunc<4, uchar>::call, SplitFunc<4, ushort>::call, SplitFunc<4, int>::call, 0, SplitFunc<4, double>::call}
         };
 
-        CV_DbgAssert( dst != 0 );
+        CV_Assert( dst != 0 );
 
         const int depth = src.depth();
         const int channels = src.channels();
 
-        CV_DbgAssert( channels <= 4 );
+        CV_Assert( channels <= 4 );
 
         if (channels == 0)
             return;
@@ -233,13 +235,13 @@ namespace
 
 void cv::cuda::split(InputArray _src, GpuMat* dst, Stream& stream)
 {
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
     splitImpl(src, dst, stream);
 }
 
 void cv::cuda::split(InputArray _src, std::vector<GpuMat>& dst, Stream& stream)
 {
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
     dst.resize(src.channels());
     if (src.channels() > 0)
         splitImpl(src, &dst[0], stream);
diff --git a/modules/cudaarithm/src/cuda/threshold.cu b/modules/cudaarithm/src/cuda/threshold.cu
index 21665cbe73..a5b8f07ce3 100644
--- a/modules/cudaarithm/src/cuda/threshold.cu
+++ b/modules/cudaarithm/src/cuda/threshold.cu
@@ -52,6 +52,8 @@
 #include "opencv2/cudev.hpp"
 #include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 namespace
@@ -95,15 +97,14 @@ namespace
 
 double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, double maxVal, int type, Stream& stream)
 {
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
     const int depth = src.depth();
 
-    CV_DbgAssert( src.channels() == 1 && depth <= CV_64F );
-    CV_DbgAssert( type <= 4 /*THRESH_TOZERO_INV*/ );
+    CV_Assert( src.channels() == 1 && depth <= CV_64F );
+    CV_Assert( type <= 4 /*THRESH_TOZERO_INV*/ );
 
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
 
     if (depth == CV_32F && type == 2 /*THRESH_TRUNC*/)
     {
@@ -142,6 +143,8 @@ double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, dou
         funcs[depth](src, dst, thresh, maxVal, type, stream);
     }
 
+    syncOutput(dst, _dst, stream);
+
     return thresh;
 }
 
diff --git a/modules/cudaarithm/src/cuda/transpose.cu b/modules/cudaarithm/src/cuda/transpose.cu
index aa85004425..bfe50bd34f 100644
--- a/modules/cudaarithm/src/cuda/transpose.cu
+++ b/modules/cudaarithm/src/cuda/transpose.cu
@@ -52,18 +52,19 @@
 #include "opencv2/cudev.hpp"
 #include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
 {
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
     const size_t elemSize = src.elemSize();
 
     CV_Assert( elemSize == 1 || elemSize == 4 || elemSize == 8 );
 
-    _dst.create( src.cols, src.rows, src.type() );
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src.cols, src.rows, src.type(), stream);
 
     if (elemSize == 1)
     {
@@ -87,6 +88,8 @@ void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
     {
         gridTranspose(globPtr<double>(src), globPtr<double>(dst), stream);
     }
+
+    syncOutput(dst, _dst, stream);
 }
 
 #endif
diff --git a/modules/cudaarithm/src/element_operations.cpp b/modules/cudaarithm/src/element_operations.cpp
index 795d7ffaa7..f88119502d 100644
--- a/modules/cudaarithm/src/element_operations.cpp
+++ b/modules/cudaarithm/src/element_operations.cpp
@@ -107,11 +107,11 @@ namespace
 
         GpuMat src1;
         if (!isScalar1)
-            src1 = _src1.getGpuMat();
+            src1 = getInputMat(_src1, stream);
 
         GpuMat src2;
         if (!isScalar2)
-            src2 = _src2.getGpuMat();
+            src2 = getInputMat(_src2, stream);
 
         Mat scalar;
         if (isScalar1)
@@ -126,7 +126,7 @@ namespace
             scalar.convertTo(Mat_<double>(scalar.rows, scalar.cols, &val[0]), CV_64F);
         }
 
-        GpuMat mask = _mask.getGpuMat();
+        GpuMat mask = getInputMat(_mask, stream);
 
         const int sdepth = src1.empty() ? src2.depth() : src1.depth();
         const int cn = src1.empty() ? src2.channels() : src1.channels();
@@ -147,8 +147,7 @@ namespace
                 CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double");
         }
 
-        _dst.create(size, CV_MAKE_TYPE(ddepth, cn));
-        GpuMat dst = _dst.getGpuMat();
+        GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(ddepth, cn), stream);
 
         if (isScalar1)
             mat_scalar_func(src2, val, true, dst, mask, scale, stream, op);
@@ -156,6 +155,8 @@ namespace
             mat_scalar_func(src1, val, false, dst, mask, scale, stream, op);
         else
             mat_mat_func(src1, src2, dst, mask, scale, stream, op);
+
+        syncOutput(dst, _dst, stream);
     }
 }
 
@@ -196,27 +197,29 @@ void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, do
 {
     if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1)
     {
-        GpuMat src1 = _src1.getGpuMat();
-        GpuMat src2 = _src2.getGpuMat();
+        GpuMat src1 = getInputMat(_src1, stream);
+        GpuMat src2 = getInputMat(_src2, stream);
 
         CV_Assert( src1.size() == src2.size() );
 
-        _dst.create(src1.size(), src1.type());
-        GpuMat dst = _dst.getGpuMat();
+        GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
 
         mulMat_8uc4_32f(src1, src2, dst, stream);
+
+        syncOutput(dst, _dst, stream);
     }
     else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1)
     {
-        GpuMat src1 = _src1.getGpuMat();
-        GpuMat src2 = _src2.getGpuMat();
+        GpuMat src1 = getInputMat(_src1, stream);
+        GpuMat src2 = getInputMat(_src2, stream);
 
         CV_Assert( src1.size() == src2.size() );
 
-        _dst.create(src1.size(), src1.type());
-        GpuMat dst = _dst.getGpuMat();
+        GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
 
         mulMat_16sc4_32f(src1, src2, dst, stream);
+
+        syncOutput(dst, _dst, stream);
     }
     else
     {
@@ -237,27 +240,29 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub
 {
     if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1)
     {
-        GpuMat src1 = _src1.getGpuMat();
-        GpuMat src2 = _src2.getGpuMat();
+        GpuMat src1 = getInputMat(_src1, stream);
+        GpuMat src2 = getInputMat(_src2, stream);
 
         CV_Assert( src1.size() == src2.size() );
 
-        _dst.create(src1.size(), src1.type());
-        GpuMat dst = _dst.getGpuMat();
+        GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
 
         divMat_8uc4_32f(src1, src2, dst, stream);
+
+        syncOutput(dst, _dst, stream);
     }
     else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1)
     {
-        GpuMat src1 = _src1.getGpuMat();
-        GpuMat src2 = _src2.getGpuMat();
+        GpuMat src1 = getInputMat(_src1, stream);
+        GpuMat src2 = getInputMat(_src2, stream);
 
         CV_Assert( src1.size() == src2.size() );
 
-        _dst.create(src1.size(), src1.type());
-        GpuMat dst = _dst.getGpuMat();
+        GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
 
         divMat_16sc4_32f(src1, src2, dst, stream);
+
+        syncOutput(dst, _dst, stream);
     }
     else
     {
@@ -389,15 +394,16 @@ void cv::cuda::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea
         {NppShift<CV_32S, 1, nppiRShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiRShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiRShiftC_32s_C4R>::call},
     };
 
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
     CV_Assert( src.depth() < CV_32F );
     CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
 
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
 
     funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream));
+
+    syncOutput(dst, _dst, stream);
 }
 
 void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream)
@@ -412,15 +418,16 @@ void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea
         {NppShift<CV_32S, 1, nppiLShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiLShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiLShiftC_32s_C4R>::call},
     };
 
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
     CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S );
     CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
 
-    _dst.create(src.size(), src.type());
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
 
     funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream));
+
+    syncOutput(dst, _dst, stream);
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -475,22 +482,24 @@ namespace
 
 void cv::cuda::magnitude(InputArray _src, OutputArray _dst, Stream& stream)
 {
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
-    _dst.create(src.size(), CV_32FC1);
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream);
 
     npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream));
+
+    syncOutput(dst, _dst, stream);
 }
 
 void cv::cuda::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream)
 {
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
-    _dst.create(src.size(), CV_32FC1);
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream);
 
     npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream));
+
+    syncOutput(dst, _dst, stream);
 }
 
 #endif

From a4e598f474b5986d0be9ba0b0fafcbf5ee82fe02 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 24 Dec 2014 13:39:37 +0300
Subject: [PATCH 05/55] use new BufferPool class for some cudaarithm routines

---
 modules/cudaarithm/src/arithm.cpp       | 15 ++++++++++----
 modules/cudaarithm/src/cuda/integral.cu | 27 ++++++++++++++++---------
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/modules/cudaarithm/src/arithm.cpp b/modules/cudaarithm/src/arithm.cpp
index b2107dd1f6..08de4e4288 100644
--- a/modules/cudaarithm/src/arithm.cpp
+++ b/modules/cudaarithm/src/arithm.cpp
@@ -315,13 +315,20 @@ void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags,
     // We don't support real-to-real transform
     CV_Assert( is_complex_input || is_complex_output );
 
-    GpuMat src_cont = src;
-
     // Make sure here we work with the continuous input,
     // as CUFFT can't handle gaps
-    createContinuous(src.rows, src.cols, src.type(), src_cont);
-    if (src_cont.data != src.data)
+    GpuMat src_cont;
+    if (src.isContinuous())
+    {
+        src_cont = src;
+    }
+    else
+    {
+        BufferPool pool(stream);
+        src_cont.allocator = pool.getAllocator();
+        createContinuous(src.rows, src.cols, src.type(), src_cont);
         src.copyTo(src_cont, stream);
+    }
 
     Size dft_size_opt = dft_size;
     if (is_1d_input && !is_row_dft)
diff --git a/modules/cudaarithm/src/cuda/integral.cu b/modules/cudaarithm/src/cuda/integral.cu
index db554eb301..4a70ab0de8 100644
--- a/modules/cudaarithm/src/cuda/integral.cu
+++ b/modules/cudaarithm/src/cuda/integral.cu
@@ -50,51 +50,58 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 ////////////////////////////////////////////////////////////////////////
 // integral
 
-void cv::cuda::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Stream& stream)
+void cv::cuda::integral(InputArray _src, OutputArray _dst, Stream& stream)
 {
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
     CV_Assert( src.type() == CV_8UC1 );
 
-    GpuMat_<int>& res = (GpuMat_<int>&) buffer;
+    BufferPool pool(stream);
+    GpuMat_<int> res(src.size(), pool.getAllocator());
 
     gridIntegral(globPtr<uchar>(src), res, stream);
 
-    _dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src.rows + 1, src.cols + 1, CV_32SC1, stream);
 
     dst.setTo(Scalar::all(0), stream);
 
     GpuMat inner = dst(Rect(1, 1, src.cols, src.rows));
     res.copyTo(inner, stream);
+
+    syncOutput(dst, _dst, stream);
 }
 
 //////////////////////////////////////////////////////////////////////////////
 // sqrIntegral
 
-void cv::cuda::sqrIntegral(InputArray _src, OutputArray _dst, GpuMat& buf, Stream& stream)
+void cv::cuda::sqrIntegral(InputArray _src, OutputArray _dst, Stream& stream)
 {
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
     CV_Assert( src.type() == CV_8UC1 );
 
-    GpuMat_<double>& res = (GpuMat_<double>&) buf;
+    BufferPool pool(Stream::Null());
+    GpuMat_<double> res(pool.getBuffer(src.size(), CV_64FC1));
 
     gridIntegral(sqr_(cvt_<int>(globPtr<uchar>(src))), res, stream);
 
-    _dst.create(src.rows + 1, src.cols + 1, CV_64FC1);
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src.rows + 1, src.cols + 1, CV_64FC1, stream);
 
     dst.setTo(Scalar::all(0), stream);
 
     GpuMat inner = dst(Rect(1, 1, src.cols, src.rows));
     res.copyTo(inner, stream);
+
+    syncOutput(dst, _dst, stream);
 }
 
 #endif

From cd0e95de165d89359b9641e6735d9b1b7216b698 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 24 Dec 2014 13:40:33 +0300
Subject: [PATCH 06/55] refactor cudaarithm reductions:

* remove overloads with explicit buffer, now BufferPool is used
* added async versions for all reduce functions
---
 modules/cuda/src/cascadeclassifier.cpp        |   3 +-
 .../cudaarithm/include/opencv2/cudaarithm.hpp | 193 ++----------
 modules/cudaarithm/perf/perf_reductions.cpp   |  34 +-
 modules/cudaarithm/src/cuda/countnonzero.cu   |  59 ++--
 modules/cudaarithm/src/cuda/minmax.cu         | 138 +++++++--
 modules/cudaarithm/src/cuda/minmaxloc.cu      | 130 +++++---
 modules/cudaarithm/src/cuda/norm.cu           | 126 ++++++--
 modules/cudaarithm/src/cuda/normalize.cu      | 290 ++++++++++++++++++
 modules/cudaarithm/src/cuda/sum.cu            | 174 +++++++----
 modules/cudaarithm/src/reductions.cpp         | 200 ++++++------
 modules/cudaarithm/test/test_reductions.cpp   | 205 ++++++++++++-
 modules/cudabgsegm/src/fgd.cpp                |   8 +-
 modules/cudafilters/src/filtering.cpp         |   2 +-
 modules/cudaimgproc/src/gftt.cpp              |   3 +-
 modules/cudaimgproc/src/match_template.cpp    |  23 +-
 samples/gpu/performance/tests.cpp             |   6 +-
 16 files changed, 1075 insertions(+), 519 deletions(-)
 create mode 100644 modules/cudaarithm/src/cuda/normalize.cu

diff --git a/modules/cuda/src/cascadeclassifier.cpp b/modules/cuda/src/cascadeclassifier.cpp
index c4e9870151..259712b89f 100644
--- a/modules/cuda/src/cascadeclassifier.cpp
+++ b/modules/cuda/src/cascadeclassifier.cpp
@@ -454,11 +454,10 @@ public:
                 // create sutable matrix headers
                 GpuMat src  = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height));
                 GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1));
-                GpuMat buff = integralBuffer;
 
                 // generate integral for scale
                 cuda::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR);
-                cuda::integral(src, sint, buff);
+                cuda::integral(src, sint);
 
                 // calculate job
                 int totalWidth = level.workArea.width / step;
diff --git a/modules/cudaarithm/include/opencv2/cudaarithm.hpp b/modules/cudaarithm/include/opencv2/cudaarithm.hpp
index be095b92e1..6e475db985 100644
--- a/modules/cudaarithm/include/opencv2/cudaarithm.hpp
+++ b/modules/cudaarithm/include/opencv2/cudaarithm.hpp
@@ -524,116 +524,53 @@ CV_EXPORTS void copyMakeBorder(InputArray src, OutputArray dst, int top, int bot
 @param src1 Source matrix. Any matrices except 64F are supported.
 @param normType Norm type. NORM_L1 , NORM_L2 , and NORM_INF are supported for now.
 @param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
-@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
 
 @sa norm
  */
-CV_EXPORTS double norm(InputArray src1, int normType, InputArray mask, GpuMat& buf);
-/** @overload
-uses new buffer, no mask
-*/
-static inline double norm(InputArray src, int normType)
-{
-    GpuMat buf;
-    return norm(src, normType, GpuMat(), buf);
-}
-/** @overload
-no mask
-*/
-static inline double norm(InputArray src, int normType, GpuMat& buf)
-{
-    return norm(src, normType, GpuMat(), buf);
-}
+CV_EXPORTS double norm(InputArray src1, int normType, InputArray mask = noArray());
+/** @overload */
+CV_EXPORTS void calcNorm(InputArray src, OutputArray dst, int normType, InputArray mask = noArray(), Stream& stream = Stream::Null());
 
 /** @brief Returns the difference of two matrices.
 
 @param src1 Source matrix. Any matrices except 64F are supported.
 @param src2 Second source matrix (if any) with the same size and type as src1.
 @param normType Norm type. NORM_L1 , NORM_L2 , and NORM_INF are supported for now.
-@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
 
 @sa norm
  */
-CV_EXPORTS double norm(InputArray src1, InputArray src2, GpuMat& buf, int normType=NORM_L2);
-/** @overload
-uses new buffer
-*/
-static inline double norm(InputArray src1, InputArray src2, int normType=NORM_L2)
-{
-    GpuMat buf;
-    return norm(src1, src2, buf, normType);
-}
+CV_EXPORTS double norm(InputArray src1, InputArray src2, int normType=NORM_L2);
+/** @overload */
+CV_EXPORTS void calcNormDiff(InputArray src1, InputArray src2, OutputArray dst, int normType=NORM_L2, Stream& stream = Stream::Null());
 
 /** @brief Returns the sum of matrix elements.
 
 @param src Source image of any depth except for CV_64F .
 @param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
-@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
 
 @sa sum
  */
-CV_EXPORTS Scalar sum(InputArray src, InputArray mask, GpuMat& buf);
-/** @overload
-uses new buffer, no mask
-*/
-static inline Scalar sum(InputArray src)
-{
-    GpuMat buf;
-    return sum(src, GpuMat(), buf);
-}
-/** @overload
-no mask
-*/
-static inline Scalar sum(InputArray src, GpuMat& buf)
-{
-    return sum(src, GpuMat(), buf);
-}
+CV_EXPORTS Scalar sum(InputArray src, InputArray mask = noArray());
+/** @overload */
+CV_EXPORTS void calcSum(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());
 
 /** @brief Returns the sum of absolute values for matrix elements.
 
 @param src Source image of any depth except for CV_64F .
 @param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
-@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
  */
-CV_EXPORTS Scalar absSum(InputArray src, InputArray mask, GpuMat& buf);
-/** @overload
-uses new buffer, no mask
-*/
-static inline Scalar absSum(InputArray src)
-{
-    GpuMat buf;
-    return absSum(src, GpuMat(), buf);
-}
-/** @overload
-no mask
-*/
-static inline Scalar absSum(InputArray src, GpuMat& buf)
-{
-    return absSum(src, GpuMat(), buf);
-}
+CV_EXPORTS Scalar absSum(InputArray src, InputArray mask = noArray());
+/** @overload */
+CV_EXPORTS void calcAbsSum(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());
 
 /** @brief Returns the squared sum of matrix elements.
 
 @param src Source image of any depth except for CV_64F .
 @param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
-@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
  */
-CV_EXPORTS Scalar sqrSum(InputArray src, InputArray mask, GpuMat& buf);
-/** @overload
-uses new buffer, no mask
-*/
-static inline Scalar sqrSum(InputArray src)
-{
-    GpuMat buf;
-    return sqrSum(src, GpuMat(), buf);
-}
-/** @overload
-no mask
-*/
-static inline Scalar sqrSum(InputArray src, GpuMat& buf)
-{
-    return sqrSum(src, GpuMat(), buf);
-}
+CV_EXPORTS Scalar sqrSum(InputArray src, InputArray mask = noArray());
+/** @overload */
+CV_EXPORTS void calcSqrSum(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());
 
 /** @brief Finds global minimum and maximum matrix elements and returns their values.
 
@@ -641,21 +578,14 @@ static inline Scalar sqrSum(InputArray src, GpuMat& buf)
 @param minVal Pointer to the returned minimum value. Use NULL if not required.
 @param maxVal Pointer to the returned maximum value. Use NULL if not required.
 @param mask Optional mask to select a sub-matrix.
-@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
 
 The function does not work with CV_64F images on GPUs with the compute capability \< 1.3.
 
 @sa minMaxLoc
  */
-CV_EXPORTS void minMax(InputArray src, double* minVal, double* maxVal, InputArray mask, GpuMat& buf);
-/** @overload
-uses new buffer
-*/
-static inline void minMax(InputArray src, double* minVal, double* maxVal=0, InputArray mask=noArray())
-{
-    GpuMat buf;
-    minMax(src, minVal, maxVal, mask, buf);
-}
+CV_EXPORTS void minMax(InputArray src, double* minVal, double* maxVal, InputArray mask = noArray());
+/** @overload */
+CV_EXPORTS void findMinMax(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null());
 
 /** @brief Finds global minimum and maximum matrix elements and returns their values with locations.
 
@@ -665,44 +595,28 @@ static inline void minMax(InputArray src, double* minVal, double* maxVal=0, Inpu
 @param minLoc Pointer to the returned minimum location. Use NULL if not required.
 @param maxLoc Pointer to the returned maximum location. Use NULL if not required.
 @param mask Optional mask to select a sub-matrix.
-@param valbuf Optional values buffer to avoid extra memory allocations. It is resized
-automatically.
-@param locbuf Optional locations buffer to avoid extra memory allocations. It is resized
-automatically.
+
 The function does not work with CV_64F images on GPU with the compute capability \< 1.3.
 
 @sa minMaxLoc
  */
 CV_EXPORTS void minMaxLoc(InputArray src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
-                          InputArray mask, GpuMat& valbuf, GpuMat& locbuf);
-/** @overload
-uses new buffer
-*/
-static inline void minMaxLoc(InputArray src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0,
-                             InputArray mask=noArray())
-{
-    GpuMat valBuf, locBuf;
-    minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf);
-}
+                          InputArray mask = noArray());
+/** @overload */
+CV_EXPORTS void findMinMaxLoc(InputArray src, OutputArray minMaxVals, OutputArray loc,
+                              InputArray mask = noArray(), Stream& stream = Stream::Null());
 
 /** @brief Counts non-zero matrix elements.
 
 @param src Single-channel source image.
-@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
 
 The function does not work with CV_64F images on GPUs with the compute capability \< 1.3.
 
 @sa countNonZero
  */
-CV_EXPORTS int countNonZero(InputArray src, GpuMat& buf);
-/** @overload
-uses new buffer
-*/
-static inline int countNonZero(const GpuMat& src)
-{
-    GpuMat buf;
-    return countNonZero(src, buf);
-}
+CV_EXPORTS int countNonZero(InputArray src);
+/** @overload */
+CV_EXPORTS void countNonZero(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
 
 /** @brief Reduces a matrix to a vector.
 
@@ -737,19 +651,12 @@ CV_EXPORTS void reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp, i
 @param mtx Source matrix. CV_8UC1 matrices are supported for now.
 @param mean Mean value.
 @param stddev Standard deviation value.
-@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
 
 @sa meanStdDev
  */
-CV_EXPORTS void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev, GpuMat& buf);
-/** @overload
-uses new buffer
-*/
-static inline void meanStdDev(InputArray src, Scalar& mean, Scalar& stddev)
-{
-    GpuMat buf;
-    meanStdDev(src, mean, stddev, buf);
-}
+CV_EXPORTS void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev);
+/** @overload */
+CV_EXPORTS void meanStdDev(InputArray mtx, OutputArray dst, Stream& stream = Stream::Null());
 
 /** @brief Computes a standard deviation of integral images.
 
@@ -773,64 +680,32 @@ normalization.
 @param dtype When negative, the output array has the same type as src; otherwise, it has the same
 number of channels as src and the depth =CV_MAT_DEPTH(dtype).
 @param mask Optional operation mask.
-@param norm_buf Optional buffer to avoid extra memory allocations. It is resized automatically.
-@param cvt_buf Optional buffer to avoid extra memory allocations. It is resized automatically.
+@param stream Stream for the asynchronous version.
 
 @sa normalize
  */
 CV_EXPORTS void normalize(InputArray src, OutputArray dst, double alpha, double beta,
-                          int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf);
-/** @overload
-uses new buffers
-*/
-static inline void normalize(InputArray src, OutputArray dst, double alpha = 1, double beta = 0,
-                             int norm_type = NORM_L2, int dtype = -1, InputArray mask = noArray())
-{
-    GpuMat norm_buf;
-    GpuMat cvt_buf;
-    normalize(src, dst, alpha, beta, norm_type, dtype, mask, norm_buf, cvt_buf);
-}
+                          int norm_type, int dtype, InputArray mask = noArray(),
+                          Stream& stream = Stream::Null());
 
 /** @brief Computes an integral image.
 
 @param src Source image. Only CV_8UC1 images are supported for now.
 @param sum Integral image containing 32-bit unsigned integer values packed into CV_32SC1 .
-@param buffer Optional buffer to avoid extra memory allocations. It is resized automatically.
 @param stream Stream for the asynchronous version.
 
 @sa integral
  */
-CV_EXPORTS void integral(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null());
-static inline void integralBuffered(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null())
-{
-    integral(src, sum, buffer, stream);
-}
-/** @overload
-uses new buffer
-*/
-static inline void integral(InputArray src, OutputArray sum, Stream& stream = Stream::Null())
-{
-    GpuMat buffer;
-    integral(src, sum, buffer, stream);
-}
+CV_EXPORTS void integral(InputArray src, OutputArray sum, Stream& stream = Stream::Null());
 
 /** @brief Computes a squared integral image.
 
 @param src Source image. Only CV_8UC1 images are supported for now.
 @param sqsum Squared integral image containing 64-bit unsigned integer values packed into
 CV_64FC1 .
-@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
 @param stream Stream for the asynchronous version.
  */
-CV_EXPORTS void sqrIntegral(InputArray src, OutputArray sqsum, GpuMat& buf, Stream& stream = Stream::Null());
-/** @overload
-uses new buffer
-*/
-static inline void sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream = Stream::Null())
-{
-    GpuMat buffer;
-    sqrIntegral(src, sqsum, buffer, stream);
-}
+CV_EXPORTS void sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream = Stream::Null());
 
 //! @} cudaarithm_reduce
 
diff --git a/modules/cudaarithm/perf/perf_reductions.cpp b/modules/cudaarithm/perf/perf_reductions.cpp
index 470df48a3f..78699c0a74 100644
--- a/modules/cudaarithm/perf/perf_reductions.cpp
+++ b/modules/cudaarithm/perf/perf_reductions.cpp
@@ -108,10 +108,9 @@ PERF_TEST_P(Sz_Norm, NormDiff,
     {
         const cv::cuda::GpuMat d_src1(src1);
         const cv::cuda::GpuMat d_src2(src2);
-        cv::cuda::GpuMat d_buf;
         double gpu_dst;
 
-        TEST_CYCLE() gpu_dst = cv::cuda::norm(d_src1, d_src2, d_buf, normType);
+        TEST_CYCLE() gpu_dst = cv::cuda::norm(d_src1, d_src2, normType);
 
         SANITY_CHECK(gpu_dst);
 
@@ -146,10 +145,9 @@ PERF_TEST_P(Sz_Depth_Cn, Sum,
     if (PERF_RUN_CUDA())
     {
         const cv::cuda::GpuMat d_src(src);
-        cv::cuda::GpuMat d_buf;
         cv::Scalar gpu_dst;
 
-        TEST_CYCLE() gpu_dst = cv::cuda::sum(d_src, d_buf);
+        TEST_CYCLE() gpu_dst = cv::cuda::sum(d_src);
 
         SANITY_CHECK(gpu_dst, 1e-5, ERROR_RELATIVE);
     }
@@ -183,10 +181,9 @@ PERF_TEST_P(Sz_Depth_Cn, SumAbs,
     if (PERF_RUN_CUDA())
     {
         const cv::cuda::GpuMat d_src(src);
-        cv::cuda::GpuMat d_buf;
         cv::Scalar gpu_dst;
 
-        TEST_CYCLE() gpu_dst = cv::cuda::absSum(d_src, d_buf);
+        TEST_CYCLE() gpu_dst = cv::cuda::absSum(d_src);
 
         SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
     }
@@ -216,10 +213,9 @@ PERF_TEST_P(Sz_Depth_Cn, SumSqr,
     if (PERF_RUN_CUDA())
     {
         const cv::cuda::GpuMat d_src(src);
-        cv::cuda::GpuMat d_buf;
         cv::Scalar gpu_dst;
 
-        TEST_CYCLE() gpu_dst = cv::cuda::sqrSum(d_src, d_buf);
+        TEST_CYCLE() gpu_dst = cv::cuda::sqrSum(d_src);
 
         SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
     }
@@ -248,10 +244,9 @@ PERF_TEST_P(Sz_Depth, MinMax,
     if (PERF_RUN_CUDA())
     {
         const cv::cuda::GpuMat d_src(src);
-        cv::cuda::GpuMat d_buf;
         double gpu_minVal, gpu_maxVal;
 
-        TEST_CYCLE() cv::cuda::minMax(d_src, &gpu_minVal, &gpu_maxVal, cv::cuda::GpuMat(), d_buf);
+        TEST_CYCLE() cv::cuda::minMax(d_src, &gpu_minVal, &gpu_maxVal, cv::cuda::GpuMat());
 
         SANITY_CHECK(gpu_minVal, 1e-10);
         SANITY_CHECK(gpu_maxVal, 1e-10);
@@ -286,11 +281,10 @@ PERF_TEST_P(Sz_Depth, MinMaxLoc,
     if (PERF_RUN_CUDA())
     {
         const cv::cuda::GpuMat d_src(src);
-        cv::cuda::GpuMat d_valbuf, d_locbuf;
         double gpu_minVal, gpu_maxVal;
         cv::Point gpu_minLoc, gpu_maxLoc;
 
-        TEST_CYCLE() cv::cuda::minMaxLoc(d_src, &gpu_minVal, &gpu_maxVal, &gpu_minLoc, &gpu_maxLoc, cv::cuda::GpuMat(), d_valbuf, d_locbuf);
+        TEST_CYCLE() cv::cuda::minMaxLoc(d_src, &gpu_minVal, &gpu_maxVal, &gpu_minLoc, &gpu_maxLoc);
 
         SANITY_CHECK(gpu_minVal, 1e-10);
         SANITY_CHECK(gpu_maxVal, 1e-10);
@@ -323,10 +317,9 @@ PERF_TEST_P(Sz_Depth, CountNonZero,
     if (PERF_RUN_CUDA())
     {
         const cv::cuda::GpuMat d_src(src);
-        cv::cuda::GpuMat d_buf;
         int gpu_dst = 0;
 
-        TEST_CYCLE() gpu_dst = cv::cuda::countNonZero(d_src, d_buf);
+        TEST_CYCLE() gpu_dst = cv::cuda::countNonZero(d_src);
 
         SANITY_CHECK(gpu_dst);
     }
@@ -414,9 +407,8 @@ PERF_TEST_P(Sz_Depth_NormType, Normalize,
     {
         const cv::cuda::GpuMat d_src(src);
         cv::cuda::GpuMat dst;
-        cv::cuda::GpuMat d_norm_buf, d_cvt_buf;
 
-        TEST_CYCLE() cv::cuda::normalize(d_src, dst, alpha, beta, norm_type, type, cv::cuda::GpuMat(), d_norm_buf, d_cvt_buf);
+        TEST_CYCLE() cv::cuda::normalize(d_src, dst, alpha, beta, norm_type, type, cv::cuda::GpuMat());
 
         CUDA_SANITY_CHECK(dst, 1e-6);
     }
@@ -445,11 +437,10 @@ PERF_TEST_P(Sz, MeanStdDev,
     if (PERF_RUN_CUDA())
     {
         const cv::cuda::GpuMat d_src(src);
-        cv::cuda::GpuMat d_buf;
         cv::Scalar gpu_mean;
         cv::Scalar gpu_stddev;
 
-        TEST_CYCLE() cv::cuda::meanStdDev(d_src, gpu_mean, gpu_stddev, d_buf);
+        TEST_CYCLE() cv::cuda::meanStdDev(d_src, gpu_mean, gpu_stddev);
 
         SANITY_CHECK(gpu_mean);
         SANITY_CHECK(gpu_stddev);
@@ -481,9 +472,8 @@ PERF_TEST_P(Sz, Integral,
     {
         const cv::cuda::GpuMat d_src(src);
         cv::cuda::GpuMat dst;
-        cv::cuda::GpuMat d_buf;
 
-        TEST_CYCLE() cv::cuda::integral(d_src, dst, d_buf);
+        TEST_CYCLE() cv::cuda::integral(d_src, dst);
 
         CUDA_SANITY_CHECK(dst);
     }
@@ -511,9 +501,9 @@ PERF_TEST_P(Sz, IntegralSqr,
     if (PERF_RUN_CUDA())
     {
         const cv::cuda::GpuMat d_src(src);
-        cv::cuda::GpuMat dst, buf;
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::cuda::sqrIntegral(d_src, dst, buf);
+        TEST_CYCLE() cv::cuda::sqrIntegral(d_src, dst);
 
         CUDA_SANITY_CHECK(dst);
     }
diff --git a/modules/cudaarithm/src/cuda/countnonzero.cu b/modules/cudaarithm/src/cuda/countnonzero.cu
index 5de2609093..fb7324660a 100644
--- a/modules/cudaarithm/src/cuda/countnonzero.cu
+++ b/modules/cudaarithm/src/cuda/countnonzero.cu
@@ -50,47 +50,64 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 namespace
 {
-    template <typename T>
-    int countNonZeroImpl(const GpuMat& _src, GpuMat& _buf)
+    template <typename T, typename D>
+    void countNonZeroImpl(const GpuMat& _src, GpuMat& _dst, Stream& stream)
     {
         const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
-        GpuMat_<int>& buf = (GpuMat_<int>&) _buf;
+        GpuMat_<D>& dst = (GpuMat_<D>&) _dst;
 
-        gridCountNonZero(src, buf);
-
-        int data;
-        buf.download(cv::Mat(1, 1, buf.type(), &data));
-
-        return data;
+        gridCountNonZero(src, dst, stream);
     }
 }
 
-int cv::cuda::countNonZero(InputArray _src, GpuMat& buf)
+void cv::cuda::countNonZero(InputArray _src, OutputArray _dst, Stream& stream)
 {
-    typedef int (*func_t)(const GpuMat& _src, GpuMat& _buf);
+    typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Stream& stream);
     static const func_t funcs[] =
     {
-        countNonZeroImpl<uchar>,
-        countNonZeroImpl<schar>,
-        countNonZeroImpl<ushort>,
-        countNonZeroImpl<short>,
-        countNonZeroImpl<int>,
-        countNonZeroImpl<float>,
-        countNonZeroImpl<double>
+        countNonZeroImpl<uchar, int>,
+        countNonZeroImpl<schar, int>,
+        countNonZeroImpl<ushort, int>,
+        countNonZeroImpl<short, int>,
+        countNonZeroImpl<int, int>,
+        countNonZeroImpl<float, int>,
+        countNonZeroImpl<double, int>,
     };
 
-    GpuMat src = _src.getGpuMat();
+    GpuMat src = getInputMat(_src, stream);
 
+    CV_Assert( src.depth() <= CV_64F );
     CV_Assert( src.channels() == 1 );
 
-    const func_t func = funcs[src.depth()];
+    GpuMat dst = getOutputMat(_dst, 1, 1, CV_32SC1, stream);
 
-    return func(src, buf);
+    const func_t func = funcs[src.depth()];
+    func(src, dst, stream);
+
+    syncOutput(dst, _dst, stream);
+}
+
+int cv::cuda::countNonZero(InputArray _src)
+{
+    Stream& stream = Stream::Null();
+
+    BufferPool pool(stream);
+    GpuMat buf = pool.getBuffer(1, 1, CV_32SC1);
+
+    countNonZero(_src, buf, stream);
+
+    int data;
+    buf.download(Mat(1, 1, CV_32SC1, &data));
+
+    return data;
 }
 
 #endif
diff --git a/modules/cudaarithm/src/cuda/minmax.cu b/modules/cudaarithm/src/cuda/minmax.cu
index 084bed8706..517427073a 100644
--- a/modules/cudaarithm/src/cuda/minmax.cu
+++ b/modules/cudaarithm/src/cuda/minmax.cu
@@ -50,62 +50,140 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 namespace
 {
-    template <typename T>
-    void minMaxImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf, double* minVal, double* maxVal)
+    template <typename T, typename R>
+    void minMaxImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream)
     {
-        typedef typename SelectIf<
-                TypesEquals<T, double>::value,
-                double,
-                typename SelectIf<TypesEquals<T, float>::value, float, int>::type
-                >::type work_type;
-
         const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
-        GpuMat_<work_type>& buf = (GpuMat_<work_type>&) _buf;
+        GpuMat_<R>& dst = (GpuMat_<R>&) _dst;
 
         if (mask.empty())
-            gridFindMinMaxVal(src, buf);
+            gridFindMinMaxVal(src, dst, stream);
         else
-            gridFindMinMaxVal(src, buf, globPtr<uchar>(mask));
+            gridFindMinMaxVal(src, dst, globPtr<uchar>(mask), stream);
+    }
 
-        work_type data[2];
-        buf.download(cv::Mat(1, 2, buf.type(), data));
+    template <typename T, typename R>
+    void minMaxImpl(const GpuMat& src, const GpuMat& mask, double* minVal, double* maxVal)
+    {
+        BufferPool pool(Stream::Null());
+        GpuMat buf(pool.getBuffer(1, 2, DataType<R>::type));
 
-        if (minVal)
-            *minVal = data[0];
+        minMaxImpl<T, R>(src, mask, buf, Stream::Null());
+
+        R data[2];
+        buf.download(Mat(1, 2, buf.type(), data));
 
-        if (maxVal)
-            *maxVal = data[1];
     }
 }
 
-void cv::cuda::minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask, GpuMat& buf)
+void cv::cuda::findMinMax(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
 {
-    typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf, double* minVal, double* maxVal);
+    typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream);
     static const func_t funcs[] =
     {
-        minMaxImpl<uchar>,
-        minMaxImpl<schar>,
-        minMaxImpl<ushort>,
-        minMaxImpl<short>,
-        minMaxImpl<int>,
-        minMaxImpl<float>,
-        minMaxImpl<double>
+        minMaxImpl<uchar, int>,
+        minMaxImpl<schar, int>,
+        minMaxImpl<ushort, int>,
+        minMaxImpl<short, int>,
+        minMaxImpl<int, int>,
+        minMaxImpl<float, float>,
+        minMaxImpl<double, double>
     };
 
-    GpuMat src = _src.getGpuMat();
-    GpuMat mask = _mask.getGpuMat();
+    const GpuMat src = getInputMat(_src, stream);
+    const GpuMat mask = getInputMat(_mask, stream);
 
     CV_Assert( src.channels() == 1 );
-    CV_DbgAssert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
+    CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
+
+    const int src_depth = src.depth();
+    const int dst_depth = src_depth < CV_32F ? CV_32S : src_depth;
+
+    GpuMat dst = getOutputMat(_dst, 1, 2, dst_depth, stream);
 
     const func_t func = funcs[src.depth()];
+    func(src, mask, dst, stream);
 
-    func(src, mask, buf, minVal, maxVal);
+    syncOutput(dst, _dst, stream);
+}
+
+void cv::cuda::minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask)
+{
+    Stream& stream = Stream::Null();
+
+    HostMem dst;
+    findMinMax(_src, dst, _mask, stream);
+
+    stream.waitForCompletion();
+
+    double vals[2];
+    dst.createMatHeader().convertTo(Mat(1, 2, CV_64FC1, &vals[0]), CV_64F);
+
+    if (minVal)
+        *minVal = vals[0];
+
+    if (maxVal)
+        *maxVal = vals[1];
+}
+
+namespace cv { namespace cuda { namespace internal {
+
+void findMaxAbs(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream);
+
+}}}
+
+namespace
+{
+    template <typename T, typename R>
+    void findMaxAbsImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream)
+    {
+        const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
+        GpuMat_<R>& dst = (GpuMat_<R>&) _dst;
+
+        if (mask.empty())
+            gridFindMaxVal(abs_(src), dst, stream);
+        else
+            gridFindMaxVal(abs_(src), dst, globPtr<uchar>(mask), stream);
+    }
+}
+
+void cv::cuda::internal::findMaxAbs(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
+{
+    typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream);
+    static const func_t funcs[] =
+    {
+        findMaxAbsImpl<uchar, int>,
+        findMaxAbsImpl<schar, int>,
+        findMaxAbsImpl<ushort, int>,
+        findMaxAbsImpl<short, int>,
+        findMaxAbsImpl<int, int>,
+        findMaxAbsImpl<float, float>,
+        findMaxAbsImpl<double, double>
+    };
+
+    const GpuMat src = getInputMat(_src, stream);
+    const GpuMat mask = getInputMat(_mask, stream);
+
+    CV_Assert( src.channels() == 1 );
+    CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
+
+    const int src_depth = src.depth();
+    const int dst_depth = src_depth < CV_32F ? CV_32S : src_depth;
+
+    GpuMat dst = getOutputMat(_dst, 1, 1, dst_depth, stream);
+
+    const func_t func = funcs[src.depth()];
+    func(src, mask, dst, stream);
+
+    syncOutput(dst, _dst, stream);
 }
 
 #endif
diff --git a/modules/cudaarithm/src/cuda/minmaxloc.cu b/modules/cudaarithm/src/cuda/minmaxloc.cu
index 6f8cc53d6b..b7c5ec872f 100644
--- a/modules/cudaarithm/src/cuda/minmaxloc.cu
+++ b/modules/cudaarithm/src/cuda/minmaxloc.cu
@@ -50,78 +50,110 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 namespace
 {
-    template <typename T>
-    void minMaxLocImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf, double* minVal, double* maxVal, cv::Point* minLoc, cv::Point* maxLoc)
+    template <typename T, typename R>
+    void minMaxLocImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf, Stream& stream)
     {
-        typedef typename SelectIf<
-                TypesEquals<T, double>::value,
-                double,
-                typename SelectIf<TypesEquals<T, float>::value, float, int>::type
-                >::type work_type;
-
         const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
-        GpuMat_<work_type>& valBuf = (GpuMat_<work_type>&) _valBuf;
+        GpuMat_<R>& valBuf = (GpuMat_<R>&) _valBuf;
         GpuMat_<int>& locBuf = (GpuMat_<int>&) _locBuf;
 
         if (mask.empty())
-            gridMinMaxLoc(src, valBuf, locBuf);
+            gridMinMaxLoc(src, valBuf, locBuf, stream);
         else
-            gridMinMaxLoc(src, valBuf, locBuf, globPtr<uchar>(mask));
-
-        cv::Mat_<work_type> h_valBuf;
-        cv::Mat_<int> h_locBuf;
-
-        valBuf.download(h_valBuf);
-        locBuf.download(h_locBuf);
-
-        if (minVal)
-            *minVal = h_valBuf(0, 0);
-
-        if (maxVal)
-            *maxVal = h_valBuf(1, 0);
-
-        if (minLoc)
-        {
-            const int idx = h_locBuf(0, 0);
-            *minLoc = cv::Point(idx % src.cols, idx / src.cols);
-        }
-
-        if (maxLoc)
-        {
-            const int idx = h_locBuf(1, 0);
-            *maxLoc = cv::Point(idx % src.cols, idx / src.cols);
-        }
+            gridMinMaxLoc(src, valBuf, locBuf, globPtr<uchar>(mask), stream);
     }
 }
 
-void cv::cuda::minMaxLoc(InputArray _src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, InputArray _mask, GpuMat& valBuf, GpuMat& locBuf)
+void cv::cuda::findMinMaxLoc(InputArray _src, OutputArray _minMaxVals, OutputArray _loc, InputArray _mask, Stream& stream)
 {
-    typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf, double* minVal, double* maxVal, cv::Point* minLoc, cv::Point* maxLoc);
+    typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf, Stream& stream);
     static const func_t funcs[] =
     {
-        minMaxLocImpl<uchar>,
-        minMaxLocImpl<schar>,
-        minMaxLocImpl<ushort>,
-        minMaxLocImpl<short>,
-        minMaxLocImpl<int>,
-        minMaxLocImpl<float>,
-        minMaxLocImpl<double>
+        minMaxLocImpl<uchar, int>,
+        minMaxLocImpl<schar, int>,
+        minMaxLocImpl<ushort, int>,
+        minMaxLocImpl<short, int>,
+        minMaxLocImpl<int, int>,
+        minMaxLocImpl<float, float>,
+        minMaxLocImpl<double, double>
     };
 
-    GpuMat src = _src.getGpuMat();
-    GpuMat mask = _mask.getGpuMat();
+    const GpuMat src = getInputMat(_src, stream);
+    const GpuMat mask = getInputMat(_mask, stream);
 
     CV_Assert( src.channels() == 1 );
-    CV_DbgAssert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
+    CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
 
-    const func_t func = funcs[src.depth()];
+    const int src_depth = src.depth();
 
-    func(src, mask, valBuf, locBuf, minVal, maxVal, minLoc, maxLoc);
+    BufferPool pool(stream);
+    GpuMat valBuf(pool.getAllocator());
+    GpuMat locBuf(pool.getAllocator());
+
+    const func_t func = funcs[src_depth];
+    func(src, mask, valBuf, locBuf, stream);
+
+    GpuMat minMaxVals = valBuf.colRange(0, 1);
+    GpuMat loc = locBuf.colRange(0, 1);
+
+    if (_minMaxVals.kind() == _InputArray::CUDA_GPU_MAT)
+    {
+        minMaxVals.copyTo(_minMaxVals, stream);
+    }
+    else
+    {
+        minMaxVals.download(_minMaxVals, stream);
+    }
+
+    if (_loc.kind() == _InputArray::CUDA_GPU_MAT)
+    {
+        loc.copyTo(_loc, stream);
+    }
+    else
+    {
+        loc.download(_loc, stream);
+    }
+}
+
+void cv::cuda::minMaxLoc(InputArray _src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, InputArray _mask)
+{
+    Stream& stream = Stream::Null();
+
+    HostMem minMaxVals, locVals;
+    findMinMaxLoc(_src, minMaxVals, locVals, _mask, stream);
+
+    stream.waitForCompletion();
+
+    double vals[2];
+    minMaxVals.createMatHeader().convertTo(Mat(minMaxVals.size(), CV_64FC1, &vals[0]), CV_64F);
+
+    int locs[2];
+    locVals.createMatHeader().copyTo(Mat(locVals.size(), CV_32SC1, &locs[0]));
+    Size size = _src.size();
+    cv::Point locs2D[] = {
+        cv::Point(locs[0] % size.width, locs[0] / size.width),
+        cv::Point(locs[1] % size.width, locs[1] / size.width),
+    };
+
+    if (minVal)
+        *minVal = vals[0];
+
+    if (maxVal)
+        *maxVal = vals[1];
+
+    if (minLoc)
+        *minLoc = locs2D[0];
+
+    if (maxLoc)
+        *maxLoc = locs2D[1];
 }
 
 #endif
diff --git a/modules/cudaarithm/src/cuda/norm.cu b/modules/cudaarithm/src/cuda/norm.cu
index bda6b45815..baf76a6db3 100644
--- a/modules/cudaarithm/src/cuda/norm.cu
+++ b/modules/cudaarithm/src/cuda/norm.cu
@@ -50,70 +50,140 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 namespace
 {
-    double normDiffInf(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf)
+    void normDiffInf(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream)
     {
         const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
         const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
-        GpuMat_<int>& buf = (GpuMat_<int>&) _buf;
+        GpuMat_<int>& dst = (GpuMat_<int>&) _dst;
 
-        gridFindMinMaxVal(abs_(cvt_<int>(src1) - cvt_<int>(src2)), buf);
-
-        int data[2];
-        buf.download(cv::Mat(1, 2, buf.type(), data));
-
-        return data[1];
+        gridFindMaxVal(abs_(cvt_<int>(src1) - cvt_<int>(src2)), dst, stream);
     }
 
-    double normDiffL1(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf)
+    void normDiffL1(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream)
     {
         const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
         const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
-        GpuMat_<int>& buf = (GpuMat_<int>&) _buf;
+        GpuMat_<int>& dst = (GpuMat_<int>&) _dst;
 
-        gridCalcSum(abs_(cvt_<int>(src1) - cvt_<int>(src2)), buf);
-
-        int data;
-        buf.download(cv::Mat(1, 1, buf.type(), &data));
-
-        return data;
+        gridCalcSum(abs_(cvt_<int>(src1) - cvt_<int>(src2)), dst, stream);
     }
 
-    double normDiffL2(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf)
+    void normDiffL2(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream)
     {
         const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
         const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
-        GpuMat_<double>& buf = (GpuMat_<double>&) _buf;
+        GpuMat_<double>& dst = (GpuMat_<double>&) _dst;
 
-        gridCalcSum(sqr_(cvt_<double>(src1) - cvt_<double>(src2)), buf);
+        BufferPool pool(stream);
+        GpuMat_<double> buf(1, 1, pool.getAllocator());
 
-        double data;
-        buf.download(cv::Mat(1, 1, buf.type(), &data));
-
-        return std::sqrt(data);
+        gridCalcSum(sqr_(cvt_<double>(src1) - cvt_<double>(src2)), buf, stream);
+        gridTransformUnary(buf, dst, sqrt_func<double>(), stream);
     }
 }
 
-double cv::cuda::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType)
+void cv::cuda::calcNormDiff(InputArray _src1, InputArray _src2, OutputArray _dst, int normType, Stream& stream)
 {
-    typedef double (*func_t)(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf);
+    typedef void (*func_t)(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream);
     static const func_t funcs[] =
     {
         0, normDiffInf, normDiffL1, 0, normDiffL2
     };
 
-    GpuMat src1 = _src1.getGpuMat();
-    GpuMat src2 = _src2.getGpuMat();
+    GpuMat src1 = getInputMat(_src1, stream);
+    GpuMat src2 = getInputMat(_src2, stream);
 
     CV_Assert( src1.type() == CV_8UC1 );
     CV_Assert( src1.size() == src2.size() && src1.type() == src2.type() );
     CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
 
-    return funcs[normType](src1, src2, buf);
+    GpuMat dst = getOutputMat(_dst, 1, 1, normType == NORM_L2 ? CV_64FC1 : CV_32SC1, stream);
+
+    const func_t func = funcs[normType];
+    func(src1, src2, dst, stream);
+
+    syncOutput(dst, _dst, stream);
+}
+
+double cv::cuda::norm(InputArray _src1, InputArray _src2, int normType)
+{
+    Stream& stream = Stream::Null();
+
+    HostMem dst;
+    calcNormDiff(_src1, _src2, dst, normType, stream);
+
+    stream.waitForCompletion();
+
+    double val;
+    dst.createMatHeader().convertTo(Mat(1, 1, CV_64FC1, &val), CV_64F);
+
+    return val;
+}
+
+namespace cv { namespace cuda { namespace internal {
+
+void normL2(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _mask, Stream& stream);
+
+}}}
+
+namespace
+{
+    template <typename T, typename R>
+    void normL2Impl(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream)
+    {
+        const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
+        GpuMat_<R>& dst = (GpuMat_<R>&) _dst;
+
+        BufferPool pool(stream);
+        GpuMat_<double> buf(1, 1, pool.getAllocator());
+
+        if (mask.empty())
+        {
+            gridCalcSum(sqr_(cvt_<double>(src)), buf, stream);
+        }
+        else
+        {
+            gridCalcSum(sqr_(cvt_<double>(src)), buf, globPtr<uchar>(mask), stream);
+        }
+
+        gridTransformUnary(buf, dst, sqrt_func<double>(), stream);
+    }
+}
+
+void cv::cuda::internal::normL2(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
+{
+    typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream);
+    static const func_t funcs[] =
+    {
+        normL2Impl<uchar, double>,
+        normL2Impl<schar, double>,
+        normL2Impl<ushort, double>,
+        normL2Impl<short, double>,
+        normL2Impl<int, double>,
+        normL2Impl<float, double>,
+        normL2Impl<double, double>
+    };
+
+    const GpuMat src = getInputMat(_src, stream);
+    const GpuMat mask = getInputMat(_mask, stream);
+
+    CV_Assert( src.channels() == 1 );
+    CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
+
+    GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC1, stream);
+
+    const func_t func = funcs[src.depth()];
+    func(src, mask, dst, stream);
+
+    syncOutput(dst, _dst, stream);
 }
 
 #endif
diff --git a/modules/cudaarithm/src/cuda/normalize.cu b/modules/cudaarithm/src/cuda/normalize.cu
new file mode 100644
index 0000000000..efbc94ecce
--- /dev/null
+++ b/modules/cudaarithm/src/cuda/normalize.cu
@@ -0,0 +1,290 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifndef HAVE_OPENCV_CUDEV
+
+#error "opencv_cudev is required"
+
+#else
+
+#include "opencv2/cudaarithm.hpp"
+#include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
+
+using namespace cv;
+using namespace cv::cuda;
+using namespace cv::cudev;
+
+namespace {
+
+template <typename T, typename R, typename I>
+struct ConvertorMinMax : unary_function<T, R>
+{
+    typedef typename LargerType<T, R>::type larger_type1;
+    typedef typename LargerType<larger_type1, I>::type larger_type2;
+    typedef typename LargerType<larger_type2, float>::type scalar_type;
+
+    scalar_type dmin, dmax;
+    const I* minMaxVals;
+
+    __device__ R operator ()(typename TypeTraits<T>::parameter_type src) const
+    {
+        const scalar_type smin = minMaxVals[0];
+        const scalar_type smax = minMaxVals[1];
+
+        const scalar_type scale = (dmax - dmin) * (smax - smin > numeric_limits<scalar_type>::epsilon() ? 1.0 / (smax - smin) : 0.0);
+        const scalar_type shift = dmin - smin * scale;
+
+        return cudev::saturate_cast<R>(scale * src + shift);
+    }
+};
+
+template <typename T, typename R, typename I>
+void normalizeMinMax(const GpuMat& _src, GpuMat& _dst, double a, double b, const GpuMat& mask, Stream& stream)
+{
+    const GpuMat_<T>& src = (const GpuMat_<T>&)_src;
+    GpuMat_<R>& dst = (GpuMat_<R>&)_dst;
+
+    BufferPool pool(stream);
+    GpuMat_<I> minMaxVals(1, 2, pool.getAllocator());
+
+    if (mask.empty())
+    {
+        gridFindMinMaxVal(src, minMaxVals, stream);
+    }
+    else
+    {
+        gridFindMinMaxVal(src, minMaxVals, globPtr<uchar>(mask), stream);
+    }
+
+    ConvertorMinMax<T, R, I> cvt;
+    cvt.dmin = std::min(a, b);
+    cvt.dmax = std::max(a, b);
+    cvt.minMaxVals = minMaxVals[0];
+
+    if (mask.empty())
+    {
+        gridTransformUnary(src, dst, cvt, stream);
+    }
+    else
+    {
+        dst.setTo(Scalar::all(0), stream);
+        gridTransformUnary(src, dst, cvt, globPtr<uchar>(mask), stream);
+    }
+}
+
+template <typename T, typename R, typename I, bool normL2>
+struct ConvertorNorm : unary_function<T, R>
+{
+    typedef typename LargerType<T, R>::type larger_type1;
+    typedef typename LargerType<larger_type1, I>::type larger_type2;
+    typedef typename LargerType<larger_type2, float>::type scalar_type;
+
+    scalar_type a;
+    const I* normVal;
+
+    __device__ R operator ()(typename TypeTraits<T>::parameter_type src) const
+    {
+        sqrt_func<scalar_type> sqrt;
+
+        scalar_type scale = normL2 ? sqrt(*normVal) : *normVal;
+        scale = scale > numeric_limits<scalar_type>::epsilon() ? a / scale : 0.0;
+
+        return cudev::saturate_cast<R>(scale * src);
+    }
+};
+
+template <typename T, typename R, typename I>
+void normalizeNorm(const GpuMat& _src, GpuMat& _dst, double a, int normType, const GpuMat& mask, Stream& stream)
+{
+    const GpuMat_<T>& src = (const GpuMat_<T>&)_src;
+    GpuMat_<R>& dst = (GpuMat_<R>&)_dst;
+
+    BufferPool pool(stream);
+    GpuMat_<I> normVal(1, 1, pool.getAllocator());
+
+    if (normType == NORM_L1)
+    {
+        if (mask.empty())
+        {
+            gridCalcSum(abs_(cvt_<I>(src)), normVal, stream);
+        }
+        else
+        {
+            gridCalcSum(abs_(cvt_<I>(src)), normVal, globPtr<uchar>(mask), stream);
+        }
+    }
+    else if (normType == NORM_L2)
+    {
+        if (mask.empty())
+        {
+            gridCalcSum(sqr_(cvt_<I>(src)), normVal, stream);
+        }
+        else
+        {
+            gridCalcSum(sqr_(cvt_<I>(src)), normVal, globPtr<uchar>(mask), stream);
+        }
+    }
+    else // NORM_INF
+    {
+        if (mask.empty())
+        {
+            gridFindMaxVal(abs_(cvt_<I>(src)), normVal, stream);
+        }
+        else
+        {
+            gridFindMaxVal(abs_(cvt_<I>(src)), normVal, globPtr<uchar>(mask), stream);
+        }
+    }
+
+    if (normType == NORM_L2)
+    {
+        ConvertorNorm<T, R, I, true> cvt;
+        cvt.a = a;
+        cvt.normVal = normVal[0];
+
+        if (mask.empty())
+        {
+            gridTransformUnary(src, dst, cvt, stream);
+        }
+        else
+        {
+            dst.setTo(Scalar::all(0), stream);
+            gridTransformUnary(src, dst, cvt, globPtr<uchar>(mask), stream);
+        }
+    }
+    else
+    {
+        ConvertorNorm<T, R, I, false> cvt;
+        cvt.a = a;
+        cvt.normVal = normVal[0];
+
+        if (mask.empty())
+        {
+            gridTransformUnary(src, dst, cvt, stream);
+        }
+        else
+        {
+            dst.setTo(Scalar::all(0), stream);
+            gridTransformUnary(src, dst, cvt, globPtr<uchar>(mask), stream);
+        }
+    }
+}
+
+} // namespace
+
+void cv::cuda::normalize(InputArray _src, OutputArray _dst, double a, double b, int normType, int dtype, InputArray _mask, Stream& stream)
+{
+    typedef void (*func_minmax_t)(const GpuMat& _src, GpuMat& _dst, double a, double b, const GpuMat& mask, Stream& stream);
+    typedef void (*func_norm_t)(const GpuMat& _src, GpuMat& _dst, double a, int normType, const GpuMat& mask, Stream& stream);
+
+    static const func_minmax_t funcs_minmax[] =
+    {
+        normalizeMinMax<uchar, float, float>,
+        normalizeMinMax<schar, float, float>,
+        normalizeMinMax<ushort, float, float>,
+        normalizeMinMax<short, float, float>,
+        normalizeMinMax<int, float, float>,
+        normalizeMinMax<float, float, float>,
+        normalizeMinMax<double, double, double>
+    };
+
+    static const func_norm_t funcs_norm[] =
+    {
+        normalizeNorm<uchar, float, float>,
+        normalizeNorm<schar, float, float>,
+        normalizeNorm<ushort, float, float>,
+        normalizeNorm<short, float, float>,
+        normalizeNorm<int, float, float>,
+        normalizeNorm<float, float, float>,
+        normalizeNorm<double, double, double>
+    };
+
+    CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_MINMAX );
+
+    const GpuMat src = getInputMat(_src, stream);
+    const GpuMat mask = getInputMat(_mask, stream);
+
+    CV_Assert( src.channels() == 1 );
+    CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
+
+    dtype = CV_MAT_DEPTH(dtype);
+
+    const int src_depth = src.depth();
+    const int tmp_depth = src_depth <= CV_32F ? CV_32F : src_depth;
+
+    GpuMat dst;
+    if (dtype == tmp_depth)
+    {
+        _dst.create(src.size(), tmp_depth);
+        dst = getOutputMat(_dst, src.size(), tmp_depth, stream);
+    }
+    else
+    {
+        BufferPool pool(stream);
+        dst = pool.getBuffer(src.size(), tmp_depth);
+    }
+
+    if (normType == NORM_MINMAX)
+    {
+        const func_minmax_t func = funcs_minmax[src_depth];
+        func(src, dst, a, b, mask, stream);
+    }
+    else
+    {
+        const func_norm_t func = funcs_norm[src_depth];
+        func(src, dst, a, normType, mask, stream);
+    }
+
+    if (dtype == tmp_depth)
+    {
+        syncOutput(dst, _dst, stream);
+    }
+    else
+    {
+        dst.convertTo(_dst, dtype, stream);
+    }
+}
+
+#endif
diff --git a/modules/cudaarithm/src/cuda/sum.cu b/modules/cudaarithm/src/cuda/sum.cu
index cced9c56e8..0160449039 100644
--- a/modules/cudaarithm/src/cuda/sum.cu
+++ b/modules/cudaarithm/src/cuda/sum.cu
@@ -50,126 +50,153 @@
 
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudev.hpp"
+#include "opencv2/core/private.cuda.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 namespace
 {
     template <typename T, typename R, int cn>
-    cv::Scalar sumImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf)
+    void sumImpl(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream)
     {
         typedef typename MakeVec<T, cn>::type src_type;
         typedef typename MakeVec<R, cn>::type res_type;
 
         const GpuMat_<src_type>& src = (const GpuMat_<src_type>&) _src;
-        GpuMat_<res_type>& buf = (GpuMat_<res_type>&) _buf;
+        GpuMat_<res_type>& dst = (GpuMat_<res_type>&) _dst;
 
         if (mask.empty())
-            gridCalcSum(src, buf);
+            gridCalcSum(src, dst, stream);
         else
-            gridCalcSum(src, buf, globPtr<uchar>(mask));
-
-        cv::Scalar_<R> res;
-        cv::Mat res_mat(buf.size(), buf.type(), res.val);
-        buf.download(res_mat);
-
-        return res;
+            gridCalcSum(src, dst, globPtr<uchar>(mask), stream);
     }
 
     template <typename T, typename R, int cn>
-    cv::Scalar sumAbsImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf)
+    void sumAbsImpl(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream)
     {
         typedef typename MakeVec<T, cn>::type src_type;
         typedef typename MakeVec<R, cn>::type res_type;
 
         const GpuMat_<src_type>& src = (const GpuMat_<src_type>&) _src;
-        GpuMat_<res_type>& buf = (GpuMat_<res_type>&) _buf;
+        GpuMat_<res_type>& dst = (GpuMat_<res_type>&) _dst;
 
         if (mask.empty())
-            gridCalcSum(abs_(cvt_<res_type>(src)), buf);
+            gridCalcSum(abs_(cvt_<res_type>(src)), dst, stream);
         else
-            gridCalcSum(abs_(cvt_<res_type>(src)), buf, globPtr<uchar>(mask));
-
-        cv::Scalar_<R> res;
-        cv::Mat res_mat(buf.size(), buf.type(), res.val);
-        buf.download(res_mat);
-
-        return res;
+            gridCalcSum(abs_(cvt_<res_type>(src)), dst, globPtr<uchar>(mask), stream);
     }
 
     template <typename T, typename R, int cn>
-    cv::Scalar sumSqrImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf)
+    void sumSqrImpl(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream)
     {
         typedef typename MakeVec<T, cn>::type src_type;
         typedef typename MakeVec<R, cn>::type res_type;
 
         const GpuMat_<src_type>& src = (const GpuMat_<src_type>&) _src;
-        GpuMat_<res_type>& buf = (GpuMat_<res_type>&) _buf;
+        GpuMat_<res_type>& dst = (GpuMat_<res_type>&) _dst;
 
         if (mask.empty())
-            gridCalcSum(sqr_(cvt_<res_type>(src)), buf);
+            gridCalcSum(sqr_(cvt_<res_type>(src)), dst, stream);
         else
-            gridCalcSum(sqr_(cvt_<res_type>(src)), buf, globPtr<uchar>(mask));
-
-        cv::Scalar_<R> res;
-        cv::Mat res_mat(buf.size(), buf.type(), res.val);
-        buf.download(res_mat);
-
-        return res;
+            gridCalcSum(sqr_(cvt_<res_type>(src)), dst, globPtr<uchar>(mask), stream);
     }
 }
 
-cv::Scalar cv::cuda::sum(InputArray _src, InputArray _mask, GpuMat& buf)
+void cv::cuda::calcSum(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
 {
-    typedef cv::Scalar (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf);
+    typedef void (*func_t)(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream);
     static const func_t funcs[7][4] =
     {
-        {sumImpl<uchar , uint  , 1>, sumImpl<uchar , uint  , 2>, sumImpl<uchar , uint  , 3>, sumImpl<uchar , uint  , 4>},
-        {sumImpl<schar , int   , 1>, sumImpl<schar , int   , 2>, sumImpl<schar , int   , 3>, sumImpl<schar , int   , 4>},
-        {sumImpl<ushort, uint  , 1>, sumImpl<ushort, uint  , 2>, sumImpl<ushort, uint  , 3>, sumImpl<ushort, uint  , 4>},
-        {sumImpl<short , int   , 1>, sumImpl<short , int   , 2>, sumImpl<short , int   , 3>, sumImpl<short , int   , 4>},
-        {sumImpl<int   , int   , 1>, sumImpl<int   , int   , 2>, sumImpl<int   , int   , 3>, sumImpl<int   , int   , 4>},
-        {sumImpl<float , float , 1>, sumImpl<float , float , 2>, sumImpl<float , float , 3>, sumImpl<float , float , 4>},
+        {sumImpl<uchar , double, 1>, sumImpl<uchar , double, 2>, sumImpl<uchar , double, 3>, sumImpl<uchar , double, 4>},
+        {sumImpl<schar , double, 1>, sumImpl<schar , double, 2>, sumImpl<schar , double, 3>, sumImpl<schar , double, 4>},
+        {sumImpl<ushort, double, 1>, sumImpl<ushort, double, 2>, sumImpl<ushort, double, 3>, sumImpl<ushort, double, 4>},
+        {sumImpl<short , double, 1>, sumImpl<short , double, 2>, sumImpl<short , double, 3>, sumImpl<short , double, 4>},
+        {sumImpl<int   , double, 1>, sumImpl<int   , double, 2>, sumImpl<int   , double, 3>, sumImpl<int   , double, 4>},
+        {sumImpl<float , double, 1>, sumImpl<float , double, 2>, sumImpl<float , double, 3>, sumImpl<float , double, 4>},
         {sumImpl<double, double, 1>, sumImpl<double, double, 2>, sumImpl<double, double, 3>, sumImpl<double, double, 4>}
     };
 
-    GpuMat src = _src.getGpuMat();
-    GpuMat mask = _mask.getGpuMat();
+    const GpuMat src = getInputMat(_src, stream);
+    const GpuMat mask = getInputMat(_mask, stream);
 
-    CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
+    CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
 
-    const func_t func = funcs[src.depth()][src.channels() - 1];
+    const int src_depth = src.depth();
+    const int channels = src.channels();
 
-    return func(src, mask, buf);
+    GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC(channels), stream);
+
+    const func_t func = funcs[src_depth][channels - 1];
+    func(src, dst, mask, stream);
+
+    syncOutput(dst, _dst, stream);
 }
 
-cv::Scalar cv::cuda::absSum(InputArray _src, InputArray _mask, GpuMat& buf)
+cv::Scalar cv::cuda::sum(InputArray _src, InputArray _mask)
 {
-    typedef cv::Scalar (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf);
+    Stream& stream = Stream::Null();
+
+    HostMem dst;
+    calcSum(_src, dst, _mask, stream);
+
+    stream.waitForCompletion();
+
+    cv::Scalar val;
+    dst.createMatHeader().convertTo(cv::Mat(dst.size(), CV_64FC(dst.channels()), val.val), CV_64F);
+
+    return val;
+}
+
+void cv::cuda::calcAbsSum(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
+{
+    typedef void (*func_t)(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream);
     static const func_t funcs[7][4] =
     {
-        {sumAbsImpl<uchar , uint  , 1>, sumAbsImpl<uchar , uint  , 2>, sumAbsImpl<uchar , uint  , 3>, sumAbsImpl<uchar , uint  , 4>},
-        {sumAbsImpl<schar , int   , 1>, sumAbsImpl<schar , int   , 2>, sumAbsImpl<schar , int   , 3>, sumAbsImpl<schar , int   , 4>},
-        {sumAbsImpl<ushort, uint  , 1>, sumAbsImpl<ushort, uint  , 2>, sumAbsImpl<ushort, uint  , 3>, sumAbsImpl<ushort, uint  , 4>},
-        {sumAbsImpl<short , int   , 1>, sumAbsImpl<short , int   , 2>, sumAbsImpl<short , int   , 3>, sumAbsImpl<short , int   , 4>},
-        {sumAbsImpl<int   , int   , 1>, sumAbsImpl<int   , int   , 2>, sumAbsImpl<int   , int   , 3>, sumAbsImpl<int   , int   , 4>},
-        {sumAbsImpl<float , float , 1>, sumAbsImpl<float , float , 2>, sumAbsImpl<float , float , 3>, sumAbsImpl<float , float , 4>},
+        {sumAbsImpl<uchar , double, 1>, sumAbsImpl<uchar , double, 2>, sumAbsImpl<uchar , double, 3>, sumAbsImpl<uchar , double, 4>},
+        {sumAbsImpl<schar , double, 1>, sumAbsImpl<schar , double, 2>, sumAbsImpl<schar , double, 3>, sumAbsImpl<schar , double, 4>},
+        {sumAbsImpl<ushort, double, 1>, sumAbsImpl<ushort, double, 2>, sumAbsImpl<ushort, double, 3>, sumAbsImpl<ushort, double, 4>},
+        {sumAbsImpl<short , double, 1>, sumAbsImpl<short , double, 2>, sumAbsImpl<short , double, 3>, sumAbsImpl<short , double, 4>},
+        {sumAbsImpl<int   , double, 1>, sumAbsImpl<int   , double, 2>, sumAbsImpl<int   , double, 3>, sumAbsImpl<int   , double, 4>},
+        {sumAbsImpl<float , double, 1>, sumAbsImpl<float , double, 2>, sumAbsImpl<float , double, 3>, sumAbsImpl<float , double, 4>},
         {sumAbsImpl<double, double, 1>, sumAbsImpl<double, double, 2>, sumAbsImpl<double, double, 3>, sumAbsImpl<double, double, 4>}
     };
 
-    GpuMat src = _src.getGpuMat();
-    GpuMat mask = _mask.getGpuMat();
+    const GpuMat src = getInputMat(_src, stream);
+    const GpuMat mask = getInputMat(_mask, stream);
 
-    CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
+    CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
 
-    const func_t func = funcs[src.depth()][src.channels() - 1];
+    const int src_depth = src.depth();
+    const int channels = src.channels();
 
-    return func(src, mask, buf);
+    GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC(channels), stream);
+
+    const func_t func = funcs[src_depth][channels - 1];
+    func(src, dst, mask, stream);
+
+    syncOutput(dst, _dst, stream);
 }
 
-cv::Scalar cv::cuda::sqrSum(InputArray _src, InputArray _mask, GpuMat& buf)
+cv::Scalar cv::cuda::absSum(InputArray _src, InputArray _mask)
 {
-    typedef cv::Scalar (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf);
+    Stream& stream = Stream::Null();
+
+    HostMem dst;
+    calcAbsSum(_src, dst, _mask, stream);
+
+    stream.waitForCompletion();
+
+    cv::Scalar val;
+    dst.createMatHeader().convertTo(cv::Mat(dst.size(), CV_64FC(dst.channels()), val.val), CV_64F);
+
+    return val;
+}
+
+void cv::cuda::calcSqrSum(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
+{
+    typedef void (*func_t)(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream);
     static const func_t funcs[7][4] =
     {
         {sumSqrImpl<uchar , double, 1>, sumSqrImpl<uchar , double, 2>, sumSqrImpl<uchar , double, 3>, sumSqrImpl<uchar , double, 4>},
@@ -181,14 +208,35 @@ cv::Scalar cv::cuda::sqrSum(InputArray _src, InputArray _mask, GpuMat& buf)
         {sumSqrImpl<double, double, 1>, sumSqrImpl<double, double, 2>, sumSqrImpl<double, double, 3>, sumSqrImpl<double, double, 4>}
     };
 
-    GpuMat src = _src.getGpuMat();
-    GpuMat mask = _mask.getGpuMat();
+    const GpuMat src = getInputMat(_src, stream);
+    const GpuMat mask = getInputMat(_mask, stream);
 
-    CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
+    CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
 
-    const func_t func = funcs[src.depth()][src.channels() - 1];
+    const int src_depth = src.depth();
+    const int channels = src.channels();
 
-    return func(src, mask, buf);
+    GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC(channels), stream);
+
+    const func_t func = funcs[src_depth][channels - 1];
+    func(src, dst, mask, stream);
+
+    syncOutput(dst, _dst, stream);
+}
+
+cv::Scalar cv::cuda::sqrSum(InputArray _src, InputArray _mask)
+{
+    Stream& stream = Stream::Null();
+
+    HostMem dst;
+    calcSqrSum(_src, dst, _mask, stream);
+
+    stream.waitForCompletion();
+
+    cv::Scalar val;
+    dst.createMatHeader().convertTo(cv::Mat(dst.size(), CV_64FC(dst.channels()), val.val), CV_64F);
+
+    return val;
 }
 
 #endif
diff --git a/modules/cudaarithm/src/reductions.cpp b/modules/cudaarithm/src/reductions.cpp
index c1e2af4ed3..8d0add4537 100644
--- a/modules/cudaarithm/src/reductions.cpp
+++ b/modules/cudaarithm/src/reductions.cpp
@@ -47,110 +47,106 @@ using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-double cv::cuda::norm(InputArray, int, InputArray, GpuMat&) { throw_no_cuda(); return 0.0; }
-double cv::cuda::norm(InputArray, InputArray, GpuMat&, int) { throw_no_cuda(); return 0.0; }
+double cv::cuda::norm(InputArray, int, InputArray) { throw_no_cuda(); return 0.0; }
+void cv::cuda::calcNorm(InputArray, OutputArray, int, InputArray, Stream&) { throw_no_cuda(); }
+double cv::cuda::norm(InputArray, InputArray, int) { throw_no_cuda(); return 0.0; }
+void cv::cuda::calcNormDiff(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
 
-Scalar cv::cuda::sum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
-Scalar cv::cuda::absSum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
-Scalar cv::cuda::sqrSum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
+Scalar cv::cuda::sum(InputArray, InputArray) { throw_no_cuda(); return Scalar(); }
+void cv::cuda::calcSum(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
+Scalar cv::cuda::absSum(InputArray, InputArray) { throw_no_cuda(); return Scalar(); }
+void cv::cuda::calcAbsSum(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
+Scalar cv::cuda::sqrSum(InputArray, InputArray) { throw_no_cuda(); return Scalar(); }
+void cv::cuda::calcSqrSum(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
 
-void cv::cuda::minMax(InputArray, double*, double*, InputArray, GpuMat&) { throw_no_cuda(); }
-void cv::cuda::minMaxLoc(InputArray, double*, double*, Point*, Point*, InputArray, GpuMat&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::minMax(InputArray, double*, double*, InputArray) { throw_no_cuda(); }
+void cv::cuda::findMinMax(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::minMaxLoc(InputArray, double*, double*, Point*, Point*, InputArray) { throw_no_cuda(); }
+void cv::cuda::findMinMaxLoc(InputArray, OutputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
 
-int cv::cuda::countNonZero(InputArray, GpuMat&) { throw_no_cuda(); return 0; }
+int cv::cuda::countNonZero(InputArray) { throw_no_cuda(); return 0; }
+void cv::cuda::countNonZero(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
 void cv::cuda::reduce(InputArray, OutputArray, int, int, int, Stream&) { throw_no_cuda(); }
 
-void cv::cuda::meanStdDev(InputArray, Scalar&, Scalar&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::meanStdDev(InputArray, Scalar&, Scalar&) { throw_no_cuda(); }
+void cv::cuda::meanStdDev(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
 void cv::cuda::rectStdDev(InputArray, InputArray, OutputArray, Rect, Stream&) { throw_no_cuda(); }
 
-void cv::cuda::normalize(InputArray, OutputArray, double, double, int, int, InputArray, GpuMat&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::normalize(InputArray, OutputArray, double, double, int, int, InputArray, Stream&) { throw_no_cuda(); }
 
-void cv::cuda::integral(InputArray, OutputArray, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::cuda::sqrIntegral(InputArray, OutputArray, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::integral(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::sqrIntegral(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
 #else
 
-namespace
-{
-    class DeviceBuffer
-    {
-    public:
-        explicit DeviceBuffer(int count_ = 1) : count(count_)
-        {
-            cudaSafeCall( cudaMalloc(&pdev, count * sizeof(double)) );
-        }
-        ~DeviceBuffer()
-        {
-            cudaSafeCall( cudaFree(pdev) );
-        }
-
-        operator double*() {return pdev;}
-
-        void download(double* hptr)
-        {
-            double hbuf;
-            cudaSafeCall( cudaMemcpy(&hbuf, pdev, sizeof(double), cudaMemcpyDeviceToHost) );
-            *hptr = hbuf;
-        }
-        void download(double** hptrs)
-        {
-            AutoBuffer<double, 2 * sizeof(double)> hbuf(count);
-            cudaSafeCall( cudaMemcpy((void*)hbuf, pdev, count * sizeof(double), cudaMemcpyDeviceToHost) );
-            for (int i = 0; i < count; ++i)
-                *hptrs[i] = hbuf[i];
-        }
-
-    private:
-        double* pdev;
-        int count;
-    };
-}
-
 ////////////////////////////////////////////////////////////////////////
 // norm
 
-double cv::cuda::norm(InputArray _src, int normType, InputArray _mask, GpuMat& buf)
-{
-    GpuMat src = _src.getGpuMat();
-    GpuMat mask = _mask.getGpuMat();
+namespace cv { namespace cuda { namespace internal {
 
+void normL2(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _mask, Stream& stream);
+
+void findMaxAbs(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _mask, Stream& stream);
+
+}}}
+
+void cv::cuda::calcNorm(InputArray _src, OutputArray dst, int normType, InputArray mask, Stream& stream)
+{
     CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
-    CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size() && src.channels() == 1) );
+
+    GpuMat src = getInputMat(_src, stream);
 
     GpuMat src_single_channel = src.reshape(1);
 
     if (normType == NORM_L1)
-        return cuda::absSum(src_single_channel, mask, buf)[0];
+    {
+        calcAbsSum(src_single_channel, dst, mask, stream);
+    }
+    else if (normType == NORM_L2)
+    {
+        internal::normL2(src_single_channel, dst, mask, stream);
+    }
+    else // NORM_INF
+    {
+        internal::findMaxAbs(src_single_channel, dst, mask, stream);
+    }
+}
 
-    if (normType == NORM_L2)
-        return std::sqrt(cuda::sqrSum(src_single_channel, mask, buf)[0]);
+double cv::cuda::norm(InputArray _src, int normType, InputArray _mask)
+{
+    Stream& stream = Stream::Null();
 
-    // NORM_INF
-    double min_val, max_val;
-    cuda::minMax(src_single_channel, &min_val, &max_val, mask, buf);
-    return std::max(std::abs(min_val), std::abs(max_val));
+    HostMem dst;
+    calcNorm(_src, dst, normType, _mask, stream);
+
+    stream.waitForCompletion();
+
+    double val;
+    dst.createMatHeader().convertTo(Mat(1, 1, CV_64FC1, &val), CV_64F);
+
+    return val;
 }
 
 ////////////////////////////////////////////////////////////////////////
 // meanStdDev
 
-void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat& buf)
+void cv::cuda::meanStdDev(InputArray _src, OutputArray _dst, Stream& stream)
 {
-    GpuMat src = _src.getGpuMat();
+    if (!deviceSupports(FEATURE_SET_COMPUTE_13))
+        CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");
+
+    const GpuMat src = getInputMat(_src, stream);
 
     CV_Assert( src.type() == CV_8UC1 );
 
-    if (!deviceSupports(FEATURE_SET_COMPUTE_13))
-        CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");
+    GpuMat dst = getOutputMat(_dst, 1, 2, CV_64FC1, stream);
 
     NppiSize sz;
     sz.width  = src.cols;
     sz.height = src.rows;
 
-    DeviceBuffer dbuf(2);
-
     int bufSize;
 #if (CUDA_VERSION <= 4020)
     nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
@@ -158,14 +154,30 @@ void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat&
     nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1R(sz, &bufSize) );
 #endif
 
-    ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);
+    BufferPool pool(stream);
+    GpuMat buf = pool.getBuffer(1, bufSize, CV_8UC1);
 
-    nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) );
+    NppStreamHandler h(StreamAccessor::getStream(stream));
 
-    cudaSafeCall( cudaDeviceSynchronize() );
+    nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dst.ptr<Npp64f>(), dst.ptr<Npp64f>() + 1) );
 
-    double* ptrs[2] = {mean.val, stddev.val};
-    dbuf.download(ptrs);
+    syncOutput(dst, _dst, stream);
+}
+
+void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev)
+{
+    Stream& stream = Stream::Null();
+
+    HostMem dst;
+    meanStdDev(_src, dst, stream);
+
+    stream.waitForCompletion();
+
+    double vals[2];
+    dst.createMatHeader().copyTo(Mat(1, 2, CV_64FC1, &vals[0]));
+
+    mean = Scalar(vals[0]);
+    stddev = Scalar(vals[1]);
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -173,13 +185,12 @@ void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat&
 
 void cv::cuda::rectStdDev(InputArray _src, InputArray _sqr, OutputArray _dst, Rect rect, Stream& _stream)
 {
-    GpuMat src = _src.getGpuMat();
-    GpuMat sqr = _sqr.getGpuMat();
+    GpuMat src = getInputMat(_src, _stream);
+    GpuMat sqr = getInputMat(_sqr, _stream);
 
     CV_Assert( src.type() == CV_32SC1 && sqr.type() == CV_64FC1 );
 
-    _dst.create(src.size(), CV_32FC1);
-    GpuMat dst = _dst.getGpuMat();
+    GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, _stream);
 
     NppiSize sz;
     sz.width = src.cols;
@@ -200,45 +211,8 @@ void cv::cuda::rectStdDev(InputArray _src, InputArray _sqr, OutputArray _dst, Re
 
     if (stream == 0)
         cudaSafeCall( cudaDeviceSynchronize() );
-}
 
-////////////////////////////////////////////////////////////////////////
-// normalize
-
-void cv::cuda::normalize(InputArray _src, OutputArray dst, double a, double b, int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf)
-{
-    GpuMat src = _src.getGpuMat();
-
-    double scale = 1, shift = 0;
-
-    if (norm_type == NORM_MINMAX)
-    {
-        double smin = 0, smax = 0;
-        double dmin = std::min(a, b), dmax = std::max(a, b);
-        cuda::minMax(src, &smin, &smax, mask, norm_buf);
-        scale = (dmax - dmin) * (smax - smin > std::numeric_limits<double>::epsilon() ? 1.0 / (smax - smin) : 0.0);
-        shift = dmin - smin * scale;
-    }
-    else if (norm_type == NORM_L2 || norm_type == NORM_L1 || norm_type == NORM_INF)
-    {
-        scale = cuda::norm(src, norm_type, mask, norm_buf);
-        scale = scale > std::numeric_limits<double>::epsilon() ? a / scale : 0.0;
-        shift = 0;
-    }
-    else
-    {
-        CV_Error(cv::Error::StsBadArg, "Unknown/unsupported norm type");
-    }
-
-    if (mask.empty())
-    {
-        src.convertTo(dst, dtype, scale, shift);
-    }
-    else
-    {
-        src.convertTo(cvt_buf, dtype, scale, shift);
-        cvt_buf.copyTo(dst, mask);
-    }
+    syncOutput(dst, _dst, _stream);
 }
 
 #endif
diff --git a/modules/cudaarithm/test/test_reductions.cpp b/modules/cudaarithm/test/test_reductions.cpp
index e3c54055df..a95d007b81 100644
--- a/modules/cudaarithm/test/test_reductions.cpp
+++ b/modules/cudaarithm/test/test_reductions.cpp
@@ -74,8 +74,27 @@ CUDA_TEST_P(Norm, Accuracy)
     cv::Mat src = randomMat(size, depth);
     cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
 
-    cv::cuda::GpuMat d_buf;
-    double val = cv::cuda::norm(loadMat(src, useRoi), normCode, loadMat(mask, useRoi), d_buf);
+    double val = cv::cuda::norm(loadMat(src, useRoi), normCode, loadMat(mask, useRoi));
+
+    double val_gold = cv::norm(src, normCode, mask);
+
+    EXPECT_NEAR(val_gold, val, depth < CV_32F ? 0.0 : 1.0);
+}
+
+CUDA_TEST_P(Norm, Async)
+{
+    cv::Mat src = randomMat(size, depth);
+    cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
+
+    cv::cuda::Stream stream;
+
+    cv::cuda::HostMem dst;
+    cv::cuda::calcNorm(loadMat(src, useRoi), dst, normCode, loadMat(mask, useRoi), stream);
+
+    stream.waitForCompletion();
+
+    double val;
+    dst.createMatHeader().convertTo(cv::Mat(1, 1, CV_64FC1, &val), CV_64F);
 
     double val_gold = cv::norm(src, normCode, mask);
 
@@ -127,6 +146,27 @@ CUDA_TEST_P(NormDiff, Accuracy)
     EXPECT_NEAR(val_gold, val, 0.0);
 }
 
+CUDA_TEST_P(NormDiff, Async)
+{
+    cv::Mat src1 = randomMat(size, CV_8UC1);
+    cv::Mat src2 = randomMat(size, CV_8UC1);
+
+    cv::cuda::Stream stream;
+
+    cv::cuda::HostMem dst;
+    cv::cuda::calcNormDiff(loadMat(src1, useRoi), loadMat(src2, useRoi), dst, normCode, stream);
+
+    stream.waitForCompletion();
+
+    double val;
+    const cv::Mat val_mat(1, 1, CV_64FC1, &val);
+    dst.createMatHeader().convertTo(val_mat, CV_64F);
+
+    double val_gold = cv::norm(src1, src2, normCode);
+
+    EXPECT_NEAR(val_gold, val, 0.0);
+}
+
 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, NormDiff, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
@@ -247,6 +287,24 @@ CUDA_TEST_P(Sum, Simple)
     EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
 }
 
+CUDA_TEST_P(Sum, Simple_Async)
+{
+    cv::cuda::Stream stream;
+
+    cv::cuda::HostMem dst;
+    cv::cuda::calcSum(loadMat(src, useRoi), dst, cv::noArray(), stream);
+
+    stream.waitForCompletion();
+
+    cv::Scalar val;
+    cv::Mat val_mat(dst.size(), CV_64FC(dst.channels()), val.val);
+    dst.createMatHeader().convertTo(val_mat, CV_64F);
+
+    cv::Scalar val_gold = cv::sum(src);
+
+    EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
+}
+
 CUDA_TEST_P(Sum, Abs)
 {
     cv::Scalar val = cv::cuda::absSum(loadMat(src, useRoi));
@@ -256,6 +314,24 @@ CUDA_TEST_P(Sum, Abs)
     EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
 }
 
+CUDA_TEST_P(Sum, Abs_Async)
+{
+    cv::cuda::Stream stream;
+
+    cv::cuda::HostMem dst;
+    cv::cuda::calcAbsSum(loadMat(src, useRoi), dst, cv::noArray(), stream);
+
+    stream.waitForCompletion();
+
+    cv::Scalar val;
+    cv::Mat val_mat(dst.size(), CV_64FC(dst.channels()), val.val);
+    dst.createMatHeader().convertTo(val_mat, CV_64F);
+
+    cv::Scalar val_gold = absSumGold(src);
+
+    EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
+}
+
 CUDA_TEST_P(Sum, Sqr)
 {
     cv::Scalar val = cv::cuda::sqrSum(loadMat(src, useRoi));
@@ -265,6 +341,24 @@ CUDA_TEST_P(Sum, Sqr)
     EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
 }
 
+CUDA_TEST_P(Sum, Sqr_Async)
+{
+    cv::cuda::Stream stream;
+
+    cv::cuda::HostMem dst;
+    cv::cuda::calcSqrSum(loadMat(src, useRoi), dst, cv::noArray(), stream);
+
+    stream.waitForCompletion();
+
+    cv::Scalar val;
+    cv::Mat val_mat(dst.size(), CV_64FC(dst.channels()), val.val);
+    dst.createMatHeader().convertTo(val_mat, CV_64F);
+
+    cv::Scalar val_gold = sqrSumGold(src);
+
+    EXPECT_SCALAR_NEAR(val_gold, val, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.5);
+}
+
 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Sum, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
@@ -321,6 +415,28 @@ CUDA_TEST_P(MinMax, WithoutMask)
     }
 }
 
+CUDA_TEST_P(MinMax, Async)
+{
+    cv::Mat src = randomMat(size, depth);
+
+    cv::cuda::Stream stream;
+
+    cv::cuda::HostMem dst;
+    cv::cuda::findMinMax(loadMat(src, useRoi), dst, cv::noArray(), stream);
+
+    stream.waitForCompletion();
+
+    double vals[2];
+    const cv::Mat vals_mat(1, 2, CV_64FC1, &vals[0]);
+    dst.createMatHeader().convertTo(vals_mat, CV_64F);
+
+    double minVal_gold, maxVal_gold;
+    minMaxLocGold(src, &minVal_gold, &maxVal_gold);
+
+    EXPECT_DOUBLE_EQ(minVal_gold, vals[0]);
+    EXPECT_DOUBLE_EQ(maxVal_gold, vals[1]);
+}
+
 CUDA_TEST_P(MinMax, WithMask)
 {
     cv::Mat src = randomMat(size, depth);
@@ -471,6 +587,41 @@ CUDA_TEST_P(MinMaxLoc, WithoutMask)
     }
 }
 
+CUDA_TEST_P(MinMaxLoc, Async)
+{
+    cv::Mat src = randomMat(size, depth);
+
+    cv::cuda::Stream stream;
+
+    cv::cuda::HostMem minMaxVals, locVals;
+    cv::cuda::findMinMaxLoc(loadMat(src, useRoi), minMaxVals, locVals, cv::noArray(), stream);
+
+    stream.waitForCompletion();
+
+    double vals[2];
+    const cv::Mat vals_mat(2, 1, CV_64FC1, &vals[0]);
+    minMaxVals.createMatHeader().convertTo(vals_mat, CV_64F);
+
+    int locs[2];
+    const cv::Mat locs_mat(2, 1, CV_32SC1, &locs[0]);
+    locVals.createMatHeader().copyTo(locs_mat);
+
+    cv::Point locs2D[] = {
+        cv::Point(locs[0] % src.cols, locs[0] / src.cols),
+        cv::Point(locs[1] % src.cols, locs[1] / src.cols),
+    };
+
+    double minVal_gold, maxVal_gold;
+    cv::Point minLoc_gold, maxLoc_gold;
+    minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold);
+
+    EXPECT_DOUBLE_EQ(minVal_gold, vals[0]);
+    EXPECT_DOUBLE_EQ(maxVal_gold, vals[1]);
+
+    expectEqual(src, minLoc_gold, locs2D[0]);
+    expectEqual(src, maxLoc_gold, locs2D[1]);
+}
+
 CUDA_TEST_P(MinMaxLoc, WithMask)
 {
     cv::Mat src = randomMat(size, depth);
@@ -564,6 +715,7 @@ PARAM_TEST_CASE(CountNonZero, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
     int depth;
     bool useRoi;
 
+    cv::Mat src;
 
     virtual void SetUp()
     {
@@ -573,15 +725,14 @@ PARAM_TEST_CASE(CountNonZero, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
         useRoi = GET_PARAM(3);
 
         cv::cuda::setDevice(devInfo.deviceID());
+
+        cv::Mat srcBase = randomMat(size, CV_8U, 0.0, 1.5);
+        srcBase.convertTo(src, depth);
     }
 };
 
 CUDA_TEST_P(CountNonZero, Accuracy)
 {
-    cv::Mat srcBase = randomMat(size, CV_8U, 0.0, 1.5);
-    cv::Mat src;
-    srcBase.convertTo(src, depth);
-
     if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
@@ -603,6 +754,24 @@ CUDA_TEST_P(CountNonZero, Accuracy)
     }
 }
 
+CUDA_TEST_P(CountNonZero, Async)
+{
+    cv::cuda::Stream stream;
+
+    cv::cuda::HostMem dst;
+    cv::cuda::countNonZero(loadMat(src, useRoi), dst, stream);
+
+    stream.waitForCompletion();
+
+    int val;
+    const cv::Mat val_mat(1, 1, CV_32SC1, &val);
+    dst.createMatHeader().copyTo(val_mat);
+
+    int val_gold = cv::countNonZero(src);
+
+    ASSERT_EQ(val_gold, val);
+}
+
 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CountNonZero, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
@@ -750,7 +919,7 @@ CUDA_TEST_P(Normalize, WithMask)
     dst_gold.setTo(cv::Scalar::all(0));
     cv::normalize(src, dst_gold, alpha, beta, norm_type, type, mask);
 
-    EXPECT_MAT_NEAR(dst_gold, dst, 1e-6);
+    EXPECT_MAT_NEAR(dst_gold, dst, type < CV_32F ? 1.0 : 1e-4);
 }
 
 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Normalize, testing::Combine(
@@ -811,6 +980,28 @@ CUDA_TEST_P(MeanStdDev, Accuracy)
     }
 }
 
+CUDA_TEST_P(MeanStdDev, Async)
+{
+    cv::Mat src = randomMat(size, CV_8UC1);
+
+    cv::cuda::Stream stream;
+
+    cv::cuda::HostMem dst;
+    cv::cuda::meanStdDev(loadMat(src, useRoi), dst, stream);
+
+    stream.waitForCompletion();
+
+    double vals[2];
+    dst.createMatHeader().copyTo(cv::Mat(1, 2, CV_64FC1, &vals[0]));
+
+    cv::Scalar mean_gold;
+    cv::Scalar stddev_gold;
+    cv::meanStdDev(src, mean_gold, stddev_gold);
+
+    EXPECT_SCALAR_NEAR(mean_gold, cv::Scalar(vals[0]), 1e-5);
+    EXPECT_SCALAR_NEAR(stddev_gold, cv::Scalar(vals[1]), 1e-5);
+}
+
 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, MeanStdDev, testing::Combine(
     ALL_DEVICES,
     DIFFERENT_SIZES,
diff --git a/modules/cudabgsegm/src/fgd.cpp b/modules/cudabgsegm/src/fgd.cpp
index 68f03a3e16..237f1c05fa 100644
--- a/modules/cudabgsegm/src/fgd.cpp
+++ b/modules/cudabgsegm/src/fgd.cpp
@@ -266,7 +266,7 @@ namespace
 {
     int bgfgClassification(const GpuMat& prevFrame, const GpuMat& curFrame,
                            const GpuMat& Ftd, const GpuMat& Fbd,
-                           GpuMat& foreground, GpuMat& countBuf,
+                           GpuMat& foreground,
                            const FGDParams& params, int out_cn)
     {
         typedef void (*func_t)(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground,
@@ -298,7 +298,7 @@ namespace
                                                                              deltaC, deltaCC, params.alpha2,
                                                                              params.N1c, params.N1cc, 0);
 
-        int count = cuda::countNonZero(foreground, countBuf);
+        int count = cuda::countNonZero(foreground);
 
         cuda::multiply(foreground, Scalar::all(255), foreground);
 
@@ -605,8 +605,6 @@ namespace
         GpuMat hist_;
         GpuMat histBuf_;
 
-        GpuMat countBuf_;
-
         GpuMat buf_;
         GpuMat filterBrd_;
 
@@ -649,7 +647,7 @@ namespace
         changeDetection(prevFrame_, curFrame, Ftd_, hist_, histBuf_);
         changeDetection(background_, curFrame, Fbd_, hist_, histBuf_);
 
-        int FG_pixels_count = bgfgClassification(prevFrame_, curFrame, Ftd_, Fbd_, foreground_, countBuf_, params_, 4);
+        int FG_pixels_count = bgfgClassification(prevFrame_, curFrame, Ftd_, Fbd_, foreground_, params_, 4);
 
 #ifdef HAVE_OPENCV_CUDAFILTERS
         if (params_.perform_morphing > 0)
diff --git a/modules/cudafilters/src/filtering.cpp b/modules/cudafilters/src/filtering.cpp
index 2ab35ccee5..ed72a3ab5c 100644
--- a/modules/cudafilters/src/filtering.cpp
+++ b/modules/cudafilters/src/filtering.cpp
@@ -542,7 +542,7 @@ namespace
             anchor_ = Point(iters_, iters_);
             iters_ = 1;
         }
-        else if (iters_ > 1 && countNonZero(kernel) == (int) kernel.total())
+        else if (iters_ > 1 && cv::countNonZero(kernel) == (int) kernel.total())
         {
             anchor_ = Point(anchor_.x * iters_, anchor_.y * iters_);
             kernel = getStructuringElement(MORPH_RECT,
diff --git a/modules/cudaimgproc/src/gftt.cpp b/modules/cudaimgproc/src/gftt.cpp
index 243665083a..ab1f3e471f 100644
--- a/modules/cudaimgproc/src/gftt.cpp
+++ b/modules/cudaimgproc/src/gftt.cpp
@@ -81,7 +81,6 @@ namespace
         GpuMat Dy_;
         GpuMat buf_;
         GpuMat eig_;
-        GpuMat minMaxbuf_;
         GpuMat tmpCorners_;
     };
 
@@ -109,7 +108,7 @@ namespace
         cornerCriteria_->compute(image, eig_);
 
         double maxVal = 0;
-        cuda::minMax(eig_, 0, &maxVal, noArray(), minMaxbuf_);
+        cuda::minMax(eig_, 0, &maxVal);
 
         ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
 
diff --git a/modules/cudaimgproc/src/match_template.cpp b/modules/cudaimgproc/src/match_template.cpp
index c5ab143ec7..25c42dfd96 100644
--- a/modules/cudaimgproc/src/match_template.cpp
+++ b/modules/cudaimgproc/src/match_template.cpp
@@ -271,7 +271,6 @@ namespace
     private:
         Match_CCORR_8U match_CCORR_;
         GpuMat image_sqsums_;
-        GpuMat intBuffer_;
     };
 
     void Match_CCORR_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
@@ -288,7 +287,7 @@ namespace
         match_CCORR_.match(image, templ, _result, stream);
         GpuMat result = _result.getGpuMat();
 
-        cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
+        cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream);
 
         double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
 
@@ -335,7 +334,6 @@ namespace
 
     private:
         GpuMat image_sqsums_;
-        GpuMat intBuffer_;
         Match_CCORR_8U match_CCORR_;
     };
 
@@ -359,7 +357,7 @@ namespace
             return;
         }
 
-        cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
+        cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream);
 
         double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
 
@@ -383,7 +381,6 @@ namespace
 
     private:
         GpuMat image_sqsums_;
-        GpuMat intBuffer_;
         Match_CCORR_8U match_CCORR_;
     };
 
@@ -398,7 +395,7 @@ namespace
         CV_Assert( image.type() == templ.type() );
         CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
 
-        cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
+        cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream);
 
         double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
 
@@ -421,7 +418,6 @@ namespace
         void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
 
     private:
-        GpuMat intBuffer_;
         std::vector<GpuMat> images_;
         std::vector<GpuMat> image_sums_;
         Match_CCORR_8U match_CCORR_;
@@ -444,7 +440,7 @@ namespace
         if (image.channels() == 1)
         {
             image_sums_.resize(1);
-            cuda::integral(image, image_sums_[0], intBuffer_, stream);
+            cuda::integral(image, image_sums_[0], stream);
 
             int templ_sum = (int) cuda::sum(templ)[0];
 
@@ -456,7 +452,7 @@ namespace
 
             image_sums_.resize(images_.size());
             for (int i = 0; i < image.channels(); ++i)
-                cuda::integral(images_[i], image_sums_[i], intBuffer_, stream);
+                cuda::integral(images_[i], image_sums_[i], stream);
 
             Scalar templ_sum = cuda::sum(templ);
 
@@ -501,7 +497,6 @@ namespace
     private:
         GpuMat imagef_, templf_;
         Match_CCORR_32F match_CCORR_32F_;
-        GpuMat intBuffer_;
         std::vector<GpuMat> images_;
         std::vector<GpuMat> image_sums_;
         std::vector<GpuMat> image_sqsums_;
@@ -527,10 +522,10 @@ namespace
         if (image.channels() == 1)
         {
             image_sums_.resize(1);
-            cuda::integral(image, image_sums_[0], intBuffer_, stream);
+            cuda::integral(image, image_sums_[0], stream);
 
             image_sqsums_.resize(1);
-            cuda::sqrIntegral(image, image_sqsums_[0], intBuffer_, stream);
+            cuda::sqrIntegral(image, image_sqsums_[0], stream);
 
             int templ_sum = (int) cuda::sum(templ)[0];
             double templ_sqsum = cuda::sqrSum(templ)[0];
@@ -547,8 +542,8 @@ namespace
             image_sqsums_.resize(images_.size());
             for (int i = 0; i < image.channels(); ++i)
             {
-                cuda::integral(images_[i], image_sums_[i], intBuffer_, stream);
-                cuda::sqrIntegral(images_[i], image_sqsums_[i], intBuffer_, stream);
+                cuda::integral(images_[i], image_sums_[i], stream);
+                cuda::sqrIntegral(images_[i], image_sqsums_[i], stream);
             }
 
             Scalar templ_sum = cuda::sum(templ);
diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp
index af3f874e1f..0898031f0d 100644
--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@@ -193,7 +193,7 @@ TEST(cornerHarris)
 TEST(integral)
 {
     Mat src, sum;
-    cuda::GpuMat d_src, d_sum, d_buf;
+    cuda::GpuMat d_src, d_sum;
 
     for (int size = 1000; size <= 4000; size *= 2)
     {
@@ -209,10 +209,10 @@ TEST(integral)
 
         d_src.upload(src);
 
-        cuda::integralBuffered(d_src, d_sum, d_buf);
+        cuda::integral(d_src, d_sum);
 
         CUDA_ON;
-        cuda::integralBuffered(d_src, d_sum, d_buf);
+        cuda::integral(d_src, d_sum);
         CUDA_OFF;
     }
 }

From b4e7ee46c633b06e6c5ca8374b1bf22357c76fd1 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 24 Dec 2014 17:23:54 +0300
Subject: [PATCH 07/55] fix compilation without CUDA

---
 .../include/opencv2/core/private.cuda.hpp     | 20 ++++++++---------
 modules/core/src/cuda_gpu_mat.cpp             | 22 +++++++++++++++++++
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/modules/core/include/opencv2/core/private.cuda.hpp b/modules/core/include/opencv2/core/private.cuda.hpp
index aaa777c914..5c473b857f 100644
--- a/modules/core/include/opencv2/core/private.cuda.hpp
+++ b/modules/core/include/opencv2/core/private.cuda.hpp
@@ -80,6 +80,16 @@
 namespace cv { namespace cuda {
     CV_EXPORTS cv::String getNppErrorMessage(int code);
     CV_EXPORTS cv::String getCudaDriverApiErrorMessage(int code);
+
+    CV_EXPORTS GpuMat getInputMat(InputArray _src, Stream& stream);
+
+    CV_EXPORTS GpuMat getOutputMat(OutputArray _dst, int rows, int cols, int type, Stream& stream);
+    static inline GpuMat getOutputMat(OutputArray _dst, Size size, int type, Stream& stream)
+    {
+        return getOutputMat(_dst, size.height, size.width, type, stream);
+    }
+
+    CV_EXPORTS void syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream);
 }}
 
 #ifndef HAVE_CUDA
@@ -106,16 +116,6 @@ namespace cv { namespace cuda
         GpuMat::Allocator* allocator_;
     };
 
-    CV_EXPORTS GpuMat getInputMat(InputArray _src, Stream& stream);
-
-    CV_EXPORTS GpuMat getOutputMat(OutputArray _dst, int rows, int cols, int type, Stream& stream);
-    static inline GpuMat getOutputMat(OutputArray _dst, Size size, int type, Stream& stream)
-    {
-        return getOutputMat(_dst, size.height, size.width, type, stream);
-    }
-
-    CV_EXPORTS void syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream);
-
     static inline void checkNppError(int code, const char* file, const int line, const char* func)
     {
         if (code < 0)
diff --git a/modules/core/src/cuda_gpu_mat.cpp b/modules/core/src/cuda_gpu_mat.cpp
index 8a7b236acb..9a17ddd85d 100644
--- a/modules/core/src/cuda_gpu_mat.cpp
+++ b/modules/core/src/cuda_gpu_mat.cpp
@@ -346,6 +346,11 @@ GpuMat cv::cuda::getInputMat(InputArray _src, Stream& stream)
 {
     GpuMat src;
 
+#ifndef HAVE_CUDA
+    (void) _src;
+    (void) stream;
+    throw_no_cuda();
+#else
     if (_src.kind() == _InputArray::CUDA_GPU_MAT)
     {
         src = _src.getGpuMat();
@@ -356,6 +361,7 @@ GpuMat cv::cuda::getInputMat(InputArray _src, Stream& stream)
         src = pool.getBuffer(_src.size(), _src.type());
         src.upload(_src, stream);
     }
+#endif
 
     return src;
 }
@@ -364,6 +370,14 @@ GpuMat cv::cuda::getOutputMat(OutputArray _dst, int rows, int cols, int type, St
 {
     GpuMat dst;
 
+#ifndef HAVE_CUDA
+    (void) _dst;
+    (void) rows;
+    (void) cols;
+    (void) type;
+    (void) stream;
+    throw_no_cuda();
+#else
     if (_dst.kind() == _InputArray::CUDA_GPU_MAT)
     {
         _dst.create(rows, cols, type);
@@ -374,12 +388,19 @@ GpuMat cv::cuda::getOutputMat(OutputArray _dst, int rows, int cols, int type, St
         BufferPool pool(stream);
         dst = pool.getBuffer(rows, cols, type);
     }
+#endif
 
     return dst;
 }
 
 void cv::cuda::syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream)
 {
+#ifndef HAVE_CUDA
+    (void) dst;
+    (void) _dst;
+    (void) stream;
+    throw_no_cuda();
+#else
     if (_dst.kind() != _InputArray::CUDA_GPU_MAT)
     {
         if (stream)
@@ -387,6 +408,7 @@ void cv::cuda::syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream)
         else
             dst.download(_dst);
     }
+#endif
 }
 
 #ifndef HAVE_CUDA

From 34f0da601009d8ac6133429c3953678f860c5cb3 Mon Sep 17 00:00:00 2001
From: Chris Mayo <aklhfex@gmail.com>
Date: Tue, 30 Dec 2014 10:13:10 +0000
Subject: [PATCH 08/55] Act on INSTALL_PYTHON_EXAMPLES

---
 samples/CMakeLists.txt         | 4 ++++
 samples/python2/CMakeLists.txt | 6 ++++++
 2 files changed, 10 insertions(+)
 create mode 100644 samples/python2/CMakeLists.txt

diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
index e1021de513..467ca162a7 100644
--- a/samples/CMakeLists.txt
+++ b/samples/CMakeLists.txt
@@ -22,6 +22,10 @@ if(ANDROID AND BUILD_ANDROID_EXAMPLES)
   add_subdirectory(android)
 endif()
 
+if(INSTALL_PYTHON_EXAMPLES)
+  add_subdirectory(python2)
+endif()
+
 #
 # END OF BUILD CASE 1: Build samples with library sources
 #
diff --git a/samples/python2/CMakeLists.txt b/samples/python2/CMakeLists.txt
new file mode 100644
index 0000000000..7fa245447c
--- /dev/null
+++ b/samples/python2/CMakeLists.txt
@@ -0,0 +1,6 @@
+if(INSTALL_PYTHON_EXAMPLES)
+  file(GLOB install_list *.py )
+  install(FILES ${install_list}
+          DESTINATION ${OPENCV_SAMPLES_SRC_INSTALL_PATH}/python2
+          PERMISSIONS OWNER_READ GROUP_READ WORLD_READ COMPONENT samples)
+endif()

From 60f2f7898af95b3d815715e4891ba97bb1b9782e Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Tue, 30 Dec 2014 00:20:45 +0300
Subject: [PATCH 09/55] SSE4.1 addWeighted fo 16u

---
 modules/core/src/arithm.cpp | 46 +++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp
index 2f377350e8..40bb097711 100644
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@@ -2863,6 +2863,52 @@ struct AddWeighted_SIMD<short, float>
     bool haveSSE2;
 };
 
+#if CV_SSE4_1
+
+template <>
+struct AddWeighted_SIMD<ushort, float>
+{
+    AddWeighted_SIMD()
+    {
+        haveSSE4_1 = checkHardwareSupport(CV_CPU_SSE4_1);
+    }
+
+    int operator() (const ushort * src1, const ushort * src2, ushort * dst, int width, float alpha, float beta, float gamma) const
+    {
+        int x = 0;
+
+        if (!haveSSE4_1)
+            return x;
+
+        __m128i v_zero = _mm_setzero_si128();
+        __m128 v_alpha = _mm_set1_ps(alpha), v_beta = _mm_set1_ps(beta),
+               v_gamma = _mm_set1_ps(gamma);
+
+        for( ; x <= width - 8; x += 8 )
+        {
+            __m128i v_src1 = _mm_loadu_si128((const __m128i *)(src1 + x));
+            __m128i v_src2 = _mm_loadu_si128((const __m128i *)(src2 + x));
+
+            __m128 v_dstf0 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src1, v_zero)), v_alpha);
+            v_dstf0 = _mm_add_ps(_mm_add_ps(v_dstf0, v_gamma),
+                                 _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src2, v_zero)), v_beta));
+
+            __m128 v_dstf1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src1, v_zero)), v_alpha);
+            v_dstf1 = _mm_add_ps(_mm_add_ps(v_dstf1, v_gamma),
+                                 _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src2, v_zero)), v_beta));
+
+            _mm_storeu_si128((__m128i *)(dst + x), _mm_packus_epi32(_mm_cvtps_epi32(v_dstf0),
+                                                                    _mm_cvtps_epi32(v_dstf1)));
+        }
+
+        return x;
+    }
+
+    bool haveSSE4_1;
+};
+
+#endif
+
 #elif CV_NEON
 
 template <>

From 68962adc5433ebf455d35b776d27f6cced14f836 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Wed, 31 Dec 2014 11:38:20 +0300
Subject: [PATCH 10/55] SSE mul

---
 modules/core/src/arithm.cpp | 159 ++++++++++++++++++++++++++++++++++++
 1 file changed, 159 insertions(+)

diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp
index 68c8979a8d..d2074634de 100644
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@@ -2355,6 +2355,165 @@ struct Mul_SIMD<float, float>
     }
 };
 
+#elif CV_SSE2
+
+#if CV_SSE4_1
+
+template <>
+struct Mul_SIMD<ushort, float>
+{
+    Mul_SIMD()
+    {
+        haveSSE = checkHardwareSupport(CV_CPU_SSE4_1);
+    }
+
+    int operator() (const ushort * src1, const ushort * src2, ushort * dst, int width, float scale) const
+    {
+        int x = 0;
+
+        if (!haveSSE)
+            return x;
+
+        __m128i v_zero = _mm_setzero_si128();
+
+        if( scale != 1.0f )
+        {
+            __m128 v_scale = _mm_set1_ps(scale);
+            for ( ; x <= width - 8; x += 8)
+            {
+                __m128i v_src1 = _mm_loadu_si128((__m128i const *)(src1 + x));
+                __m128i v_src2 = _mm_loadu_si128((__m128i const *)(src2 + x));
+
+                __m128 v_dst1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src1, v_zero)),
+                                           _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src2, v_zero)));
+                v_dst1 = _mm_mul_ps(v_dst1, v_scale);
+
+                __m128 v_dst2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src1, v_zero)),
+                                           _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src2, v_zero)));
+                v_dst2 = _mm_mul_ps(v_dst2, v_scale);
+
+                __m128i v_dsti = _mm_packus_epi32(_mm_cvtps_epi32(v_dst1), _mm_cvtps_epi32(v_dst2));
+                _mm_storeu_si128((__m128i *)(dst + x), v_dsti);
+            }
+        }
+
+        return x;
+    }
+
+    bool haveSSE;
+};
+
+#endif
+
+template <>
+struct Mul_SIMD<schar, float>
+{
+    Mul_SIMD()
+    {
+        haveSSE = checkHardwareSupport(CV_CPU_SSE2);
+    }
+
+    int operator() (const schar * src1, const schar * src2, schar * dst, int width, float scale) const
+    {
+        int x = 0;
+
+        if (!haveSSE)
+            return x;
+
+        __m128i v_zero = _mm_setzero_si128();
+
+        if( scale == 1.0f )
+            for ( ; x <= width - 8; x += 8)
+            {
+                __m128i v_src1 = _mm_loadl_epi64((__m128i const *)(src1 + x));
+                __m128i v_src2 = _mm_loadl_epi64((__m128i const *)(src2 + x));
+
+                v_src1 = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src1), 8);
+                v_src2 = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src2), 8);
+
+                __m128 v_dst1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src1), 16)),
+                                           _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src2), 16)));
+
+                __m128 v_dst2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src1), 16)),
+                                           _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src2), 16)));
+
+                __m128i v_dsti = _mm_packs_epi32(_mm_cvtps_epi32(v_dst1), _mm_cvtps_epi32(v_dst2));
+                _mm_storel_epi64((__m128i *)(dst + x), _mm_packs_epi16(v_dsti, v_zero));
+            }
+        else
+        {
+            __m128 v_scale = _mm_set1_ps(scale);
+            for ( ; x <= width - 8; x += 8)
+            {
+                __m128i v_src1 = _mm_loadl_epi64((__m128i const *)(src1 + x));
+                __m128i v_src2 = _mm_loadl_epi64((__m128i const *)(src2 + x));
+
+                v_src1 = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src1), 8);
+                v_src2 = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src2), 8);
+
+                __m128 v_dst1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src1), 16)),
+                                           _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src2), 16)));
+                v_dst1 = _mm_mul_ps(v_dst1, v_scale);
+
+                __m128 v_dst2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src1), 16)),
+                                           _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src2), 16)));
+                v_dst2 = _mm_mul_ps(v_dst2, v_scale);
+
+                __m128i v_dsti = _mm_packs_epi32(_mm_cvtps_epi32(v_dst1), _mm_cvtps_epi32(v_dst2));
+                _mm_storel_epi64((__m128i *)(dst + x), _mm_packs_epi16(v_dsti, v_zero));
+            }
+        }
+
+        return x;
+    }
+
+    bool haveSSE;
+};
+
+template <>
+struct Mul_SIMD<short, float>
+{
+    Mul_SIMD()
+    {
+        haveSSE = checkHardwareSupport(CV_CPU_SSE2);
+    }
+
+    int operator() (const short * src1, const short * src2, short * dst, int width, float scale) const
+    {
+        int x = 0;
+
+        if (!haveSSE)
+            return x;
+
+        __m128i v_zero = _mm_setzero_si128();
+
+        if( scale != 1.0f )
+        {
+            __m128 v_scale = _mm_set1_ps(scale);
+            for ( ; x <= width - 8; x += 8)
+            {
+                __m128i v_src1 = _mm_loadu_si128((__m128i const *)(src1 + x));
+                __m128i v_src2 = _mm_loadu_si128((__m128i const *)(src2 + x));
+
+                __m128 v_dst1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src1), 16)),
+                                           _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src2), 16)));
+                v_dst1 = _mm_mul_ps(v_dst1, v_scale);
+
+                __m128 v_dst2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src1), 16)),
+                                           _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src2), 16)));
+                v_dst2 = _mm_mul_ps(v_dst2, v_scale);
+
+                __m128i v_dsti = _mm_packs_epi32(_mm_cvtps_epi32(v_dst1), _mm_cvtps_epi32(v_dst2));
+                _mm_storeu_si128((__m128i *)(dst + x), v_dsti);
+            }
+        }
+
+        return x;
+    }
+
+    bool haveSSE;
+};
+
 #endif
 
 template<typename T, typename WT> static void

From ac47c0c624fade8a83e4df7df0da7e55966c10f3 Mon Sep 17 00:00:00 2001
From: Mike Izbicki <mike@izbicki.me>
Date: Fri, 2 Jan 2015 14:41:44 -0800
Subject: [PATCH 11/55] fix to work with python 2.6

I had to make this modification locally to get opencv to build with python 2.6.  Python 2.6 requires indices in the format string (the `0` I added).  This requirement was relaxed in 2.7, so what used to be there would be working for people who could upgrade. I don't think the change has any negative consequences for future python versions, but I'm no expert.
---
 modules/python/src2/gen2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py
index cf60ea9785..55a79484ce 100755
--- a/modules/python/src2/gen2.py
+++ b/modules/python/src2/gen2.py
@@ -861,7 +861,7 @@ class PythonWrapperGenerator(object):
             decls = self.parser.parse(hdr)
             if len(decls) == 0:
                 continue
-            self.code_include.write( '#include "{}"\n'.format(hdr[hdr.rindex('opencv2/'):]) )
+            self.code_include.write( '#include "{0}"\n'.format(hdr[hdr.rindex('opencv2/'):]) )
             for decl in decls:
                 name = decl[0]
                 if name.startswith("struct") or name.startswith("class"):

From a11c02497f60bac7b916b8245d3d14f5ecb22bf2 Mon Sep 17 00:00:00 2001
From: Nisarg Thakkar <nisargtha@gmail.com>
Date: Thu, 8 Jan 2015 13:48:48 +0530
Subject: [PATCH 12/55] Fixed Bug similar to Bug#3757. UMat all dims zero after
 release.

---
 modules/core/include/opencv2/core/mat.inl.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp
index 73a39e77a1..cb39c15fb4 100644
--- a/modules/core/include/opencv2/core/mat.inl.hpp
+++ b/modules/core/include/opencv2/core/mat.inl.hpp
@@ -3284,7 +3284,8 @@ inline void UMat::release()
 {
     if( u && CV_XADD(&(u->urefcount), -1) == 1 )
         deallocate();
-    size.p[0] = 0;
+    for(int i = 0; i < dims; i++)
+        size.p[i] = 0;
     u = 0;
 }
 

From 4762728fd598923f7ba4f36899fc79e22c7a5199 Mon Sep 17 00:00:00 2001
From: Timur Sattarov <tim.helloworld@gmail.com>
Date: Sun, 11 Jan 2015 03:02:58 +0400
Subject: [PATCH 13/55] More sensible error messages

---
 modules/calib3d/src/calibration.cpp | 10 +++++++---
 modules/imgproc/src/contours.cpp    |  4 +++-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/modules/calib3d/src/calibration.cpp b/modules/calib3d/src/calibration.cpp
index 43a4cd1477..5a86624c50 100644
--- a/modules/calib3d/src/calibration.cpp
+++ b/modules/calib3d/src/calibration.cpp
@@ -2972,7 +2972,13 @@ static void collectCalibrationData( InputArrayOfArrays objectPoints,
     for( i = 0; i < nimages; i++ )
     {
         ni = objectPoints.getMat(i).checkVector(3, CV_32F);
-        CV_Assert( ni >= 0 );
+        if( ni <= 0 )
+            CV_Error(CV_StsUnsupportedFormat, "objectPoints should contain vector of vectors of points of type Point3f");
+        int ni1 = imagePoints1.getMat(i).checkVector(2, CV_32F);
+        if( ni1 <= 0 )
+            CV_Error(CV_StsUnsupportedFormat, "imagePoints1 should contain vector of vectors of points of type Point2f");
+        CV_Assert( ni == ni1 );
+
         total += ni;
     }
 
@@ -2995,8 +3001,6 @@ static void collectCalibrationData( InputArrayOfArrays objectPoints,
         Mat objpt = objectPoints.getMat(i);
         Mat imgpt1 = imagePoints1.getMat(i);
         ni = objpt.checkVector(3, CV_32F);
-        int ni1 = imgpt1.checkVector(2, CV_32F);
-        CV_Assert( ni > 0 && ni == ni1 );
         npoints.at<int>(i) = ni;
         memcpy( objPtData + j, objpt.ptr(), ni*sizeof(objPtData[0]) );
         memcpy( imgPtData1 + j, imgpt1.ptr(), ni*sizeof(imgPtData1[0]) );
diff --git a/modules/imgproc/src/contours.cpp b/modules/imgproc/src/contours.cpp
index b0a81ed32d..758ccb02bc 100644
--- a/modules/imgproc/src/contours.cpp
+++ b/modules/imgproc/src/contours.cpp
@@ -193,7 +193,9 @@ cvStartFindContours( void* _img, CvMemStorage* storage,
 
     if( !((CV_IS_MASK_ARR( mat ) && mode < CV_RETR_FLOODFILL) ||
           (CV_MAT_TYPE(mat->type) == CV_32SC1 && mode == CV_RETR_FLOODFILL)) )
-        CV_Error( CV_StsUnsupportedFormat, "[Start]FindContours support only 8uC1 and 32sC1 images" );
+        CV_Error( CV_StsUnsupportedFormat,
+                  "[Start]FindContours supports only CV_8UC1 images when mode != CV_RETR_FLOODFILL "
+                  "otherwise supports CV_32SC1 images only" );
 
     CvSize size = cvSize( mat->width, mat->height );
     int step = mat->step;

From 1fa37fe733756bbc8c48dfd6ee5a7a25c9c6521f Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Tue, 6 Jan 2015 16:57:21 +0300
Subject: [PATCH 14/55] OpenCV for Tegra compilation fix for - Gaussian blur; -
 CascadeClassifier; - Blenders in stitching module; - Laplacial pyromids in
 stitching module.

---
 modules/imgproc/src/smooth.cpp          | 4 +++-
 modules/objdetect/src/cascadedetect.cpp | 2 +-
 modules/objdetect/src/cascadedetect.hpp | 2 ++
 modules/stitching/src/blenders.cpp      | 8 ++++----
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp
index 5ab70d9a26..2a69003641 100644
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@@ -1497,7 +1497,9 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
     }
 
 #ifdef HAVE_TEGRA_OPTIMIZATION
-    if(sigma1 == 0 && sigma2 == 0 && tegra::gaussian(_src.getMat(), _dst.getMat(), ksize, borderType))
+    Mat src = _src.getMat();
+    Mat dst = _dst.getMat();
+    if(sigma1 == 0 && sigma2 == 0 && tegra::gaussian(src, dst, ksize, borderType))
         return;
 #endif
 
diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp
index 4e25a5ccf1..8a61c2dd7e 100644
--- a/modules/objdetect/src/cascadedetect.cpp
+++ b/modules/objdetect/src/cascadedetect.cpp
@@ -931,7 +931,7 @@ Ptr<CascadeClassifierImpl::MaskGenerator> CascadeClassifierImpl::getMaskGenerato
 Ptr<BaseCascadeClassifier::MaskGenerator> createFaceDetectionMaskGenerator()
 {
 #ifdef HAVE_TEGRA_OPTIMIZATION
-    return tegra::getCascadeClassifierMaskGenerator(*this);
+    return tegra::getCascadeClassifierMaskGenerator();
 #else
     return Ptr<BaseCascadeClassifier::MaskGenerator>();
 #endif
diff --git a/modules/objdetect/src/cascadedetect.hpp b/modules/objdetect/src/cascadedetect.hpp
index 17eeccd53b..4cbf3e9bf0 100644
--- a/modules/objdetect/src/cascadedetect.hpp
+++ b/modules/objdetect/src/cascadedetect.hpp
@@ -1,5 +1,7 @@
 #pragma once
 
+#include "opencv2/core/ocl.hpp"
+
 namespace cv
 {
 
diff --git a/modules/stitching/src/blenders.cpp b/modules/stitching/src/blenders.cpp
index c36e6877bc..3fddff5ffe 100644
--- a/modules/stitching/src/blenders.cpp
+++ b/modules/stitching/src/blenders.cpp
@@ -477,6 +477,8 @@ static bool ocl_normalizeUsingWeightMap(InputArray _weight, InputOutputArray _ma
 void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
 {
 #ifdef HAVE_TEGRA_OPTIMIZATION
+    Mat weight = _weight.getMat();
+    Mat src = _src.getMat();
     if(tegra::normalizeUsingWeightMap(weight, src))
         return;
 #endif
@@ -486,9 +488,6 @@ void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
             !ocl_normalizeUsingWeightMap(_weight, _src) )
 #endif
     {
-        Mat weight = _weight.getMat();
-        Mat src = _src.getMat();
-
         CV_Assert(src.type() == CV_16SC3);
 
         if(weight.type() == CV_32FC1)
@@ -547,7 +546,8 @@ void createWeightMap(InputArray mask, float sharpness, InputOutputArray weight)
 void createLaplacePyr(InputArray img, int num_levels, std::vector<UMat> &pyr)
 {
 #ifdef HAVE_TEGRA_OPTIMIZATION
-    if(tegra::createLaplacePyr(img, num_levels, pyr))
+    cv::Mat imgMat = img.getMat();
+    if(tegra::createLaplacePyr(imgMat, num_levels, pyr))
         return;
 #endif
 

From 81f786393a586e9d2a98180d869759fbdac00d53 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Tue, 30 Dec 2014 16:53:19 +0300
Subject: [PATCH 15/55] NEON detection in runtime

---
 modules/core/src/system.cpp    | 30 ++++++++++++++++++++++++++++++
 modules/imgproc/src/filter.cpp | 28 +++++++++++-----------------
 modules/ts/src/ts_func.cpp     |  2 +-
 3 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index d9a20873f6..b4484f9470 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -48,6 +48,13 @@
 # endif
 #endif
 
+#if defined ANDROID || defined __linux__
+#  include <unistd.h>
+#  include <fcntl.h>
+#  include <elf.h>
+#  include <linux/auxvec.h>
+#endif
+
 #if defined WIN32 || defined _WIN32 || defined WINCE
 #ifndef _WIN32_WINNT           // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
   #define _WIN32_WINNT 0x0400  // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
@@ -253,6 +260,29 @@ struct HWFeatures
             f.have[CV_CPU_AVX]    = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX
         }
 
+    #if defined ANDROID || defined __linux__
+        int cpufile = open("/proc/self/auxv", O_RDONLY);
+
+        if (cpufile >= 0)
+        {
+            Elf32_auxv_t auxv;
+            const size_t size_auxv_t = sizeof(Elf32_auxv_t);
+
+            while (read(cpufile, &auxv, sizeof(Elf32_auxv_t)) == size_auxv_t)
+            {
+                if (auxv.a_type == AT_HWCAP)
+                {
+                    f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
+                    break;
+                }
+            }
+
+            close(cpufile);
+        }
+    #elif (defined __clang__ || defined __APPLE__) && defined __ARM_NEON__
+        f.have[CV_CPU_NEON] = true;
+    #endif
+
         return f;
     }
 
diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp
index f376507255..63a1005ae7 100644
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@@ -2231,9 +2231,8 @@ struct SymmRowSmallVec_8u32s
 
     int operator()(const uchar* src, uchar* _dst, int width, int cn) const
     {
-        //Uncomment the two following lines when runtime support for neon is implemented.
-        // if( !checkHardwareSupport(CV_CPU_NEON) )
-        //     return 0;
+         if( !checkHardwareSupport(CV_CPU_NEON) )
+             return 0;
 
         int i = 0, _ksize = kernel.rows + kernel.cols - 1;
         int* dst = (int*)_dst;
@@ -2459,9 +2458,8 @@ struct SymmColumnVec_32s8u
 
     int operator()(const uchar** _src, uchar* dst, int width) const
     {
-        //Uncomment the two following lines when runtime support for neon is implemented.
-        // if( !checkHardwareSupport(CV_CPU_NEON) )
-        //     return 0;
+         if( !checkHardwareSupport(CV_CPU_NEON) )
+             return 0;
 
         int _ksize = kernel.rows + kernel.cols - 1;
         int ksize2 = _ksize / 2;
@@ -2612,9 +2610,8 @@ struct SymmColumnSmallVec_32s16s
 
     int operator()(const uchar** _src, uchar* _dst, int width) const
     {
-        //Uncomment the two following lines when runtime support for neon is implemented.
-        // if( !checkHardwareSupport(CV_CPU_NEON) )
-        //     return 0;
+         if( !checkHardwareSupport(CV_CPU_NEON) )
+             return 0;
 
         int ksize2 = (kernel.rows + kernel.cols - 1)/2;
         const float* ky = kernel.ptr<float>() + ksize2;
@@ -2788,15 +2785,13 @@ struct SymmColumnVec_32f16s
         kernel = _kernel;
         delta = (float)_delta;
         CV_Assert( (symmetryType & (KERNEL_SYMMETRICAL | KERNEL_ASYMMETRICAL)) != 0 );
-        //Uncomment the following line when runtime support for neon is implemented.
-        // neon_supported = checkHardwareSupport(CV_CPU_NEON);
+         neon_supported = checkHardwareSupport(CV_CPU_NEON);
     }
 
     int operator()(const uchar** _src, uchar* _dst, int width) const
     {
-        //Uncomment the two following lines when runtime support for neon is implemented.
-        // if( !neon_supported )
-        //     return 0;
+         if( !neon_supported )
+             return 0;
 
         int _ksize = kernel.rows + kernel.cols - 1;
         int ksize2 = _ksize / 2;
@@ -2943,9 +2938,8 @@ struct SymmRowSmallVec_32f
 
     int operator()(const uchar* _src, uchar* _dst, int width, int cn) const
     {
-        //Uncomment the two following lines when runtime support for neon is implemented.
-        // if( !checkHardwareSupport(CV_CPU_NEON) )
-        //     return 0;
+         if( !checkHardwareSupport(CV_CPU_NEON) )
+             return 0;
 
         int i = 0, _ksize = kernel.rows + kernel.cols - 1;
         float* dst = (float*)_dst;
diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp
index 7745c86c5c..03877c0910 100644
--- a/modules/ts/src/ts_func.cpp
+++ b/modules/ts/src/ts_func.cpp
@@ -3020,7 +3020,7 @@ void printVersionInfo(bool useStdOut)
     if (checkHardwareSupport(CV_CPU_AVX)) cpu_features += " avx";
 #endif
 #if CV_NEON
-    cpu_features += " neon"; // NEON is currently not checked at runtime
+    if (checkHardwareSupport(CV_CPU_NEON)) cpu_features += " neon";
 #endif
 
     cpu_features.erase(0, 1); // erase initial space

From 4cc7b0a74f75e6e75d3cbbe265c2d2f4071f1134 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 24 Dec 2014 16:05:47 +0300
Subject: [PATCH 16/55] move ImagePyramid to cudalegacy

---
 .../cudalegacy/include/opencv2/cudalegacy.hpp |  13 ++
 modules/cudalegacy/src/image_pyramid.cpp      | 147 ++++++++++++++++++
 modules/cudawarping/CMakeLists.txt            |   2 +-
 .../include/opencv2/cudawarping.hpp           |   8 -
 modules/cudawarping/perf/perf_warping.cpp     |  37 -----
 modules/cudawarping/src/precomp.hpp           |   7 -
 modules/cudawarping/src/pyramids.cpp          | 110 -------------
 7 files changed, 161 insertions(+), 163 deletions(-)
 create mode 100644 modules/cudalegacy/src/image_pyramid.cpp

diff --git a/modules/cudalegacy/include/opencv2/cudalegacy.hpp b/modules/cudalegacy/include/opencv2/cudalegacy.hpp
index a72ef09c75..5e57733857 100644
--- a/modules/cudalegacy/include/opencv2/cudalegacy.hpp
+++ b/modules/cudalegacy/include/opencv2/cudalegacy.hpp
@@ -43,6 +43,7 @@
 #ifndef __OPENCV_CUDALEGACY_HPP__
 #define __OPENCV_CUDALEGACY_HPP__
 
+#include "opencv2/core/cuda.hpp"
 #include "opencv2/cudalegacy/NCV.hpp"
 #include "opencv2/cudalegacy/NPP_staging.hpp"
 #include "opencv2/cudalegacy/NCVPyramid.hpp"
@@ -56,4 +57,16 @@
   @}
 */
 
+namespace cv { namespace cuda {
+
+class CV_EXPORTS ImagePyramid : public Algorithm
+{
+public:
+    virtual void getLayer(OutputArray outImg, Size outRoi, Stream& stream = Stream::Null()) const = 0;
+};
+
+CV_EXPORTS Ptr<ImagePyramid> createImagePyramid(InputArray img, int nLayers = -1, Stream& stream = Stream::Null());
+
+}}
+
 #endif /* __OPENCV_CUDALEGACY_HPP__ */
diff --git a/modules/cudalegacy/src/image_pyramid.cpp b/modules/cudalegacy/src/image_pyramid.cpp
new file mode 100644
index 0000000000..938ffea5d8
--- /dev/null
+++ b/modules/cudalegacy/src/image_pyramid.cpp
@@ -0,0 +1,147 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::cuda;
+
+#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
+
+Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray, int, Stream&) { throw_no_cuda(); return Ptr<ImagePyramid>(); }
+
+#else // HAVE_CUDA
+
+namespace
+{
+    class ImagePyramidImpl : public ImagePyramid
+    {
+    public:
+        ImagePyramidImpl(InputArray img, int nLayers, Stream& stream);
+
+        void getLayer(OutputArray outImg, Size outRoi, Stream& stream = Stream::Null()) const;
+
+    private:
+        GpuMat layer0_;
+        std::vector<GpuMat> pyramid_;
+        int nLayers_;
+    };
+
+    ImagePyramidImpl::ImagePyramidImpl(InputArray _img, int numLayers, Stream& stream)
+    {
+        GpuMat img = _img.getGpuMat();
+
+        CV_Assert( img.depth() <= CV_32F && img.channels() <= 4 );
+
+        img.copyTo(layer0_, stream);
+
+        Size szLastLayer = img.size();
+        nLayers_ = 1;
+
+        if (numLayers <= 0)
+            numLayers = 255; // it will cut-off when any of the dimensions goes 1
+
+        pyramid_.resize(numLayers);
+
+        for (int i = 0; i < numLayers - 1; ++i)
+        {
+            Size szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
+
+            if (szCurLayer.width == 0 || szCurLayer.height == 0)
+                break;
+
+            ensureSizeIsEnough(szCurLayer, img.type(), pyramid_[i]);
+            nLayers_++;
+
+            const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
+
+            cv::cuda::device::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream));
+
+            szLastLayer = szCurLayer;
+        }
+    }
+
+    void ImagePyramidImpl::getLayer(OutputArray _outImg, Size outRoi, Stream& stream) const
+    {
+        CV_Assert( outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0 );
+
+        ensureSizeIsEnough(outRoi, layer0_.type(), _outImg);
+        GpuMat outImg = _outImg.getGpuMat();
+
+        if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
+        {
+            layer0_.copyTo(outImg, stream);
+            return;
+        }
+
+        float lastScale = 1.0f;
+        float curScale;
+        GpuMat lastLayer = layer0_;
+        GpuMat curLayer;
+
+        for (int i = 0; i < nLayers_ - 1; ++i)
+        {
+            curScale = lastScale * 0.5f;
+            curLayer = pyramid_[i];
+
+            if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows)
+            {
+                curLayer.copyTo(outImg, stream);
+            }
+
+            if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows)
+                break;
+
+            lastScale = curScale;
+            lastLayer = curLayer;
+        }
+
+        cv::cuda::device::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream));
+    }
+}
+
+Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray img, int nLayers, Stream& stream)
+{
+    return Ptr<ImagePyramid>(new ImagePyramidImpl(img, nLayers, stream));
+}
+
+#endif
diff --git a/modules/cudawarping/CMakeLists.txt b/modules/cudawarping/CMakeLists.txt
index 231e24e695..fa99e9d04b 100644
--- a/modules/cudawarping/CMakeLists.txt
+++ b/modules/cudawarping/CMakeLists.txt
@@ -6,4 +6,4 @@ set(the_description "CUDA-accelerated Image Warping")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow)
 
-ocv_define_module(cudawarping opencv_imgproc OPTIONAL opencv_cudalegacy)
+ocv_define_module(cudawarping opencv_core opencv_imgproc OPTIONAL opencv_cudev)
diff --git a/modules/cudawarping/include/opencv2/cudawarping.hpp b/modules/cudawarping/include/opencv2/cudawarping.hpp
index ca877d50c9..fbd63873bc 100644
--- a/modules/cudawarping/include/opencv2/cudawarping.hpp
+++ b/modules/cudawarping/include/opencv2/cudawarping.hpp
@@ -224,14 +224,6 @@ src .
  */
 CV_EXPORTS void pyrUp(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
 
-class CV_EXPORTS ImagePyramid : public Algorithm
-{
-public:
-    virtual void getLayer(OutputArray outImg, Size outRoi, Stream& stream = Stream::Null()) const = 0;
-};
-
-CV_EXPORTS Ptr<ImagePyramid> createImagePyramid(InputArray img, int nLayers = -1, Stream& stream = Stream::Null());
-
 //! @}
 
 }} // namespace cv { namespace cuda {
diff --git a/modules/cudawarping/perf/perf_warping.cpp b/modules/cudawarping/perf/perf_warping.cpp
index dfb11075a7..1096d3f441 100644
--- a/modules/cudawarping/perf/perf_warping.cpp
+++ b/modules/cudawarping/perf/perf_warping.cpp
@@ -514,40 +514,3 @@ PERF_TEST_P(Sz_Depth_Cn, PyrUp,
         CPU_SANITY_CHECK(dst);
     }
 }
-
-//////////////////////////////////////////////////////////////////////
-// ImagePyramidGetLayer
-
-PERF_TEST_P(Sz_Depth_Cn, ImagePyramidGetLayer,
-            Combine(CUDA_TYPICAL_MAT_SIZES,
-                    Values(CV_8U, CV_16U, CV_32F),
-                    CUDA_CHANNELS_1_3_4))
-{
-    const cv::Size size = GET_PARAM(0);
-    const int depth = GET_PARAM(1);
-    const int channels = GET_PARAM(2);
-
-    const int type = CV_MAKE_TYPE(depth, channels);
-
-    cv::Mat src(size, type);
-    declare.in(src, WARMUP_RNG);
-
-    const int nLayers = 3;
-    const cv::Size dstSize(size.width / 2 + 10, size.height / 2 + 10);
-
-    if (PERF_RUN_CUDA())
-    {
-        const cv::cuda::GpuMat d_src(src);
-        cv::cuda::GpuMat dst;
-
-        cv::Ptr<cv::cuda::ImagePyramid> d_pyr = cv::cuda::createImagePyramid(d_src, nLayers);
-
-        TEST_CYCLE() d_pyr->getLayer(dst, dstSize);
-
-        CUDA_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
diff --git a/modules/cudawarping/src/precomp.hpp b/modules/cudawarping/src/precomp.hpp
index 9f5b0c1529..a59a4e9257 100644
--- a/modules/cudawarping/src/precomp.hpp
+++ b/modules/cudawarping/src/precomp.hpp
@@ -47,11 +47,4 @@
 
 #include "opencv2/core/private.cuda.hpp"
 
-#include "opencv2/opencv_modules.hpp"
-
-#ifdef HAVE_OPENCV_CUDALEGACY
-#  include "opencv2/cudalegacy.hpp"
-#  include "opencv2/cudalegacy/private.hpp"
-#endif
-
 #endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/cudawarping/src/pyramids.cpp b/modules/cudawarping/src/pyramids.cpp
index 3d942fc6a3..0cb0f5de57 100644
--- a/modules/cudawarping/src/pyramids.cpp
+++ b/modules/cudawarping/src/pyramids.cpp
@@ -50,8 +50,6 @@ using namespace cv::cuda;
 void cv::cuda::pyrDown(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 void cv::cuda::pyrUp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray, int, Stream&) { throw_no_cuda(); return Ptr<ImagePyramid>(); }
-
 #else // HAVE_CUDA
 
 //////////////////////////////////////////////////////////////////////////////
@@ -133,112 +131,4 @@ void cv::cuda::pyrUp(InputArray _src, OutputArray _dst, Stream& stream)
     func(src, dst, StreamAccessor::getStream(stream));
 }
 
-//////////////////////////////////////////////////////////////////////////////
-// ImagePyramid
-
-#ifdef HAVE_OPENCV_CUDALEGACY
-
-namespace
-{
-    class ImagePyramidImpl : public ImagePyramid
-    {
-    public:
-        ImagePyramidImpl(InputArray img, int nLayers, Stream& stream);
-
-        void getLayer(OutputArray outImg, Size outRoi, Stream& stream = Stream::Null()) const;
-
-    private:
-        GpuMat layer0_;
-        std::vector<GpuMat> pyramid_;
-        int nLayers_;
-    };
-
-    ImagePyramidImpl::ImagePyramidImpl(InputArray _img, int numLayers, Stream& stream)
-    {
-        GpuMat img = _img.getGpuMat();
-
-        CV_Assert( img.depth() <= CV_32F && img.channels() <= 4 );
-
-        img.copyTo(layer0_, stream);
-
-        Size szLastLayer = img.size();
-        nLayers_ = 1;
-
-        if (numLayers <= 0)
-            numLayers = 255; // it will cut-off when any of the dimensions goes 1
-
-        pyramid_.resize(numLayers);
-
-        for (int i = 0; i < numLayers - 1; ++i)
-        {
-            Size szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
-
-            if (szCurLayer.width == 0 || szCurLayer.height == 0)
-                break;
-
-            ensureSizeIsEnough(szCurLayer, img.type(), pyramid_[i]);
-            nLayers_++;
-
-            const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
-
-            cv::cuda::device::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream));
-
-            szLastLayer = szCurLayer;
-        }
-    }
-
-    void ImagePyramidImpl::getLayer(OutputArray _outImg, Size outRoi, Stream& stream) const
-    {
-        CV_Assert( outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0 );
-
-        ensureSizeIsEnough(outRoi, layer0_.type(), _outImg);
-        GpuMat outImg = _outImg.getGpuMat();
-
-        if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
-        {
-            layer0_.copyTo(outImg, stream);
-            return;
-        }
-
-        float lastScale = 1.0f;
-        float curScale;
-        GpuMat lastLayer = layer0_;
-        GpuMat curLayer;
-
-        for (int i = 0; i < nLayers_ - 1; ++i)
-        {
-            curScale = lastScale * 0.5f;
-            curLayer = pyramid_[i];
-
-            if (outRoi.width == curLayer.cols && outRoi.height == curLayer.rows)
-            {
-                curLayer.copyTo(outImg, stream);
-            }
-
-            if (outRoi.width >= curLayer.cols && outRoi.height >= curLayer.rows)
-                break;
-
-            lastScale = curScale;
-            lastLayer = curLayer;
-        }
-
-        cv::cuda::device::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream));
-    }
-}
-
 #endif
-
-Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray img, int nLayers, Stream& stream)
-{
-#ifndef HAVE_OPENCV_CUDALEGACY
-    (void) img;
-    (void) nLayers;
-    (void) stream;
-    throw_no_cuda();
-    return Ptr<ImagePyramid>();
-#else
-    return Ptr<ImagePyramid>(new ImagePyramidImpl(img, nLayers, stream));
-#endif
-}
-
-#endif // HAVE_CUDA

From ecee388236cdfd737def724c3f9fca6ad6e0a4aa Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 24 Dec 2014 16:06:09 +0300
Subject: [PATCH 17/55] move buildWarp*Maps to stitching

---
 .../include/opencv2/cudawarping.hpp           |  15 -
 modules/cudawarping/perf/perf_warping.cpp     |  82 -----
 modules/cudawarping/src/warp.cpp              | 122 --------
 modules/stitching/CMakeLists.txt              |   5 +
 .../opencv2/stitching/detail/warpers.hpp      |   2 -
 .../src/cuda/build_warp_maps.cu               |   0
 modules/stitching/src/warpers.cpp             |  85 ------
 modules/stitching/src/warpers_cuda.cpp        | 283 ++++++++++++++++++
 8 files changed, 288 insertions(+), 306 deletions(-)
 rename modules/{cudawarping => stitching}/src/cuda/build_warp_maps.cu (100%)
 create mode 100644 modules/stitching/src/warpers_cuda.cpp

diff --git a/modules/cudawarping/include/opencv2/cudawarping.hpp b/modules/cudawarping/include/opencv2/cudawarping.hpp
index fbd63873bc..66c41ccefb 100644
--- a/modules/cudawarping/include/opencv2/cudawarping.hpp
+++ b/modules/cudawarping/include/opencv2/cudawarping.hpp
@@ -171,21 +171,6 @@ CV_EXPORTS void warpPerspective(InputArray src, OutputArray dst, InputArray M, S
  */
 CV_EXPORTS void buildWarpPerspectiveMaps(InputArray M, bool inverse, Size dsize, OutputArray xmap, OutputArray ymap, Stream& stream = Stream::Null());
 
-/** @brief Builds plane warping maps.
- */
-CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, InputArray T, float scale,
-                                   OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null());
-
-/** @brief Builds cylindrical warping maps.
- */
-CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, float scale,
-                                         OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null());
-
-/** @brief Builds spherical warping maps.
- */
-CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray K, InputArray R, float scale,
-                                       OutputArray map_x, OutputArray map_y, Stream& stream = Stream::Null());
-
 /** @brief Rotates an image around the origin (0,0) and then shifts it.
 
 @param src Source image. Supports 1, 3 or 4 channels images with CV_8U , CV_16U or CV_32F
diff --git a/modules/cudawarping/perf/perf_warping.cpp b/modules/cudawarping/perf/perf_warping.cpp
index 1096d3f441..36662418c3 100644
--- a/modules/cudawarping/perf/perf_warping.cpp
+++ b/modules/cudawarping/perf/perf_warping.cpp
@@ -325,88 +325,6 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border, WarpPerspective,
     }
 }
 
-//////////////////////////////////////////////////////////////////////
-// BuildWarpPlaneMaps
-
-PERF_TEST_P(Sz, BuildWarpPlaneMaps,
-            CUDA_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
-    const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
-    const cv::Mat T = cv::Mat::zeros(1, 3, CV_32F);
-
-    if (PERF_RUN_CUDA())
-    {
-        cv::cuda::GpuMat map_x;
-        cv::cuda::GpuMat map_y;
-
-        TEST_CYCLE() cv::cuda::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, map_x, map_y);
-
-        CUDA_SANITY_CHECK(map_x);
-        CUDA_SANITY_CHECK(map_y);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BuildWarpCylindricalMaps
-
-PERF_TEST_P(Sz, BuildWarpCylindricalMaps,
-            CUDA_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
-    const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
-
-    if (PERF_RUN_CUDA())
-    {
-        cv::cuda::GpuMat map_x;
-        cv::cuda::GpuMat map_y;
-
-        TEST_CYCLE() cv::cuda::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
-
-        CUDA_SANITY_CHECK(map_x);
-        CUDA_SANITY_CHECK(map_y);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// BuildWarpSphericalMaps
-
-PERF_TEST_P(Sz, BuildWarpSphericalMaps,
-            CUDA_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
-    const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
-
-    if (PERF_RUN_CUDA())
-    {
-        cv::cuda::GpuMat map_x;
-        cv::cuda::GpuMat map_y;
-
-        TEST_CYCLE() cv::cuda::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
-
-        CUDA_SANITY_CHECK(map_x);
-        CUDA_SANITY_CHECK(map_y);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
-
 //////////////////////////////////////////////////////////////////////
 // Rotate
 
diff --git a/modules/cudawarping/src/warp.cpp b/modules/cudawarping/src/warp.cpp
index 121ea5c018..99554e3122 100644
--- a/modules/cudawarping/src/warp.cpp
+++ b/modules/cudawarping/src/warp.cpp
@@ -53,10 +53,6 @@ void cv::cuda::buildWarpAffineMaps(InputArray, bool, Size, OutputArray, OutputAr
 void cv::cuda::warpPerspective(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
 void cv::cuda::buildWarpPerspectiveMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::cuda::buildWarpPlaneMaps(Size, Rect, InputArray, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::cuda::buildWarpCylindricalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::cuda::buildWarpSphericalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
-
 void cv::cuda::rotate(InputArray, OutputArray, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
 
 #else // HAVE_CUDA
@@ -462,124 +458,6 @@ void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M,
     }
 }
 
-//////////////////////////////////////////////////////////////////////////////
-// buildWarpPlaneMaps
-
-namespace cv { namespace cuda { namespace device
-{
-    namespace imgproc
-    {
-        void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
-                                const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
-                                cudaStream_t stream);
-    }
-}}}
-
-void cv::cuda::buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, InputArray _T,
-                                 float scale, OutputArray _map_x, OutputArray _map_y, Stream& stream)
-{
-    (void) src_size;
-
-    Mat K = _K.getMat();
-    Mat R = _R.getMat();
-    Mat T = _T.getMat();
-
-    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
-    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
-    CV_Assert( (T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32FC1 && T.isContinuous() );
-
-    Mat K_Rinv = K * R.t();
-    Mat R_Kinv = R * K.inv();
-    CV_Assert( K_Rinv.isContinuous() );
-    CV_Assert( R_Kinv.isContinuous() );
-
-    _map_x.create(dst_roi.size(), CV_32FC1);
-    _map_y.create(dst_roi.size(), CV_32FC1);
-
-    GpuMat map_x = _map_x.getGpuMat();
-    GpuMat map_y = _map_y.getGpuMat();
-
-    device::imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),
-                       T.ptr<float>(), scale, StreamAccessor::getStream(stream));
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// buildWarpCylyndricalMaps
-
-namespace cv { namespace cuda { namespace device
-{
-    namespace imgproc
-    {
-        void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
-                                      const float k_rinv[9], const float r_kinv[9], float scale,
-                                      cudaStream_t stream);
-    }
-}}}
-
-void cv::cuda::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
-                                       OutputArray _map_x, OutputArray _map_y, Stream& stream)
-{
-    (void) src_size;
-
-    Mat K = _K.getMat();
-    Mat R = _R.getMat();
-
-    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
-    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
-
-    Mat K_Rinv = K * R.t();
-    Mat R_Kinv = R * K.inv();
-    CV_Assert( K_Rinv.isContinuous() );
-    CV_Assert( R_Kinv.isContinuous() );
-
-    _map_x.create(dst_roi.size(), CV_32FC1);
-    _map_y.create(dst_roi.size(), CV_32FC1);
-
-    GpuMat map_x = _map_x.getGpuMat();
-    GpuMat map_y = _map_y.getGpuMat();
-
-    device::imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
-}
-
-
-//////////////////////////////////////////////////////////////////////////////
-// buildWarpSphericalMaps
-
-namespace cv { namespace cuda { namespace device
-{
-    namespace imgproc
-    {
-        void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
-                                    const float k_rinv[9], const float r_kinv[9], float scale,
-                                    cudaStream_t stream);
-    }
-}}}
-
-void cv::cuda::buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
-                                     OutputArray _map_x, OutputArray _map_y, Stream& stream)
-{
-    (void) src_size;
-
-    Mat K = _K.getMat();
-    Mat R = _R.getMat();
-
-    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
-    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
-
-    Mat K_Rinv = K * R.t();
-    Mat R_Kinv = R * K.inv();
-    CV_Assert( K_Rinv.isContinuous() );
-    CV_Assert( R_Kinv.isContinuous() );
-
-    _map_x.create(dst_roi.size(), CV_32FC1);
-    _map_y.create(dst_roi.size(), CV_32FC1);
-
-    GpuMat map_x = _map_x.getGpuMat();
-    GpuMat map_y = _map_y.getGpuMat();
-
-    device::imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
-}
-
 ////////////////////////////////////////////////////////////////////////
 // rotate
 
diff --git a/modules/stitching/CMakeLists.txt b/modules/stitching/CMakeLists.txt
index 73db4a0310..8d11491be2 100644
--- a/modules/stitching/CMakeLists.txt
+++ b/modules/stitching/CMakeLists.txt
@@ -1,3 +1,8 @@
 set(the_description "Images stitching")
+
+if(HAVE_CUDA)
+  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+endif()
+
 ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect
                   OPTIONAL opencv_cuda opencv_cudaarithm opencv_cudafilters opencv_cudafeatures2d opencv_xfeatures2d)
diff --git a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
index 79f387cc2f..19dff8e1f0 100644
--- a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
@@ -398,7 +398,6 @@ public:
 };
 
 
-#ifdef HAVE_OPENCV_CUDAWARPING
 class CV_EXPORTS PlaneWarperGpu : public PlaneWarper
 {
 public:
@@ -515,7 +514,6 @@ public:
 private:
     cuda::GpuMat d_xmap_, d_ymap_, d_src_, d_dst_;
 };
-#endif
 
 
 struct SphericalPortraitProjector : ProjectorBase
diff --git a/modules/cudawarping/src/cuda/build_warp_maps.cu b/modules/stitching/src/cuda/build_warp_maps.cu
similarity index 100%
rename from modules/cudawarping/src/cuda/build_warp_maps.cu
rename to modules/stitching/src/cuda/build_warp_maps.cu
diff --git a/modules/stitching/src/warpers.cpp b/modules/stitching/src/warpers.cpp
index 744474ba6e..4b6185f4e6 100644
--- a/modules/stitching/src/warpers.cpp
+++ b/modules/stitching/src/warpers.cpp
@@ -242,91 +242,6 @@ void SphericalWarper::detectResultRoi(Size src_size, Point &dst_tl, Point &dst_b
     dst_br.y = static_cast<int>(br_vf);
 }
 
-
-#ifdef HAVE_OPENCV_CUDAWARPING
-Rect PlaneWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R, cuda::GpuMat & xmap, cuda::GpuMat & ymap)
-{
-    return buildMaps(src_size, K, R, Mat::zeros(3, 1, CV_32F), xmap, ymap);
-}
-
-Rect PlaneWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R, InputArray T, cuda::GpuMat & xmap, cuda::GpuMat & ymap)
-{
-    projector_.setCameraParams(K, R, T);
-
-    Point dst_tl, dst_br;
-    detectResultRoi(src_size, dst_tl, dst_br);
-
-    cuda::buildWarpPlaneMaps(src_size, Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1)),
-                            K, R, T, projector_.scale, xmap, ymap);
-
-    return Rect(dst_tl, dst_br);
-}
-
-Point PlaneWarperGpu::warp(const cuda::GpuMat & src, InputArray K, InputArray R, int interp_mode, int border_mode,
-                           cuda::GpuMat & dst)
-{
-    return warp(src, K, R, Mat::zeros(3, 1, CV_32F), interp_mode, border_mode, dst);
-}
-
-
-Point PlaneWarperGpu::warp(const cuda::GpuMat & src, InputArray K, InputArray R, InputArray T, int interp_mode, int border_mode,
-                           cuda::GpuMat & dst)
-{
-    Rect dst_roi = buildMaps(src.size(), K, R, T, d_xmap_, d_ymap_);
-    dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
-    cuda::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
-    return dst_roi.tl();
-}
-
-
-Rect SphericalWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R, cuda::GpuMat & xmap, cuda::GpuMat & ymap)
-{
-    projector_.setCameraParams(K, R);
-
-    Point dst_tl, dst_br;
-    detectResultRoi(src_size, dst_tl, dst_br);
-
-    cuda::buildWarpSphericalMaps(src_size, Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1)),
-                                K, R, projector_.scale, xmap, ymap);
-
-    return Rect(dst_tl, dst_br);
-}
-
-
-Point SphericalWarperGpu::warp(const cuda::GpuMat & src, InputArray K, InputArray R, int interp_mode, int border_mode,
-                               cuda::GpuMat & dst)
-{
-    Rect dst_roi = buildMaps(src.size(), K, R, d_xmap_, d_ymap_);
-    dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
-    cuda::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
-    return dst_roi.tl();
-}
-
-
-Rect CylindricalWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R, cuda::GpuMat & xmap, cuda::GpuMat & ymap)
-{
-    projector_.setCameraParams(K, R);
-
-    Point dst_tl, dst_br;
-    detectResultRoi(src_size, dst_tl, dst_br);
-
-    cuda::buildWarpCylindricalMaps(src_size, Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1)),
-                                  K, R, projector_.scale, xmap, ymap);
-
-    return Rect(dst_tl, dst_br);
-}
-
-
-Point CylindricalWarperGpu::warp(const cuda::GpuMat & src, InputArray K, InputArray R, int interp_mode, int border_mode,
-                                 cuda::GpuMat & dst)
-{
-    Rect dst_roi = buildMaps(src.size(), K, R, d_xmap_, d_ymap_);
-    dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
-    cuda::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
-    return dst_roi.tl();
-}
-#endif
-
 void SphericalPortraitWarper::detectResultRoi(Size src_size, Point &dst_tl, Point &dst_br)
 {
     detectResultRoiByBorder(src_size, dst_tl, dst_br);
diff --git a/modules/stitching/src/warpers_cuda.cpp b/modules/stitching/src/warpers_cuda.cpp
new file mode 100644
index 0000000000..0593e6d9e1
--- /dev/null
+++ b/modules/stitching/src/warpers_cuda.cpp
@@ -0,0 +1,283 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include "opencv2/core/private.cuda.hpp"
+
+using namespace cv;
+using namespace cv::cuda;
+
+Rect cv::detail::PlaneWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R,
+                                           cuda::GpuMat & xmap, cuda::GpuMat & ymap)
+{
+    return buildMaps(src_size, K, R, Mat::zeros(3, 1, CV_32F), xmap, ymap);
+}
+
+#ifdef HAVE_CUDA
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace imgproc
+    {
+        void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
+                                const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
+                                cudaStream_t stream);
+    }
+}}}
+
+static void buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, InputArray _T,
+                               float scale, OutputArray _map_x, OutputArray _map_y, Stream& stream = Stream::Null())
+{
+    (void) src_size;
+
+    Mat K = _K.getMat();
+    Mat R = _R.getMat();
+    Mat T = _T.getMat();
+
+    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
+    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
+    CV_Assert( (T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32FC1 && T.isContinuous() );
+
+    Mat K_Rinv = K * R.t();
+    Mat R_Kinv = R * K.inv();
+    CV_Assert( K_Rinv.isContinuous() );
+    CV_Assert( R_Kinv.isContinuous() );
+
+    _map_x.create(dst_roi.size(), CV_32FC1);
+    _map_y.create(dst_roi.size(), CV_32FC1);
+
+    GpuMat map_x = _map_x.getGpuMat();
+    GpuMat map_y = _map_y.getGpuMat();
+
+    device::imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),
+                       T.ptr<float>(), scale, StreamAccessor::getStream(stream));
+}
+
+#endif
+
+Rect cv::detail::PlaneWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R, InputArray T,
+                                           cuda::GpuMat & xmap, cuda::GpuMat & ymap)
+{
+#ifndef HAVE_CUDA
+    (void)src_size;
+    (void)K;
+    (void)R;
+    (void)T;
+    (void)xmap;
+    (void)ymap;
+    throw_no_cuda();
+    return Rect();
+#else
+    projector_.setCameraParams(K, R, T);
+
+    Point dst_tl, dst_br;
+    detectResultRoi(src_size, dst_tl, dst_br);
+
+    ::buildWarpPlaneMaps(src_size, Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1)),
+                         K, R, T, projector_.scale, xmap, ymap);
+
+    return Rect(dst_tl, dst_br);
+#endif
+}
+
+Point cv::detail::PlaneWarperGpu::warp(const cuda::GpuMat & src, InputArray K, InputArray R,
+                                       int interp_mode, int border_mode,
+                                       cuda::GpuMat & dst)
+{
+    return warp(src, K, R, Mat::zeros(3, 1, CV_32F), interp_mode, border_mode, dst);
+}
+
+
+Point cv::detail::PlaneWarperGpu::warp(const cuda::GpuMat & src, InputArray K, InputArray R, InputArray T,
+                                       int interp_mode, int border_mode,
+                                       cuda::GpuMat & dst)
+{
+    Rect dst_roi = buildMaps(src.size(), K, R, T, d_xmap_, d_ymap_);
+    dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
+    cuda::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
+    return dst_roi.tl();
+}
+
+#ifdef HAVE_CUDA
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace imgproc
+    {
+        void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
+                                    const float k_rinv[9], const float r_kinv[9], float scale,
+                                    cudaStream_t stream);
+    }
+}}}
+
+static void buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
+                                   OutputArray _map_x, OutputArray _map_y, Stream& stream = Stream::Null())
+{
+    (void) src_size;
+
+    Mat K = _K.getMat();
+    Mat R = _R.getMat();
+
+    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
+    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
+
+    Mat K_Rinv = K * R.t();
+    Mat R_Kinv = R * K.inv();
+    CV_Assert( K_Rinv.isContinuous() );
+    CV_Assert( R_Kinv.isContinuous() );
+
+    _map_x.create(dst_roi.size(), CV_32FC1);
+    _map_y.create(dst_roi.size(), CV_32FC1);
+
+    GpuMat map_x = _map_x.getGpuMat();
+    GpuMat map_y = _map_y.getGpuMat();
+
+    device::imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
+}
+
+#endif
+
+Rect cv::detail::SphericalWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R, cuda::GpuMat & xmap, cuda::GpuMat & ymap)
+{
+#ifndef HAVE_CUDA
+    (void)src_size;
+    (void)K;
+    (void)R;
+    (void)xmap;
+    (void)ymap;
+    throw_no_cuda();
+    return Rect();
+#else
+    projector_.setCameraParams(K, R);
+
+    Point dst_tl, dst_br;
+    detectResultRoi(src_size, dst_tl, dst_br);
+
+    ::buildWarpSphericalMaps(src_size, Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1)),
+                             K, R, projector_.scale, xmap, ymap);
+
+    return Rect(dst_tl, dst_br);
+#endif
+}
+
+Point cv::detail::SphericalWarperGpu::warp(const cuda::GpuMat & src, InputArray K, InputArray R,
+                                           int interp_mode, int border_mode,
+                                           cuda::GpuMat & dst)
+{
+    Rect dst_roi = buildMaps(src.size(), K, R, d_xmap_, d_ymap_);
+    dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
+    cuda::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
+    return dst_roi.tl();
+}
+
+#ifdef HAVE_CUDA
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace imgproc
+    {
+        void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
+                                      const float k_rinv[9], const float r_kinv[9], float scale,
+                                      cudaStream_t stream);
+    }
+}}}
+
+static void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
+                                     OutputArray _map_x, OutputArray _map_y, Stream& stream = Stream::Null())
+{
+    (void) src_size;
+
+    Mat K = _K.getMat();
+    Mat R = _R.getMat();
+
+    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
+    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
+
+    Mat K_Rinv = K * R.t();
+    Mat R_Kinv = R * K.inv();
+    CV_Assert( K_Rinv.isContinuous() );
+    CV_Assert( R_Kinv.isContinuous() );
+
+    _map_x.create(dst_roi.size(), CV_32FC1);
+    _map_y.create(dst_roi.size(), CV_32FC1);
+
+    GpuMat map_x = _map_x.getGpuMat();
+    GpuMat map_y = _map_y.getGpuMat();
+
+    device::imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
+}
+
+#endif
+
+Rect cv::detail::CylindricalWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R,
+                                                 cuda::GpuMat & xmap, cuda::GpuMat & ymap)
+{
+#ifndef HAVE_CUDA
+    (void)src_size;
+    (void)K;
+    (void)R;
+    (void)xmap;
+    (void)ymap;
+    throw_no_cuda();
+    return Rect();
+#else
+    projector_.setCameraParams(K, R);
+
+    Point dst_tl, dst_br;
+    detectResultRoi(src_size, dst_tl, dst_br);
+
+    ::buildWarpCylindricalMaps(src_size, Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1)),
+                               K, R, projector_.scale, xmap, ymap);
+
+    return Rect(dst_tl, dst_br);
+#endif
+}
+
+Point cv::detail::CylindricalWarperGpu::warp(const cuda::GpuMat & src, InputArray K, InputArray R,
+                                             int interp_mode, int border_mode,
+                                             cuda::GpuMat & dst)
+{
+    Rect dst_roi = buildMaps(src.size(), K, R, d_xmap_, d_ymap_);
+    dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
+    cuda::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
+    return dst_roi.tl();
+}

From 1421e306fb9498b9039e92c55e9df989779a8ec7 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 24 Dec 2014 17:24:38 +0300
Subject: [PATCH 18/55] fix compilation without CUDA

---
 modules/stitching/src/warpers_cuda.cpp | 183 +++++++++++++------------
 1 file changed, 99 insertions(+), 84 deletions(-)

diff --git a/modules/stitching/src/warpers_cuda.cpp b/modules/stitching/src/warpers_cuda.cpp
index 0593e6d9e1..d1fe8739b4 100644
--- a/modules/stitching/src/warpers_cuda.cpp
+++ b/modules/stitching/src/warpers_cuda.cpp
@@ -46,12 +46,6 @@
 using namespace cv;
 using namespace cv::cuda;
 
-Rect cv::detail::PlaneWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R,
-                                           cuda::GpuMat & xmap, cuda::GpuMat & ymap)
-{
-    return buildMaps(src_size, K, R, Mat::zeros(3, 1, CV_32F), xmap, ymap);
-}
-
 #ifdef HAVE_CUDA
 
 namespace cv { namespace cuda { namespace device
@@ -61,6 +55,14 @@ namespace cv { namespace cuda { namespace device
         void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
                                 const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
                                 cudaStream_t stream);
+
+        void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
+                                    const float k_rinv[9], const float r_kinv[9], float scale,
+                                    cudaStream_t stream);
+
+        void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
+                                      const float k_rinv[9], const float r_kinv[9], float scale,
+                                      cudaStream_t stream);
     }
 }}}
 
@@ -92,8 +94,64 @@ static void buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray _K, Input
                        T.ptr<float>(), scale, StreamAccessor::getStream(stream));
 }
 
+static void buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
+                                   OutputArray _map_x, OutputArray _map_y, Stream& stream = Stream::Null())
+{
+    (void) src_size;
+
+    Mat K = _K.getMat();
+    Mat R = _R.getMat();
+
+    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
+    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
+
+    Mat K_Rinv = K * R.t();
+    Mat R_Kinv = R * K.inv();
+    CV_Assert( K_Rinv.isContinuous() );
+    CV_Assert( R_Kinv.isContinuous() );
+
+    _map_x.create(dst_roi.size(), CV_32FC1);
+    _map_y.create(dst_roi.size(), CV_32FC1);
+
+    GpuMat map_x = _map_x.getGpuMat();
+    GpuMat map_y = _map_y.getGpuMat();
+
+    device::imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
+}
+
+static void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
+                                     OutputArray _map_x, OutputArray _map_y, Stream& stream = Stream::Null())
+{
+    (void) src_size;
+
+    Mat K = _K.getMat();
+    Mat R = _R.getMat();
+
+    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
+    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
+
+    Mat K_Rinv = K * R.t();
+    Mat R_Kinv = R * K.inv();
+    CV_Assert( K_Rinv.isContinuous() );
+    CV_Assert( R_Kinv.isContinuous() );
+
+    _map_x.create(dst_roi.size(), CV_32FC1);
+    _map_y.create(dst_roi.size(), CV_32FC1);
+
+    GpuMat map_x = _map_x.getGpuMat();
+    GpuMat map_y = _map_y.getGpuMat();
+
+    device::imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
+}
+
 #endif
 
+Rect cv::detail::PlaneWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R,
+                                           cuda::GpuMat & xmap, cuda::GpuMat & ymap)
+{
+    return buildMaps(src_size, K, R, Mat::zeros(3, 1, CV_32F), xmap, ymap);
+}
+
 Rect cv::detail::PlaneWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R, InputArray T,
                                            cuda::GpuMat & xmap, cuda::GpuMat & ymap)
 {
@@ -131,50 +189,23 @@ Point cv::detail::PlaneWarperGpu::warp(const cuda::GpuMat & src, InputArray K, I
                                        int interp_mode, int border_mode,
                                        cuda::GpuMat & dst)
 {
+#ifndef HAVE_OPENCV_CUDAWARPING
+    (void)src;
+    (void)K;
+    (void)R;
+    (void)T;
+    (void)interp_mode;
+    (void)border_mode;
+    (void)dst;
+    throw_no_cuda();
+    return Point();
+#else
     Rect dst_roi = buildMaps(src.size(), K, R, T, d_xmap_, d_ymap_);
     dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
     cuda::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
     return dst_roi.tl();
-}
-
-#ifdef HAVE_CUDA
-
-namespace cv { namespace cuda { namespace device
-{
-    namespace imgproc
-    {
-        void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
-                                    const float k_rinv[9], const float r_kinv[9], float scale,
-                                    cudaStream_t stream);
-    }
-}}}
-
-static void buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
-                                   OutputArray _map_x, OutputArray _map_y, Stream& stream = Stream::Null())
-{
-    (void) src_size;
-
-    Mat K = _K.getMat();
-    Mat R = _R.getMat();
-
-    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
-    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
-
-    Mat K_Rinv = K * R.t();
-    Mat R_Kinv = R * K.inv();
-    CV_Assert( K_Rinv.isContinuous() );
-    CV_Assert( R_Kinv.isContinuous() );
-
-    _map_x.create(dst_roi.size(), CV_32FC1);
-    _map_y.create(dst_roi.size(), CV_32FC1);
-
-    GpuMat map_x = _map_x.getGpuMat();
-    GpuMat map_y = _map_y.getGpuMat();
-
-    device::imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
-}
-
 #endif
+}
 
 Rect cv::detail::SphericalWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R, cuda::GpuMat & xmap, cuda::GpuMat & ymap)
 {
@@ -203,50 +234,23 @@ Point cv::detail::SphericalWarperGpu::warp(const cuda::GpuMat & src, InputArray
                                            int interp_mode, int border_mode,
                                            cuda::GpuMat & dst)
 {
+#ifndef HAVE_OPENCV_CUDAWARPING
+    (void)src;
+    (void)K;
+    (void)R;
+    (void)interp_mode;
+    (void)border_mode;
+    (void)dst;
+    throw_no_cuda();
+    return Point();
+#else
     Rect dst_roi = buildMaps(src.size(), K, R, d_xmap_, d_ymap_);
     dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
     cuda::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
     return dst_roi.tl();
-}
-
-#ifdef HAVE_CUDA
-
-namespace cv { namespace cuda { namespace device
-{
-    namespace imgproc
-    {
-        void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
-                                      const float k_rinv[9], const float r_kinv[9], float scale,
-                                      cudaStream_t stream);
-    }
-}}}
-
-static void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
-                                     OutputArray _map_x, OutputArray _map_y, Stream& stream = Stream::Null())
-{
-    (void) src_size;
-
-    Mat K = _K.getMat();
-    Mat R = _R.getMat();
-
-    CV_Assert( K.size() == Size(3,3) && K.type() == CV_32FC1 );
-    CV_Assert( R.size() == Size(3,3) && R.type() == CV_32FC1 );
-
-    Mat K_Rinv = K * R.t();
-    Mat R_Kinv = R * K.inv();
-    CV_Assert( K_Rinv.isContinuous() );
-    CV_Assert( R_Kinv.isContinuous() );
-
-    _map_x.create(dst_roi.size(), CV_32FC1);
-    _map_y.create(dst_roi.size(), CV_32FC1);
-
-    GpuMat map_x = _map_x.getGpuMat();
-    GpuMat map_y = _map_y.getGpuMat();
-
-    device::imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
-}
-
 #endif
+}
+
 
 Rect cv::detail::CylindricalWarperGpu::buildMaps(Size src_size, InputArray K, InputArray R,
                                                  cuda::GpuMat & xmap, cuda::GpuMat & ymap)
@@ -276,8 +280,19 @@ Point cv::detail::CylindricalWarperGpu::warp(const cuda::GpuMat & src, InputArra
                                              int interp_mode, int border_mode,
                                              cuda::GpuMat & dst)
 {
+#ifndef HAVE_OPENCV_CUDAWARPING
+    (void)src;
+    (void)K;
+    (void)R;
+    (void)interp_mode;
+    (void)border_mode;
+    (void)dst;
+    throw_no_cuda();
+    return Point();
+#else
     Rect dst_roi = buildMaps(src.size(), K, R, d_xmap_, d_ymap_);
     dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
     cuda::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
     return dst_roi.tl();
+#endif
 }

From 6f91b7f6a4605591f16a121058940481e5b5a2c7 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Mon, 12 Jan 2015 12:46:44 +0300
Subject: [PATCH 19/55] remove firstLevel=2 test case, because CPU
 implementation fails

---
 modules/cudafeatures2d/test/test_features2d.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/cudafeatures2d/test/test_features2d.cpp b/modules/cudafeatures2d/test/test_features2d.cpp
index 6e4479b7d5..468024a5d8 100644
--- a/modules/cudafeatures2d/test/test_features2d.cpp
+++ b/modules/cudafeatures2d/test/test_features2d.cpp
@@ -208,7 +208,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_Features2D, ORB,  testing::Combine(
     testing::Values(ORB_ScaleFactor(1.2f)),
     testing::Values(ORB_LevelsCount(4), ORB_LevelsCount(8)),
     testing::Values(ORB_EdgeThreshold(31)),
-    testing::Values(ORB_firstLevel(0), ORB_firstLevel(2)),
+    testing::Values(ORB_firstLevel(0)),
     testing::Values(ORB_WTA_K(2), ORB_WTA_K(3), ORB_WTA_K(4)),
     testing::Values(ORB_ScoreType(cv::ORB::HARRIS_SCORE)),
     testing::Values(ORB_PatchSize(31), ORB_PatchSize(29)),

From 14ef62ed661893d44546a3c0b08a518bd39ee99d Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Mon, 12 Jan 2015 18:11:09 +0300
Subject: [PATCH 20/55] refactor CUDA FAST feature detector algorithm:

use new FastFeatureDetector abstract interface and hidden implementation
---
 .../include/opencv2/cudafeatures2d.hpp        | 115 +++------
 .../cudafeatures2d/perf/perf_features2d.cpp   |   9 +-
 modules/cudafeatures2d/src/cuda/fast.cu       |  30 +--
 modules/cudafeatures2d/src/fast.cpp           | 238 ++++++++++--------
 modules/cudafeatures2d/src/orb.cpp            |  24 +-
 .../cudafeatures2d/test/test_features2d.cpp   |   7 +-
 samples/gpu/performance/tests.cpp             |   6 +-
 7 files changed, 213 insertions(+), 216 deletions(-)

diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
index f61d2dfd00..f6f674d2aa 100644
--- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
+++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
@@ -48,6 +48,7 @@
 #endif
 
 #include "opencv2/core/cuda.hpp"
+#include "opencv2/features2d.hpp"
 #include "opencv2/cudafilters.hpp"
 
 /**
@@ -228,91 +229,49 @@ private:
     std::vector<GpuMat> trainDescCollection;
 };
 
-/** @brief Class used for corner detection using the FAST algorithm. :
+//
+// Feature2DAsync
+//
+
+/** @brief Abstract base class for 2D image feature detectors and descriptor extractors.
  */
-class CV_EXPORTS FAST_CUDA
+class CV_EXPORTS Feature2DAsync
+{
+public:
+    virtual ~Feature2DAsync() {}
+
+    virtual void detectAsync(InputArray image, OutputArray keypoints,
+                             InputArray mask = noArray(),
+                             Stream& stream = Stream::Null()) = 0;
+
+    virtual void convert(InputArray gpu_keypoints, std::vector<KeyPoint>& keypoints) = 0;
+};
+
+//
+// FastFeatureDetector
+//
+
+/** @brief Wrapping class for feature detection using the FAST method.
+ */
+class CV_EXPORTS FastFeatureDetector : public cv::FastFeatureDetector, public Feature2DAsync
 {
 public:
     enum
     {
         LOCATION_ROW = 0,
         RESPONSE_ROW,
-        ROWS_COUNT
+        ROWS_COUNT,
+
+        FEATURE_SIZE = 7
     };
 
-    //! all features have same size
-    static const int FEATURE_SIZE = 7;
+    static Ptr<FastFeatureDetector> create(int threshold=10,
+                                           bool nonmaxSuppression=true,
+                                           int type=FastFeatureDetector::TYPE_9_16,
+                                           int max_npoints = 5000);
 
-    /** @brief Constructor.
-
-    @param threshold Threshold on difference between intensity of the central pixel and pixels on a
-    circle around this pixel.
-    @param nonmaxSuppression If it is true, non-maximum suppression is applied to detected corners
-    (keypoints).
-    @param keypointsRatio Inner buffer size for keypoints store is determined as (keypointsRatio \*
-    image_width \* image_height).
-     */
-    explicit FAST_CUDA(int threshold, bool nonmaxSuppression = true, double keypointsRatio = 0.05);
-
-    /** @brief Finds the keypoints using FAST detector.
-
-    @param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are
-    supported.
-    @param mask Optional input mask that marks the regions where we should detect features.
-    @param keypoints The output vector of keypoints. Can be stored both in CPU and GPU memory. For GPU
-    memory:
-    -   keypoints.ptr\<Vec2s\>(LOCATION_ROW)[i] will contain location of i'th point
-    -   keypoints.ptr\<float\>(RESPONSE_ROW)[i] will contain response of i'th point (if non-maximum
-    suppression is applied)
-     */
-    void operator ()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
-    /** @overload */
-    void operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
-
-    /** @brief Download keypoints from GPU to CPU memory.
-    */
-    static void downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
-
-    /** @brief Converts keypoints from CUDA representation to vector of KeyPoint.
-    */
-    static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);
-
-    /** @brief Releases inner buffer memory.
-    */
-    void release();
-
-    bool nonmaxSuppression;
-
-    int threshold;
-
-    //! max keypoints = keypointsRatio * img.size().area()
-    double keypointsRatio;
-
-    /** @brief Find keypoints and compute it's response if nonmaxSuppression is true.
-
-    @param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are
-    supported.
-    @param mask Optional input mask that marks the regions where we should detect features.
-
-    The function returns count of detected keypoints.
-     */
-    int calcKeyPointsLocation(const GpuMat& image, const GpuMat& mask);
-
-    /** @brief Gets final array of keypoints.
-
-    @param keypoints The output vector of keypoints.
-
-    The function performs non-max suppression if needed and returns final count of keypoints.
-     */
-    int getKeyPoints(GpuMat& keypoints);
-
-private:
-    GpuMat kpLoc_;
-    int count_;
-
-    GpuMat score_;
-
-    GpuMat d_keypoints_;
+    virtual void setMaxNumPoints(int max_npoints) = 0;
+    virtual int getMaxNumPoints() const = 0;
 };
 
 /** @brief Class for extracting ORB features and descriptors from an image. :
@@ -388,8 +347,8 @@ public:
 
     inline void setFastParams(int threshold, bool nonmaxSuppression = true)
     {
-        fastDetector_.threshold = threshold;
-        fastDetector_.nonmaxSuppression = nonmaxSuppression;
+        fastDetector_->setThreshold(threshold);
+        fastDetector_->setNonmaxSuppression(nonmaxSuppression);
     }
 
     /** @brief Releases inner buffer memory.
@@ -433,7 +392,7 @@ private:
     std::vector<GpuMat> keyPointsPyr_;
     std::vector<int> keyPointsCount_;
 
-    FAST_CUDA fastDetector_;
+    Ptr<cv::cuda::FastFeatureDetector> fastDetector_;
 
     Ptr<cuda::Filter> blurFilter;
 
diff --git a/modules/cudafeatures2d/perf/perf_features2d.cpp b/modules/cudafeatures2d/perf/perf_features2d.cpp
index 26eb434f44..da3cd77db0 100644
--- a/modules/cudafeatures2d/perf/perf_features2d.cpp
+++ b/modules/cudafeatures2d/perf/perf_features2d.cpp
@@ -64,15 +64,18 @@ PERF_TEST_P(Image_Threshold_NonMaxSuppression, FAST,
 
     if (PERF_RUN_CUDA())
     {
-        cv::cuda::FAST_CUDA d_fast(threshold, nonMaxSuppersion, 0.5);
+        cv::Ptr<cv::cuda::FastFeatureDetector> d_fast =
+                cv::cuda::FastFeatureDetector::create(threshold, nonMaxSuppersion,
+                                                      cv::FastFeatureDetector::TYPE_9_16,
+                                                      0.5 * img.size().area());
 
         const cv::cuda::GpuMat d_img(img);
         cv::cuda::GpuMat d_keypoints;
 
-        TEST_CYCLE() d_fast(d_img, cv::cuda::GpuMat(), d_keypoints);
+        TEST_CYCLE() d_fast->detectAsync(d_img, d_keypoints);
 
         std::vector<cv::KeyPoint> gpu_keypoints;
-        d_fast.downloadKeypoints(d_keypoints, gpu_keypoints);
+        d_fast->convert(d_keypoints, gpu_keypoints);
 
         sortKeyPoints(gpu_keypoints);
 
diff --git a/modules/cudafeatures2d/src/cuda/fast.cu b/modules/cudafeatures2d/src/cuda/fast.cu
index 7aa888ac3f..72235d4e50 100644
--- a/modules/cudafeatures2d/src/cuda/fast.cu
+++ b/modules/cudafeatures2d/src/cuda/fast.cu
@@ -279,7 +279,7 @@ namespace cv { namespace cuda { namespace device
             #endif
         }
 
-        int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold)
+        int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold, cudaStream_t stream)
         {
             void* counter_ptr;
             cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
@@ -290,29 +290,29 @@ namespace cv { namespace cuda { namespace device
             grid.x = divUp(img.cols - 6, block.x);
             grid.y = divUp(img.rows - 6, block.y);
 
-            cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
+            cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(unsigned int), stream) );
 
             if (score.data)
             {
                 if (mask.data)
-                    calcKeypoints<true><<<grid, block>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
+                    calcKeypoints<true><<<grid, block, 0, stream>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
                 else
-                    calcKeypoints<true><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
+                    calcKeypoints<true><<<grid, block, 0, stream>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
             }
             else
             {
                 if (mask.data)
-                    calcKeypoints<false><<<grid, block>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
+                    calcKeypoints<false><<<grid, block, 0, stream>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
                 else
-                    calcKeypoints<false><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
+                    calcKeypoints<false><<<grid, block, 0, stream>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
             }
 
             cudaSafeCall( cudaGetLastError() );
 
-            cudaSafeCall( cudaDeviceSynchronize() );
-
             unsigned int count;
-            cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
+            cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) );
+
+            cudaSafeCall( cudaStreamSynchronize(stream) );
 
             return count;
         }
@@ -356,7 +356,7 @@ namespace cv { namespace cuda { namespace device
             #endif
         }
 
-        int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response)
+        int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response, cudaStream_t stream)
         {
             void* counter_ptr;
             cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
@@ -366,15 +366,15 @@ namespace cv { namespace cuda { namespace device
             dim3 grid;
             grid.x = divUp(count, block.x);
 
-            cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
+            cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(unsigned int), stream) );
 
-            nonmaxSuppression<<<grid, block>>>(kpLoc, count, score, loc, response);
+            nonmaxSuppression<<<grid, block, 0, stream>>>(kpLoc, count, score, loc, response);
             cudaSafeCall( cudaGetLastError() );
 
-            cudaSafeCall( cudaDeviceSynchronize() );
-
             unsigned int new_count;
-            cudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
+            cudaSafeCall( cudaMemcpyAsync(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) );
+
+            cudaSafeCall( cudaStreamSynchronize(stream) );
 
             return new_count;
         }
diff --git a/modules/cudafeatures2d/src/fast.cpp b/modules/cudafeatures2d/src/fast.cpp
index aa77aa87bd..cb22ea54d7 100644
--- a/modules/cudafeatures2d/src/fast.cpp
+++ b/modules/cudafeatures2d/src/fast.cpp
@@ -47,124 +47,162 @@ using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-cv::cuda::FAST_CUDA::FAST_CUDA(int, bool, double) { throw_no_cuda(); }
-void cv::cuda::FAST_CUDA::operator ()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
-void cv::cuda::FAST_CUDA::operator ()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::cuda::FAST_CUDA::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::cuda::FAST_CUDA::convertKeypoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::cuda::FAST_CUDA::release() { throw_no_cuda(); }
-int cv::cuda::FAST_CUDA::calcKeyPointsLocation(const GpuMat&, const GpuMat&) { throw_no_cuda(); return 0; }
-int cv::cuda::FAST_CUDA::getKeyPoints(GpuMat&) { throw_no_cuda(); return 0; }
+Ptr<FastFeatureDetector> cv::cuda::FastFeatureDetector::create(int, bool, int, int) { throw_no_cuda(); return Ptr<FastFeatureDetector>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-cv::cuda::FAST_CUDA::FAST_CUDA(int _threshold, bool _nonmaxSuppression, double _keypointsRatio) :
-    nonmaxSuppression(_nonmaxSuppression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
-{
-}
-
-void cv::cuda::FAST_CUDA::operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
-{
-    if (image.empty())
-        return;
-
-    (*this)(image, mask, d_keypoints_);
-    downloadKeypoints(d_keypoints_, keypoints);
-}
-
-void cv::cuda::FAST_CUDA::downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints)
-{
-    if (d_keypoints.empty())
-        return;
-
-    Mat h_keypoints(d_keypoints);
-    convertKeypoints(h_keypoints, keypoints);
-}
-
-void cv::cuda::FAST_CUDA::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
-{
-    if (h_keypoints.empty())
-        return;
-
-    CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4);
-
-    int npoints = h_keypoints.cols;
-
-    keypoints.resize(npoints);
-
-    const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
-    const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
-
-    for (int i = 0; i < npoints; ++i)
-    {
-        KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
-        keypoints[i] = kp;
-    }
-}
-
-void cv::cuda::FAST_CUDA::operator ()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
-{
-    calcKeyPointsLocation(img, mask);
-    keypoints.cols = getKeyPoints(keypoints);
-}
-
 namespace cv { namespace cuda { namespace device
 {
     namespace fast
     {
-        int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold);
-        int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response);
+        int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold, cudaStream_t stream);
+        int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response, cudaStream_t stream);
     }
 }}}
 
-int cv::cuda::FAST_CUDA::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask)
+namespace
 {
-    using namespace cv::cuda::device::fast;
-
-    CV_Assert(img.type() == CV_8UC1);
-    CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
-
-    int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
-
-    ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_);
-
-    if (nonmaxSuppression)
+    class FAST_Impl : public cv::cuda::FastFeatureDetector
+    {
+    public:
+        FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints);
+
+        virtual void detect(InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask);
+        virtual void detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream);
+
+        virtual void convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints);
+
+        virtual void setThreshold(int threshold) { threshold_ = threshold; }
+        virtual int getThreshold() const { return threshold_; }
+
+        virtual void setNonmaxSuppression(bool f) { nonmaxSuppression_ = f; }
+        virtual bool getNonmaxSuppression() const { return nonmaxSuppression_; }
+
+        virtual void setMaxNumPoints(int max_npoints) { max_npoints_ = max_npoints; }
+        virtual int getMaxNumPoints() const { return max_npoints_; }
+
+        virtual void setType(int type) { CV_Assert( type == TYPE_9_16 ); }
+        virtual int getType() const { return TYPE_9_16; }
+
+    private:
+        int threshold_;
+        bool nonmaxSuppression_;
+        int max_npoints_;
+    };
+
+    FAST_Impl::FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints) :
+        threshold_(threshold), nonmaxSuppression_(nonmaxSuppression), max_npoints_(max_npoints)
     {
-        ensureSizeIsEnough(img.size(), CV_32SC1, score_);
-        score_.setTo(Scalar::all(0));
     }
 
-    count_ = calcKeypoints_gpu(img, mask, kpLoc_.ptr<short2>(), maxKeypoints, nonmaxSuppression ? score_ : PtrStepSzi(), threshold);
-    count_ = std::min(count_, maxKeypoints);
+    void FAST_Impl::detect(InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask)
+    {
+        if (_image.empty())
+        {
+            keypoints.clear();
+            return;
+        }
 
-    return count_;
+        BufferPool pool(Stream::Null());
+        GpuMat d_keypoints = pool.getBuffer(ROWS_COUNT, max_npoints_, CV_16SC2);
+
+        detectAsync(_image, d_keypoints, _mask, Stream::Null());
+        convert(d_keypoints, keypoints);
+    }
+
+    void FAST_Impl::detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream)
+    {
+        using namespace cv::cuda::device::fast;
+
+        const GpuMat img = _image.getGpuMat();
+        const GpuMat mask = _mask.getGpuMat();
+
+        CV_Assert( img.type() == CV_8UC1 );
+        CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()) );
+
+        BufferPool pool(stream);
+
+        GpuMat kpLoc = pool.getBuffer(1, max_npoints_, CV_16SC2);
+
+        GpuMat score;
+        if (nonmaxSuppression_)
+        {
+            score = pool.getBuffer(img.size(), CV_32SC1);
+            score.setTo(Scalar::all(0), stream);
+        }
+
+        int count = calcKeypoints_gpu(img, mask, kpLoc.ptr<short2>(), max_npoints_, score, threshold_, StreamAccessor::getStream(stream));
+        count = std::min(count, max_npoints_);
+
+        if (count == 0)
+        {
+            _keypoints.release();
+            return;
+        }
+
+        ensureSizeIsEnough(ROWS_COUNT, count, CV_32FC1, _keypoints);
+        GpuMat& keypoints = _keypoints.getGpuMatRef();
+
+        if (nonmaxSuppression_)
+        {
+            count = nonmaxSuppression_gpu(kpLoc.ptr<short2>(), count, score, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW), StreamAccessor::getStream(stream));
+            if (count == 0)
+            {
+                keypoints.release();
+            }
+            else
+            {
+                keypoints.cols = count;
+            }
+        }
+        else
+        {
+            GpuMat locRow(1, count, kpLoc.type(), keypoints.ptr(0));
+            kpLoc.colRange(0, count).copyTo(locRow, stream);
+            keypoints.row(1).setTo(Scalar::all(0), stream);
+        }
+    }
+
+    void FAST_Impl::convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints)
+    {
+        if (_gpu_keypoints.empty())
+        {
+            keypoints.clear();
+            return;
+        }
+
+        Mat h_keypoints;
+        if (_gpu_keypoints.kind() == _InputArray::CUDA_GPU_MAT)
+        {
+            _gpu_keypoints.getGpuMat().download(h_keypoints);
+        }
+        else
+        {
+            h_keypoints = _gpu_keypoints.getMat();
+        }
+
+        CV_Assert( h_keypoints.rows == ROWS_COUNT );
+        CV_Assert( h_keypoints.elemSize() == 4 );
+
+        const int npoints = h_keypoints.cols;
+
+        keypoints.resize(npoints);
+
+        const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
+        const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
+
+        for (int i = 0; i < npoints; ++i)
+        {
+            KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
+            keypoints[i] = kp;
+        }
+    }
 }
 
-int cv::cuda::FAST_CUDA::getKeyPoints(GpuMat& keypoints)
+Ptr<cv::cuda::FastFeatureDetector> cv::cuda::FastFeatureDetector::create(int threshold, bool nonmaxSuppression, int type, int max_npoints)
 {
-    using namespace cv::cuda::device::fast;
-
-    if (count_ == 0)
-        return 0;
-
-    ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);
-
-    if (nonmaxSuppression)
-        return nonmaxSuppression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW));
-
-    GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0));
-    kpLoc_.colRange(0, count_).copyTo(locRow);
-    keypoints.row(1).setTo(Scalar::all(0));
-
-    return count_;
-}
-
-void cv::cuda::FAST_CUDA::release()
-{
-    kpLoc_.release();
-    score_.release();
-
-    d_keypoints_.release();
+    CV_Assert( type == TYPE_9_16 );
+    return makePtr<FAST_Impl>(threshold, nonmaxSuppression, max_npoints);
 }
 
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/cudafeatures2d/src/orb.cpp b/modules/cudafeatures2d/src/orb.cpp
index 8d8afe8f04..c04649b1f4 100644
--- a/modules/cudafeatures2d/src/orb.cpp
+++ b/modules/cudafeatures2d/src/orb.cpp
@@ -398,7 +398,7 @@ namespace
 cv::cuda::ORB_CUDA::ORB_CUDA(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) :
     nFeatures_(nFeatures), scaleFactor_(scaleFactor), nLevels_(nLevels), edgeThreshold_(edgeThreshold), firstLevel_(firstLevel), WTA_K_(WTA_K),
     scoreType_(scoreType), patchSize_(patchSize),
-    fastDetector_(DEFAULT_FAST_THRESHOLD)
+    fastDetector_(cuda::FastFeatureDetector::create(DEFAULT_FAST_THRESHOLD))
 {
     CV_Assert(patchSize_ >= 2);
 
@@ -554,7 +554,7 @@ namespace
                 return;
             }
 
-            count = cull_gpu(keypoints.ptr<int>(FAST_CUDA::LOCATION_ROW), keypoints.ptr<float>(FAST_CUDA::RESPONSE_ROW), count, n_points);
+            count = cull_gpu(keypoints.ptr<int>(cuda::FastFeatureDetector::LOCATION_ROW), keypoints.ptr<float>(cuda::FastFeatureDetector::RESPONSE_ROW), count, n_points);
         }
     }
 }
@@ -570,20 +570,20 @@ void cv::cuda::ORB_CUDA::computeKeyPointsPyramid()
 
     for (int level = 0; level < nLevels_; ++level)
     {
-        keyPointsCount_[level] = fastDetector_.calcKeyPointsLocation(imagePyr_[level], maskPyr_[level]);
+        fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area());
+
+        GpuMat fastKpRange;
+        fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null());
+
+        keyPointsCount_[level] = fastKpRange.cols;
 
         if (keyPointsCount_[level] == 0)
             continue;
 
-        ensureSizeIsEnough(3, keyPointsCount_[level], CV_32FC1, keyPointsPyr_[level]);
+        ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]);
+        fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2));
 
-        GpuMat fastKpRange = keyPointsPyr_[level].rowRange(0, 2);
-        keyPointsCount_[level] = fastDetector_.getKeyPoints(fastKpRange);
-
-        if (keyPointsCount_[level] == 0)
-            continue;
-
-        int n_features = static_cast<int>(n_features_per_level_[level]);
+        const int n_features = static_cast<int>(n_features_per_level_[level]);
 
         if (scoreType_ == ORB::HARRIS_SCORE)
         {
@@ -767,8 +767,6 @@ void cv::cuda::ORB_CUDA::release()
 
     keyPointsPyr_.clear();
 
-    fastDetector_.release();
-
     d_keypoints_.release();
 }
 
diff --git a/modules/cudafeatures2d/test/test_features2d.cpp b/modules/cudafeatures2d/test/test_features2d.cpp
index 468024a5d8..9a8d76ce31 100644
--- a/modules/cudafeatures2d/test/test_features2d.cpp
+++ b/modules/cudafeatures2d/test/test_features2d.cpp
@@ -76,15 +76,14 @@ CUDA_TEST_P(FAST, Accuracy)
     cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(image.empty());
 
-    cv::cuda::FAST_CUDA fast(threshold);
-    fast.nonmaxSuppression = nonmaxSuppression;
+    cv::Ptr<cv::cuda::FastFeatureDetector> fast = cv::cuda::FastFeatureDetector::create(threshold, nonmaxSuppression);
 
     if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS))
     {
         try
         {
             std::vector<cv::KeyPoint> keypoints;
-            fast(loadMat(image), cv::cuda::GpuMat(), keypoints);
+            fast->detect(loadMat(image), keypoints);
         }
         catch (const cv::Exception& e)
         {
@@ -94,7 +93,7 @@ CUDA_TEST_P(FAST, Accuracy)
     else
     {
         std::vector<cv::KeyPoint> keypoints;
-        fast(loadMat(image), cv::cuda::GpuMat(), keypoints);
+        fast->detect(loadMat(image), keypoints);
 
         std::vector<cv::KeyPoint> keypoints_gold;
         cv::FAST(image, keypoints_gold, threshold, nonmaxSuppression);
diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp
index 2e7faa3341..8869a1b664 100644
--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@@ -322,14 +322,14 @@ TEST(FAST)
     FAST(src, keypoints, 20);
     CPU_OFF;
 
-    cuda::FAST_CUDA d_FAST(20);
+    cv::Ptr<cv::cuda::FastFeatureDetector> d_FAST = cv::cuda::FastFeatureDetector::create(20);
     cuda::GpuMat d_src(src);
     cuda::GpuMat d_keypoints;
 
-    d_FAST(d_src, cuda::GpuMat(), d_keypoints);
+    d_FAST->detectAsync(d_src, d_keypoints);
 
     CUDA_ON;
-    d_FAST(d_src, cuda::GpuMat(), d_keypoints);
+    d_FAST->detectAsync(d_src, d_keypoints);
     CUDA_OFF;
 }
 

From 9f77ffb03f443331080bfb7d59c4f10d0dd08815 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Mon, 12 Jan 2015 18:26:41 +0300
Subject: [PATCH 21/55] extend Feature2DAsync interface

---
 .../include/opencv2/cudafeatures2d.hpp        | 26 ++++--
 .../cudafeatures2d/src/feature2d_async.cpp    | 85 +++++++++++++++++++
 2 files changed, 103 insertions(+), 8 deletions(-)
 create mode 100644 modules/cudafeatures2d/src/feature2d_async.cpp

diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
index f6f674d2aa..a193eb6f8b 100644
--- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
+++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
@@ -233,26 +233,36 @@ private:
 // Feature2DAsync
 //
 
-/** @brief Abstract base class for 2D image feature detectors and descriptor extractors.
- */
 class CV_EXPORTS Feature2DAsync
 {
 public:
-    virtual ~Feature2DAsync() {}
+    virtual ~Feature2DAsync();
 
-    virtual void detectAsync(InputArray image, OutputArray keypoints,
+    virtual void detectAsync(InputArray image,
+                             OutputArray keypoints,
                              InputArray mask = noArray(),
-                             Stream& stream = Stream::Null()) = 0;
+                             Stream& stream = Stream::Null());
 
-    virtual void convert(InputArray gpu_keypoints, std::vector<KeyPoint>& keypoints) = 0;
+    virtual void computeAsync(InputArray image,
+                              OutputArray keypoints,
+                              OutputArray descriptors,
+                              Stream& stream = Stream::Null());
+
+    virtual void detectAndComputeAsync(InputArray image,
+                                       InputArray mask,
+                                       OutputArray keypoints,
+                                       OutputArray descriptors,
+                                       bool useProvidedKeypoints=false,
+                                       Stream& stream = Stream::Null());
+
+    virtual void convert(InputArray gpu_keypoints,
+                         std::vector<KeyPoint>& keypoints) = 0;
 };
 
 //
 // FastFeatureDetector
 //
 
-/** @brief Wrapping class for feature detection using the FAST method.
- */
 class CV_EXPORTS FastFeatureDetector : public cv::FastFeatureDetector, public Feature2DAsync
 {
 public:
diff --git a/modules/cudafeatures2d/src/feature2d_async.cpp b/modules/cudafeatures2d/src/feature2d_async.cpp
new file mode 100644
index 0000000000..202a725376
--- /dev/null
+++ b/modules/cudafeatures2d/src/feature2d_async.cpp
@@ -0,0 +1,85 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+cv::cuda::Feature2DAsync::~Feature2DAsync()
+{
+}
+
+void cv::cuda::Feature2DAsync::detectAsync(InputArray image,
+                                           OutputArray keypoints,
+                                           InputArray mask,
+                                           Stream& stream)
+{
+    if (image.empty())
+    {
+        keypoints.clear();
+        return;
+    }
+
+    detectAndComputeAsync(image, mask, keypoints, noArray(), false, stream);
+}
+
+void cv::cuda::Feature2DAsync::computeAsync(InputArray image,
+                                            OutputArray keypoints,
+                                            OutputArray descriptors,
+                                            Stream& stream)
+{
+    if (image.empty())
+    {
+        descriptors.release();
+        return;
+    }
+
+    detectAndComputeAsync(image, noArray(), keypoints, descriptors, true, stream);
+}
+
+void cv::cuda::Feature2DAsync::detectAndComputeAsync(InputArray /*image*/,
+                                                     InputArray /*mask*/,
+                                                     OutputArray /*keypoints*/,
+                                                     OutputArray /*descriptors*/,
+                                                     bool /*useProvidedKeypoints*/,
+                                                     Stream& /*stream*/)
+{
+    CV_Error(Error::StsNotImplemented, "");
+}

From 554ddd2ec49f4df46c7b23ac257c21fda0a449d0 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 13 Jan 2015 10:25:34 +0300
Subject: [PATCH 22/55] fix compilation without CUDA

---
 modules/cudafeatures2d/src/fast.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/cudafeatures2d/src/fast.cpp b/modules/cudafeatures2d/src/fast.cpp
index cb22ea54d7..2095ef7cf6 100644
--- a/modules/cudafeatures2d/src/fast.cpp
+++ b/modules/cudafeatures2d/src/fast.cpp
@@ -47,7 +47,7 @@ using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-Ptr<FastFeatureDetector> cv::cuda::FastFeatureDetector::create(int, bool, int, int) { throw_no_cuda(); return Ptr<FastFeatureDetector>(); }
+Ptr<cv::cuda::FastFeatureDetector> cv::cuda::FastFeatureDetector::create(int, bool, int, int) { throw_no_cuda(); return Ptr<cv::cuda::FastFeatureDetector>(); }
 
 #else /* !defined (HAVE_CUDA) */
 

From f960a5707d99d9a55da8f2b12e96bcad65fd9b90 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 13 Jan 2015 10:40:58 +0300
Subject: [PATCH 23/55] refactor CUDA ORB feature detector/extractor algorithm:

use new abstract interface and hidden implementation
---
 .../include/opencv2/cudafeatures2d.hpp        | 125 +--
 .../cudafeatures2d/perf/perf_features2d.cpp   |   6 +-
 modules/cudafeatures2d/src/orb.cpp            | 741 ++++++++++--------
 .../cudafeatures2d/test/test_features2d.cpp   |  11 +-
 samples/gpu/performance/tests.cpp             |   6 +-
 5 files changed, 447 insertions(+), 442 deletions(-)

diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
index a193eb6f8b..4a78d50e68 100644
--- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
+++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
@@ -284,9 +284,11 @@ public:
     virtual int getMaxNumPoints() const = 0;
 };
 
-/** @brief Class for extracting ORB features and descriptors from an image. :
- */
-class CV_EXPORTS ORB_CUDA
+//
+// ORB
+//
+
+class CV_EXPORTS ORB : public cv::ORB, public Feature2DAsync
 {
 public:
     enum
@@ -300,113 +302,20 @@ public:
         ROWS_COUNT
     };
 
-    enum
-    {
-        DEFAULT_FAST_THRESHOLD = 20
-    };
-
-    /** @brief Constructor.
-
-    @param nFeatures The number of desired features.
-    @param scaleFactor Coefficient by which we divide the dimensions from one scale pyramid level to
-    the next.
-    @param nLevels The number of levels in the scale pyramid.
-    @param edgeThreshold How far from the boundary the points should be.
-    @param firstLevel The level at which the image is given. If 1, that means we will also look at the
-    image scaleFactor times bigger.
-    @param WTA_K
-    @param scoreType
-    @param patchSize
-     */
-    explicit ORB_CUDA(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31,
-                     int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31);
-
-    /** @overload */
-    void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
-    /** @overload */
-    void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
-
-    /** @brief Detects keypoints and computes descriptors for them.
-
-    @param image Input 8-bit grayscale image.
-    @param mask Optional input mask that marks the regions where we should detect features.
-    @param keypoints The input/output vector of keypoints. Can be stored both in CPU and GPU memory.
-    For GPU memory:
-    -   keypoints.ptr\<float\>(X_ROW)[i] contains x coordinate of the i'th feature.
-    -   keypoints.ptr\<float\>(Y_ROW)[i] contains y coordinate of the i'th feature.
-    -   keypoints.ptr\<float\>(RESPONSE_ROW)[i] contains the response of the i'th feature.
-    -   keypoints.ptr\<float\>(ANGLE_ROW)[i] contains orientation of the i'th feature.
-    -   keypoints.ptr\<float\>(OCTAVE_ROW)[i] contains the octave of the i'th feature.
-    -   keypoints.ptr\<float\>(SIZE_ROW)[i] contains the size of the i'th feature.
-    @param descriptors Computed descriptors. if blurForDescriptor is true, image will be blurred
-    before descriptors calculation.
-     */
-    void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors);
-    /** @overload */
-    void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors);
-
-    /** @brief Download keypoints from GPU to CPU memory.
-    */
-    static void downloadKeyPoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
-    /** @brief Converts keypoints from CUDA representation to vector of KeyPoint.
-    */
-    static void convertKeyPoints(const Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
-
-    //! returns the descriptor size in bytes
-    inline int descriptorSize() const { return kBytes; }
-
-    inline void setFastParams(int threshold, bool nonmaxSuppression = true)
-    {
-        fastDetector_->setThreshold(threshold);
-        fastDetector_->setNonmaxSuppression(nonmaxSuppression);
-    }
-
-    /** @brief Releases inner buffer memory.
-    */
-    void release();
+    static Ptr<ORB> create(int nfeatures=500,
+                           float scaleFactor=1.2f,
+                           int nlevels=8,
+                           int edgeThreshold=31,
+                           int firstLevel=0,
+                           int WTA_K=2,
+                           int scoreType=ORB::HARRIS_SCORE,
+                           int patchSize=31,
+                           int fastThreshold=20,
+                           bool blurForDescriptor=false);
 
     //! if true, image will be blurred before descriptors calculation
-    bool blurForDescriptor;
-
-private:
-    enum { kBytes = 32 };
-
-    void buildScalePyramids(const GpuMat& image, const GpuMat& mask);
-
-    void computeKeyPointsPyramid();
-
-    void computeDescriptors(GpuMat& descriptors);
-
-    void mergeKeyPoints(GpuMat& keypoints);
-
-    int nFeatures_;
-    float scaleFactor_;
-    int nLevels_;
-    int edgeThreshold_;
-    int firstLevel_;
-    int WTA_K_;
-    int scoreType_;
-    int patchSize_;
-
-    //! The number of desired features per scale
-    std::vector<size_t> n_features_per_level_;
-
-    //! Points to compute BRIEF descriptors from
-    GpuMat pattern_;
-
-    std::vector<GpuMat> imagePyr_;
-    std::vector<GpuMat> maskPyr_;
-
-    GpuMat buf_;
-
-    std::vector<GpuMat> keyPointsPyr_;
-    std::vector<int> keyPointsCount_;
-
-    Ptr<cv::cuda::FastFeatureDetector> fastDetector_;
-
-    Ptr<cuda::Filter> blurFilter;
-
-    GpuMat d_keypoints_;
+    virtual void setBlurForDescriptor(bool blurForDescriptor) = 0;
+    virtual bool getBlurForDescriptor() const = 0;
 };
 
 //! @}
diff --git a/modules/cudafeatures2d/perf/perf_features2d.cpp b/modules/cudafeatures2d/perf/perf_features2d.cpp
index da3cd77db0..0dcb0434f5 100644
--- a/modules/cudafeatures2d/perf/perf_features2d.cpp
+++ b/modules/cudafeatures2d/perf/perf_features2d.cpp
@@ -109,15 +109,15 @@ PERF_TEST_P(Image_NFeatures, ORB,
 
     if (PERF_RUN_CUDA())
     {
-        cv::cuda::ORB_CUDA d_orb(nFeatures);
+        cv::Ptr<cv::cuda::ORB> d_orb = cv::cuda::ORB::create(nFeatures);
 
         const cv::cuda::GpuMat d_img(img);
         cv::cuda::GpuMat d_keypoints, d_descriptors;
 
-        TEST_CYCLE() d_orb(d_img, cv::cuda::GpuMat(), d_keypoints, d_descriptors);
+        TEST_CYCLE() d_orb->detectAndComputeAsync(d_img, cv::noArray(), d_keypoints, d_descriptors);
 
         std::vector<cv::KeyPoint> gpu_keypoints;
-        d_orb.downloadKeyPoints(d_keypoints, gpu_keypoints);
+        d_orb->convert(d_keypoints, gpu_keypoints);
 
         cv::Mat gpu_descriptors(d_descriptors);
 
diff --git a/modules/cudafeatures2d/src/orb.cpp b/modules/cudafeatures2d/src/orb.cpp
index c04649b1f4..6bfdd5ac47 100644
--- a/modules/cudafeatures2d/src/orb.cpp
+++ b/modules/cudafeatures2d/src/orb.cpp
@@ -47,18 +47,7 @@ using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-cv::cuda::ORB_CUDA::ORB_CUDA(int, float, int, int, int, int, int, int) : fastDetector_(20) { throw_no_cuda(); }
-void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
-void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, GpuMat&) { throw_no_cuda(); }
-void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
-void cv::cuda::ORB_CUDA::downloadKeyPoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::cuda::ORB_CUDA::convertKeyPoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::cuda::ORB_CUDA::release() { throw_no_cuda(); }
-void cv::cuda::ORB_CUDA::buildScalePyramids(const GpuMat&, const GpuMat&) { throw_no_cuda(); }
-void cv::cuda::ORB_CUDA::computeKeyPointsPyramid() { throw_no_cuda(); }
-void cv::cuda::ORB_CUDA::computeDescriptors(GpuMat&) { throw_no_cuda(); }
-void cv::cuda::ORB_CUDA::mergeKeyPoints(GpuMat&) { throw_no_cuda(); }
+Ptr<cv::cuda::ORB> cv::cuda::ORB::create(int, float, int, int, int, int, int, int, int, bool) { throw_no_cuda(); return Ptr<cv::cuda::ORB>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -346,7 +335,100 @@ namespace
         -1,-6, 0,-11/*mean (0.127148), correlation (0.547401)*/
     };
 
-    void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize)
+    class ORB_Impl : public cv::cuda::ORB
+    {
+    public:
+        ORB_Impl(int nfeatures,
+                 float scaleFactor,
+                 int nlevels,
+                 int edgeThreshold,
+                 int firstLevel,
+                 int WTA_K,
+                 int scoreType,
+                 int patchSize,
+                 int fastThreshold,
+                 bool blurForDescriptor);
+
+        virtual void detectAndCompute(InputArray _image, InputArray _mask, std::vector<KeyPoint>& keypoints, OutputArray _descriptors, bool useProvidedKeypoints);
+        virtual void detectAndComputeAsync(InputArray _image, InputArray _mask, OutputArray _keypoints, OutputArray _descriptors, bool useProvidedKeypoints, Stream& stream);
+
+        virtual void convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints);
+
+        virtual int descriptorSize() const { return kBytes; }
+        virtual int descriptorType() const { return CV_8U; }
+        virtual int defaultNorm() const { return NORM_HAMMING; }
+
+        virtual void setMaxFeatures(int maxFeatures) { nFeatures_ = maxFeatures; }
+        virtual int getMaxFeatures() const { return nFeatures_; }
+
+        virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; }
+        virtual double getScaleFactor() const { return scaleFactor_; }
+
+        virtual void setNLevels(int nlevels) { nLevels_ = nlevels; }
+        virtual int getNLevels() const { return nLevels_; }
+
+        virtual void setEdgeThreshold(int edgeThreshold) { edgeThreshold_ = edgeThreshold; }
+        virtual int getEdgeThreshold() const { return edgeThreshold_; }
+
+        virtual void setFirstLevel(int firstLevel) { firstLevel_ = firstLevel; }
+        virtual int getFirstLevel() const { return firstLevel_; }
+
+        virtual void setWTA_K(int wta_k) { WTA_K_ = wta_k; }
+        virtual int getWTA_K() const { return WTA_K_; }
+
+        virtual void setScoreType(int scoreType) { scoreType_ = scoreType; }
+        virtual int getScoreType() const { return scoreType_; }
+
+        virtual void setPatchSize(int patchSize) { patchSize_ = patchSize; }
+        virtual int getPatchSize() const { return patchSize_; }
+
+        virtual void setFastThreshold(int fastThreshold) { fastThreshold_ = fastThreshold; }
+        virtual int getFastThreshold() const { return fastThreshold_; }
+
+        virtual void setBlurForDescriptor(bool blurForDescriptor) { blurForDescriptor_ = blurForDescriptor; }
+        virtual bool getBlurForDescriptor() const { return blurForDescriptor_; }
+
+    private:
+        int nFeatures_;
+        float scaleFactor_;
+        int nLevels_;
+        int edgeThreshold_;
+        int firstLevel_;
+        int WTA_K_;
+        int scoreType_;
+        int patchSize_;
+        int fastThreshold_;
+        bool blurForDescriptor_;
+
+    private:
+        void buildScalePyramids(InputArray _image, InputArray _mask);
+        void computeKeyPointsPyramid();
+        void computeDescriptors(OutputArray _descriptors);
+        void mergeKeyPoints(OutputArray _keypoints);
+
+    private:
+        Ptr<cv::cuda::FastFeatureDetector> fastDetector_;
+
+        //! The number of desired features per scale
+        std::vector<size_t> n_features_per_level_;
+
+        //! Points to compute BRIEF descriptors from
+        GpuMat pattern_;
+
+        std::vector<GpuMat> imagePyr_;
+        std::vector<GpuMat> maskPyr_;
+
+        GpuMat buf_;
+
+        std::vector<GpuMat> keyPointsPyr_;
+        std::vector<int> keyPointsCount_;
+
+        Ptr<cuda::Filter> blurFilter_;
+
+        GpuMat d_keypoints_;
+    };
+
+    static void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize)
     {
         RNG rng(0x12345678);
 
@@ -381,7 +463,7 @@ namespace
         }
     }
 
-    void makeRandomPattern(int patchSize, Point* pattern, int npoints)
+    static void makeRandomPattern(int patchSize, Point* pattern, int npoints)
     {
         // we always start with a fixed seed,
         // to make patterns the same on each run
@@ -393,155 +475,189 @@ namespace
             pattern[i].y = rng.uniform(-patchSize / 2, patchSize / 2 + 1);
         }
     }
-}
 
-cv::cuda::ORB_CUDA::ORB_CUDA(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) :
-    nFeatures_(nFeatures), scaleFactor_(scaleFactor), nLevels_(nLevels), edgeThreshold_(edgeThreshold), firstLevel_(firstLevel), WTA_K_(WTA_K),
-    scoreType_(scoreType), patchSize_(patchSize),
-    fastDetector_(cuda::FastFeatureDetector::create(DEFAULT_FAST_THRESHOLD))
-{
-    CV_Assert(patchSize_ >= 2);
-
-    // fill the extractors and descriptors for the corresponding scales
-    float factor = 1.0f / scaleFactor_;
-    float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_));
-
-    n_features_per_level_.resize(nLevels_);
-    size_t sum_n_features = 0;
-    for (int level = 0; level < nLevels_ - 1; ++level)
+    ORB_Impl::ORB_Impl(int nFeatures,
+                       float scaleFactor,
+                       int nLevels,
+                       int edgeThreshold,
+                       int firstLevel,
+                       int WTA_K,
+                       int scoreType,
+                       int patchSize,
+                       int fastThreshold,
+                       bool blurForDescriptor) :
+        nFeatures_(nFeatures),
+        scaleFactor_(scaleFactor),
+        nLevels_(nLevels),
+        edgeThreshold_(edgeThreshold),
+        firstLevel_(firstLevel),
+        WTA_K_(WTA_K),
+        scoreType_(scoreType),
+        patchSize_(patchSize),
+        fastThreshold_(fastThreshold),
+        blurForDescriptor_(blurForDescriptor)
     {
-        n_features_per_level_[level] = cvRound(n_desired_features_per_scale);
-        sum_n_features += n_features_per_level_[level];
-        n_desired_features_per_scale *= factor;
-    }
-    n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features;
+        CV_Assert( patchSize_ >= 2 );
+        CV_Assert( WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4 );
 
-    // pre-compute the end of a row in a circular patch
-    int half_patch_size = patchSize_ / 2;
-    std::vector<int> u_max(half_patch_size + 2);
-    for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v)
-        u_max[v] = cvRound(std::sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
+        fastDetector_ = cuda::FastFeatureDetector::create(fastThreshold_);
 
-    // Make sure we are symmetric
-    for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v)
-    {
-        while (u_max[v_0] == u_max[v_0 + 1])
-            ++v_0;
-        u_max[v] = v_0;
-        ++v_0;
-    }
-    CV_Assert(u_max.size() < 32);
-    cv::cuda::device::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
+        // fill the extractors and descriptors for the corresponding scales
+        float factor = 1.0f / scaleFactor_;
+        float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_));
 
-    // Calc pattern
-    const int npoints = 512;
-    Point pattern_buf[npoints];
-    const Point* pattern0 = (const Point*)bit_pattern_31_;
-    if (patchSize_ != 31)
-    {
-        pattern0 = pattern_buf;
-        makeRandomPattern(patchSize_, pattern_buf, npoints);
-    }
-
-    CV_Assert(WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4);
-
-    Mat h_pattern;
-
-    if (WTA_K_ == 2)
-    {
-        h_pattern.create(2, npoints, CV_32SC1);
-
-        int* pattern_x_ptr = h_pattern.ptr<int>(0);
-        int* pattern_y_ptr = h_pattern.ptr<int>(1);
-
-        for (int i = 0; i < npoints; ++i)
+        n_features_per_level_.resize(nLevels_);
+        size_t sum_n_features = 0;
+        for (int level = 0; level < nLevels_ - 1; ++level)
         {
-            pattern_x_ptr[i] = pattern0[i].x;
-            pattern_y_ptr[i] = pattern0[i].y;
+            n_features_per_level_[level] = cvRound(n_desired_features_per_scale);
+            sum_n_features += n_features_per_level_[level];
+            n_desired_features_per_scale *= factor;
         }
-    }
-    else
-    {
-        int ntuples = descriptorSize() * 4;
-        initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints);
-    }
+        n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features;
 
-    pattern_.upload(h_pattern);
-
-    blurFilter = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101);
-
-    blurForDescriptor = false;
-}
-
-namespace
-{
-    inline float getScale(float scaleFactor, int firstLevel, int level)
-    {
-        return pow(scaleFactor, level - firstLevel);
-    }
-}
-
-void cv::cuda::ORB_CUDA::buildScalePyramids(const GpuMat& image, const GpuMat& mask)
-{
-    CV_Assert(image.type() == CV_8UC1);
-    CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
-
-    imagePyr_.resize(nLevels_);
-    maskPyr_.resize(nLevels_);
-
-    for (int level = 0; level < nLevels_; ++level)
-    {
-        float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level);
-
-        Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale));
-
-        ensureSizeIsEnough(sz, image.type(), imagePyr_[level]);
-        ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]);
-        maskPyr_[level].setTo(Scalar::all(255));
-
-        // Compute the resized image
-        if (level != firstLevel_)
+        // pre-compute the end of a row in a circular patch
+        int half_patch_size = patchSize_ / 2;
+        std::vector<int> u_max(half_patch_size + 2);
+        for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v)
         {
-            if (level < firstLevel_)
-            {
-                cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
+            u_max[v] = cvRound(std::sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
+        }
 
-                if (!mask.empty())
-                    cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR);
-            }
-            else
-            {
-                cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR);
+        // Make sure we are symmetric
+        for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v)
+        {
+            while (u_max[v_0] == u_max[v_0 + 1])
+                ++v_0;
+            u_max[v] = v_0;
+            ++v_0;
+        }
+        CV_Assert( u_max.size() < 32 );
+        cv::cuda::device::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
 
-                if (!mask.empty())
-                {
-                    cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR);
-                    cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO);
-                }
+        // Calc pattern
+        const int npoints = 512;
+        Point pattern_buf[npoints];
+        const Point* pattern0 = (const Point*)bit_pattern_31_;
+        if (patchSize_ != 31)
+        {
+            pattern0 = pattern_buf;
+            makeRandomPattern(patchSize_, pattern_buf, npoints);
+        }
+
+        Mat h_pattern;
+        if (WTA_K_ == 2)
+        {
+            h_pattern.create(2, npoints, CV_32SC1);
+
+            int* pattern_x_ptr = h_pattern.ptr<int>(0);
+            int* pattern_y_ptr = h_pattern.ptr<int>(1);
+
+            for (int i = 0; i < npoints; ++i)
+            {
+                pattern_x_ptr[i] = pattern0[i].x;
+                pattern_y_ptr[i] = pattern0[i].y;
             }
         }
         else
         {
-            image.copyTo(imagePyr_[level]);
-
-            if (!mask.empty())
-                mask.copyTo(maskPyr_[level]);
+            int ntuples = descriptorSize() * 4;
+            initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints);
         }
 
-        // Filter keypoints by image border
-        ensureSizeIsEnough(sz, CV_8UC1, buf_);
-        buf_.setTo(Scalar::all(0));
-        Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_);
-        buf_(inner).setTo(Scalar::all(255));
+        pattern_.upload(h_pattern);
 
-        cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]);
+        blurFilter_ = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101);
     }
-}
 
-namespace
-{
-    //takes keypoints and culls them by the response
-    void cull(GpuMat& keypoints, int& count, int n_points)
+    void ORB_Impl::detectAndCompute(InputArray _image, InputArray _mask, std::vector<KeyPoint>& keypoints, OutputArray _descriptors, bool useProvidedKeypoints)
+    {
+        CV_Assert( useProvidedKeypoints == false );
+
+        detectAndComputeAsync(_image, _mask, d_keypoints_, _descriptors, false, Stream::Null());
+        convert(d_keypoints_, keypoints);
+    }
+
+    void ORB_Impl::detectAndComputeAsync(InputArray _image, InputArray _mask, OutputArray _keypoints, OutputArray _descriptors, bool useProvidedKeypoints, Stream& stream)
+    {
+        CV_Assert( useProvidedKeypoints == false );
+
+        buildScalePyramids(_image, _mask);
+        computeKeyPointsPyramid();
+        if (_descriptors.needed())
+        {
+            computeDescriptors(_descriptors);
+        }
+        mergeKeyPoints(_keypoints);
+    }
+
+    static float getScale(float scaleFactor, int firstLevel, int level)
+    {
+        return pow(scaleFactor, level - firstLevel);
+    }
+
+    void ORB_Impl::buildScalePyramids(InputArray _image, InputArray _mask)
+    {
+        const GpuMat image = _image.getGpuMat();
+        const GpuMat mask = _mask.getGpuMat();
+
+        CV_Assert( image.type() == CV_8UC1 );
+        CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) );
+
+        imagePyr_.resize(nLevels_);
+        maskPyr_.resize(nLevels_);
+
+        for (int level = 0; level < nLevels_; ++level)
+        {
+            float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level);
+
+            Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale));
+
+            ensureSizeIsEnough(sz, image.type(), imagePyr_[level]);
+            ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]);
+            maskPyr_[level].setTo(Scalar::all(255));
+
+            // Compute the resized image
+            if (level != firstLevel_)
+            {
+                if (level < firstLevel_)
+                {
+                    cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
+
+                    if (!mask.empty())
+                        cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR);
+                }
+                else
+                {
+                    cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR);
+
+                    if (!mask.empty())
+                    {
+                        cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR);
+                        cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO);
+                    }
+                }
+            }
+            else
+            {
+                image.copyTo(imagePyr_[level]);
+
+                if (!mask.empty())
+                    mask.copyTo(maskPyr_[level]);
+            }
+
+            // Filter keypoints by image border
+            ensureSizeIsEnough(sz, CV_8UC1, buf_);
+            buf_.setTo(Scalar::all(0));
+            Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_);
+            buf_(inner).setTo(Scalar::all(255));
+
+            cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]);
+        }
+    }
+
+    // takes keypoints and culls them by the response
+    static void cull(GpuMat& keypoints, int& count, int n_points)
     {
         using namespace cv::cuda::device::orb;
 
@@ -557,217 +673,196 @@ namespace
             count = cull_gpu(keypoints.ptr<int>(cuda::FastFeatureDetector::LOCATION_ROW), keypoints.ptr<float>(cuda::FastFeatureDetector::RESPONSE_ROW), count, n_points);
         }
     }
-}
 
-void cv::cuda::ORB_CUDA::computeKeyPointsPyramid()
-{
-    using namespace cv::cuda::device::orb;
-
-    int half_patch_size = patchSize_ / 2;
-
-    keyPointsPyr_.resize(nLevels_);
-    keyPointsCount_.resize(nLevels_);
-
-    for (int level = 0; level < nLevels_; ++level)
+    void ORB_Impl::computeKeyPointsPyramid()
     {
-        fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area());
+        using namespace cv::cuda::device::orb;
 
-        GpuMat fastKpRange;
-        fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null());
+        int half_patch_size = patchSize_ / 2;
 
-        keyPointsCount_[level] = fastKpRange.cols;
+        keyPointsPyr_.resize(nLevels_);
+        keyPointsCount_.resize(nLevels_);
 
-        if (keyPointsCount_[level] == 0)
-            continue;
+        fastDetector_->setThreshold(fastThreshold_);
 
-        ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]);
-        fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2));
-
-        const int n_features = static_cast<int>(n_features_per_level_[level]);
-
-        if (scoreType_ == ORB::HARRIS_SCORE)
+        for (int level = 0; level < nLevels_; ++level)
         {
-            // Keep more points than necessary as FAST does not give amazing corners
-            cull(keyPointsPyr_[level], keyPointsCount_[level], 2 * n_features);
+            fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area());
 
-            // Compute the Harris cornerness (better scoring than FAST)
-            HarrisResponses_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(1), keyPointsCount_[level], 7, HARRIS_K, 0);
+            GpuMat fastKpRange;
+            fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null());
+
+            keyPointsCount_[level] = fastKpRange.cols;
+
+            if (keyPointsCount_[level] == 0)
+                continue;
+
+            ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]);
+            fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2));
+
+            const int n_features = static_cast<int>(n_features_per_level_[level]);
+
+            if (scoreType_ == ORB::HARRIS_SCORE)
+            {
+                // Keep more points than necessary as FAST does not give amazing corners
+                cull(keyPointsPyr_[level], keyPointsCount_[level], 2 * n_features);
+
+                // Compute the Harris cornerness (better scoring than FAST)
+                HarrisResponses_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(1), keyPointsCount_[level], 7, HARRIS_K, 0);
+            }
+
+            //cull to the final desired level, using the new Harris scores or the original FAST scores.
+            cull(keyPointsPyr_[level], keyPointsCount_[level], n_features);
+
+            // Compute orientation
+            IC_Angle_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2), keyPointsCount_[level], half_patch_size, 0);
+        }
+    }
+
+    void ORB_Impl::computeDescriptors(OutputArray _descriptors)
+    {
+        using namespace cv::cuda::device::orb;
+
+        int nAllkeypoints = 0;
+
+        for (int level = 0; level < nLevels_; ++level)
+            nAllkeypoints += keyPointsCount_[level];
+
+        if (nAllkeypoints == 0)
+        {
+            _descriptors.release();
+            return;
         }
 
-        //cull to the final desired level, using the new Harris scores or the original FAST scores.
-        cull(keyPointsPyr_[level], keyPointsCount_[level], n_features);
+        ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, _descriptors);
+        GpuMat descriptors = _descriptors.getGpuMat();
 
-        // Compute orientation
-        IC_Angle_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2), keyPointsCount_[level], half_patch_size, 0);
-    }
-}
+        int offset = 0;
 
-void cv::cuda::ORB_CUDA::computeDescriptors(GpuMat& descriptors)
-{
-    using namespace cv::cuda::device::orb;
-
-    int nAllkeypoints = 0;
-
-    for (int level = 0; level < nLevels_; ++level)
-        nAllkeypoints += keyPointsCount_[level];
-
-    if (nAllkeypoints == 0)
-    {
-        descriptors.release();
-        return;
-    }
-
-    ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, descriptors);
-
-    int offset = 0;
-
-    for (int level = 0; level < nLevels_; ++level)
-    {
-        if (keyPointsCount_[level] == 0)
-            continue;
-
-        GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]);
-
-        if (blurForDescriptor)
+        for (int level = 0; level < nLevels_; ++level)
         {
-            // preprocess the resized image
-            ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_);
-            blurFilter->apply(imagePyr_[level], buf_);
+            if (keyPointsCount_[level] == 0)
+                continue;
+
+            GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]);
+
+            if (blurForDescriptor_)
+            {
+                // preprocess the resized image
+                ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_);
+                blurFilter_->apply(imagePyr_[level], buf_);
+            }
+
+            computeOrbDescriptor_gpu(blurForDescriptor_ ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2),
+                keyPointsCount_[level], pattern_.ptr<int>(0), pattern_.ptr<int>(1), descRange, descriptorSize(), WTA_K_, 0);
+
+            offset += keyPointsCount_[level];
+        }
+    }
+
+    void ORB_Impl::mergeKeyPoints(OutputArray _keypoints)
+    {
+        using namespace cv::cuda::device::orb;
+
+        int nAllkeypoints = 0;
+
+        for (int level = 0; level < nLevels_; ++level)
+            nAllkeypoints += keyPointsCount_[level];
+
+        if (nAllkeypoints == 0)
+        {
+            _keypoints.release();
+            return;
         }
 
-        computeOrbDescriptor_gpu(blurForDescriptor ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2),
-            keyPointsCount_[level], pattern_.ptr<int>(0), pattern_.ptr<int>(1), descRange, descriptorSize(), WTA_K_, 0);
+        ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, _keypoints);
+        GpuMat& keypoints = _keypoints.getGpuMatRef();
 
-        offset += keyPointsCount_[level];
+        int offset = 0;
+
+        for (int level = 0; level < nLevels_; ++level)
+        {
+            if (keyPointsCount_[level] == 0)
+                continue;
+
+            float sf = getScale(scaleFactor_, firstLevel_, level);
+
+            GpuMat keyPointsRange = keypoints.colRange(offset, offset + keyPointsCount_[level]);
+
+            float locScale = level != firstLevel_ ? sf : 1.0f;
+
+            mergeLocation_gpu(keyPointsPyr_[level].ptr<short2>(0), keyPointsRange.ptr<float>(0), keyPointsRange.ptr<float>(1), keyPointsCount_[level], locScale, 0);
+
+            GpuMat range = keyPointsRange.rowRange(2, 4);
+            keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range);
+
+            keyPointsRange.row(4).setTo(Scalar::all(level));
+            keyPointsRange.row(5).setTo(Scalar::all(patchSize_ * sf));
+
+            offset += keyPointsCount_[level];
+        }
     }
-}
 
-void cv::cuda::ORB_CUDA::mergeKeyPoints(GpuMat& keypoints)
-{
-    using namespace cv::cuda::device::orb;
-
-    int nAllkeypoints = 0;
-
-    for (int level = 0; level < nLevels_; ++level)
-        nAllkeypoints += keyPointsCount_[level];
-
-    if (nAllkeypoints == 0)
+    void ORB_Impl::convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints)
     {
-        keypoints.release();
-        return;
-    }
+        if (_gpu_keypoints.empty())
+        {
+            keypoints.clear();
+            return;
+        }
 
-    ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, keypoints);
+        Mat h_keypoints;
+        if (_gpu_keypoints.kind() == _InputArray::CUDA_GPU_MAT)
+        {
+            _gpu_keypoints.getGpuMat().download(h_keypoints);
+        }
+        else
+        {
+            h_keypoints = _gpu_keypoints.getMat();
+        }
 
-    int offset = 0;
+        CV_Assert( h_keypoints.rows == ROWS_COUNT );
+        CV_Assert( h_keypoints.type() == CV_32FC1 );
 
-    for (int level = 0; level < nLevels_; ++level)
-    {
-        if (keyPointsCount_[level] == 0)
-            continue;
+        const int npoints = h_keypoints.cols;
 
-        float sf = getScale(scaleFactor_, firstLevel_, level);
+        keypoints.resize(npoints);
 
-        GpuMat keyPointsRange = keypoints.colRange(offset, offset + keyPointsCount_[level]);
+        const float* x_ptr = h_keypoints.ptr<float>(X_ROW);
+        const float* y_ptr = h_keypoints.ptr<float>(Y_ROW);
+        const float* response_ptr = h_keypoints.ptr<float>(RESPONSE_ROW);
+        const float* angle_ptr = h_keypoints.ptr<float>(ANGLE_ROW);
+        const float* octave_ptr = h_keypoints.ptr<float>(OCTAVE_ROW);
+        const float* size_ptr = h_keypoints.ptr<float>(SIZE_ROW);
 
-        float locScale = level != firstLevel_ ? sf : 1.0f;
+        for (int i = 0; i < npoints; ++i)
+        {
+            KeyPoint kp;
 
-        mergeLocation_gpu(keyPointsPyr_[level].ptr<short2>(0), keyPointsRange.ptr<float>(0), keyPointsRange.ptr<float>(1), keyPointsCount_[level], locScale, 0);
+            kp.pt.x = x_ptr[i];
+            kp.pt.y = y_ptr[i];
+            kp.response = response_ptr[i];
+            kp.angle = angle_ptr[i];
+            kp.octave = static_cast<int>(octave_ptr[i]);
+            kp.size = size_ptr[i];
 
-        GpuMat range = keyPointsRange.rowRange(2, 4);
-        keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range);
-
-        keyPointsRange.row(4).setTo(Scalar::all(level));
-        keyPointsRange.row(5).setTo(Scalar::all(patchSize_ * sf));
-
-        offset += keyPointsCount_[level];
+            keypoints[i] = kp;
+        }
     }
 }
 
-void cv::cuda::ORB_CUDA::downloadKeyPoints(const GpuMat &d_keypoints, std::vector<KeyPoint>& keypoints)
+Ptr<cv::cuda::ORB> cv::cuda::ORB::create(int nfeatures,
+                                         float scaleFactor,
+                                         int nlevels,
+                                         int edgeThreshold,
+                                         int firstLevel,
+                                         int WTA_K,
+                                         int scoreType,
+                                         int patchSize,
+                                         int fastThreshold,
+                                         bool blurForDescriptor)
 {
-    if (d_keypoints.empty())
-    {
-        keypoints.clear();
-        return;
-    }
-
-    Mat h_keypoints(d_keypoints);
-
-    convertKeyPoints(h_keypoints, keypoints);
-}
-
-void cv::cuda::ORB_CUDA::convertKeyPoints(const Mat &d_keypoints, std::vector<KeyPoint>& keypoints)
-{
-    if (d_keypoints.empty())
-    {
-        keypoints.clear();
-        return;
-    }
-
-    CV_Assert(d_keypoints.type() == CV_32FC1 && d_keypoints.rows == ROWS_COUNT);
-
-    const float* x_ptr = d_keypoints.ptr<float>(X_ROW);
-    const float* y_ptr = d_keypoints.ptr<float>(Y_ROW);
-    const float* response_ptr = d_keypoints.ptr<float>(RESPONSE_ROW);
-    const float* angle_ptr = d_keypoints.ptr<float>(ANGLE_ROW);
-    const float* octave_ptr = d_keypoints.ptr<float>(OCTAVE_ROW);
-    const float* size_ptr = d_keypoints.ptr<float>(SIZE_ROW);
-
-    keypoints.resize(d_keypoints.cols);
-
-    for (int i = 0; i < d_keypoints.cols; ++i)
-    {
-        KeyPoint kp;
-
-        kp.pt.x = x_ptr[i];
-        kp.pt.y = y_ptr[i];
-        kp.response = response_ptr[i];
-        kp.angle = angle_ptr[i];
-        kp.octave = static_cast<int>(octave_ptr[i]);
-        kp.size = size_ptr[i];
-
-        keypoints[i] = kp;
-    }
-}
-
-void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints)
-{
-    buildScalePyramids(image, mask);
-    computeKeyPointsPyramid();
-    mergeKeyPoints(keypoints);
-}
-
-void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors)
-{
-    buildScalePyramids(image, mask);
-    computeKeyPointsPyramid();
-    computeDescriptors(descriptors);
-    mergeKeyPoints(keypoints);
-}
-
-void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
-{
-    (*this)(image, mask, d_keypoints_);
-    downloadKeyPoints(d_keypoints_, keypoints);
-}
-
-void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors)
-{
-    (*this)(image, mask, d_keypoints_, descriptors);
-    downloadKeyPoints(d_keypoints_, keypoints);
-}
-
-void cv::cuda::ORB_CUDA::release()
-{
-    imagePyr_.clear();
-    maskPyr_.clear();
-
-    buf_.release();
-
-    keyPointsPyr_.clear();
-
-    d_keypoints_.release();
+    return makePtr<ORB_Impl>(nfeatures, scaleFactor, nlevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize, fastThreshold, blurForDescriptor);
 }
 
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/cudafeatures2d/test/test_features2d.cpp b/modules/cudafeatures2d/test/test_features2d.cpp
index 9a8d76ce31..25ba48faf8 100644
--- a/modules/cudafeatures2d/test/test_features2d.cpp
+++ b/modules/cudafeatures2d/test/test_features2d.cpp
@@ -122,7 +122,7 @@ namespace
     IMPLEMENT_PARAM_CLASS(ORB_BlurForDescriptor, bool)
 }
 
-CV_ENUM(ORB_ScoreType, ORB::HARRIS_SCORE, ORB::FAST_SCORE)
+CV_ENUM(ORB_ScoreType, cv::ORB::HARRIS_SCORE, cv::ORB::FAST_SCORE)
 
 PARAM_TEST_CASE(ORB, cv::cuda::DeviceInfo, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold, ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor)
 {
@@ -162,8 +162,9 @@ CUDA_TEST_P(ORB, Accuracy)
     cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1));
     mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
 
-    cv::cuda::ORB_CUDA orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
-    orb.blurForDescriptor = blurForDescriptor;
+    cv::Ptr<cv::cuda::ORB> orb =
+            cv::cuda::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel,
+                                  WTA_K, scoreType, patchSize, 20, blurForDescriptor);
 
     if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS))
     {
@@ -171,7 +172,7 @@ CUDA_TEST_P(ORB, Accuracy)
         {
             std::vector<cv::KeyPoint> keypoints;
             cv::cuda::GpuMat descriptors;
-            orb(loadMat(image), loadMat(mask), keypoints, descriptors);
+            orb->detectAndComputeAsync(loadMat(image), loadMat(mask), keypoints, descriptors);
         }
         catch (const cv::Exception& e)
         {
@@ -182,7 +183,7 @@ CUDA_TEST_P(ORB, Accuracy)
     {
         std::vector<cv::KeyPoint> keypoints;
         cv::cuda::GpuMat descriptors;
-        orb(loadMat(image), loadMat(mask), keypoints, descriptors);
+        orb->detectAndCompute(loadMat(image), loadMat(mask), keypoints, descriptors);
 
         cv::Ptr<cv::ORB> orb_gold = cv::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
 
diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp
index 8869a1b664..0d083e5bda 100644
--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@@ -350,15 +350,15 @@ TEST(ORB)
     orb->detectAndCompute(src, Mat(), keypoints, descriptors);
     CPU_OFF;
 
-    cuda::ORB_CUDA d_orb;
+    Ptr<cuda::ORB> d_orb = cuda::ORB::create();
     cuda::GpuMat d_src(src);
     cuda::GpuMat d_keypoints;
     cuda::GpuMat d_descriptors;
 
-    d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
+    d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
 
     CUDA_ON;
-    d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
+    d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
     CUDA_OFF;
 }
 

From 764d55b81df438ff218c861d4e47459f89a9f467 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 13 Jan 2015 17:57:09 +0300
Subject: [PATCH 24/55] add extended documentation for Features2DAsync

---
 .../include/opencv2/cudafeatures2d.hpp        | 27 ++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
index 4a78d50e68..c7ab6e3924 100644
--- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
+++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
@@ -233,28 +233,47 @@ private:
 // Feature2DAsync
 //
 
+/** @brief Abstract base class for CUDA asynchronous 2D image feature detectors and descriptor extractors.
+ */
 class CV_EXPORTS Feature2DAsync
 {
 public:
     virtual ~Feature2DAsync();
 
+    /** @brief Detects keypoints in an image.
+
+    @param image Image.
+    @param keypoints The detected keypoints.
+    @param mask Mask specifying where to look for keypoints (optional). It must be a 8-bit integer
+    matrix with non-zero values in the region of interest.
+    @param stream CUDA stream.
+     */
     virtual void detectAsync(InputArray image,
                              OutputArray keypoints,
                              InputArray mask = noArray(),
                              Stream& stream = Stream::Null());
 
+    /** @brief Computes the descriptors for a set of keypoints detected in an image.
+
+    @param image Image.
+    @param keypoints Input collection of keypoints.
+    @param descriptors Computed descriptors. Row j is the descriptor for j-th keypoint.
+    @param stream CUDA stream.
+     */
     virtual void computeAsync(InputArray image,
                               OutputArray keypoints,
                               OutputArray descriptors,
                               Stream& stream = Stream::Null());
 
+    /** Detects keypoints and computes the descriptors. */
     virtual void detectAndComputeAsync(InputArray image,
                                        InputArray mask,
                                        OutputArray keypoints,
                                        OutputArray descriptors,
-                                       bool useProvidedKeypoints=false,
+                                       bool useProvidedKeypoints = false,
                                        Stream& stream = Stream::Null());
 
+    /** Converts keypoints array from internal representation to standard vector. */
     virtual void convert(InputArray gpu_keypoints,
                          std::vector<KeyPoint>& keypoints) = 0;
 };
@@ -263,6 +282,8 @@ public:
 // FastFeatureDetector
 //
 
+/** @brief Wrapping class for feature detection using the FAST method.
+ */
 class CV_EXPORTS FastFeatureDetector : public cv::FastFeatureDetector, public Feature2DAsync
 {
 public:
@@ -288,6 +309,10 @@ public:
 // ORB
 //
 
+/** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor
+ *
+ * @sa cv::ORB
+ */
 class CV_EXPORTS ORB : public cv::ORB, public Feature2DAsync
 {
 public:

From 8a178da1a42d6ec2a26eed1c998889377d1576ae Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 13 Jan 2015 17:57:30 +0300
Subject: [PATCH 25/55] refactor CUDA BFMatcher algorithm:

use new abstract interface and hidden implementation
---
 .../include/opencv2/cudafeatures2d.hpp        |  411 ++--
 .../cudafeatures2d/perf/perf_features2d.cpp   |   24 +-
 .../src/brute_force_matcher.cpp               | 1790 +++++++++--------
 .../cudafeatures2d/test/test_features2d.cpp   |   68 +-
 modules/stitching/src/matchers.cpp            |   11 +-
 samples/gpu/performance/tests.cpp             |   18 +-
 6 files changed, 1269 insertions(+), 1053 deletions(-)

diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
index c7ab6e3924..975726973f 100644
--- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
+++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
@@ -63,170 +63,315 @@ namespace cv { namespace cuda {
 //! @addtogroup cudafeatures2d
 //! @{
 
-/** @brief Brute-force descriptor matcher.
+//
+// DescriptorMatcher
+//
 
-For each descriptor in the first set, this matcher finds the closest descriptor in the second set
-by trying each one. This descriptor matcher supports masking permissible matches between descriptor
-sets.
+/** @brief Abstract base class for matching keypoint descriptors.
 
-The class BFMatcher_CUDA has an interface similar to the class DescriptorMatcher. It has two groups
-of match methods: for matching descriptors of one image with another image or with an image set.
-Also, all functions have an alternative to save results either to the GPU memory or to the CPU
-memory.
-
-@sa DescriptorMatcher, BFMatcher
+It has two groups of match methods: for matching descriptors of an image with another image or with
+an image set.
  */
-class CV_EXPORTS BFMatcher_CUDA
+class CV_EXPORTS DescriptorMatcher : public cv::Algorithm
 {
 public:
-    explicit BFMatcher_CUDA(int norm = cv::NORM_L2);
+    //
+    // Factories
+    //
 
-    //! Add descriptors to train descriptor collection
-    void add(const std::vector<GpuMat>& descCollection);
+    /** @brief Brute-force descriptor matcher.
 
-    //! Get train descriptors collection
-    const std::vector<GpuMat>& getTrainDescriptors() const;
+    For each descriptor in the first set, this matcher finds the closest descriptor in the second set
+    by trying each one. This descriptor matcher supports masking permissible matches of descriptor
+    sets.
 
-    //! Clear train descriptors collection
-    void clear();
+    @param normType One of NORM_L1, NORM_L2, NORM_HAMMING. L1 and L2 norms are
+    preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and
+    BRIEF).
+     */
+    static Ptr<DescriptorMatcher> createBFMatcher(int norm = cv::NORM_L2);
 
-    //! Return true if there are not train descriptors in collection
-    bool empty() const;
+    //
+    // Utility
+    //
 
-    //! Return true if the matcher supports mask in match methods
-    bool isMaskSupported() const;
+    /** @brief Returns true if the descriptor matcher supports masking permissible matches.
+     */
+    virtual bool isMaskSupported() const = 0;
 
-    //! Find one best match for each query descriptor
-    void matchSingle(const GpuMat& query, const GpuMat& train,
-        GpuMat& trainIdx, GpuMat& distance,
-        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
+    //
+    // Descriptor collection
+    //
 
-    //! Download trainIdx and distance and convert it to CPU vector with DMatch
-    static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches);
-    //! Convert trainIdx and distance to vector with DMatch
-    static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches);
+    /** @brief Adds descriptors to train a descriptor collection.
 
-    //! Find one best match for each query descriptor
-    void match(const GpuMat& query, const GpuMat& train, std::vector<DMatch>& matches, const GpuMat& mask = GpuMat());
+    If the collection is not empty, the new descriptors are added to existing train descriptors.
 
-    //! Make gpu collection of trains and masks in suitable format for matchCollection function
-    void makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
+    @param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same
+    train image.
+     */
+    virtual void add(const std::vector<GpuMat>& descriptors) = 0;
 
-    //! Find one best match from train collection for each query descriptor
-    void matchCollection(const GpuMat& query, const GpuMat& trainCollection,
-        GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
-        const GpuMat& masks = GpuMat(), Stream& stream = Stream::Null());
+    /** @brief Returns a constant link to the train descriptor collection.
+     */
+    virtual const std::vector<GpuMat>& getTrainDescriptors() const = 0;
 
-    //! Download trainIdx, imgIdx and distance and convert it to vector with DMatch
-    static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches);
-    //! Convert trainIdx, imgIdx and distance to vector with DMatch
-    static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches);
+    /** @brief Clears the train descriptor collection.
+     */
+    virtual void clear() = 0;
 
-    //! Find one best match from train collection for each query descriptor.
-    void match(const GpuMat& query, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
+    /** @brief Returns true if there are no train descriptors in the collection.
+     */
+    virtual bool empty() const = 0;
 
-    //! Find k best matches for each query descriptor (in increasing order of distances)
-    void knnMatchSingle(const GpuMat& query, const GpuMat& train,
-        GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
-        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
+    /** @brief Trains a descriptor matcher.
 
-    //! Download trainIdx and distance and convert it to vector with DMatch
-    //! compactResult is used when mask is not empty. If compactResult is false matches
-    //! vector will have the same size as queryDescriptors rows. If compactResult is true
-    //! matches vector will not contain matches for fully masked out query descriptors.
-    static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-    //! Convert trainIdx and distance to vector with DMatch
-    static void knnMatchConvert(const Mat& trainIdx, const Mat& distance,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+    Trains a descriptor matcher (for example, the flann index). In all methods to match, the method
+    train() is run every time before matching.
+     */
+    virtual void train() = 0;
 
-    //! Find k best matches for each query descriptor (in increasing order of distances).
-    //! compactResult is used when mask is not empty. If compactResult is false matches
-    //! vector will have the same size as queryDescriptors rows. If compactResult is true
-    //! matches vector will not contain matches for fully masked out query descriptors.
-    void knnMatch(const GpuMat& query, const GpuMat& train,
-        std::vector< std::vector<DMatch> >& matches, int k, const GpuMat& mask = GpuMat(),
-        bool compactResult = false);
+    //
+    // 1 to 1 match
+    //
 
-    //! Find k best matches from train collection for each query descriptor (in increasing order of distances)
-    void knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection,
-        GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
-        const GpuMat& maskCollection = GpuMat(), Stream& stream = Stream::Null());
+    /** @brief Finds the best match for each descriptor from a query set (blocking version).
 
-    //! Download trainIdx and distance and convert it to vector with DMatch
-    //! compactResult is used when mask is not empty. If compactResult is false matches
-    //! vector will have the same size as queryDescriptors rows. If compactResult is true
-    //! matches vector will not contain matches for fully masked out query descriptors.
-    //! @see BFMatcher_CUDA::knnMatchDownload
-    static void knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-    //! Convert trainIdx and distance to vector with DMatch
-    //! @see BFMatcher_CUDA::knnMatchConvert
-    static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+    @param queryDescriptors Query set of descriptors.
+    @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
+    collection stored in the class object.
+    @param matches Matches. If a query descriptor is masked out in mask , no match is added for this
+    descriptor. So, matches size may be smaller than the query descriptors count.
+    @param mask Mask specifying permissible matches between an input query and train matrices of
+    descriptors.
 
-    //! Find k best matches  for each query descriptor (in increasing order of distances).
-    //! compactResult is used when mask is not empty. If compactResult is false matches
-    //! vector will have the same size as queryDescriptors rows. If compactResult is true
-    //! matches vector will not contain matches for fully masked out query descriptors.
-    void knnMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, int k,
-        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
+    In the first variant of this method, the train descriptors are passed as an input argument. In the
+    second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is
+    used. Optional mask (or masks) can be passed to specify which query and training descriptors can be
+    matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if
+    mask.at\<uchar\>(i,j) is non-zero.
+     */
+    virtual void match(InputArray queryDescriptors, InputArray trainDescriptors,
+                       std::vector<DMatch>& matches,
+                       InputArray mask = noArray()) = 0;
 
-    //! Find best matches for each query descriptor which have distance less than maxDistance.
-    //! nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
-    //! carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
-    //! because it didn't have enough memory.
-    //! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
-    //! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-    //! Matches doesn't sorted.
-    void radiusMatchSingle(const GpuMat& query, const GpuMat& train,
-        GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
-        const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
+    /** @overload
+     */
+    virtual void match(InputArray queryDescriptors,
+                       std::vector<DMatch>& matches,
+                       const std::vector<GpuMat>& masks = std::vector<GpuMat>()) = 0;
 
-    //! Download trainIdx, nMatches and distance and convert it to vector with DMatch.
-    //! matches will be sorted in increasing order of distances.
-    //! compactResult is used when mask is not empty. If compactResult is false matches
-    //! vector will have the same size as queryDescriptors rows. If compactResult is true
-    //! matches vector will not contain matches for fully masked out query descriptors.
-    static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-    //! Convert trainIdx, nMatches and distance to vector with DMatch.
-    static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+    /** @brief Finds the best match for each descriptor from a query set (asynchronous version).
 
-    //! Find best matches for each query descriptor which have distance less than maxDistance
-    //! in increasing order of distances).
-    void radiusMatch(const GpuMat& query, const GpuMat& train,
-        std::vector< std::vector<DMatch> >& matches, float maxDistance,
-        const GpuMat& mask = GpuMat(), bool compactResult = false);
+    @param queryDescriptors Query set of descriptors.
+    @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
+    collection stored in the class object.
+    @param matches Matches array stored in GPU memory. Internal representation is not defined.
+    Use DescriptorMatcher::matchConvert method to retrieve results in standard representation.
+    @param mask Mask specifying permissible matches between an input query and train matrices of
+    descriptors.
+    @param stream CUDA stream.
 
-    //! Find best matches for each query descriptor which have distance less than maxDistance.
-    //! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
-    //! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
-    //! Matches doesn't sorted.
-    void radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
-        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), Stream& stream = Stream::Null());
+    In the first variant of this method, the train descriptors are passed as an input argument. In the
+    second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is
+    used. Optional mask (or masks) can be passed to specify which query and training descriptors can be
+    matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if
+    mask.at\<uchar\>(i,j) is non-zero.
+     */
+    virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
+                            OutputArray matches,
+                            InputArray mask = noArray(),
+                            Stream& stream = Stream::Null()) = 0;
 
-    //! Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
-    //! matches will be sorted in increasing order of distances.
-    //! compactResult is used when mask is not empty. If compactResult is false matches
-    //! vector will have the same size as queryDescriptors rows. If compactResult is true
-    //! matches vector will not contain matches for fully masked out query descriptors.
-    static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
-    //! Convert trainIdx, nMatches and distance to vector with DMatch.
-    static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
-        std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
+    /** @overload
+     */
+    virtual void matchAsync(InputArray queryDescriptors,
+                            OutputArray matches,
+                            const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
+                            Stream& stream = Stream::Null()) = 0;
 
-    //! Find best matches from train collection for each query descriptor which have distance less than
-    //! maxDistance (in increasing order of distances).
-    void radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance,
-        const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
+    /** @brief Converts matches array from internal representation to standard matches vector.
 
-    int norm;
+    The method is supposed to be used with DescriptorMatcher::matchAsync to get final result.
+    Call this method only after DescriptorMatcher::matchAsync is completed (ie. after synchronization).
 
-private:
-    std::vector<GpuMat> trainDescCollection;
+    @param gpu_matches Matches, returned from DescriptorMatcher::matchAsync.
+    @param matches Vector of DMatch objects.
+     */
+    virtual void matchConvert(InputArray gpu_matches,
+                              std::vector<DMatch>& matches) = 0;
+
+    //
+    // knn match
+    //
+
+    /** @brief Finds the k best matches for each descriptor from a query set (blocking version).
+
+    @param queryDescriptors Query set of descriptors.
+    @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
+    collection stored in the class object.
+    @param matches Matches. Each matches[i] is k or less matches for the same query descriptor.
+    @param k Count of best matches found per each query descriptor or less if a query descriptor has
+    less than k possible matches in total.
+    @param mask Mask specifying permissible matches between an input query and train matrices of
+    descriptors.
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+
+    These extended variants of DescriptorMatcher::match methods find several best matches for each query
+    descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match
+    for the details about query and train descriptors.
+     */
+    virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors,
+                          std::vector<std::vector<DMatch> >& matches,
+                          int k,
+                          InputArray mask = noArray(),
+                          bool compactResult = false) = 0;
+
+    /** @overload
+     */
+    virtual void knnMatch(InputArray queryDescriptors,
+                          std::vector<std::vector<DMatch> >& matches,
+                          int k,
+                          const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
+                          bool compactResult = false) = 0;
+
+    /** @brief Finds the k best matches for each descriptor from a query set (asynchronous version).
+
+    @param queryDescriptors Query set of descriptors.
+    @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
+    collection stored in the class object.
+    @param matches Matches array stored in GPU memory. Internal representation is not defined.
+    Use DescriptorMatcher::knnMatchConvert method to retrieve results in standard representation.
+    @param k Count of best matches found per each query descriptor or less if a query descriptor has
+    less than k possible matches in total.
+    @param mask Mask specifying permissible matches between an input query and train matrices of
+    descriptors.
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+    @param stream CUDA stream.
+
+    These extended variants of DescriptorMatcher::matchAsync methods find several best matches for each query
+    descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::matchAsync
+    for the details about query and train descriptors.
+     */
+    virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
+                               OutputArray matches,
+                               int k,
+                               InputArray mask = noArray(),
+                               Stream& stream = Stream::Null()) = 0;
+
+    /** @overload
+     */
+    virtual void knnMatchAsync(InputArray queryDescriptors,
+                               OutputArray matches,
+                               int k,
+                               const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
+                               Stream& stream = Stream::Null()) = 0;
+
+    /** @brief Converts matches array from internal representation to standard matches vector.
+
+    The method is supposed to be used with DescriptorMatcher::knnMatchAsync to get final result.
+    Call this method only after DescriptorMatcher::knnMatchAsync is completed (ie. after synchronization).
+
+    @param gpu_matches Matches, returned from DescriptorMatcher::knnMatchAsync.
+    @param matches Vector of DMatch objects.
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+     */
+    virtual void knnMatchConvert(InputArray gpu_matches,
+                                 std::vector< std::vector<DMatch> >& matches,
+                                 bool compactResult = false) = 0;
+
+    //
+    // radius match
+    //
+
+    /** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (blocking version).
+
+    @param queryDescriptors Query set of descriptors.
+    @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
+    collection stored in the class object.
+    @param matches Found matches.
+    @param maxDistance Threshold for the distance between matched descriptors. Distance means here
+    metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
+    in Pixels)!
+    @param mask Mask specifying permissible matches between an input query and train matrices of
+    descriptors.
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+
+    For each query descriptor, the methods find such training descriptors that the distance between the
+    query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
+    returned in the distance increasing order.
+     */
+    virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors,
+                             std::vector<std::vector<DMatch> >& matches,
+                             float maxDistance,
+                             InputArray mask = noArray(),
+                             bool compactResult = false) = 0;
+
+    /** @overload
+     */
+    virtual void radiusMatch(InputArray queryDescriptors,
+                             std::vector<std::vector<DMatch> >& matches,
+                             float maxDistance,
+                             const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
+                             bool compactResult = false) = 0;
+
+    /** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (asynchronous version).
+
+    @param queryDescriptors Query set of descriptors.
+    @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
+    collection stored in the class object.
+    @param matches Matches array stored in GPU memory. Internal representation is not defined.
+    Use DescriptorMatcher::radiusMatchConvert method to retrieve results in standard representation.
+    @param maxDistance Threshold for the distance between matched descriptors. Distance means here
+    metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
+    in Pixels)!
+    @param mask Mask specifying permissible matches between an input query and train matrices of
+    descriptors.
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+    @param stream CUDA stream.
+
+    For each query descriptor, the methods find such training descriptors that the distance between the
+    query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
+    returned in the distance increasing order.
+     */
+    virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
+                                  OutputArray matches,
+                                  float maxDistance,
+                                  InputArray mask = noArray(),
+                                  Stream& stream = Stream::Null()) = 0;
+
+    /** @overload
+     */
+    virtual void radiusMatchAsync(InputArray queryDescriptors,
+                                  OutputArray matches,
+                                  float maxDistance,
+                                  const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
+                                  Stream& stream = Stream::Null()) = 0;
+
+    /** @brief Converts matches array from internal representation to standard matches vector.
+
+    The method is supposed to be used with DescriptorMatcher::radiusMatchAsync to get final result.
+    Call this method only after DescriptorMatcher::radiusMatchAsync is completed (ie. after synchronization).
+
+    @param gpu_matches Matches, returned from DescriptorMatcher::radiusMatchAsync.
+    @param matches Vector of DMatch objects.
+    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
+    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
+    the matches vector does not contain matches for fully masked-out query descriptors.
+     */
+    virtual void radiusMatchConvert(InputArray gpu_matches,
+                                    std::vector< std::vector<DMatch> >& matches,
+                                    bool compactResult = false) = 0;
 };
 
 //
diff --git a/modules/cudafeatures2d/perf/perf_features2d.cpp b/modules/cudafeatures2d/perf/perf_features2d.cpp
index 0dcb0434f5..9d81348164 100644
--- a/modules/cudafeatures2d/perf/perf_features2d.cpp
+++ b/modules/cudafeatures2d/perf/perf_features2d.cpp
@@ -167,16 +167,16 @@ PERF_TEST_P(DescSize_Norm, BFMatch,
 
     if (PERF_RUN_CUDA())
     {
-        cv::cuda::BFMatcher_CUDA d_matcher(normType);
+        cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
 
         const cv::cuda::GpuMat d_query(query);
         const cv::cuda::GpuMat d_train(train);
-        cv::cuda::GpuMat d_trainIdx, d_distance;
+        cv::cuda::GpuMat d_matches;
 
-        TEST_CYCLE() d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
+        TEST_CYCLE() d_matcher->matchAsync(d_query, d_train, d_matches);
 
         std::vector<cv::DMatch> gpu_matches;
-        d_matcher.matchDownload(d_trainIdx, d_distance, gpu_matches);
+        d_matcher->matchConvert(d_matches, gpu_matches);
 
         SANITY_CHECK_MATCHES(gpu_matches);
     }
@@ -226,16 +226,16 @@ PERF_TEST_P(DescSize_K_Norm, BFKnnMatch,
 
     if (PERF_RUN_CUDA())
     {
-        cv::cuda::BFMatcher_CUDA d_matcher(normType);
+        cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
 
         const cv::cuda::GpuMat d_query(query);
         const cv::cuda::GpuMat d_train(train);
-        cv::cuda::GpuMat d_trainIdx, d_distance, d_allDist;
+        cv::cuda::GpuMat d_matches;
 
-        TEST_CYCLE() d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
+        TEST_CYCLE() d_matcher->knnMatchAsync(d_query, d_train, d_matches, k);
 
         std::vector< std::vector<cv::DMatch> > matchesTbl;
-        d_matcher.knnMatchDownload(d_trainIdx, d_distance, matchesTbl);
+        d_matcher->knnMatchConvert(d_matches, matchesTbl);
 
         std::vector<cv::DMatch> gpu_matches;
         toOneRowMatches(matchesTbl, gpu_matches);
@@ -280,16 +280,16 @@ PERF_TEST_P(DescSize_Norm, BFRadiusMatch,
 
     if (PERF_RUN_CUDA())
     {
-        cv::cuda::BFMatcher_CUDA d_matcher(normType);
+        cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
 
         const cv::cuda::GpuMat d_query(query);
         const cv::cuda::GpuMat d_train(train);
-        cv::cuda::GpuMat d_trainIdx, d_nMatches, d_distance;
+        cv::cuda::GpuMat d_matches;
 
-        TEST_CYCLE() d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, maxDistance);
+        TEST_CYCLE() d_matcher->radiusMatchAsync(d_query, d_train, d_matches, maxDistance);
 
         std::vector< std::vector<cv::DMatch> > matchesTbl;
-        d_matcher.radiusMatchDownload(d_trainIdx, d_distance, d_nMatches, matchesTbl);
+        d_matcher->radiusMatchConvert(d_matches, matchesTbl);
 
         std::vector<cv::DMatch> gpu_matches;
         toOneRowMatches(matchesTbl, gpu_matches);
diff --git a/modules/cudafeatures2d/src/brute_force_matcher.cpp b/modules/cudafeatures2d/src/brute_force_matcher.cpp
index 5de0b06e32..a00537c8eb 100644
--- a/modules/cudafeatures2d/src/brute_force_matcher.cpp
+++ b/modules/cudafeatures2d/src/brute_force_matcher.cpp
@@ -47,37 +47,7 @@ using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-cv::cuda::BFMatcher_CUDA::BFMatcher_CUDA(int) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::add(const std::vector<GpuMat>&) { throw_no_cuda(); }
-const std::vector<GpuMat>& cv::cuda::BFMatcher_CUDA::getTrainDescriptors() const { throw_no_cuda(); return trainDescCollection; }
-void cv::cuda::BFMatcher_CUDA::clear() { throw_no_cuda(); }
-bool cv::cuda::BFMatcher_CUDA::empty() const { throw_no_cuda(); return true; }
-bool cv::cuda::BFMatcher_CUDA::isMaskSupported() const { throw_no_cuda(); return true; }
-void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat&, const GpuMat&, std::vector<DMatch>&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat&, const Mat&, std::vector<DMatch>&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::match(const GpuMat&, const GpuMat&, std::vector<DMatch>&, const GpuMat&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::makeGpuCollection(GpuMat&, GpuMat&, const std::vector<GpuMat>&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::matchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector<DMatch>&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat&, const Mat&, const Mat&, std::vector<DMatch>&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::match(const GpuMat&, std::vector<DMatch>&, const std::vector<GpuMat>&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::knnMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, const GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::knnMatchDownload(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::knnMatchConvert(const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, int, const GpuMat&, bool) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::knnMatch2Collection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::knnMatch2Download(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::knnMatch2Convert(const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, int, const std::vector<GpuMat>&, bool) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::radiusMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, float, const GpuMat&, bool) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::radiusMatchCollection(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const std::vector<GpuMat>&, Stream&) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat&, const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, float, const std::vector<GpuMat>&, bool) { throw_no_cuda(); }
+Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int) { throw_no_cuda(); return Ptr<cv::cuda::DescriptorMatcher>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -155,857 +125,953 @@ namespace cv { namespace cuda { namespace device
     }
 }}}
 
-////////////////////////////////////////////////////////////////////
-// Train collection
-
-cv::cuda::BFMatcher_CUDA::BFMatcher_CUDA(int norm_) : norm(norm_)
-{
-}
-
-void cv::cuda::BFMatcher_CUDA::add(const std::vector<GpuMat>& descCollection)
-{
-    trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end());
-}
-
-const std::vector<GpuMat>& cv::cuda::BFMatcher_CUDA::getTrainDescriptors() const
-{
-    return trainDescCollection;
-}
-
-void cv::cuda::BFMatcher_CUDA::clear()
-{
-    trainDescCollection.clear();
-}
-
-bool cv::cuda::BFMatcher_CUDA::empty() const
-{
-    return trainDescCollection.empty();
-}
-
-bool cv::cuda::BFMatcher_CUDA::isMaskSupported() const
-{
-    return true;
-}
-
-////////////////////////////////////////////////////////////////////
-// Match
-
-void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat& query, const GpuMat& train,
-    GpuMat& trainIdx, GpuMat& distance,
-    const GpuMat& mask, Stream& stream)
-{
-    if (query.empty() || train.empty())
-        return;
-
-    using namespace cv::cuda::device::bf_match;
-
-    typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
-                             const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
-                             cudaStream_t stream);
-
-    static const caller_t callersL1[] =
-    {
-        matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
-        matchL1_gpu<unsigned short>, matchL1_gpu<short>,
-        matchL1_gpu<int>, matchL1_gpu<float>
-    };
-    static const caller_t callersL2[] =
-    {
-        0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
-        0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
-        0/*matchL2_gpu<int>*/, matchL2_gpu<float>
-    };
-
-    static const caller_t callersHamming[] =
-    {
-        matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
-        matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
-        matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
-    };
-
-    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
-    CV_Assert(train.cols == query.cols && train.type() == query.type());
-    CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
-
-    const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
-
-    const int nQuery = query.rows;
-
-    ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx);
-    ensureSizeIsEnough(1, nQuery, CV_32F, distance);
-
-    caller_t func = callers[query.depth()];
-    CV_Assert(func != 0);
-
-    func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream));
-}
-
-void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches)
-{
-    if (trainIdx.empty() || distance.empty())
-        return;
-
-    Mat trainIdxCPU(trainIdx);
-    Mat distanceCPU(distance);
-
-    matchConvert(trainIdxCPU, distanceCPU, matches);
-}
-
-void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches)
-{
-    if (trainIdx.empty() || distance.empty())
-        return;
-
-    CV_Assert(trainIdx.type() == CV_32SC1);
-    CV_Assert(distance.type() == CV_32FC1 && distance.cols == trainIdx.cols);
-
-    const int nQuery = trainIdx.cols;
-
-    matches.clear();
-    matches.reserve(nQuery);
-
-    const int* trainIdx_ptr = trainIdx.ptr<int>();
-    const float* distance_ptr =  distance.ptr<float>();
-    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr)
-    {
-        int train_idx = *trainIdx_ptr;
-
-        if (train_idx == -1)
-            continue;
-
-        float distance_local = *distance_ptr;
-
-        DMatch m(queryIdx, train_idx, 0, distance_local);
-
-        matches.push_back(m);
-    }
-}
-
-void cv::cuda::BFMatcher_CUDA::match(const GpuMat& query, const GpuMat& train,
-    std::vector<DMatch>& matches, const GpuMat& mask)
-{
-    GpuMat trainIdx, distance;
-    matchSingle(query, train, trainIdx, distance, mask);
-    matchDownload(trainIdx, distance, matches);
-}
-
-void cv::cuda::BFMatcher_CUDA::makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection,
-    const std::vector<GpuMat>& masks)
-{
-    if (empty())
-        return;
-
-    if (masks.empty())
-    {
-        Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
-
-        PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
-
-        for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
-            *trainCollectionCPU_ptr = trainDescCollection[i];
-
-        trainCollection.upload(trainCollectionCPU);
-        maskCollection.release();
-    }
-    else
-    {
-        CV_Assert(masks.size() == trainDescCollection.size());
-
-        Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
-        Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb)));
-
-        PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
-        PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>();
-
-        for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
-        {
-            const GpuMat& train = trainDescCollection[i];
-            const GpuMat& mask = masks[i];
-
-            CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows));
-
-            *trainCollectionCPU_ptr = train;
-            *maskCollectionCPU_ptr = mask;
-        }
-
-        trainCollection.upload(trainCollectionCPU);
-        maskCollection.upload(maskCollectionCPU);
-    }
-}
-
-void cv::cuda::BFMatcher_CUDA::matchCollection(const GpuMat& query, const GpuMat& trainCollection,
-    GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
-    const GpuMat& masks, Stream& stream)
-{
-    if (query.empty() || trainCollection.empty())
-        return;
-
-    using namespace cv::cuda::device::bf_match;
-
-    typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
-                             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
-                             cudaStream_t stream);
-
-    static const caller_t callersL1[] =
-    {
-        matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
-        matchL1_gpu<unsigned short>, matchL1_gpu<short>,
-        matchL1_gpu<int>, matchL1_gpu<float>
-    };
-    static const caller_t callersL2[] =
-    {
-        0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
-        0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
-        0/*matchL2_gpu<int>*/, matchL2_gpu<float>
-    };
-    static const caller_t callersHamming[] =
-    {
-        matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
-        matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
-        matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
-    };
-
-    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
-    CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
-
-    const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
-
-    const int nQuery = query.rows;
-
-    ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx);
-    ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx);
-    ensureSizeIsEnough(1, nQuery, CV_32F, distance);
-
-    caller_t func = callers[query.depth()];
-    CV_Assert(func != 0);
-
-    func(query, trainCollection, masks, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
-}
-
-void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches)
-{
-    if (trainIdx.empty() || imgIdx.empty() || distance.empty())
-        return;
-
-    Mat trainIdxCPU(trainIdx);
-    Mat imgIdxCPU(imgIdx);
-    Mat distanceCPU(distance);
-
-    matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches);
-}
-
-void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches)
-{
-    if (trainIdx.empty() || imgIdx.empty() || distance.empty())
-        return;
-
-    CV_Assert(trainIdx.type() == CV_32SC1);
-    CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.cols == trainIdx.cols);
-    CV_Assert(distance.type() == CV_32FC1 && distance.cols == trainIdx.cols);
-
-    const int nQuery = trainIdx.cols;
-
-    matches.clear();
-    matches.reserve(nQuery);
-
-    const int* trainIdx_ptr = trainIdx.ptr<int>();
-    const int* imgIdx_ptr = imgIdx.ptr<int>();
-    const float* distance_ptr =  distance.ptr<float>();
-    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
-    {
-        int _trainIdx = *trainIdx_ptr;
-
-        if (_trainIdx == -1)
-            continue;
-
-        int _imgIdx = *imgIdx_ptr;
-
-        float _distance = *distance_ptr;
-
-        DMatch m(queryIdx, _trainIdx, _imgIdx, _distance);
-
-        matches.push_back(m);
-    }
-}
-
-void cv::cuda::BFMatcher_CUDA::match(const GpuMat& query, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks)
-{
-    GpuMat trainCollection;
-    GpuMat maskCollection;
-
-    makeGpuCollection(trainCollection, maskCollection, masks);
-
-    GpuMat trainIdx, imgIdx, distance;
-
-    matchCollection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection);
-    matchDownload(trainIdx, imgIdx, distance, matches);
-}
-
-////////////////////////////////////////////////////////////////////
-// KnnMatch
-
-void cv::cuda::BFMatcher_CUDA::knnMatchSingle(const GpuMat& query, const GpuMat& train,
-    GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
-    const GpuMat& mask, Stream& stream)
-{
-    if (query.empty() || train.empty())
-        return;
-
-    using namespace cv::cuda::device::bf_knnmatch;
-
-    typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
-                             const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
-                             cudaStream_t stream);
-
-    static const caller_t callersL1[] =
-    {
-        matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
-        matchL1_gpu<unsigned short>, matchL1_gpu<short>,
-        matchL1_gpu<int>, matchL1_gpu<float>
-    };
-    static const caller_t callersL2[] =
-    {
-        0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
-        0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
-        0/*matchL2_gpu<int>*/, matchL2_gpu<float>
-    };
-    static const caller_t callersHamming[] =
-    {
-        matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
-        matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
-        matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
-    };
-
-    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
-    CV_Assert(train.type() == query.type() && train.cols == query.cols);
-    CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
-
-    const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
-
-    const int nQuery = query.rows;
-    const int nTrain = train.rows;
-
-    if (k == 2)
-    {
-        ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
-        ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
-    }
-    else
-    {
-        ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);
-        ensureSizeIsEnough(nQuery, k, CV_32F, distance);
-        ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
-    }
-
-    trainIdx.setTo(Scalar::all(-1), stream);
-
-    caller_t func = callers[query.depth()];
-    CV_Assert(func != 0);
-
-    func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream));
-}
-
-void cv::cuda::BFMatcher_CUDA::knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
-    std::vector< std::vector<DMatch> >& matches, bool compactResult)
-{
-    if (trainIdx.empty() || distance.empty())
-        return;
-
-    Mat trainIdxCPU(trainIdx);
-    Mat distanceCPU(distance);
-
-    knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
-}
-
-void cv::cuda::BFMatcher_CUDA::knnMatchConvert(const Mat& trainIdx, const Mat& distance,
-    std::vector< std::vector<DMatch> >& matches, bool compactResult)
-{
-    if (trainIdx.empty() || distance.empty())
-        return;
-
-    CV_Assert(trainIdx.type() == CV_32SC2 || trainIdx.type() == CV_32SC1);
-    CV_Assert(distance.type() == CV_32FC2 || distance.type() == CV_32FC1);
-    CV_Assert(distance.size() == trainIdx.size());
-    CV_Assert(trainIdx.isContinuous() && distance.isContinuous());
-
-    const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows;
-    const int k = trainIdx.type() == CV_32SC2 ? 2 :trainIdx.cols;
-
-    matches.clear();
-    matches.reserve(nQuery);
-
-    const int* trainIdx_ptr = trainIdx.ptr<int>();
-    const float* distance_ptr = distance.ptr<float>();
-
-    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
-    {
-        matches.push_back(std::vector<DMatch>());
-        std::vector<DMatch>& curMatches = matches.back();
-        curMatches.reserve(k);
-
-        for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr)
-        {
-            int _trainIdx = *trainIdx_ptr;
-
-            if (_trainIdx != -1)
-            {
-                float _distance = *distance_ptr;
-
-                DMatch m(queryIdx, _trainIdx, 0, _distance);
-
-                curMatches.push_back(m);
-            }
-        }
-
-        if (compactResult && curMatches.empty())
-            matches.pop_back();
-    }
-}
-
-void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat& query, const GpuMat& train,
-    std::vector< std::vector<DMatch> >& matches, int k, const GpuMat& mask, bool compactResult)
-{
-    GpuMat trainIdx, distance, allDist;
-    knnMatchSingle(query, train, trainIdx, distance, allDist, k, mask);
-    knnMatchDownload(trainIdx, distance, matches, compactResult);
-}
-
-void cv::cuda::BFMatcher_CUDA::knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection,
-    GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
-    const GpuMat& maskCollection, Stream& stream)
-{
-    if (query.empty() || trainCollection.empty())
-        return;
-
-    using namespace cv::cuda::device::bf_knnmatch;
-
-    typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
-                             const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
-                             cudaStream_t stream);
-
-    static const caller_t callersL1[] =
-    {
-        match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
-        match2L1_gpu<unsigned short>, match2L1_gpu<short>,
-        match2L1_gpu<int>, match2L1_gpu<float>
-    };
-    static const caller_t callersL2[] =
-    {
-        0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
-        0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
-        0/*match2L2_gpu<int>*/, match2L2_gpu<float>
-    };
-    static const caller_t callersHamming[] =
-    {
-        match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
-        match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
-        match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
-    };
-
-    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
-    CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
-
-    const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
-
-    const int nQuery = query.rows;
-
-    ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
-    ensureSizeIsEnough(1, nQuery, CV_32SC2, imgIdx);
-    ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
-
-    trainIdx.setTo(Scalar::all(-1), stream);
-
-    caller_t func = callers[query.depth()];
-    CV_Assert(func != 0);
-
-    func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
-}
-
-void cv::cuda::BFMatcher_CUDA::knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
-    std::vector< std::vector<DMatch> >& matches, bool compactResult)
-{
-    if (trainIdx.empty() || imgIdx.empty() || distance.empty())
-        return;
-
-    Mat trainIdxCPU(trainIdx);
-    Mat imgIdxCPU(imgIdx);
-    Mat distanceCPU(distance);
-
-    knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult);
-}
-
-void cv::cuda::BFMatcher_CUDA::knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
-    std::vector< std::vector<DMatch> >& matches, bool compactResult)
-{
-    if (trainIdx.empty() || imgIdx.empty() || distance.empty())
-        return;
-
-    CV_Assert(trainIdx.type() == CV_32SC2);
-    CV_Assert(imgIdx.type() == CV_32SC2 && imgIdx.cols == trainIdx.cols);
-    CV_Assert(distance.type() == CV_32FC2 && distance.cols == trainIdx.cols);
-
-    const int nQuery = trainIdx.cols;
-
-    matches.clear();
-    matches.reserve(nQuery);
-
-    const int* trainIdx_ptr = trainIdx.ptr<int>();
-    const int* imgIdx_ptr = imgIdx.ptr<int>();
-    const float* distance_ptr = distance.ptr<float>();
-
-    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
-    {
-        matches.push_back(std::vector<DMatch>());
-        std::vector<DMatch>& curMatches = matches.back();
-        curMatches.reserve(2);
-
-        for (int i = 0; i < 2; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
-        {
-            int _trainIdx = *trainIdx_ptr;
-
-            if (_trainIdx != -1)
-            {
-                int _imgIdx = *imgIdx_ptr;
-
-                float _distance = *distance_ptr;
-
-                DMatch m(queryIdx, _trainIdx, _imgIdx, _distance);
-
-                curMatches.push_back(m);
-            }
-        }
-
-        if (compactResult && curMatches.empty())
-            matches.pop_back();
-    }
-}
-
 namespace
 {
-    struct ImgIdxSetter
+    static void makeGpuCollection(const std::vector<GpuMat>& trainDescCollection,
+                                  const std::vector<GpuMat>& masks,
+                                  GpuMat& trainCollection,
+                                  GpuMat& maskCollection)
     {
-        explicit inline ImgIdxSetter(int imgIdx_) : imgIdx(imgIdx_) {}
-        inline void operator()(DMatch& m) const {m.imgIdx = imgIdx;}
-        int imgIdx;
-    };
-}
-
-void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, int k,
-    const std::vector<GpuMat>& masks, bool compactResult)
-{
-    if (k == 2)
-    {
-        GpuMat trainCollection;
-        GpuMat maskCollection;
-
-        makeGpuCollection(trainCollection, maskCollection, masks);
-
-        GpuMat trainIdx, imgIdx, distance;
-
-        knnMatch2Collection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection);
-        knnMatch2Download(trainIdx, imgIdx, distance, matches);
-    }
-    else
-    {
-        if (query.empty() || empty())
+        if (trainDescCollection.empty())
             return;
 
-        std::vector< std::vector<DMatch> > curMatches;
-        std::vector<DMatch> temp;
-        temp.reserve(2 * k);
-
-        matches.resize(query.rows);
-        for_each(matches.begin(), matches.end(), bind2nd(mem_fun_ref(&std::vector<DMatch>::reserve), k));
-
-        for (size_t imgIdx = 0, size = trainDescCollection.size(); imgIdx < size; ++imgIdx)
+        if (masks.empty())
         {
-            knnMatch(query, trainDescCollection[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]);
+            Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
 
-            for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
+            PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
+
+            for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
+                *trainCollectionCPU_ptr = trainDescCollection[i];
+
+            trainCollection.upload(trainCollectionCPU);
+            maskCollection.release();
+        }
+        else
+        {
+            CV_Assert( masks.size() == trainDescCollection.size() );
+
+            Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
+            Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb)));
+
+            PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
+            PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>();
+
+            for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
             {
-                std::vector<DMatch>& localMatch = curMatches[queryIdx];
-                std::vector<DMatch>& globalMatch = matches[queryIdx];
+                const GpuMat& train = trainDescCollection[i];
+                const GpuMat& mask = masks[i];
 
-                for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast<int>(imgIdx)));
+                CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows) );
 
-                temp.clear();
-                merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp));
+                *trainCollectionCPU_ptr = train;
+                *maskCollectionCPU_ptr = mask;
+            }
 
-                globalMatch.clear();
-                const size_t count = std::min((size_t)k, temp.size());
-                copy(temp.begin(), temp.begin() + count, back_inserter(globalMatch));
+            trainCollection.upload(trainCollectionCPU);
+            maskCollection.upload(maskCollectionCPU);
+        }
+    }
+
+    class BFMatcher_Impl : public cv::cuda::DescriptorMatcher
+    {
+    public:
+        explicit BFMatcher_Impl(int norm) : norm_(norm)
+        {
+            CV_Assert( norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING );
+        }
+
+        virtual bool isMaskSupported() const { return true; }
+
+        virtual void add(const std::vector<GpuMat>& descriptors)
+        {
+            trainDescCollection_.insert(trainDescCollection_.end(), descriptors.begin(), descriptors.end());
+        }
+
+        virtual const std::vector<GpuMat>& getTrainDescriptors() const
+        {
+            return trainDescCollection_;
+        }
+
+        virtual void clear()
+        {
+            trainDescCollection_.clear();
+        }
+
+        virtual bool empty() const
+        {
+            return trainDescCollection_.empty();
+        }
+
+        virtual void train()
+        {
+        }
+
+        virtual void match(InputArray queryDescriptors, InputArray trainDescriptors,
+                           std::vector<DMatch>& matches,
+                           InputArray mask = noArray());
+
+        virtual void match(InputArray queryDescriptors,
+                           std::vector<DMatch>& matches,
+                           const std::vector<GpuMat>& masks = std::vector<GpuMat>());
+
+        virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
+                                OutputArray matches,
+                                InputArray mask = noArray(),
+                                Stream& stream = Stream::Null());
+
+        virtual void matchAsync(InputArray queryDescriptors,
+                                OutputArray matches,
+                                const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
+                                Stream& stream = Stream::Null());
+
+        virtual void matchConvert(InputArray gpu_matches,
+                                  std::vector<DMatch>& matches);
+
+        virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors,
+                              std::vector<std::vector<DMatch> >& matches,
+                              int k,
+                              InputArray mask = noArray(),
+                              bool compactResult = false);
+
+        virtual void knnMatch(InputArray queryDescriptors,
+                              std::vector<std::vector<DMatch> >& matches,
+                              int k,
+                              const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
+                              bool compactResult = false);
+
+        virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
+                                   OutputArray matches,
+                                   int k,
+                                   InputArray mask = noArray(),
+                                   Stream& stream = Stream::Null());
+
+        virtual void knnMatchAsync(InputArray queryDescriptors,
+                                   OutputArray matches,
+                                   int k,
+                                   const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
+                                   Stream& stream = Stream::Null());
+
+        virtual void knnMatchConvert(InputArray gpu_matches,
+                                     std::vector< std::vector<DMatch> >& matches,
+                                     bool compactResult = false);
+
+        virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors,
+                                 std::vector<std::vector<DMatch> >& matches,
+                                 float maxDistance,
+                                 InputArray mask = noArray(),
+                                 bool compactResult = false);
+
+        virtual void radiusMatch(InputArray queryDescriptors,
+                                 std::vector<std::vector<DMatch> >& matches,
+                                 float maxDistance,
+                                 const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
+                                 bool compactResult = false);
+
+        virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
+                                      OutputArray matches,
+                                      float maxDistance,
+                                      InputArray mask = noArray(),
+                                      Stream& stream = Stream::Null());
+
+        virtual void radiusMatchAsync(InputArray queryDescriptors,
+                                      OutputArray matches,
+                                      float maxDistance,
+                                      const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
+                                      Stream& stream = Stream::Null());
+
+        virtual void radiusMatchConvert(InputArray gpu_matches,
+                                        std::vector< std::vector<DMatch> >& matches,
+                                        bool compactResult = false);
+
+    private:
+        int norm_;
+        std::vector<GpuMat> trainDescCollection_;
+    };
+
+    //
+    // 1 to 1 match
+    //
+
+    void BFMatcher_Impl::match(InputArray _queryDescriptors, InputArray _trainDescriptors,
+                               std::vector<DMatch>& matches,
+                               InputArray _mask)
+    {
+        GpuMat d_matches;
+        matchAsync(_queryDescriptors, _trainDescriptors, d_matches, _mask);
+        matchConvert(d_matches, matches);
+    }
+
+    void BFMatcher_Impl::match(InputArray _queryDescriptors,
+                               std::vector<DMatch>& matches,
+                               const std::vector<GpuMat>& masks)
+    {
+        GpuMat d_matches;
+        matchAsync(_queryDescriptors, d_matches, masks);
+        matchConvert(d_matches, matches);
+    }
+
+    void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
+                                    OutputArray _matches,
+                                    InputArray _mask,
+                                    Stream& stream)
+    {
+        using namespace cv::cuda::device::bf_match;
+
+        const GpuMat query = _queryDescriptors.getGpuMat();
+        const GpuMat train = _trainDescriptors.getGpuMat();
+        const GpuMat mask = _mask.getGpuMat();
+
+        if (query.empty() || train.empty())
+        {
+            _matches.release();
+            return;
+        }
+
+        CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
+        CV_Assert( train.cols == query.cols && train.type() == query.type() );
+        CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
+
+        typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
+                                 const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
+                                 cudaStream_t stream);
+
+        static const caller_t callersL1[] =
+        {
+            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
+            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
+            matchL1_gpu<int>, matchL1_gpu<float>
+        };
+        static const caller_t callersL2[] =
+        {
+            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
+            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
+            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
+        };
+        static const caller_t callersHamming[] =
+        {
+            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
+            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
+            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
+        };
+
+        const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
+
+        const caller_t func = callers[query.depth()];
+        if (func == 0)
+        {
+            CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
+        }
+
+        const int nQuery = query.rows;
+
+        _matches.create(2, nQuery, CV_32SC1);
+        GpuMat matches = _matches.getGpuMat();
+
+        GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
+        GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(1));
+
+        func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream));
+    }
+
+    void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors,
+                                    OutputArray _matches,
+                                    const std::vector<GpuMat>& masks,
+                                    Stream& stream)
+    {
+        using namespace cv::cuda::device::bf_match;
+
+        const GpuMat query = _queryDescriptors.getGpuMat();
+
+        if (query.empty() || trainDescCollection_.empty())
+        {
+            _matches.release();
+            return;
+        }
+
+        CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
+
+        GpuMat trainCollection, maskCollection;
+        makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
+
+        typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
+                                 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
+                                 cudaStream_t stream);
+
+        static const caller_t callersL1[] =
+        {
+            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
+            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
+            matchL1_gpu<int>, matchL1_gpu<float>
+        };
+        static const caller_t callersL2[] =
+        {
+            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
+            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
+            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
+        };
+        static const caller_t callersHamming[] =
+        {
+            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
+            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
+            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
+        };
+
+        const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
+
+        const caller_t func = callers[query.depth()];
+        if (func == 0)
+        {
+            CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
+        }
+
+        const int nQuery = query.rows;
+
+        _matches.create(3, nQuery, CV_32SC1);
+        GpuMat matches = _matches.getGpuMat();
+
+        GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
+        GpuMat imgIdx(1, nQuery, CV_32SC1, matches.ptr(1));
+        GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(2));
+
+        func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
+    }
+
+    void BFMatcher_Impl::matchConvert(InputArray _gpu_matches,
+                                      std::vector<DMatch>& matches)
+    {
+        Mat gpu_matches;
+        if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
+        {
+            _gpu_matches.getGpuMat().download(gpu_matches);
+        }
+        else
+        {
+            gpu_matches = _gpu_matches.getMat();
+        }
+
+        if (gpu_matches.empty())
+        {
+            matches.clear();
+            return;
+        }
+
+        CV_Assert( (gpu_matches.type() == CV_32SC1) && (gpu_matches.rows == 2 || gpu_matches.rows == 3) );
+
+        const int nQuery = gpu_matches.cols;
+
+        matches.clear();
+        matches.reserve(nQuery);
+
+        const int* trainIdxPtr = NULL;
+        const int* imgIdxPtr = NULL;
+        const float* distancePtr = NULL;
+
+        if (gpu_matches.rows == 2)
+        {
+            trainIdxPtr = gpu_matches.ptr<int>(0);
+            distancePtr =  gpu_matches.ptr<float>(1);
+        }
+        else
+        {
+            trainIdxPtr = gpu_matches.ptr<int>(0);
+            imgIdxPtr =  gpu_matches.ptr<int>(1);
+            distancePtr =  gpu_matches.ptr<float>(2);
+        }
+
+        for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
+        {
+            const int trainIdx = trainIdxPtr[queryIdx];
+            if (trainIdx == -1)
+                continue;
+
+            const int imgIdx = imgIdxPtr ? imgIdxPtr[queryIdx] : 0;
+            const float distance = distancePtr[queryIdx];
+
+            DMatch m(queryIdx, trainIdx, imgIdx, distance);
+
+            matches.push_back(m);
+        }
+    }
+
+    //
+    // knn match
+    //
+
+    void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
+                                  std::vector<std::vector<DMatch> >& matches,
+                                  int k,
+                                  InputArray _mask,
+                                  bool compactResult)
+    {
+        GpuMat d_matches;
+        knnMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, k, _mask);
+        knnMatchConvert(d_matches, matches, compactResult);
+    }
+
+    void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors,
+                                  std::vector<std::vector<DMatch> >& matches,
+                                  int k,
+                                  const std::vector<GpuMat>& masks,
+                                  bool compactResult)
+    {
+        if (k == 2)
+        {
+            GpuMat d_matches;
+            knnMatchAsync(_queryDescriptors, d_matches, k, masks);
+            knnMatchConvert(d_matches, matches, compactResult);
+        }
+        else
+        {
+            const GpuMat query = _queryDescriptors.getGpuMat();
+
+            if (query.empty() || trainDescCollection_.empty())
+            {
+                matches.clear();
+                return;
+            }
+
+            CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
+
+            std::vector< std::vector<DMatch> > curMatches;
+            std::vector<DMatch> temp;
+            temp.reserve(2 * k);
+
+            matches.resize(query.rows);
+            for (size_t i = 0; i < matches.size(); ++i)
+                matches[i].reserve(k);
+
+            for (size_t imgIdx = 0; imgIdx < trainDescCollection_.size(); ++imgIdx)
+            {
+                knnMatch(query, trainDescCollection_[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]);
+
+                for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
+                {
+                    std::vector<DMatch>& localMatch = curMatches[queryIdx];
+                    std::vector<DMatch>& globalMatch = matches[queryIdx];
+
+                    for (size_t i = 0; i < localMatch.size(); ++i)
+                        localMatch[i].imgIdx = imgIdx;
+
+                    temp.clear();
+                    std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), std::back_inserter(temp));
+
+                    globalMatch.clear();
+                    const size_t count = std::min(static_cast<size_t>(k), temp.size());
+                    std::copy(temp.begin(), temp.begin() + count, std::back_inserter(globalMatch));
+                }
+            }
+
+            if (compactResult)
+            {
+                std::vector< std::vector<DMatch> >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector<DMatch>::empty));
+                matches.erase(new_end, matches.end());
             }
         }
+    }
 
-        if (compactResult)
+    void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
+                                       OutputArray _matches,
+                                       int k,
+                                       InputArray _mask,
+                                       Stream& stream)
+    {
+        using namespace cv::cuda::device::bf_knnmatch;
+
+        const GpuMat query = _queryDescriptors.getGpuMat();
+        const GpuMat train = _trainDescriptors.getGpuMat();
+        const GpuMat mask = _mask.getGpuMat();
+
+        if (query.empty() || train.empty())
         {
-            std::vector< std::vector<DMatch> >::iterator new_end = remove_if(matches.begin(), matches.end(), mem_fun_ref(&std::vector<DMatch>::empty));
-            matches.erase(new_end, matches.end());
+            _matches.release();
+            return;
+        }
+
+        CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
+        CV_Assert( train.cols == query.cols && train.type() == query.type() );
+        CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
+
+        typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
+                                 const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
+                                 cudaStream_t stream);
+
+        static const caller_t callersL1[] =
+        {
+            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
+            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
+            matchL1_gpu<int>, matchL1_gpu<float>
+        };
+        static const caller_t callersL2[] =
+        {
+            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
+            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
+            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
+        };
+        static const caller_t callersHamming[] =
+        {
+            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
+            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
+            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
+        };
+
+        const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
+
+        const caller_t func = callers[query.depth()];
+        if (func == 0)
+        {
+            CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
+        }
+
+        const int nQuery = query.rows;
+        const int nTrain = train.rows;
+
+        GpuMat trainIdx, distance, allDist;
+        if (k == 2)
+        {
+            _matches.create(2, nQuery, CV_32SC2);
+            GpuMat matches = _matches.getGpuMat();
+
+            trainIdx = GpuMat(1, nQuery, CV_32SC2, matches.ptr(0));
+            distance = GpuMat(1, nQuery, CV_32FC2, matches.ptr(1));
+        }
+        else
+        {
+            _matches.create(2 * nQuery, k, CV_32SC1);
+            GpuMat matches = _matches.getGpuMat();
+
+            trainIdx = GpuMat(nQuery, k, CV_32SC1, matches.ptr(0), matches.step);
+            distance = GpuMat(nQuery, k, CV_32FC1, matches.ptr(nQuery), matches.step);
+
+            BufferPool pool(stream);
+            allDist = pool.getBuffer(nQuery, nTrain, CV_32FC1);
+        }
+
+        trainIdx.setTo(Scalar::all(-1), stream);
+
+        func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream));
+    }
+
+    void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors,
+                                       OutputArray _matches,
+                                       int k,
+                                       const std::vector<GpuMat>& masks,
+                                       Stream& stream)
+    {
+        using namespace cv::cuda::device::bf_knnmatch;
+
+        if (k != 2)
+        {
+            CV_Error(Error::StsNotImplemented, "only k=2 mode is supported for now");
+        }
+
+        const GpuMat query = _queryDescriptors.getGpuMat();
+
+        if (query.empty() || trainDescCollection_.empty())
+        {
+            _matches.release();
+            return;
+        }
+
+        CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
+
+        GpuMat trainCollection, maskCollection;
+        makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
+
+        typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
+                                 const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
+                                 cudaStream_t stream);
+
+        static const caller_t callersL1[] =
+        {
+            match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
+            match2L1_gpu<unsigned short>, match2L1_gpu<short>,
+            match2L1_gpu<int>, match2L1_gpu<float>
+        };
+        static const caller_t callersL2[] =
+        {
+            0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
+            0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
+            0/*match2L2_gpu<int>*/, match2L2_gpu<float>
+        };
+        static const caller_t callersHamming[] =
+        {
+            match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
+            match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
+            match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
+        };
+
+        const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
+
+        const caller_t func = callers[query.depth()];
+        if (func == 0)
+        {
+            CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
+        }
+
+        const int nQuery = query.rows;
+
+        _matches.create(3, nQuery, CV_32SC2);
+        GpuMat matches = _matches.getGpuMat();
+
+        GpuMat trainIdx(1, nQuery, CV_32SC2, matches.ptr(0));
+        GpuMat imgIdx(1, nQuery, CV_32SC2, matches.ptr(1));
+        GpuMat distance(1, nQuery, CV_32FC2, matches.ptr(2));
+
+        trainIdx.setTo(Scalar::all(-1), stream);
+
+        func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
+    }
+
+    void BFMatcher_Impl::knnMatchConvert(InputArray _gpu_matches,
+                                         std::vector< std::vector<DMatch> >& matches,
+                                         bool compactResult)
+    {
+        Mat gpu_matches;
+        if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
+        {
+            _gpu_matches.getGpuMat().download(gpu_matches);
+        }
+        else
+        {
+            gpu_matches = _gpu_matches.getMat();
+        }
+
+        if (gpu_matches.empty())
+        {
+            matches.clear();
+            return;
+        }
+
+        CV_Assert( ((gpu_matches.type() == CV_32SC2) && (gpu_matches.rows == 2 || gpu_matches.rows == 3)) ||
+                   (gpu_matches.type() == CV_32SC1) );
+
+        int nQuery = -1, k = -1;
+
+        const int* trainIdxPtr = NULL;
+        const int* imgIdxPtr = NULL;
+        const float* distancePtr = NULL;
+
+        if (gpu_matches.type() == CV_32SC2)
+        {
+            nQuery = gpu_matches.cols;
+            k = 2;
+
+            if (gpu_matches.rows == 2)
+            {
+                trainIdxPtr = gpu_matches.ptr<int>(0);
+                distancePtr =  gpu_matches.ptr<float>(1);
+            }
+            else
+            {
+                trainIdxPtr = gpu_matches.ptr<int>(0);
+                imgIdxPtr =  gpu_matches.ptr<int>(1);
+                distancePtr =  gpu_matches.ptr<float>(2);
+            }
+        }
+        else
+        {
+            nQuery = gpu_matches.rows / 2;
+            k = gpu_matches.cols;
+
+            trainIdxPtr = gpu_matches.ptr<int>(0);
+            distancePtr =  gpu_matches.ptr<float>(nQuery);
+        }
+
+        matches.clear();
+        matches.reserve(nQuery);
+
+        for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
+        {
+            matches.push_back(std::vector<DMatch>());
+            std::vector<DMatch>& curMatches = matches.back();
+            curMatches.reserve(k);
+
+            for (int i = 0; i < k; ++i)
+            {
+                const int trainIdx = *trainIdxPtr;
+                if (trainIdx == -1)
+                    continue;
+
+                const int imgIdx = imgIdxPtr ? *imgIdxPtr : 0;
+                const float distance = *distancePtr;
+
+                DMatch m(queryIdx, trainIdx, imgIdx, distance);
+
+                curMatches.push_back(m);
+
+                ++trainIdxPtr;
+                ++distancePtr;
+                if (imgIdxPtr)
+                    ++imgIdxPtr;
+            }
+
+            if (compactResult && curMatches.empty())
+            {
+                matches.pop_back();
+            }
+        }
+    }
+
+    //
+    // radius match
+    //
+
+    void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
+                                     std::vector<std::vector<DMatch> >& matches,
+                                     float maxDistance,
+                                     InputArray _mask,
+                                     bool compactResult)
+    {
+        GpuMat d_matches;
+        radiusMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, maxDistance, _mask);
+        radiusMatchConvert(d_matches, matches, compactResult);
+    }
+
+    void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors,
+                                     std::vector<std::vector<DMatch> >& matches,
+                                     float maxDistance,
+                                     const std::vector<GpuMat>& masks,
+                                     bool compactResult)
+    {
+        GpuMat d_matches;
+        radiusMatchAsync(_queryDescriptors, d_matches, maxDistance, masks);
+        radiusMatchConvert(d_matches, matches, compactResult);
+    }
+
+    void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
+                                          OutputArray _matches,
+                                          float maxDistance,
+                                          InputArray _mask,
+                                          Stream& stream)
+    {
+        using namespace cv::cuda::device::bf_radius_match;
+
+        const GpuMat query = _queryDescriptors.getGpuMat();
+        const GpuMat train = _trainDescriptors.getGpuMat();
+        const GpuMat mask = _mask.getGpuMat();
+
+        if (query.empty() || train.empty())
+        {
+            _matches.release();
+            return;
+        }
+
+        CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
+        CV_Assert( train.cols == query.cols && train.type() == query.type() );
+        CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
+
+        typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
+                                 const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
+                                 cudaStream_t stream);
+
+        static const caller_t callersL1[] =
+        {
+            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
+            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
+            matchL1_gpu<int>, matchL1_gpu<float>
+        };
+        static const caller_t callersL2[] =
+        {
+            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
+            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
+            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
+        };
+        static const caller_t callersHamming[] =
+        {
+            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
+            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
+            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
+        };
+
+        const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
+
+        const caller_t func = callers[query.depth()];
+        if (func == 0)
+        {
+            CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
+        }
+
+        const int nQuery = query.rows;
+        const int nTrain = train.rows;
+
+        const int cols = std::max((nTrain / 100), nQuery);
+
+        _matches.create(2 * nQuery + 1, cols, CV_32SC1);
+        GpuMat matches = _matches.getGpuMat();
+
+        GpuMat trainIdx(nQuery, cols, CV_32SC1, matches.ptr(0), matches.step);
+        GpuMat distance(nQuery, cols, CV_32FC1, matches.ptr(nQuery), matches.step);
+        GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(2 * nQuery));
+
+        nMatches.setTo(Scalar::all(0), stream);
+
+        func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream));
+    }
+
+    void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors,
+                                          OutputArray _matches,
+                                          float maxDistance,
+                                          const std::vector<GpuMat>& masks,
+                                          Stream& stream)
+    {
+        using namespace cv::cuda::device::bf_radius_match;
+
+        const GpuMat query = _queryDescriptors.getGpuMat();
+
+        if (query.empty() || trainDescCollection_.empty())
+        {
+            _matches.release();
+            return;
+        }
+
+        CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
+
+        GpuMat trainCollection, maskCollection;
+        makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
+
+        typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
+                                 const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
+                                 cudaStream_t stream);
+
+        static const caller_t callersL1[] =
+        {
+            matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
+            matchL1_gpu<unsigned short>, matchL1_gpu<short>,
+            matchL1_gpu<int>, matchL1_gpu<float>
+        };
+        static const caller_t callersL2[] =
+        {
+            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
+            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
+            0/*matchL2_gpu<int>*/, matchL2_gpu<float>
+        };
+        static const caller_t callersHamming[] =
+        {
+            matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
+            matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
+            matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
+        };
+
+        const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
+
+        const caller_t func = callers[query.depth()];
+        if (func == 0)
+        {
+            CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
+        }
+
+        const int nQuery = query.rows;
+
+        _matches.create(3 * nQuery + 1, nQuery, CV_32FC1);
+        GpuMat matches = _matches.getGpuMat();
+
+        GpuMat trainIdx(nQuery, nQuery, CV_32SC1, matches.ptr(0), matches.step);
+        GpuMat imgIdx(nQuery, nQuery, CV_32SC1, matches.ptr(nQuery), matches.step);
+        GpuMat distance(nQuery, nQuery, CV_32FC1, matches.ptr(2 * nQuery), matches.step);
+        GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(3 * nQuery));
+
+        nMatches.setTo(Scalar::all(0), stream);
+
+        std::vector<PtrStepSzb> trains_(trainDescCollection_.begin(), trainDescCollection_.end());
+        std::vector<PtrStepSzb> masks_(masks.begin(), masks.end());
+
+        func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
+            trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream));
+    }
+
+    void BFMatcher_Impl::radiusMatchConvert(InputArray _gpu_matches,
+                                            std::vector< std::vector<DMatch> >& matches,
+                                            bool compactResult)
+    {
+        Mat gpu_matches;
+        if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
+        {
+            _gpu_matches.getGpuMat().download(gpu_matches);
+        }
+        else
+        {
+            gpu_matches = _gpu_matches.getMat();
+        }
+
+        if (gpu_matches.empty())
+        {
+            matches.clear();
+            return;
+        }
+
+        CV_Assert( gpu_matches.type() == CV_32SC1 || gpu_matches.type() == CV_32FC1 );
+
+        int nQuery = -1;
+
+        const int* trainIdxPtr = NULL;
+        const int* imgIdxPtr = NULL;
+        const float* distancePtr = NULL;
+        const int* nMatchesPtr = NULL;
+
+        if (gpu_matches.type() == CV_32SC1)
+        {
+            nQuery = (gpu_matches.rows - 1) / 2;
+
+            trainIdxPtr = gpu_matches.ptr<int>(0);
+            distancePtr =  gpu_matches.ptr<float>(nQuery);
+            nMatchesPtr = gpu_matches.ptr<int>(2 * nQuery);
+        }
+        else
+        {
+            nQuery = (gpu_matches.rows - 1) / 3;
+
+            trainIdxPtr = gpu_matches.ptr<int>(0);
+            imgIdxPtr = gpu_matches.ptr<int>(nQuery);
+            distancePtr =  gpu_matches.ptr<float>(2 * nQuery);
+            nMatchesPtr = gpu_matches.ptr<int>(3 * nQuery);
+        }
+
+        matches.clear();
+        matches.reserve(nQuery);
+
+        for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
+        {
+            const int nMatched = std::min(nMatchesPtr[queryIdx], gpu_matches.cols);
+
+            if (nMatched == 0)
+            {
+                if (!compactResult)
+                {
+                    matches.push_back(std::vector<DMatch>());
+                }
+            }
+            else
+            {
+                matches.push_back(std::vector<DMatch>(nMatched));
+                std::vector<DMatch>& curMatches = matches.back();
+
+                for (int i = 0; i < nMatched; ++i)
+                {
+                    const int trainIdx = trainIdxPtr[i];
+
+                    const int imgIdx = imgIdxPtr ? imgIdxPtr[i] : 0;
+                    const float distance = distancePtr[i];
+
+                    DMatch m(queryIdx, trainIdx, imgIdx, distance);
+
+                    curMatches[i] = m;
+                }
+
+                std::sort(curMatches.begin(), curMatches.end());
+            }
+
+            trainIdxPtr += gpu_matches.cols;
+            distancePtr += gpu_matches.cols;
+            if (imgIdxPtr)
+                imgIdxPtr += gpu_matches.cols;
         }
     }
 }
 
-////////////////////////////////////////////////////////////////////
-// RadiusMatch
-
-void cv::cuda::BFMatcher_CUDA::radiusMatchSingle(const GpuMat& query, const GpuMat& train,
-    GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
-    const GpuMat& mask, Stream& stream)
+Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int norm)
 {
-    if (query.empty() || train.empty())
-        return;
-
-    using namespace cv::cuda::device::bf_radius_match;
-
-    typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
-                             const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
-                             cudaStream_t stream);
-
-    static const caller_t callersL1[] =
-    {
-        matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
-        matchL1_gpu<unsigned short>, matchL1_gpu<short>,
-        matchL1_gpu<int>, matchL1_gpu<float>
-    };
-    static const caller_t callersL2[] =
-    {
-        0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
-        0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
-        0/*matchL2_gpu<int>*/, matchL2_gpu<float>
-    };
-    static const caller_t callersHamming[] =
-    {
-        matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
-        matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
-        matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
-    };
-
-    const int nQuery = query.rows;
-    const int nTrain = train.rows;
-
-    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
-    CV_Assert(train.type() == query.type() && train.cols == query.cols);
-    CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size()));
-    CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
-
-    const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
-
-    ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);
-    if (trainIdx.empty())
-    {
-        ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx);
-        ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance);
-    }
-
-    nMatches.setTo(Scalar::all(0), stream);
-
-    caller_t func = callers[query.depth()];
-    CV_Assert(func != 0);
-
-    func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream));
-}
-
-void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
-    std::vector< std::vector<DMatch> >& matches, bool compactResult)
-{
-    if (trainIdx.empty() || distance.empty() || nMatches.empty())
-        return;
-
-    Mat trainIdxCPU(trainIdx);
-    Mat distanceCPU(distance);
-    Mat nMatchesCPU(nMatches);
-
-    radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
-}
-
-void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
-    std::vector< std::vector<DMatch> >& matches, bool compactResult)
-{
-    if (trainIdx.empty() || distance.empty() || nMatches.empty())
-        return;
-
-    CV_Assert(trainIdx.type() == CV_32SC1);
-    CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());
-    CV_Assert(nMatches.type() == CV_32SC1 && nMatches.cols == trainIdx.rows);
-
-    const int nQuery = trainIdx.rows;
-
-    matches.clear();
-    matches.reserve(nQuery);
-
-    const int* nMatches_ptr = nMatches.ptr<int>();
-
-    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
-    {
-        const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
-        const float* distance_ptr = distance.ptr<float>(queryIdx);
-
-        const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
-
-        if (nMatched == 0)
-        {
-            if (!compactResult)
-                matches.push_back(std::vector<DMatch>());
-            continue;
-        }
-
-        matches.push_back(std::vector<DMatch>(nMatched));
-        std::vector<DMatch>& curMatches = matches.back();
-
-        for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++distance_ptr)
-        {
-            int _trainIdx = *trainIdx_ptr;
-
-            float _distance = *distance_ptr;
-
-            DMatch m(queryIdx, _trainIdx, 0, _distance);
-
-            curMatches[i] = m;
-        }
-
-        sort(curMatches.begin(), curMatches.end());
-    }
-}
-
-void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat& query, const GpuMat& train,
-    std::vector< std::vector<DMatch> >& matches, float maxDistance, const GpuMat& mask, bool compactResult)
-{
-    GpuMat trainIdx, distance, nMatches;
-    radiusMatchSingle(query, train, trainIdx, distance, nMatches, maxDistance, mask);
-    radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult);
-}
-
-void cv::cuda::BFMatcher_CUDA::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches,
-    float maxDistance, const std::vector<GpuMat>& masks, Stream& stream)
-{
-    if (query.empty() || empty())
-        return;
-
-    using namespace cv::cuda::device::bf_radius_match;
-
-    typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
-                             const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
-                             cudaStream_t stream);
-
-    static const caller_t callersL1[] =
-    {
-        matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
-        matchL1_gpu<unsigned short>, matchL1_gpu<short>,
-        matchL1_gpu<int>, matchL1_gpu<float>
-    };
-    static const caller_t callersL2[] =
-    {
-        0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
-        0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
-        0/*matchL2_gpu<int>*/, matchL2_gpu<float>
-    };
-    static const caller_t callersHamming[] =
-    {
-        matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
-        matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
-        matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
-    };
-
-    const int nQuery = query.rows;
-
-    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
-    CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size()));
-    CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
-
-    const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
-
-    ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);
-    if (trainIdx.empty())
-    {
-        ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32SC1, trainIdx);
-        ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32SC1, imgIdx);
-        ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32FC1, distance);
-    }
-
-    nMatches.setTo(Scalar::all(0), stream);
-
-    caller_t func = callers[query.depth()];
-    CV_Assert(func != 0);
-
-    std::vector<PtrStepSzb> trains_(trainDescCollection.begin(), trainDescCollection.end());
-    std::vector<PtrStepSzb> masks_(masks.begin(), masks.end());
-
-    func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
-        trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream));
-}
-
-void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,
-    std::vector< std::vector<DMatch> >& matches, bool compactResult)
-{
-    if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
-        return;
-
-    Mat trainIdxCPU(trainIdx);
-    Mat imgIdxCPU(imgIdx);
-    Mat distanceCPU(distance);
-    Mat nMatchesCPU(nMatches);
-
-    radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
-}
-
-void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
-    std::vector< std::vector<DMatch> >& matches, bool compactResult)
-{
-    if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
-        return;
-
-    CV_Assert(trainIdx.type() == CV_32SC1);
-    CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.size() == trainIdx.size());
-    CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size());
-    CV_Assert(nMatches.type() == CV_32SC1 && nMatches.cols == trainIdx.rows);
-
-    const int nQuery = trainIdx.rows;
-
-    matches.clear();
-    matches.reserve(nQuery);
-
-    const int* nMatches_ptr = nMatches.ptr<int>();
-
-    for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
-    {
-        const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
-        const int* imgIdx_ptr = imgIdx.ptr<int>(queryIdx);
-        const float* distance_ptr = distance.ptr<float>(queryIdx);
-
-        const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
-
-        if (nMatched == 0)
-        {
-            if (!compactResult)
-                matches.push_back(std::vector<DMatch>());
-            continue;
-        }
-
-        matches.push_back(std::vector<DMatch>());
-        std::vector<DMatch>& curMatches = matches.back();
-        curMatches.reserve(nMatched);
-
-        for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
-        {
-            int _trainIdx = *trainIdx_ptr;
-            int _imgIdx = *imgIdx_ptr;
-            float _distance = *distance_ptr;
-
-            DMatch m(queryIdx, _trainIdx, _imgIdx, _distance);
-
-            curMatches.push_back(m);
-        }
-
-        sort(curMatches.begin(), curMatches.end());
-    }
-}
-
-void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches,
-    float maxDistance, const std::vector<GpuMat>& masks, bool compactResult)
-{
-    GpuMat trainIdx, imgIdx, distance, nMatches;
-    radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks);
-    radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult);
+    return makePtr<BFMatcher_Impl>(norm);
 }
 
 #endif /* !defined (HAVE_CUDA) */
diff --git a/modules/cudafeatures2d/test/test_features2d.cpp b/modules/cudafeatures2d/test/test_features2d.cpp
index 25ba48faf8..3046a604b3 100644
--- a/modules/cudafeatures2d/test/test_features2d.cpp
+++ b/modules/cudafeatures2d/test/test_features2d.cpp
@@ -285,7 +285,8 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::cuda::DeviceInfo, NormCode, DescriptorSiz
 
 CUDA_TEST_P(BruteForceMatcher, Match_Single)
 {
-    cv::cuda::BFMatcher_CUDA matcher(normCode);
+    cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
+            cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
 
     cv::cuda::GpuMat mask;
     if (useMask)
@@ -295,7 +296,7 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single)
     }
 
     std::vector<cv::DMatch> matches;
-    matcher.match(loadMat(query), loadMat(train), matches, mask);
+    matcher->match(loadMat(query), loadMat(train), matches, mask);
 
     ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
 
@@ -312,13 +313,14 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single)
 
 CUDA_TEST_P(BruteForceMatcher, Match_Collection)
 {
-    cv::cuda::BFMatcher_CUDA matcher(normCode);
+    cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
+            cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
 
     cv::cuda::GpuMat d_train(train);
 
     // make add() twice to test such case
-    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
-    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
+    matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
+    matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
 
     // prepare masks (make first nearest match illegal)
     std::vector<cv::cuda::GpuMat> masks(2);
@@ -331,9 +333,9 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection)
 
     std::vector<cv::DMatch> matches;
     if (useMask)
-        matcher.match(cv::cuda::GpuMat(query), matches, masks);
+        matcher->match(cv::cuda::GpuMat(query), matches, masks);
     else
-        matcher.match(cv::cuda::GpuMat(query), matches);
+        matcher->match(cv::cuda::GpuMat(query), matches);
 
     ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
 
@@ -366,7 +368,8 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection)
 
 CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
 {
-    cv::cuda::BFMatcher_CUDA matcher(normCode);
+    cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
+            cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
 
     const int knn = 2;
 
@@ -378,7 +381,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
     }
 
     std::vector< std::vector<cv::DMatch> > matches;
-    matcher.knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
+    matcher->knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
 
     ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
 
@@ -405,7 +408,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
 
 CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
 {
-    cv::cuda::BFMatcher_CUDA matcher(normCode);
+    cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
+            cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
 
     const int knn = 3;
 
@@ -417,7 +421,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
     }
 
     std::vector< std::vector<cv::DMatch> > matches;
-    matcher.knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
+    matcher->knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
 
     ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
 
@@ -444,15 +448,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
 
 CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
 {
-    cv::cuda::BFMatcher_CUDA matcher(normCode);
+    cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
+            cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
 
     const int knn = 2;
 
     cv::cuda::GpuMat d_train(train);
 
     // make add() twice to test such case
-    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
-    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
+    matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
+    matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
 
     // prepare masks (make first nearest match illegal)
     std::vector<cv::cuda::GpuMat> masks(2);
@@ -466,9 +471,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
     std::vector< std::vector<cv::DMatch> > matches;
 
     if (useMask)
-        matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
+        matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
     else
-        matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn);
+        matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn);
 
     ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
 
@@ -506,15 +511,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
 
 CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
 {
-    cv::cuda::BFMatcher_CUDA matcher(normCode);
+    cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
+            cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
 
     const int knn = 3;
 
     cv::cuda::GpuMat d_train(train);
 
     // make add() twice to test such case
-    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
-    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
+    matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
+    matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
 
     // prepare masks (make first nearest match illegal)
     std::vector<cv::cuda::GpuMat> masks(2);
@@ -528,9 +534,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
     std::vector< std::vector<cv::DMatch> > matches;
 
     if (useMask)
-        matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
+        matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
     else
-        matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn);
+        matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn);
 
     ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
 
@@ -568,7 +574,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
 
 CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
 {
-    cv::cuda::BFMatcher_CUDA matcher(normCode);
+    cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
+            cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
 
     const float radius = 1.f / countFactor;
 
@@ -577,7 +584,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
         try
         {
             std::vector< std::vector<cv::DMatch> > matches;
-            matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
+            matcher->radiusMatch(loadMat(query), loadMat(train), matches, radius);
         }
         catch (const cv::Exception& e)
         {
@@ -594,7 +601,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
         }
 
         std::vector< std::vector<cv::DMatch> > matches;
-        matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius, mask);
+        matcher->radiusMatch(loadMat(query), loadMat(train), matches, radius, mask);
 
         ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
 
@@ -617,7 +624,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
 
 CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
 {
-    cv::cuda::BFMatcher_CUDA matcher(normCode);
+    cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
+            cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
 
     const int n = 3;
     const float radius = 1.f / countFactor * n;
@@ -625,8 +633,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
     cv::cuda::GpuMat d_train(train);
 
     // make add() twice to test such case
-    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
-    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
+    matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
+    matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
 
     // prepare masks (make first nearest match illegal)
     std::vector<cv::cuda::GpuMat> masks(2);
@@ -642,7 +650,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
         try
         {
             std::vector< std::vector<cv::DMatch> > matches;
-            matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
+            matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
         }
         catch (const cv::Exception& e)
         {
@@ -654,9 +662,9 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
         std::vector< std::vector<cv::DMatch> > matches;
 
         if (useMask)
-            matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
+            matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
         else
-            matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius);
+            matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius);
 
         ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
 
diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp
index 49ee0f4744..ee05268d78 100644
--- a/modules/stitching/src/matchers.cpp
+++ b/modules/stitching/src/matchers.cpp
@@ -154,7 +154,7 @@ void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
 
     matches_info.matches.clear();
 
-    Ptr<DescriptorMatcher> matcher;
+    Ptr<cv::DescriptorMatcher> matcher;
 #if 0 // TODO check this
     if (ocl::useOpenCL())
     {
@@ -220,13 +220,13 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
     descriptors1_.upload(features1.descriptors);
     descriptors2_.upload(features2.descriptors);
 
-    BFMatcher_CUDA matcher(NORM_L2);
+    Ptr<cuda::DescriptorMatcher> matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2);
+
     MatchesSet matches;
 
     // Find 1->2 matches
     pair_matches.clear();
-    matcher.knnMatchSingle(descriptors1_, descriptors2_, train_idx_, distance_, all_dist_, 2);
-    matcher.knnMatchDownload(train_idx_, distance_, pair_matches);
+    matcher->knnMatch(descriptors1_, descriptors2_, pair_matches, 2);
     for (size_t i = 0; i < pair_matches.size(); ++i)
     {
         if (pair_matches[i].size() < 2)
@@ -242,8 +242,7 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
 
     // Find 2->1 matches
     pair_matches.clear();
-    matcher.knnMatchSingle(descriptors2_, descriptors1_, train_idx_, distance_, all_dist_, 2);
-    matcher.knnMatchDownload(train_idx_, distance_, pair_matches);
+    matcher->knnMatch(descriptors2_, descriptors1_, pair_matches, 2);
     for (size_t i = 0; i < pair_matches.size(); ++i)
     {
         if (pair_matches[i].size() < 2)
diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp
index 0d083e5bda..09094282f2 100644
--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@@ -379,14 +379,14 @@ TEST(BruteForceMatcher)
 
     // Init CUDA matcher
 
-    cuda::BFMatcher_CUDA d_matcher(NORM_L2);
+    Ptr<cuda::DescriptorMatcher> d_matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2);
 
     cuda::GpuMat d_query(query);
     cuda::GpuMat d_train(train);
 
     // Output
     vector< vector<DMatch> > matches(2);
-    cuda::GpuMat d_trainIdx, d_distance, d_allDist, d_nMatches;
+    cuda::GpuMat d_matches;
 
     SUBTEST << "match";
 
@@ -396,10 +396,10 @@ TEST(BruteForceMatcher)
     matcher.match(query, train, matches[0]);
     CPU_OFF;
 
-    d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
+    d_matcher->matchAsync(d_query, d_train, d_matches);
 
     CUDA_ON;
-    d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
+    d_matcher->matchAsync(d_query, d_train, d_matches);
     CUDA_OFF;
 
     SUBTEST << "knnMatch";
@@ -410,10 +410,10 @@ TEST(BruteForceMatcher)
     matcher.knnMatch(query, train, matches, 2);
     CPU_OFF;
 
-    d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
+    d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
 
     CUDA_ON;
-    d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
+    d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
     CUDA_OFF;
 
     SUBTEST << "radiusMatch";
@@ -426,12 +426,10 @@ TEST(BruteForceMatcher)
     matcher.radiusMatch(query, train, matches, max_distance);
     CPU_OFF;
 
-    d_trainIdx.release();
-
-    d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
+    d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
 
     CUDA_ON;
-    d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
+    d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
     CUDA_OFF;
 }
 

From 5f1282afdb0b3d137b0cf2161d5691bc877fc3f3 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Tue, 13 Jan 2015 18:23:59 +0300
Subject: [PATCH 26/55] fix documentation warnings

---
 modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
index 975726973f..1d7f4e4e43 100644
--- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
+++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp
@@ -89,7 +89,7 @@ public:
     preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and
     BRIEF).
      */
-    static Ptr<DescriptorMatcher> createBFMatcher(int norm = cv::NORM_L2);
+    static Ptr<DescriptorMatcher> createBFMatcher(int normType = cv::NORM_L2);
 
     //
     // Utility
@@ -248,9 +248,6 @@ public:
     less than k possible matches in total.
     @param mask Mask specifying permissible matches between an input query and train matrices of
     descriptors.
-    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
-    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
-    the matches vector does not contain matches for fully masked-out query descriptors.
     @param stream CUDA stream.
 
     These extended variants of DescriptorMatcher::matchAsync methods find several best matches for each query
@@ -335,9 +332,6 @@ public:
     in Pixels)!
     @param mask Mask specifying permissible matches between an input query and train matrices of
     descriptors.
-    @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
-    false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
-    the matches vector does not contain matches for fully masked-out query descriptors.
     @param stream CUDA stream.
 
     For each query descriptor, the methods find such training descriptors that the distance between the

From efc7dbaae235a53fc2cc975a4e80fc93d306f4fa Mon Sep 17 00:00:00 2001
From: Claudio Caraffi <caraffi@ce.unipr.it>
Date: Wed, 14 Jan 2015 11:55:41 +0100
Subject: [PATCH 27/55] Prevent linking failure, bug 4108

---
 cmake/OpenCVModule.cmake | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake
index c5325e20f1..109a65cd75 100644
--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@@ -808,7 +808,7 @@ function(ocv_add_perf_tests)
     __ocv_parse_test_sources(PERF ${ARGN})
 
     # opencv_imgcodecs is required for imread/imwrite
-    set(perf_deps ${the_module} opencv_ts opencv_imgcodecs ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_opencv_ts_DEPS})
+    set(perf_deps opencv_ts ${the_module} opencv_imgcodecs ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_opencv_ts_DEPS})
     ocv_check_dependencies(${perf_deps})
 
     if(OCV_DEPENDENCIES_FOUND)
@@ -829,7 +829,7 @@ function(ocv_add_perf_tests)
 
       ocv_add_executable(${the_target} ${OPENCV_PERF_${the_module}_SOURCES} ${${the_target}_pch})
       ocv_target_include_modules(${the_target} ${perf_deps} "${perf_path}")
-      ocv_target_link_libraries(${the_target} ${OPENCV_MODULE_${the_module}_DEPS} ${perf_deps} ${OPENCV_LINKER_LIBS})
+      ocv_target_link_libraries(${the_target} ${perf_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS})
       add_dependencies(opencv_perf_tests ${the_target})
 
       # Additional target properties
@@ -864,7 +864,7 @@ function(ocv_add_accuracy_tests)
     __ocv_parse_test_sources(TEST ${ARGN})
 
     # opencv_imgcodecs is required for imread/imwrite
-    set(test_deps ${the_module} opencv_ts opencv_imgcodecs opencv_videoio ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_opencv_ts_DEPS})
+    set(test_deps opencv_ts ${the_module} opencv_imgcodecs opencv_videoio ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_opencv_ts_DEPS})
     ocv_check_dependencies(${test_deps})
     if(OCV_DEPENDENCIES_FOUND)
       set(the_target "opencv_test_${name}")
@@ -884,7 +884,7 @@ function(ocv_add_accuracy_tests)
 
       ocv_add_executable(${the_target} ${OPENCV_TEST_${the_module}_SOURCES} ${${the_target}_pch})
       ocv_target_include_modules(${the_target} ${test_deps} "${test_path}")
-      ocv_target_link_libraries(${the_target} ${OPENCV_MODULE_${the_module}_DEPS} ${test_deps} ${OPENCV_LINKER_LIBS})
+      ocv_target_link_libraries(${the_target} ${test_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS})
       add_dependencies(opencv_tests ${the_target})
 
       # Additional target properties

From 07ab0c1c53e82e7589d7555a04b30dd5334de914 Mon Sep 17 00:00:00 2001
From: Yan Wang <yan.wang@linux.intel.com>
Date: Wed, 14 Jan 2015 19:33:12 +0800
Subject: [PATCH 28/55] Avoid compile unnecessary OpenCL kernel.

Signed-off-by: Yan Wang <yan.wang@linux.intel.com>
---
 modules/objdetect/src/cascadedetect.cpp       | 8 ++++----
 modules/objdetect/src/opencl/cascadedetect.cl | 9 ++++++++-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp
index 4e25a5ccf1..6ddc0c878f 100644
--- a/modules/objdetect/src/cascadedetect.cpp
+++ b/modules/objdetect/src/cascadedetect.cpp
@@ -1072,10 +1072,10 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
         {
             String opts;
             if (lbufSize.area())
-                opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
+                opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d -D HAAR",
                               localsz.width, localsz.height, lbufSize.area(), lbufSize.width, data.maxNodesPerTree, splitstage_ocl, nstages, MAX_FACES);
             else
-                opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
+                opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d -D HAAR",
                               localsz.width, localsz.height, data.maxNodesPerTree, splitstage_ocl, nstages, MAX_FACES);
             haarKernel.create("runHaarClassifier", ocl::objdetect::cascadedetect_oclsrc, opts);
             if( haarKernel.empty() )
@@ -1112,10 +1112,10 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
         {
             String opts;
             if (lbufSize.area())
-                opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
+                opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d -D LBP",
                               localsz.width, localsz.height, lbufSize.area(), lbufSize.width, splitstage_ocl, nstages, MAX_FACES);
             else
-                opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
+                opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d -D LBP",
                               localsz.width, localsz.height, splitstage_ocl, nstages, MAX_FACES);
             lbpKernel.create("runLBPClassifierStumpSimple", ocl::objdetect::cascadedetect_oclsrc, opts);
             if( lbpKernel.empty() )
diff --git a/modules/objdetect/src/opencl/cascadedetect.cl b/modules/objdetect/src/opencl/cascadedetect.cl
index dfebc28dd3..13cb1aa389 100644
--- a/modules/objdetect/src/opencl/cascadedetect.cl
+++ b/modules/objdetect/src/opencl/cascadedetect.cl
@@ -12,19 +12,22 @@
 //    Erping Pang, erping@multicorewareinc.com
 //
 
-
+#ifdef HAAR
 typedef struct __attribute__((aligned(4))) OptHaarFeature
 {
     int4 ofs[3] __attribute__((aligned (4)));
     float4 weight __attribute__((aligned (4)));
 }
 OptHaarFeature;
+#endif
 
+#ifdef LBP
 typedef struct __attribute__((aligned(4))) OptLBPFeature
 {
     int16 ofs __attribute__((aligned (4)));
 }
 OptLBPFeature;
+#endif
 
 typedef struct __attribute__((aligned(4))) Stump
 {
@@ -64,6 +67,7 @@ ScaleData;
 #define NODE_COUNT 1
 #endif
 
+#ifdef HAAR
 __kernel __attribute__((reqd_work_group_size(LOCAL_SIZE_X,LOCAL_SIZE_Y,1)))
 void runHaarClassifier(
     int nscales, __global const ScaleData* scaleData,
@@ -352,7 +356,9 @@ void runHaarClassifier(
         }
     }
 }
+#endif
 
+#ifdef LBP
 #undef CALC_SUM_OFS_
 #define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
     ((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
@@ -651,3 +657,4 @@ void runLBPClassifierStump(
         }
     }
 }
+#endif

From 01717c83d720aa792ddb5eef7e1079350d91f9dc Mon Sep 17 00:00:00 2001
From: Artur Wieczorek <artwik@wp.pl>
Date: Wed, 14 Jan 2015 14:06:42 +0100
Subject: [PATCH 29/55] Don't define destructors for COM-based interfaces.

---
 modules/videoio/src/cap_dshow.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/modules/videoio/src/cap_dshow.cpp b/modules/videoio/src/cap_dshow.cpp
index 82e74878be..013d08e54a 100644
--- a/modules/videoio/src/cap_dshow.cpp
+++ b/modules/videoio/src/cap_dshow.cpp
@@ -134,8 +134,6 @@ public:
 
     virtual HRESULT STDMETHODCALLTYPE Clone(
         /* [out] */ IEnumPIDMap **ppIEnumPIDMap) = 0;
-
-    virtual ~IEnumPIDMap() {}
 };
 
 interface IMPEG2PIDMap : public IUnknown
@@ -151,8 +149,6 @@ interface IMPEG2PIDMap : public IUnknown
 
     virtual HRESULT STDMETHODCALLTYPE EnumPIDMap(
         /* [out] */ IEnumPIDMap **pIEnumPIDMap) = 0;
-
-    virtual ~IMPEG2PIDMap() {}
 };
 
 #endif
@@ -238,8 +234,6 @@ interface ISampleGrabberCB : public IUnknown
         double SampleTime,
         BYTE *pBuffer,
         LONG BufferLen) = 0;
-
-    virtual ~ISampleGrabberCB() {}
 };
 
 interface ISampleGrabber : public IUnknown
@@ -266,8 +260,6 @@ interface ISampleGrabber : public IUnknown
     virtual HRESULT STDMETHODCALLTYPE SetCallback(
         ISampleGrabberCB *pCallback,
         LONG WhichMethodToCallback) = 0;
-
-    virtual ~ISampleGrabber() {}
 };
 
 #ifndef HEADER

From 1264be878547a2f7194f546f9e07589d1175a431 Mon Sep 17 00:00:00 2001
From: Artur Wieczorek <artwik@wp.pl>
Date: Wed, 31 Dec 2014 10:15:48 +0100
Subject: [PATCH 30/55] Check if _WIN32_IE is defined with proper value in
 window_w32.cpp

TBBUTTONINFO struct and BTNS_xxx symbols used in the code need _WIN32_IE to be defined with at least 0x0500 value (_WIN32_IE_IE50) in order to be included from commctrl.h.
---
 modules/highgui/src/window_w32.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modules/highgui/src/window_w32.cpp b/modules/highgui/src/window_w32.cpp
index 66e6846573..0878bff60b 100644
--- a/modules/highgui/src/window_w32.cpp
+++ b/modules/highgui/src/window_w32.cpp
@@ -48,6 +48,11 @@
 #  pragma GCC diagnostic ignored "-Wmissing-declarations"
 #endif
 
+#if (_WIN32_IE < 0x0500)
+#pragma message("WARNING: Win32 UI needs to be compiled with _WIN32_IE >= 0x0500 (_WIN32_IE_IE50)")
+#define _WIN32_IE 0x0500
+#endif
+
 #include <commctrl.h>
 #include <stdlib.h>
 #include <string.h>

From f126f371b2909bc2e9bf5c8b207efbdb0abd1bca Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 14 Jan 2015 12:55:44 +0300
Subject: [PATCH 31/55] move CUDA object detection algorithms to separate
 module

---
 modules/cuda/CMakeLists.txt                   |   2 +-
 modules/cuda/include/opencv2/cuda.hpp         | 263 --------------
 modules/cuda/perf/perf_precomp.hpp            |   1 -
 modules/cuda/src/precomp.hpp                  |   1 -
 modules/cuda/test/test_precomp.hpp            |   1 -
 modules/cudaobjdetect/CMakeLists.txt          |   9 +
 .../include/opencv2/cudaobjdetect.hpp         | 329 ++++++++++++++++++
 modules/cudaobjdetect/perf/perf_main.cpp      |  47 +++
 .../perf/perf_objdetect.cpp                   |   0
 modules/cudaobjdetect/perf/perf_precomp.hpp   |  64 ++++
 .../src/cascadeclassifier.cpp                 |   0
 .../{cuda => cudaobjdetect}/src/cuda/hog.cu   |   0
 .../{cuda => cudaobjdetect}/src/cuda/lbp.cu   |   0
 .../{cuda => cudaobjdetect}/src/cuda/lbp.hpp  |   0
 modules/{cuda => cudaobjdetect}/src/hog.cpp   |   0
 modules/cudaobjdetect/src/precomp.hpp         |  62 ++++
 modules/cudaobjdetect/test/test_main.cpp      |  45 +++
 .../test/test_objdetect.cpp                   |   0
 modules/cudaobjdetect/test/test_precomp.hpp   |  64 ++++
 samples/gpu/CMakeLists.txt                    |   2 +-
 samples/gpu/cascadeclassifier.cpp             |   2 +-
 samples/gpu/hog.cpp                           |   2 +-
 22 files changed, 624 insertions(+), 270 deletions(-)
 create mode 100644 modules/cudaobjdetect/CMakeLists.txt
 create mode 100644 modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
 create mode 100644 modules/cudaobjdetect/perf/perf_main.cpp
 rename modules/{cuda => cudaobjdetect}/perf/perf_objdetect.cpp (100%)
 create mode 100644 modules/cudaobjdetect/perf/perf_precomp.hpp
 rename modules/{cuda => cudaobjdetect}/src/cascadeclassifier.cpp (100%)
 rename modules/{cuda => cudaobjdetect}/src/cuda/hog.cu (100%)
 rename modules/{cuda => cudaobjdetect}/src/cuda/lbp.cu (100%)
 rename modules/{cuda => cudaobjdetect}/src/cuda/lbp.hpp (100%)
 rename modules/{cuda => cudaobjdetect}/src/hog.cpp (100%)
 create mode 100644 modules/cudaobjdetect/src/precomp.hpp
 create mode 100644 modules/cudaobjdetect/test/test_main.cpp
 rename modules/{cuda => cudaobjdetect}/test/test_objdetect.cpp (100%)
 create mode 100644 modules/cudaobjdetect/test/test_precomp.hpp

diff --git a/modules/cuda/CMakeLists.txt b/modules/cuda/CMakeLists.txt
index 389e90b47e..d668ea8b01 100644
--- a/modules/cuda/CMakeLists.txt
+++ b/modules/cuda/CMakeLists.txt
@@ -6,4 +6,4 @@ set(the_description "CUDA-accelerated Computer Vision")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4100 /wd4324 /wd4512 /wd4515 -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
 
-ocv_define_module(cuda opencv_calib3d opencv_objdetect opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy)
+ocv_define_module(cuda opencv_calib3d opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy)
diff --git a/modules/cuda/include/opencv2/cuda.hpp b/modules/cuda/include/opencv2/cuda.hpp
index 93bb511cd0..c6004296bd 100644
--- a/modules/cuda/include/opencv2/cuda.hpp
+++ b/modules/cuda/include/opencv2/cuda.hpp
@@ -53,274 +53,11 @@
     @addtogroup cuda
     @{
         @defgroup cuda_calib3d Camera Calibration and 3D Reconstruction
-        @defgroup cuda_objdetect Object Detection
     @}
  */
 
 namespace cv { namespace cuda {
 
-//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
-
-//! @addtogroup cuda_objdetect
-//! @{
-
-struct CV_EXPORTS HOGConfidence
-{
-   double scale;
-   std::vector<Point> locations;
-   std::vector<double> confidences;
-   std::vector<double> part_scores[4];
-};
-
-/** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector.
-
-Interfaces of all methods are kept similar to the CPU HOG descriptor and detector analogues as much
-as possible.
-
-@note
-   -   An example applying the HOG descriptor for people detection can be found at
-        opencv_source_code/samples/cpp/peopledetect.cpp
-    -   A CUDA example applying the HOG descriptor for people detection can be found at
-        opencv_source_code/samples/gpu/hog.cpp
-    -   (Python) An example applying the HOG descriptor for people detection can be found at
-        opencv_source_code/samples/python2/peopledetect.py
- */
-struct CV_EXPORTS HOGDescriptor
-{
-    enum { DEFAULT_WIN_SIGMA = -1 };
-    enum { DEFAULT_NLEVELS = 64 };
-    enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
-
-    /** @brief Creates the HOG descriptor and detector.
-
-    @param win_size Detection window size. Align to block size and block stride.
-    @param block_size Block size in pixels. Align to cell size. Only (16,16) is supported for now.
-    @param block_stride Block stride. It must be a multiple of cell size.
-    @param cell_size Cell size. Only (8, 8) is supported for now.
-    @param nbins Number of bins. Only 9 bins per cell are supported for now.
-    @param win_sigma Gaussian smoothing window parameter.
-    @param threshold_L2hys L2-Hys normalization method shrinkage.
-    @param gamma_correction Flag to specify whether the gamma correction preprocessing is required or
-    not.
-    @param nlevels Maximum number of detection window increases.
-     */
-    HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
-                  Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
-                  int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
-                  double threshold_L2hys=0.2, bool gamma_correction=true,
-                  int nlevels=DEFAULT_NLEVELS);
-
-    /** @brief Returns the number of coefficients required for the classification.
-     */
-    size_t getDescriptorSize() const;
-    /** @brief Returns the block histogram size.
-    */
-    size_t getBlockHistogramSize() const;
-
-    /** @brief Sets coefficients for the linear SVM classifier.
-    */
-    void setSVMDetector(const std::vector<float>& detector);
-
-    /** @brief Returns coefficients of the classifier trained for people detection (for default window size).
-    */
-    static std::vector<float> getDefaultPeopleDetector();
-    /** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows).
-    */
-    static std::vector<float> getPeopleDetector48x96();
-    /** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows).
-    */
-    static std::vector<float> getPeopleDetector64x128();
-
-    /** @brief Performs object detection without a multi-scale window.
-
-    @param img Source image. CV_8UC1 and CV_8UC4 types are supported for now.
-    @param found_locations Left-top corner points of detected objects boundaries.
-    @param hit_threshold Threshold for the distance between features and SVM classifying plane.
-    Usually it is 0 and should be specfied in the detector coefficients (as the last free
-    coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
-    manually here.
-    @param win_stride Window stride. It must be a multiple of block stride.
-    @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
-     */
-    void detect(const GpuMat& img, std::vector<Point>& found_locations,
-                double hit_threshold=0, Size win_stride=Size(),
-                Size padding=Size());
-
-    /** @brief Performs object detection with a multi-scale window.
-
-    @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
-    @param found_locations Detected objects boundaries.
-    @param hit_threshold Threshold for the distance between features and SVM classifying plane. See
-    cuda::HOGDescriptor::detect for details.
-    @param win_stride Window stride. It must be a multiple of block stride.
-    @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
-    @param scale0 Coefficient of the detection window increase.
-    @param group_threshold Coefficient to regulate the similarity threshold. When detected, some
-    objects can be covered by many rectangles. 0 means not to perform grouping. See groupRectangles .
-     */
-    void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
-                          double hit_threshold=0, Size win_stride=Size(),
-                          Size padding=Size(), double scale0=1.05,
-                          int group_threshold=2);
-
-    void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
-                                                Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
-
-    void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
-                                                                    double hit_threshold, Size win_stride, Size padding,
-                                                                    std::vector<HOGConfidence> &conf_out, int group_threshold);
-
-    /** @brief Returns block descriptors computed for the whole image.
-
-    @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
-    @param win_stride Window stride. It must be a multiple of block stride.
-    @param descriptors 2D array of descriptors.
-    @param descr_format Descriptor storage format:
-    -   **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
-    -   **DESCR_FORMAT_COL_BY_COL** - Column-major order.
-
-    The function is mainly used to learn the classifier.
-     */
-    void getDescriptors(const GpuMat& img, Size win_stride,
-                        GpuMat& descriptors,
-                        int descr_format=DESCR_FORMAT_COL_BY_COL);
-
-    Size win_size;
-    Size block_size;
-    Size block_stride;
-    Size cell_size;
-    int nbins;
-    double win_sigma;
-    double threshold_L2hys;
-    bool gamma_correction;
-    int nlevels;
-
-protected:
-    void computeBlockHistograms(const GpuMat& img);
-    void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
-
-    double getWinSigma() const;
-    bool checkDetectorSize() const;
-
-    static int numPartsWithin(int size, int part_size, int stride);
-    static Size numPartsWithin(Size size, Size part_size, Size stride);
-
-    // Coefficients of the separating plane
-    float free_coef;
-    GpuMat detector;
-
-    // Results of the last classification step
-    GpuMat labels, labels_buf;
-    Mat labels_host;
-
-    // Results of the last histogram evaluation step
-    GpuMat block_hists, block_hists_buf;
-
-    // Gradients conputation results
-    GpuMat grad, qangle, grad_buf, qangle_buf;
-
-    // returns subbuffer with required size, reallocates buffer if nessesary.
-    static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
-    static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
-
-    std::vector<GpuMat> image_scales;
-};
-
-//////////////////////////// CascadeClassifier ////////////////////////////
-
-/** @brief Cascade classifier class used for object detection. Supports HAAR and LBP cascades. :
-
-@note
-   -   A cascade classifier example can be found at
-        opencv_source_code/samples/gpu/cascadeclassifier.cpp
-    -   A Nvidea API specific cascade classifier example can be found at
-        opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp
- */
-class CV_EXPORTS CascadeClassifier_CUDA
-{
-public:
-    CascadeClassifier_CUDA();
-    /** @brief Loads the classifier from a file. Cascade type is detected automatically by constructor parameter.
-
-    @param filename Name of the file from which the classifier is loaded. Only the old haar classifier
-    (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
-    type of OpenCV XML cascade supported for LBP.
-     */
-    CascadeClassifier_CUDA(const String& filename);
-    ~CascadeClassifier_CUDA();
-
-    /** @brief Checks whether the classifier is loaded or not.
-    */
-    bool empty() const;
-    /** @brief Loads the classifier from a file. The previous content is destroyed.
-
-    @param filename Name of the file from which the classifier is loaded. Only the old haar classifier
-    (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
-    type of OpenCV XML cascade supported for LBP.
-     */
-    bool load(const String& filename);
-    /** @brief Destroys the loaded classifier.
-    */
-    void release();
-
-    /** @overload */
-    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
-    /** @brief Detects objects of different sizes in the input image.
-
-    @param image Matrix of type CV_8U containing an image where objects should be detected.
-    @param objectsBuf Buffer to store detected objects (rectangles). If it is empty, it is allocated
-    with the default size. If not empty, the function searches not more than N objects, where
-    N = sizeof(objectsBufer's data)/sizeof(cv::Rect).
-    @param maxObjectSize Maximum possible object size. Objects larger than that are ignored. Used for
-    second signature and supported only for LBP cascades.
-    @param scaleFactor Parameter specifying how much the image size is reduced at each image scale.
-    @param minNeighbors Parameter specifying how many neighbors each candidate rectangle should have
-    to retain it.
-    @param minSize Minimum possible object size. Objects smaller than that are ignored.
-
-    The detected objects are returned as a list of rectangles.
-
-    The function returns the number of detected objects, so you can retrieve them as in the following
-    example:
-    @code
-        cuda::CascadeClassifier_CUDA cascade_gpu(...);
-
-        Mat image_cpu = imread(...)
-        GpuMat image_gpu(image_cpu);
-
-        GpuMat objbuf;
-        int detections_number = cascade_gpu.detectMultiScale( image_gpu,
-                  objbuf, 1.2, minNeighbors);
-
-        Mat obj_host;
-        // download only detected number of rectangles
-        objbuf.colRange(0, detections_number).download(obj_host);
-
-        Rect* faces = obj_host.ptr<Rect>();
-        for(int i = 0; i < detections_num; ++i)
-           cv::rectangle(image_cpu, faces[i], Scalar(255));
-
-        imshow("Faces", image_cpu);
-    @endcode
-    @sa CascadeClassifier::detectMultiScale
-     */
-    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
-
-    bool findLargestObject;
-    bool visualizeInPlace;
-
-    Size getClassifierSize() const;
-
-private:
-    struct CascadeClassifierImpl;
-    CascadeClassifierImpl* impl;
-    struct HaarCascade;
-    struct LbpCascade;
-    friend class CascadeClassifier_CUDA_LBP;
-};
-
-//! @} cuda_objdetect
-
 //////////////////////////// Labeling ////////////////////////////
 
 //! @addtogroup cuda
diff --git a/modules/cuda/perf/perf_precomp.hpp b/modules/cuda/perf/perf_precomp.hpp
index fbf7d3a191..f810968cb7 100644
--- a/modules/cuda/perf/perf_precomp.hpp
+++ b/modules/cuda/perf/perf_precomp.hpp
@@ -56,7 +56,6 @@
 
 #include "opencv2/cuda.hpp"
 #include "opencv2/calib3d.hpp"
-#include "opencv2/objdetect.hpp"
 
 #ifdef GTEST_CREATE_SHARED_LIBRARY
 #error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
diff --git a/modules/cuda/src/precomp.hpp b/modules/cuda/src/precomp.hpp
index 60c71b52bd..7feeadddc1 100644
--- a/modules/cuda/src/precomp.hpp
+++ b/modules/cuda/src/precomp.hpp
@@ -47,7 +47,6 @@
 #include "opencv2/cudaarithm.hpp"
 #include "opencv2/cudawarping.hpp"
 #include "opencv2/calib3d.hpp"
-#include "opencv2/objdetect.hpp"
 
 #include "opencv2/core/private.cuda.hpp"
 #include "opencv2/core/utility.hpp"
diff --git a/modules/cuda/test/test_precomp.hpp b/modules/cuda/test/test_precomp.hpp
index a0abfd2285..e3b33017a7 100644
--- a/modules/cuda/test/test_precomp.hpp
+++ b/modules/cuda/test/test_precomp.hpp
@@ -60,7 +60,6 @@
 #include "opencv2/core.hpp"
 #include "opencv2/core/opengl.hpp"
 #include "opencv2/calib3d.hpp"
-#include "opencv2/objdetect.hpp"
 
 #include "cvconfig.h"
 
diff --git a/modules/cudaobjdetect/CMakeLists.txt b/modules/cudaobjdetect/CMakeLists.txt
new file mode 100644
index 0000000000..351f6e87b4
--- /dev/null
+++ b/modules/cudaobjdetect/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(IOS OR (NOT HAVE_CUDA AND NOT BUILD_CUDA_STUBS))
+  ocv_module_disable(cudaobjdetect)
+endif()
+
+set(the_description "CUDA-accelerated Object Detection")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow)
+
+ocv_define_module(cudaobjdetect opencv_objdetect opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudalegacy)
diff --git a/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp b/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
new file mode 100644
index 0000000000..32126515a7
--- /dev/null
+++ b/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
@@ -0,0 +1,329 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_CUDAOBJDETECT_HPP__
+#define __OPENCV_CUDAOBJDETECT_HPP__
+
+#ifndef __cplusplus
+#  error cudaobjdetect.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/cuda.hpp"
+
+/**
+  @addtogroup cuda
+  @{
+      @defgroup cuda_objdetect Object Detection
+  @}
+ */
+
+namespace cv { namespace cuda {
+
+//! @addtogroup cuda_objdetect
+//! @{
+
+//
+// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector
+//
+
+struct CV_EXPORTS HOGConfidence
+{
+   double scale;
+   std::vector<Point> locations;
+   std::vector<double> confidences;
+   std::vector<double> part_scores[4];
+};
+
+/** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector.
+
+Interfaces of all methods are kept similar to the CPU HOG descriptor and detector analogues as much
+as possible.
+
+@note
+   -   An example applying the HOG descriptor for people detection can be found at
+        opencv_source_code/samples/cpp/peopledetect.cpp
+    -   A CUDA example applying the HOG descriptor for people detection can be found at
+        opencv_source_code/samples/gpu/hog.cpp
+    -   (Python) An example applying the HOG descriptor for people detection can be found at
+        opencv_source_code/samples/python2/peopledetect.py
+ */
+struct CV_EXPORTS HOGDescriptor
+{
+    enum { DEFAULT_WIN_SIGMA = -1 };
+    enum { DEFAULT_NLEVELS = 64 };
+    enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+
+    /** @brief Creates the HOG descriptor and detector.
+
+    @param win_size Detection window size. Align to block size and block stride.
+    @param block_size Block size in pixels. Align to cell size. Only (16,16) is supported for now.
+    @param block_stride Block stride. It must be a multiple of cell size.
+    @param cell_size Cell size. Only (8, 8) is supported for now.
+    @param nbins Number of bins. Only 9 bins per cell are supported for now.
+    @param win_sigma Gaussian smoothing window parameter.
+    @param threshold_L2hys L2-Hys normalization method shrinkage.
+    @param gamma_correction Flag to specify whether the gamma correction preprocessing is required or
+    not.
+    @param nlevels Maximum number of detection window increases.
+     */
+    HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
+                  Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
+                  int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
+                  double threshold_L2hys=0.2, bool gamma_correction=true,
+                  int nlevels=DEFAULT_NLEVELS);
+
+    /** @brief Returns the number of coefficients required for the classification.
+     */
+    size_t getDescriptorSize() const;
+    /** @brief Returns the block histogram size.
+    */
+    size_t getBlockHistogramSize() const;
+
+    /** @brief Sets coefficients for the linear SVM classifier.
+    */
+    void setSVMDetector(const std::vector<float>& detector);
+
+    /** @brief Returns coefficients of the classifier trained for people detection (for default window size).
+    */
+    static std::vector<float> getDefaultPeopleDetector();
+    /** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows).
+    */
+    static std::vector<float> getPeopleDetector48x96();
+    /** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows).
+    */
+    static std::vector<float> getPeopleDetector64x128();
+
+    /** @brief Performs object detection without a multi-scale window.
+
+    @param img Source image. CV_8UC1 and CV_8UC4 types are supported for now.
+    @param found_locations Left-top corner points of detected objects boundaries.
+    @param hit_threshold Threshold for the distance between features and SVM classifying plane.
+    Usually it is 0 and should be specfied in the detector coefficients (as the last free
+    coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
+    manually here.
+    @param win_stride Window stride. It must be a multiple of block stride.
+    @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
+     */
+    void detect(const GpuMat& img, std::vector<Point>& found_locations,
+                double hit_threshold=0, Size win_stride=Size(),
+                Size padding=Size());
+
+    /** @brief Performs object detection with a multi-scale window.
+
+    @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
+    @param found_locations Detected objects boundaries.
+    @param hit_threshold Threshold for the distance between features and SVM classifying plane. See
+    cuda::HOGDescriptor::detect for details.
+    @param win_stride Window stride. It must be a multiple of block stride.
+    @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
+    @param scale0 Coefficient of the detection window increase.
+    @param group_threshold Coefficient to regulate the similarity threshold. When detected, some
+    objects can be covered by many rectangles. 0 means not to perform grouping. See groupRectangles .
+     */
+    void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
+                          double hit_threshold=0, Size win_stride=Size(),
+                          Size padding=Size(), double scale0=1.05,
+                          int group_threshold=2);
+
+    void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
+                                                Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
+
+    void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
+                                                                    double hit_threshold, Size win_stride, Size padding,
+                                                                    std::vector<HOGConfidence> &conf_out, int group_threshold);
+
+    /** @brief Returns block descriptors computed for the whole image.
+
+    @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
+    @param win_stride Window stride. It must be a multiple of block stride.
+    @param descriptors 2D array of descriptors.
+    @param descr_format Descriptor storage format:
+    -   **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
+    -   **DESCR_FORMAT_COL_BY_COL** - Column-major order.
+
+    The function is mainly used to learn the classifier.
+     */
+    void getDescriptors(const GpuMat& img, Size win_stride,
+                        GpuMat& descriptors,
+                        int descr_format=DESCR_FORMAT_COL_BY_COL);
+
+    Size win_size;
+    Size block_size;
+    Size block_stride;
+    Size cell_size;
+    int nbins;
+    double win_sigma;
+    double threshold_L2hys;
+    bool gamma_correction;
+    int nlevels;
+
+protected:
+    void computeBlockHistograms(const GpuMat& img);
+    void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
+
+    double getWinSigma() const;
+    bool checkDetectorSize() const;
+
+    static int numPartsWithin(int size, int part_size, int stride);
+    static Size numPartsWithin(Size size, Size part_size, Size stride);
+
+    // Coefficients of the separating plane
+    float free_coef;
+    GpuMat detector;
+
+    // Results of the last classification step
+    GpuMat labels, labels_buf;
+    Mat labels_host;
+
+    // Results of the last histogram evaluation step
+    GpuMat block_hists, block_hists_buf;
+
+    // Gradients conputation results
+    GpuMat grad, qangle, grad_buf, qangle_buf;
+
+    // returns subbuffer with required size, reallocates buffer if nessesary.
+    static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
+    static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
+
+    std::vector<GpuMat> image_scales;
+};
+
+//
+// CascadeClassifier
+//
+
+/** @brief Cascade classifier class used for object detection. Supports HAAR and LBP cascades. :
+
+@note
+   -   A cascade classifier example can be found at
+        opencv_source_code/samples/gpu/cascadeclassifier.cpp
+    -   A Nvidea API specific cascade classifier example can be found at
+        opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp
+ */
+class CV_EXPORTS CascadeClassifier_CUDA
+{
+public:
+    CascadeClassifier_CUDA();
+    /** @brief Loads the classifier from a file. Cascade type is detected automatically by constructor parameter.
+
+    @param filename Name of the file from which the classifier is loaded. Only the old haar classifier
+    (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
+    type of OpenCV XML cascade supported for LBP.
+     */
+    CascadeClassifier_CUDA(const String& filename);
+    ~CascadeClassifier_CUDA();
+
+    /** @brief Checks whether the classifier is loaded or not.
+    */
+    bool empty() const;
+    /** @brief Loads the classifier from a file. The previous content is destroyed.
+
+    @param filename Name of the file from which the classifier is loaded. Only the old haar classifier
+    (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
+    type of OpenCV XML cascade supported for LBP.
+     */
+    bool load(const String& filename);
+    /** @brief Destroys the loaded classifier.
+    */
+    void release();
+
+    /** @overload */
+    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
+    /** @brief Detects objects of different sizes in the input image.
+
+    @param image Matrix of type CV_8U containing an image where objects should be detected.
+    @param objectsBuf Buffer to store detected objects (rectangles). If it is empty, it is allocated
+    with the default size. If not empty, the function searches not more than N objects, where
+    N = sizeof(objectsBufer's data)/sizeof(cv::Rect).
+    @param maxObjectSize Maximum possible object size. Objects larger than that are ignored. Used for
+    second signature and supported only for LBP cascades.
+    @param scaleFactor Parameter specifying how much the image size is reduced at each image scale.
+    @param minNeighbors Parameter specifying how many neighbors each candidate rectangle should have
+    to retain it.
+    @param minSize Minimum possible object size. Objects smaller than that are ignored.
+
+    The detected objects are returned as a list of rectangles.
+
+    The function returns the number of detected objects, so you can retrieve them as in the following
+    example:
+    @code
+        cuda::CascadeClassifier_CUDA cascade_gpu(...);
+
+        Mat image_cpu = imread(...)
+        GpuMat image_gpu(image_cpu);
+
+        GpuMat objbuf;
+        int detections_number = cascade_gpu.detectMultiScale( image_gpu,
+                  objbuf, 1.2, minNeighbors);
+
+        Mat obj_host;
+        // download only detected number of rectangles
+        objbuf.colRange(0, detections_number).download(obj_host);
+
+        Rect* faces = obj_host.ptr<Rect>();
+        for(int i = 0; i < detections_num; ++i)
+           cv::rectangle(image_cpu, faces[i], Scalar(255));
+
+        imshow("Faces", image_cpu);
+    @endcode
+    @sa CascadeClassifier::detectMultiScale
+     */
+    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
+
+    bool findLargestObject;
+    bool visualizeInPlace;
+
+    Size getClassifierSize() const;
+
+private:
+    struct CascadeClassifierImpl;
+    CascadeClassifierImpl* impl;
+    struct HaarCascade;
+    struct LbpCascade;
+    friend class CascadeClassifier_CUDA_LBP;
+};
+
+//! @}
+
+}} // namespace cv { namespace cuda {
+
+#endif /* __OPENCV_CUDAOBJDETECT_HPP__ */
diff --git a/modules/cudaobjdetect/perf/perf_main.cpp b/modules/cudaobjdetect/perf/perf_main.cpp
new file mode 100644
index 0000000000..7a927be744
--- /dev/null
+++ b/modules/cudaobjdetect/perf/perf_main.cpp
@@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+CV_PERF_TEST_CUDA_MAIN(cudaobjdetect)
diff --git a/modules/cuda/perf/perf_objdetect.cpp b/modules/cudaobjdetect/perf/perf_objdetect.cpp
similarity index 100%
rename from modules/cuda/perf/perf_objdetect.cpp
rename to modules/cudaobjdetect/perf/perf_objdetect.cpp
diff --git a/modules/cudaobjdetect/perf/perf_precomp.hpp b/modules/cudaobjdetect/perf/perf_precomp.hpp
new file mode 100644
index 0000000000..16ebf61f22
--- /dev/null
+++ b/modules/cudaobjdetect/perf/perf_precomp.hpp
@@ -0,0 +1,64 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/cuda_perf.hpp"
+
+#include "opencv2/cudaobjdetect.hpp"
+#include "opencv2/objdetect.hpp"
+
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
diff --git a/modules/cuda/src/cascadeclassifier.cpp b/modules/cudaobjdetect/src/cascadeclassifier.cpp
similarity index 100%
rename from modules/cuda/src/cascadeclassifier.cpp
rename to modules/cudaobjdetect/src/cascadeclassifier.cpp
diff --git a/modules/cuda/src/cuda/hog.cu b/modules/cudaobjdetect/src/cuda/hog.cu
similarity index 100%
rename from modules/cuda/src/cuda/hog.cu
rename to modules/cudaobjdetect/src/cuda/hog.cu
diff --git a/modules/cuda/src/cuda/lbp.cu b/modules/cudaobjdetect/src/cuda/lbp.cu
similarity index 100%
rename from modules/cuda/src/cuda/lbp.cu
rename to modules/cudaobjdetect/src/cuda/lbp.cu
diff --git a/modules/cuda/src/cuda/lbp.hpp b/modules/cudaobjdetect/src/cuda/lbp.hpp
similarity index 100%
rename from modules/cuda/src/cuda/lbp.hpp
rename to modules/cudaobjdetect/src/cuda/lbp.hpp
diff --git a/modules/cuda/src/hog.cpp b/modules/cudaobjdetect/src/hog.cpp
similarity index 100%
rename from modules/cuda/src/hog.cpp
rename to modules/cudaobjdetect/src/hog.cpp
diff --git a/modules/cudaobjdetect/src/precomp.hpp b/modules/cudaobjdetect/src/precomp.hpp
new file mode 100644
index 0000000000..2e5ab7af3b
--- /dev/null
+++ b/modules/cudaobjdetect/src/precomp.hpp
@@ -0,0 +1,62 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include <limits>
+
+#include "opencv2/cudaobjdetect.hpp"
+#include "opencv2/cudaarithm.hpp"
+#include "opencv2/cudawarping.hpp"
+#include "opencv2/objdetect.hpp"
+
+#include "opencv2/core/private.cuda.hpp"
+#include "opencv2/core/utility.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_CUDALEGACY
+#  include "opencv2/cudalegacy/private.hpp"
+#endif
+
+#endif /* __OPENCV_PRECOMP_H__ */
diff --git a/modules/cudaobjdetect/test/test_main.cpp b/modules/cudaobjdetect/test/test_main.cpp
new file mode 100644
index 0000000000..04f4fcf6e6
--- /dev/null
+++ b/modules/cudaobjdetect/test/test_main.cpp
@@ -0,0 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+CV_CUDA_TEST_MAIN("gpu")
diff --git a/modules/cuda/test/test_objdetect.cpp b/modules/cudaobjdetect/test/test_objdetect.cpp
similarity index 100%
rename from modules/cuda/test/test_objdetect.cpp
rename to modules/cudaobjdetect/test/test_objdetect.cpp
diff --git a/modules/cudaobjdetect/test/test_precomp.hpp b/modules/cudaobjdetect/test/test_precomp.hpp
new file mode 100644
index 0000000000..a2d16c8105
--- /dev/null
+++ b/modules/cudaobjdetect/test/test_precomp.hpp
@@ -0,0 +1,64 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include <fstream>
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/cuda_test.hpp"
+
+#include "opencv2/cudaobjdetect.hpp"
+#include "opencv2/objdetect.hpp"
+
+#include "cvconfig.h"
+
+#endif
diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt
index 65fe4ef8cf..10c91991c9 100644
--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@@ -3,7 +3,7 @@ SET(OPENCV_CUDA_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc op
                                      opencv_calib3d opencv_cuda opencv_superres
                                      opencv_cudaarithm opencv_cudafilters opencv_cudawarping opencv_cudaimgproc
                                      opencv_cudafeatures2d opencv_cudaoptflow opencv_cudabgsegm
-                                     opencv_cudastereo opencv_cudalegacy)
+                                     opencv_cudastereo opencv_cudalegacy opencv_cudaobjdetect)
 
 ocv_check_dependencies(${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS})
 
diff --git a/samples/gpu/cascadeclassifier.cpp b/samples/gpu/cascadeclassifier.cpp
index dbb2895e96..ff19c1d224 100644
--- a/samples/gpu/cascadeclassifier.cpp
+++ b/samples/gpu/cascadeclassifier.cpp
@@ -9,7 +9,7 @@
 #include "opencv2/objdetect/objdetect.hpp"
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
-#include "opencv2/cuda.hpp"
+#include "opencv2/cudaobjdetect.hpp"
 #include "opencv2/cudaimgproc.hpp"
 #include "opencv2/cudawarping.hpp"
 
diff --git a/samples/gpu/hog.cpp b/samples/gpu/hog.cpp
index 59ea44f31b..106de79320 100644
--- a/samples/gpu/hog.cpp
+++ b/samples/gpu/hog.cpp
@@ -5,7 +5,7 @@
 #include <iomanip>
 #include <stdexcept>
 #include <opencv2/core/utility.hpp>
-#include "opencv2/cuda.hpp"
+#include "opencv2/cudaobjdetect.hpp"
 #include "opencv2/highgui.hpp"
 #include "opencv2/objdetect.hpp"
 #include "opencv2/imgproc.hpp"

From 0af7597d3623957ebc2eea248d6870e7a8f6c19c Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 14 Jan 2015 14:01:34 +0300
Subject: [PATCH 32/55] fix CUDA HOG for multi GPU systems:

move GPU constants initialization from constructor to compute method
---
 modules/cudaobjdetect/src/hog.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/cudaobjdetect/src/hog.cpp b/modules/cudaobjdetect/src/hog.cpp
index f71bf2b5ad..a10afa0ae0 100644
--- a/modules/cudaobjdetect/src/hog.cpp
+++ b/modules/cudaobjdetect/src/hog.cpp
@@ -127,9 +127,6 @@ cv::cuda::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size bl
 
     Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
     CV_Assert(cells_per_block == Size(2, 2));
-
-    cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
-    hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
 }
 
 size_t cv::cuda::HOGDescriptor::getDescriptorSize() const
@@ -221,6 +218,9 @@ void cv::cuda::HOGDescriptor::computeGradient(const GpuMat& img, GpuMat& _grad,
 
 void cv::cuda::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
 {
+    cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
+    hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
+
     computeGradient(img, grad, qangle);
 
     size_t block_hist_size = getBlockHistogramSize();

From 8257dc3c1ebbc7dfc207e48ed02ea5a71ca1fa94 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 14 Jan 2015 18:18:51 +0300
Subject: [PATCH 33/55] refactor CUDA HOG algorithm:

use abstract interface with hidden implementation
---
 .../include/opencv2/cudaobjdetect.hpp         |  189 +-
 modules/cudaobjdetect/perf/perf_objdetect.cpp |   10 +-
 modules/cudaobjdetect/src/hog.cpp             | 3048 +++++++++--------
 modules/cudaobjdetect/test/test_objdetect.cpp |  135 +-
 samples/gpu/hog.cpp                           |   35 +-
 5 files changed, 1697 insertions(+), 1720 deletions(-)

diff --git a/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp b/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
index 32126515a7..2887bf472c 100644
--- a/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
+++ b/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
@@ -65,32 +65,24 @@ namespace cv { namespace cuda {
 // HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector
 //
 
-struct CV_EXPORTS HOGConfidence
-{
-   double scale;
-   std::vector<Point> locations;
-   std::vector<double> confidences;
-   std::vector<double> part_scores[4];
-};
-
 /** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector.
 
-Interfaces of all methods are kept similar to the CPU HOG descriptor and detector analogues as much
-as possible.
-
 @note
-   -   An example applying the HOG descriptor for people detection can be found at
+    -   An example applying the HOG descriptor for people detection can be found at
         opencv_source_code/samples/cpp/peopledetect.cpp
     -   A CUDA example applying the HOG descriptor for people detection can be found at
         opencv_source_code/samples/gpu/hog.cpp
     -   (Python) An example applying the HOG descriptor for people detection can be found at
         opencv_source_code/samples/python2/peopledetect.py
  */
-struct CV_EXPORTS HOGDescriptor
+class CV_EXPORTS HOG : public cv::Algorithm
 {
-    enum { DEFAULT_WIN_SIGMA = -1 };
-    enum { DEFAULT_NLEVELS = 64 };
-    enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+public:
+    enum
+    {
+        DESCR_FORMAT_ROW_BY_ROW,
+        DESCR_FORMAT_COL_BY_COL
+    };
 
     /** @brief Creates the HOG descriptor and detector.
 
@@ -99,132 +91,105 @@ struct CV_EXPORTS HOGDescriptor
     @param block_stride Block stride. It must be a multiple of cell size.
     @param cell_size Cell size. Only (8, 8) is supported for now.
     @param nbins Number of bins. Only 9 bins per cell are supported for now.
-    @param win_sigma Gaussian smoothing window parameter.
-    @param threshold_L2hys L2-Hys normalization method shrinkage.
-    @param gamma_correction Flag to specify whether the gamma correction preprocessing is required or
-    not.
-    @param nlevels Maximum number of detection window increases.
      */
-    HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
-                  Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
-                  int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
-                  double threshold_L2hys=0.2, bool gamma_correction=true,
-                  int nlevels=DEFAULT_NLEVELS);
+    static Ptr<HOG> create(Size win_size = Size(64, 128),
+                           Size block_size = Size(16, 16),
+                           Size block_stride = Size(8, 8),
+                           Size cell_size = Size(8, 8),
+                           int nbins = 9);
+
+    //! Gaussian smoothing window parameter.
+    virtual void setWinSigma(double win_sigma) = 0;
+    virtual double getWinSigma() const = 0;
+
+    //! L2-Hys normalization method shrinkage.
+    virtual void setL2HysThreshold(double threshold_L2hys) = 0;
+    virtual double getL2HysThreshold() const = 0;
+
+    //! Flag to specify whether the gamma correction preprocessing is required or not.
+    virtual void setGammaCorrection(bool gamma_correction) = 0;
+    virtual bool getGammaCorrection() const = 0;
+
+    //! Maximum number of detection window increases.
+    virtual void setNumLevels(int nlevels) = 0;
+    virtual int getNumLevels() const = 0;
+
+    //! Threshold for the distance between features and SVM classifying plane.
+    //! Usually it is 0 and should be specfied in the detector coefficients (as the last free
+    //! coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
+    //! manually here.
+    virtual void setHitThreshold(double hit_threshold) = 0;
+    virtual double getHitThreshold() const = 0;
+
+    //! Window stride. It must be a multiple of block stride.
+    virtual void setWinStride(Size win_stride) = 0;
+    virtual Size getWinStride() const = 0;
+
+    //! Coefficient of the detection window increase.
+    virtual void setScaleFactor(double scale0) = 0;
+    virtual double getScaleFactor() const = 0;
+
+    //! Coefficient to regulate the similarity threshold. When detected, some
+    //! objects can be covered by many rectangles. 0 means not to perform grouping.
+    //! See groupRectangles.
+    virtual void setGroupThreshold(int group_threshold) = 0;
+    virtual int getGroupThreshold() const = 0;
+
+    //! Descriptor storage format:
+    //!   - **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
+    //!   - **DESCR_FORMAT_COL_BY_COL** - Column-major order.
+    virtual void setDescriptorFormat(int descr_format) = 0;
+    virtual int getDescriptorFormat() const = 0;
 
     /** @brief Returns the number of coefficients required for the classification.
      */
-    size_t getDescriptorSize() const;
+    virtual size_t getDescriptorSize() const = 0;
+
     /** @brief Returns the block histogram size.
-    */
-    size_t getBlockHistogramSize() const;
+     */
+    virtual size_t getBlockHistogramSize() const = 0;
 
     /** @brief Sets coefficients for the linear SVM classifier.
-    */
-    void setSVMDetector(const std::vector<float>& detector);
+     */
+    virtual void setSVMDetector(InputArray detector) = 0;
 
-    /** @brief Returns coefficients of the classifier trained for people detection (for default window size).
-    */
-    static std::vector<float> getDefaultPeopleDetector();
-    /** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows).
-    */
-    static std::vector<float> getPeopleDetector48x96();
-    /** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows).
-    */
-    static std::vector<float> getPeopleDetector64x128();
+    /** @brief Returns coefficients of the classifier trained for people detection.
+     */
+    virtual Mat getDefaultPeopleDetector() const = 0;
 
     /** @brief Performs object detection without a multi-scale window.
 
     @param img Source image. CV_8UC1 and CV_8UC4 types are supported for now.
     @param found_locations Left-top corner points of detected objects boundaries.
-    @param hit_threshold Threshold for the distance between features and SVM classifying plane.
-    Usually it is 0 and should be specfied in the detector coefficients (as the last free
-    coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
-    manually here.
-    @param win_stride Window stride. It must be a multiple of block stride.
-    @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
+    @param confidences Optional output array for confidences.
      */
-    void detect(const GpuMat& img, std::vector<Point>& found_locations,
-                double hit_threshold=0, Size win_stride=Size(),
-                Size padding=Size());
+    virtual void detect(InputArray img,
+                        std::vector<Point>& found_locations,
+                        std::vector<double>* confidences = NULL) = 0;
 
     /** @brief Performs object detection with a multi-scale window.
 
     @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
     @param found_locations Detected objects boundaries.
+    @param confidences Optional output array for confidences.
     @param hit_threshold Threshold for the distance between features and SVM classifying plane. See
     cuda::HOGDescriptor::detect for details.
     @param win_stride Window stride. It must be a multiple of block stride.
     @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
-    @param scale0 Coefficient of the detection window increase.
-    @param group_threshold Coefficient to regulate the similarity threshold. When detected, some
-    objects can be covered by many rectangles. 0 means not to perform grouping. See groupRectangles .
      */
-    void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
-                          double hit_threshold=0, Size win_stride=Size(),
-                          Size padding=Size(), double scale0=1.05,
-                          int group_threshold=2);
-
-    void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
-                                                Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
-
-    void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
-                                                                    double hit_threshold, Size win_stride, Size padding,
-                                                                    std::vector<HOGConfidence> &conf_out, int group_threshold);
+    virtual void detectMultiScale(InputArray img,
+                                  std::vector<Rect>& found_locations,
+                                  std::vector<double>* confidences = NULL) = 0;
 
     /** @brief Returns block descriptors computed for the whole image.
 
     @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
-    @param win_stride Window stride. It must be a multiple of block stride.
     @param descriptors 2D array of descriptors.
-    @param descr_format Descriptor storage format:
-    -   **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
-    -   **DESCR_FORMAT_COL_BY_COL** - Column-major order.
-
-    The function is mainly used to learn the classifier.
+    @param stream CUDA stream.
      */
-    void getDescriptors(const GpuMat& img, Size win_stride,
-                        GpuMat& descriptors,
-                        int descr_format=DESCR_FORMAT_COL_BY_COL);
-
-    Size win_size;
-    Size block_size;
-    Size block_stride;
-    Size cell_size;
-    int nbins;
-    double win_sigma;
-    double threshold_L2hys;
-    bool gamma_correction;
-    int nlevels;
-
-protected:
-    void computeBlockHistograms(const GpuMat& img);
-    void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
-
-    double getWinSigma() const;
-    bool checkDetectorSize() const;
-
-    static int numPartsWithin(int size, int part_size, int stride);
-    static Size numPartsWithin(Size size, Size part_size, Size stride);
-
-    // Coefficients of the separating plane
-    float free_coef;
-    GpuMat detector;
-
-    // Results of the last classification step
-    GpuMat labels, labels_buf;
-    Mat labels_host;
-
-    // Results of the last histogram evaluation step
-    GpuMat block_hists, block_hists_buf;
-
-    // Gradients conputation results
-    GpuMat grad, qangle, grad_buf, qangle_buf;
-
-    // returns subbuffer with required size, reallocates buffer if nessesary.
-    static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
-    static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
-
-    std::vector<GpuMat> image_scales;
+    virtual void compute(InputArray img,
+                         OutputArray descriptors,
+                         Stream& stream = Stream::Null()) = 0;
 };
 
 //
diff --git a/modules/cudaobjdetect/perf/perf_objdetect.cpp b/modules/cudaobjdetect/perf/perf_objdetect.cpp
index c5d4649b84..8aeabc4ec0 100644
--- a/modules/cudaobjdetect/perf/perf_objdetect.cpp
+++ b/modules/cudaobjdetect/perf/perf_objdetect.cpp
@@ -71,10 +71,10 @@ PERF_TEST_P(Image, ObjDetect_HOG,
         const cv::cuda::GpuMat d_img(img);
         std::vector<cv::Rect> gpu_found_locations;
 
-        cv::cuda::HOGDescriptor d_hog;
-        d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
+        cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
+        d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
 
-        TEST_CYCLE() d_hog.detectMultiScale(d_img, gpu_found_locations);
+        TEST_CYCLE() d_hog->detectMultiScale(d_img, gpu_found_locations);
 
         SANITY_CHECK(gpu_found_locations);
     }
@@ -82,8 +82,10 @@ PERF_TEST_P(Image, ObjDetect_HOG,
     {
         std::vector<cv::Rect> cpu_found_locations;
 
+        cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
+
         cv::HOGDescriptor hog;
-        hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
+        hog.setSVMDetector(d_hog->getDefaultPeopleDetector());
 
         TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations);
 
diff --git a/modules/cudaobjdetect/src/hog.cpp b/modules/cudaobjdetect/src/hog.cpp
index a10afa0ae0..1d465ff25c 100644
--- a/modules/cudaobjdetect/src/hog.cpp
+++ b/modules/cudaobjdetect/src/hog.cpp
@@ -42,23 +42,12 @@
 
 #include "precomp.hpp"
 
+using namespace cv;
+using namespace cv::cuda;
+
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-cv::cuda::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int) { throw_no_cuda(); }
-size_t cv::cuda::HOGDescriptor::getDescriptorSize() const { throw_no_cuda(); return 0; }
-size_t cv::cuda::HOGDescriptor::getBlockHistogramSize() const { throw_no_cuda(); return 0; }
-double cv::cuda::HOGDescriptor::getWinSigma() const { throw_no_cuda(); return 0; }
-bool cv::cuda::HOGDescriptor::checkDetectorSize() const { throw_no_cuda(); return false; }
-void cv::cuda::HOGDescriptor::setSVMDetector(const std::vector<float>&) { throw_no_cuda(); }
-void cv::cuda::HOGDescriptor::detect(const GpuMat&, std::vector<Point>&, double, Size, Size) { throw_no_cuda(); }
-void cv::cuda::HOGDescriptor::detectMultiScale(const GpuMat&, std::vector<Rect>&, double, Size, Size, double, int) { throw_no_cuda(); }
-void cv::cuda::HOGDescriptor::computeBlockHistograms(const GpuMat&) { throw_no_cuda(); }
-void cv::cuda::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&, int) { throw_no_cuda(); }
-std::vector<float> cv::cuda::HOGDescriptor::getDefaultPeopleDetector() { throw_no_cuda(); return std::vector<float>(); }
-std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector48x96() { throw_no_cuda(); return std::vector<float>(); }
-std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector64x128() { throw_no_cuda(); return std::vector<float>(); }
-void cv::cuda::HOGDescriptor::computeConfidence(const GpuMat&, std::vector<Point>&, double, Size, Size, std::vector<Point>&, std::vector<double>&) { throw_no_cuda(); }
-void cv::cuda::HOGDescriptor::computeConfidenceMultiScale(const GpuMat&, std::vector<Rect>&, double, Size, Size, std::vector<HOGConfidence>&, int) { throw_no_cuda(); }
+Ptr<cuda::HOG> cv::cuda::HOG::create(Size, Size, Size, Size, int) { throw_no_cuda(); return Ptr<cuda::HOG>(); }
 
 #else
 
@@ -102,1518 +91,1607 @@ namespace cv { namespace cuda { namespace device
     }
 }}}
 
-using namespace ::cv::cuda::device;
+using namespace cv::cuda::device;
 
-cv::cuda::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_,
-                                      int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_)
-        : win_size(win_size_),
-          block_size(block_size_),
-          block_stride(block_stride_),
-          cell_size(cell_size_),
-          nbins(nbins_),
-          win_sigma(win_sigma_),
-          threshold_L2hys(threshold_L2hys_),
-          gamma_correction(gamma_correction_),
-          nlevels(nlevels_)
+namespace
 {
-    CV_Assert((win_size.width  - block_size.width ) % block_stride.width  == 0 &&
-              (win_size.height - block_size.height) % block_stride.height == 0);
+    class HOG_Impl : public cv::cuda::HOG
+    {
+    public:
+        HOG_Impl(Size win_size,
+                 Size block_size,
+                 Size block_stride,
+                 Size cell_size,
+                 int nbins);
 
-    CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0);
+        virtual void setWinSigma(double win_sigma) { win_sigma_ = win_sigma; }
+        virtual double getWinSigma() const;
 
-    CV_Assert(block_stride == cell_size);
+        virtual void setL2HysThreshold(double threshold_L2hys) { threshold_L2hys_ = threshold_L2hys; }
+        virtual double getL2HysThreshold() const { return threshold_L2hys_; }
 
-    CV_Assert(cell_size == Size(8, 8));
+        virtual void setGammaCorrection(bool gamma_correction) { gamma_correction_ = gamma_correction; }
+        virtual bool getGammaCorrection() const { return gamma_correction_; }
 
-    Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
-    CV_Assert(cells_per_block == Size(2, 2));
-}
+        virtual void setNumLevels(int nlevels) { nlevels_ = nlevels; }
+        virtual int getNumLevels() const { return nlevels_; }
 
-size_t cv::cuda::HOGDescriptor::getDescriptorSize() const
-{
-    return numPartsWithin(win_size, block_size, block_stride).area() * getBlockHistogramSize();
-}
+        virtual void setHitThreshold(double hit_threshold) { hit_threshold_ = hit_threshold; }
+        virtual double getHitThreshold() const { return hit_threshold_; }
 
-size_t cv::cuda::HOGDescriptor::getBlockHistogramSize() const
-{
-    Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
-    return (size_t)(nbins * cells_per_block.area());
-}
+        virtual void setWinStride(Size win_stride) { win_stride_ = win_stride; }
+        virtual Size getWinStride() const { return win_stride_; }
 
-double cv::cuda::HOGDescriptor::getWinSigma() const
-{
-    return win_sigma >= 0 ? win_sigma : (block_size.width + block_size.height) / 8.0;
-}
+        virtual void setScaleFactor(double scale0) { scale0_ = scale0; }
+        virtual double getScaleFactor() const { return scale0_; }
 
-bool cv::cuda::HOGDescriptor::checkDetectorSize() const
-{
-    size_t detector_size = detector.rows * detector.cols;
-    size_t descriptor_size = getDescriptorSize();
-    return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1;
-}
+        virtual void setGroupThreshold(int group_threshold) { group_threshold_ = group_threshold; }
+        virtual int getGroupThreshold() const { return group_threshold_; }
 
-void cv::cuda::HOGDescriptor::setSVMDetector(const std::vector<float>& _detector)
-{
-    std::vector<float> detector_reordered(_detector.size());
+        virtual void setDescriptorFormat(int descr_format) { descr_format_ = descr_format; }
+        virtual int getDescriptorFormat() const { return descr_format_; }
 
-    size_t block_hist_size = getBlockHistogramSize();
-    cv::Size blocks_per_img = numPartsWithin(win_size, block_size, block_stride);
+        virtual size_t getDescriptorSize() const;
 
-    for (int i = 0; i < blocks_per_img.height; ++i)
-        for (int j = 0; j < blocks_per_img.width; ++j)
+        virtual size_t getBlockHistogramSize() const;
+
+        virtual void setSVMDetector(InputArray detector);
+
+        virtual Mat getDefaultPeopleDetector() const;
+
+        virtual void detect(InputArray img,
+                            std::vector<Point>& found_locations,
+                            std::vector<double>* confidences);
+
+        virtual void detectMultiScale(InputArray img,
+                                      std::vector<Rect>& found_locations,
+                                      std::vector<double>* confidences);
+
+        virtual void compute(InputArray img,
+                             OutputArray descriptors,
+                             Stream& stream);
+
+    private:
+        Size win_size_;
+        Size block_size_;
+        Size block_stride_;
+        Size cell_size_;
+        int nbins_;
+
+        double win_sigma_;
+        double threshold_L2hys_;
+        bool gamma_correction_;
+        int nlevels_;
+        double hit_threshold_;
+        Size win_stride_;
+        double scale0_;
+        int group_threshold_;
+        int descr_format_;
+
+    private:
+        int getTotalHistSize(Size img_size) const;
+        void computeBlockHistograms(const GpuMat& img, GpuMat& block_hists);
+        void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
+
+        // Coefficients of the separating plane
+        float free_coef_;
+        GpuMat detector_;
+    };
+
+    HOG_Impl::HOG_Impl(Size win_size,
+                       Size block_size,
+                       Size block_stride,
+                       Size cell_size,
+                       int nbins) :
+        win_size_(win_size),
+        block_size_(block_size),
+        block_stride_(block_stride),
+        cell_size_(cell_size),
+        nbins_(nbins),
+
+        win_sigma_(-1.0),
+        threshold_L2hys_(0.2),
+        gamma_correction_(true),
+        nlevels_(64),
+        hit_threshold_(0.0),
+        win_stride_(block_stride),
+        scale0_(1.05),
+        group_threshold_(2),
+        descr_format_(DESCR_FORMAT_COL_BY_COL)
+    {
+        CV_Assert((win_size.width  - block_size.width ) % block_stride.width  == 0 &&
+                  (win_size.height - block_size.height) % block_stride.height == 0);
+
+        CV_Assert(block_size.width % cell_size.width == 0 &&
+                  block_size.height % cell_size.height == 0);
+
+        CV_Assert(block_stride == cell_size);
+
+        CV_Assert(cell_size == Size(8, 8));
+
+        Size cells_per_block(block_size.width / cell_size.width, block_size.height / cell_size.height);
+        CV_Assert(cells_per_block == Size(2, 2));
+    }
+
+    static int numPartsWithin(int size, int part_size, int stride)
+    {
+        return (size - part_size + stride) / stride;
+    }
+
+    static Size numPartsWithin(Size size, Size part_size, Size stride)
+    {
+        return Size(numPartsWithin(size.width, part_size.width, stride.width),
+                    numPartsWithin(size.height, part_size.height, stride.height));
+    }
+
+    size_t HOG_Impl::getDescriptorSize() const
+    {
+        return numPartsWithin(win_size_, block_size_, block_stride_).area() * getBlockHistogramSize();
+    }
+
+    size_t HOG_Impl::getBlockHistogramSize() const
+    {
+        Size cells_per_block(block_size_.width / cell_size_.width, block_size_.height / cell_size_.height);
+        return nbins_ * cells_per_block.area();
+    }
+
+    double HOG_Impl::getWinSigma() const
+    {
+        return win_sigma_ >= 0 ? win_sigma_ : (block_size_.width + block_size_.height) / 8.0;
+    }
+
+    void HOG_Impl::setSVMDetector(InputArray _detector)
+    {
+        const int descriptor_size = static_cast<int>(getDescriptorSize());
+
+        const Mat detector = _detector.getMat();
+
+        CV_Assert( detector.type() == CV_32FC1 );
+        CV_Assert( detector.rows == 1 );
+        CV_Assert( detector.cols == descriptor_size || detector.cols == descriptor_size + 1 );
+
+        std::vector<float> detector_reordered(detector.ptr<float>(), detector.ptr<float>() + detector.cols);
+
+        size_t block_hist_size = getBlockHistogramSize();
+        Size blocks_per_win = numPartsWithin(win_size_, block_size_, block_stride_);
+
+        for (int i = 0; i < blocks_per_win.height; ++i)
         {
-            const float* src = &_detector[0] + (j * blocks_per_img.height + i) * block_hist_size;
-            float* dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size;
-            for (size_t k = 0; k < block_hist_size; ++k)
-                dst[k] = src[k];
-        }
-
-    this->detector.upload(Mat(detector_reordered).reshape(1, 1));
-
-    size_t descriptor_size = getDescriptorSize();
-    free_coef = _detector.size() > descriptor_size ? _detector[descriptor_size] : 0;
-
-    CV_Assert(checkDetectorSize());
-}
-
-cv::cuda::GpuMat cv::cuda::HOGDescriptor::getBuffer(const Size& sz, int type, GpuMat& buf)
-{
-    if (buf.empty() || buf.type() != type)
-        buf.create(sz, type);
-    else
-        if (buf.cols < sz.width || buf.rows < sz.height)
-            buf.create(std::max(buf.rows, sz.height), std::max(buf.cols, sz.width), type);
-
-    return buf(Rect(Point(0,0), sz));
-}
-
-cv::cuda::GpuMat cv::cuda::HOGDescriptor::getBuffer(int rows, int cols, int type, GpuMat& buf)
-{
-    return getBuffer(Size(cols, rows), type, buf);
-}
-
-
-void cv::cuda::HOGDescriptor::computeGradient(const GpuMat& img, GpuMat& _grad, GpuMat& _qangle)
-{
-    CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
-
-    //   grad.create(img.size(), CV_32FC2);
-    _grad = getBuffer(img.size(), CV_32FC2, grad_buf);
-
-    //   qangle.create(img.size(), CV_8UC2);
-    _qangle = getBuffer(img.size(), CV_8UC2, qangle_buf);
-
-    float angleScale = (float)(nbins / CV_PI);
-    switch (img.type())
-    {
-        case CV_8UC1:
-            hog::compute_gradients_8UC1(nbins, img.rows, img.cols, img, angleScale, _grad, _qangle, gamma_correction);
-            break;
-        case CV_8UC4:
-            hog::compute_gradients_8UC4(nbins, img.rows, img.cols, img, angleScale, _grad, _qangle, gamma_correction);
-            break;
-    }
-}
-
-
-void cv::cuda::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
-{
-    cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
-    hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
-
-    computeGradient(img, grad, qangle);
-
-    size_t block_hist_size = getBlockHistogramSize();
-    Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
-
-    //   block_hists.create(1, block_hist_size * blocks_per_img.area(), CV_32F);
-    block_hists = getBuffer(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F, block_hists_buf);
-
-    hog::compute_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols,
-                        grad, qangle, (float)getWinSigma(), block_hists.ptr<float>());
-
-    hog::normalize_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols,
-                         block_hists.ptr<float>(), (float)threshold_L2hys);
-}
-
-
-void cv::cuda::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, int descr_format)
-{
-    CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
-
-    computeBlockHistograms(img);
-
-    const size_t block_hist_size = getBlockHistogramSize();
-    Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
-    Size wins_per_img   = numPartsWithin(img.size(), win_size, win_stride);
-
-    descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F);
-
-    switch (descr_format)
-    {
-    case DESCR_FORMAT_ROW_BY_ROW:
-        hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
-                                    win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), descriptors);
-        break;
-    case DESCR_FORMAT_COL_BY_COL:
-        hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
-                                    win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), descriptors);
-        break;
-    default:
-        CV_Error(cv::Error::StsBadArg, "Unknown descriptor format");
-    }
-}
-
-void cv::cuda::HOGDescriptor::computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
-                          Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences)
-{
-  CV_Assert(padding == Size(0, 0));
-
-  hits.clear();
-  if (detector.empty())
-    return;
-
-  computeBlockHistograms(img);
-
-  if (win_stride == Size())
-    win_stride = block_stride;
-  else
-    CV_Assert(win_stride.width % block_stride.width == 0 &&
-         win_stride.height % block_stride.height == 0);
-
-  Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
-  labels.create(1, wins_per_img.area(), CV_32F);
-
-  hog::compute_confidence_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
-               win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(),
-               detector.ptr<float>(), (float)free_coef, (float)hit_threshold, labels.ptr<float>());
-
-  labels.download(labels_host);
-  float* vec = labels_host.ptr<float>();
-
-  // does not support roi for now..
-  locations.clear();
-  confidences.clear();
-  for (int i = 0; i < wins_per_img.area(); i++)
-    {
-      int y = i / wins_per_img.width;
-      int x = i - wins_per_img.width * y;
-      if (vec[i] >= hit_threshold)
-   hits.push_back(Point(x * win_stride.width, y * win_stride.height));
-
-      Point pt(win_stride.width * x, win_stride.height * y);
-      locations.push_back(pt);
-      confidences.push_back((double)vec[i]);
-    }
-}
-
-void cv::cuda::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
-                            double hit_threshold, Size win_stride, Size padding,
-                            std::vector<HOGConfidence> &conf_out, int group_threshold)
-{
-    std::vector<double> level_scale;
-    double scale = 1.;
-    int levels = 0;
-
-    for (levels = 0; levels < (int)conf_out.size(); levels++)
-    {
-        scale = conf_out[levels].scale;
-        level_scale.push_back(scale);
-        if (cvRound(img.cols/scale) < win_size.width || cvRound(img.rows/scale) < win_size.height)
-            break;
-    }
-
-    levels = std::max(levels, 1);
-    level_scale.resize(levels);
-
-    std::vector<Rect> all_candidates;
-    std::vector<Point> locations;
-
-    for (size_t i = 0; i < level_scale.size(); i++)
-    {
-        scale = level_scale[i];
-        Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
-        GpuMat smaller_img;
-
-        if (sz == img.size())
-            smaller_img = img;
-        else
-        {
-            smaller_img.create(sz, img.type());
-            switch (img.type())
+            for (int j = 0; j < blocks_per_win.width; ++j)
             {
-            case CV_8UC1: hog::resize_8UC1(img, smaller_img); break;
-            case CV_8UC4: hog::resize_8UC4(img, smaller_img); break;
+                const float* src = detector.ptr<float>() + (j * blocks_per_win.height + i) * block_hist_size;
+                float* dst = &detector_reordered[0] + (i * blocks_per_win.width + j) * block_hist_size;
+                for (size_t k = 0; k < block_hist_size; ++k)
+                    dst[k] = src[k];
             }
         }
 
-        computeConfidence(smaller_img, locations, hit_threshold, win_stride, padding, conf_out[i].locations, conf_out[i].confidences);
-
-        Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
-        for (size_t j = 0; j < locations.size(); j++)
-            all_candidates.push_back(Rect(Point2d(locations[j]) * scale, scaled_win_size));
+        detector_.upload(Mat(detector_reordered).reshape(1, 1));
+        free_coef_ = detector.cols > descriptor_size ? detector.at<float>(0, descriptor_size) : 0;
     }
 
-    found_locations.assign(all_candidates.begin(), all_candidates.end());
-    groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);
-}
+    static Mat getPeopleDetector64x128();
+    static Mat getPeopleDetector48x96();
 
-
-void cv::cuda::HOGDescriptor::detect(const GpuMat& img, std::vector<Point>& hits, double hit_threshold, Size win_stride, Size padding)
-{
-    CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
-    CV_Assert(padding == Size(0, 0));
-
-    hits.clear();
-    if (detector.empty())
-        return;
-
-    computeBlockHistograms(img);
-
-    if (win_stride == Size())
-        win_stride = block_stride;
-    else
-        CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
-
-    Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
-    //   labels.create(1, wins_per_img.area(), CV_8U);
-    labels = getBuffer(1, wins_per_img.area(), CV_8U, labels_buf);
-
-    hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
-                        win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(),
-                        detector.ptr<float>(), (float)free_coef, (float)hit_threshold, labels.ptr());
-
-    labels.download(labels_host);
-    unsigned char* vec = labels_host.ptr();
-    for (int i = 0; i < wins_per_img.area(); i++)
+    Mat HOG_Impl::getDefaultPeopleDetector() const
     {
-        int y = i / wins_per_img.width;
-        int x = i - wins_per_img.width * y;
-        if (vec[i])
-            hits.push_back(Point(x * win_stride.width, y * win_stride.height));
+        CV_Assert( win_size_ == Size(64, 128) || win_size_ == Size(48, 96) );
+
+        if (win_size_ == Size(64, 128))
+            return getPeopleDetector64x128();
+        else
+            return getPeopleDetector48x96();
     }
-}
 
-
-
-void cv::cuda::HOGDescriptor::detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations, double hit_threshold,
-                                              Size win_stride, Size padding, double scale0, int group_threshold)
-{
-
-    CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
-
-    std::vector<double> level_scale;
-    double scale = 1.;
-    int levels = 0;
-
-    for (levels = 0; levels < nlevels; levels++)
+    void HOG_Impl::detect(InputArray _img, std::vector<Point>& hits, std::vector<double>* confidences)
     {
-        level_scale.push_back(scale);
-        if (cvRound(img.cols/scale) < win_size.width ||
-            cvRound(img.rows/scale) < win_size.height || scale0 <= 1)
-            break;
-        scale *= scale0;
-    }
-    levels = std::max(levels, 1);
-    level_scale.resize(levels);
-    image_scales.resize(levels);
+        const GpuMat img = _img.getGpuMat();
 
-    std::vector<Rect> all_candidates;
-    std::vector<Point> locations;
+        CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC4 );
+        CV_Assert( win_stride_.width % block_stride_.width == 0 && win_stride_.height % block_stride_.height == 0 );
 
-    for (size_t i = 0; i < level_scale.size(); i++)
-    {
-        scale = level_scale[i];
-        Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
-        GpuMat smaller_img;
+        hits.clear();
+        if (detector_.empty())
+            return;
 
-        if (sz == img.size())
-            smaller_img = img;
+        BufferPool pool(Stream::Null());
+
+        GpuMat block_hists = pool.getBuffer(1, getTotalHistSize(img.size()), CV_32FC1);
+        computeBlockHistograms(img, block_hists);
+
+        Size wins_per_img = numPartsWithin(img.size(), win_size_, win_stride_);
+
+        if (confidences == NULL)
+        {
+            GpuMat labels = pool.getBuffer(1, wins_per_img.area(), CV_8UC1);
+
+            hog::classify_hists(win_size_.height, win_size_.width,
+                                block_stride_.height, block_stride_.width,
+                                win_stride_.height, win_stride_.width,
+                                img.rows, img.cols,
+                                block_hists.ptr<float>(),
+                                detector_.ptr<float>(),
+                                (float)free_coef_,
+                                (float)hit_threshold_,
+                                labels.ptr());
+
+            Mat labels_host;
+            labels.download(labels_host);
+            unsigned char* vec = labels_host.ptr();
+
+            for (int i = 0; i < wins_per_img.area(); i++)
+            {
+                int y = i / wins_per_img.width;
+                int x = i - wins_per_img.width * y;
+                if (vec[i])
+                    hits.push_back(Point(x * win_stride_.width, y * win_stride_.height));
+            }
+        }
         else
         {
-            image_scales[i].create(sz, img.type());
-            switch (img.type())
-            {
-                case CV_8UC1: hog::resize_8UC1(img, image_scales[i]); break;
-                case CV_8UC4: hog::resize_8UC4(img, image_scales[i]); break;
-            }
-            smaller_img = image_scales[i];
-        }
+            GpuMat labels = pool.getBuffer(1, wins_per_img.area(), CV_32FC1);
 
-        detect(smaller_img, locations, hit_threshold, win_stride, padding);
-        Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
-        for (size_t j = 0; j < locations.size(); j++)
-            all_candidates.push_back(Rect(Point2d(locations[j]) * scale, scaled_win_size));
+            hog::compute_confidence_hists(win_size_.height, win_size_.width,
+                                          block_stride_.height, block_stride_.width,
+                                          win_stride_.height, win_stride_.width,
+                                          img.rows, img.cols,
+                                          block_hists.ptr<float>(),
+                                          detector_.ptr<float>(),
+                                          (float)free_coef_,
+                                          (float)hit_threshold_,
+                                          labels.ptr<float>());
+
+            Mat labels_host;
+            labels.download(labels_host);
+            float* vec = labels_host.ptr<float>();
+
+            confidences->clear();
+            for (int i = 0; i < wins_per_img.area(); i++)
+            {
+                int y = i / wins_per_img.width;
+                int x = i - wins_per_img.width * y;
+
+                if (vec[i] >= hit_threshold_)
+                {
+                    hits.push_back(Point(x * win_stride_.width, y * win_stride_.height));
+                    confidences->push_back((double)vec[i]);
+                }
+            }
+        }
     }
 
-    found_locations.assign(all_candidates.begin(), all_candidates.end());
-    groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);
+    void HOG_Impl::detectMultiScale(InputArray _img,
+                                    std::vector<Rect>& found_locations,
+                                    std::vector<double>* confidences)
+    {
+        const GpuMat img = _img.getGpuMat();
+
+        CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC4 );
+        CV_Assert( confidences == NULL || group_threshold_ == 0 );
+
+        std::vector<double> level_scale;
+        double scale = 1.0;
+        int levels = 0;
+        for (levels = 0; levels < nlevels_; levels++)
+        {
+            level_scale.push_back(scale);
+
+            if (cvRound(img.cols / scale) < win_size_.width ||
+                cvRound(img.rows / scale) < win_size_.height ||
+                scale0_ <= 1)
+            {
+                break;
+            }
+
+            scale *= scale0_;
+        }
+        levels = std::max(levels, 1);
+        level_scale.resize(levels);
+
+        std::vector<Point> level_hits;
+        std::vector<double> level_confidences;
+
+        BufferPool pool(Stream::Null());
+
+        found_locations.clear();
+        for (size_t i = 0; i < level_scale.size(); i++)
+        {
+            scale = level_scale[i];
+
+            Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
+
+            GpuMat smaller_img;
+            if (sz == img.size())
+            {
+                smaller_img = img;
+            }
+            else
+            {
+                smaller_img = pool.getBuffer(sz, img.type());
+                switch (img.type())
+                {
+                    case CV_8UC1: hog::resize_8UC1(img, smaller_img); break;
+                    case CV_8UC4: hog::resize_8UC4(img, smaller_img); break;
+                }
+            }
+
+            detect(smaller_img, level_hits,
+                   confidences ? &level_confidences : NULL);
+
+            Size scaled_win_size(cvRound(win_size_.width * scale),
+                                 cvRound(win_size_.height * scale));
+
+            for (size_t j = 0; j < level_hits.size(); j++)
+            {
+                found_locations.push_back(Rect(Point2d(level_hits[j]) * scale, scaled_win_size));
+                if (confidences)
+                    confidences->push_back(level_confidences[j]);
+            }
+        }
+
+        if (group_threshold_ > 0)
+        {
+            groupRectangles(found_locations, group_threshold_, 0.2/*magic number copied from CPU version*/);
+        }
+    }
+
+    void HOG_Impl::compute(InputArray _img,
+                           OutputArray _descriptors,
+                           Stream& stream)
+    {
+        const GpuMat img = _img.getGpuMat();
+
+        CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC4 );
+        CV_Assert( win_stride_.width % block_stride_.width == 0 && win_stride_.height % block_stride_.height == 0 );
+        CV_Assert( !stream );
+
+        BufferPool pool(stream);
+
+        GpuMat block_hists = pool.getBuffer(1, getTotalHistSize(img.size()), CV_32FC1);
+        computeBlockHistograms(img, block_hists);
+
+        const size_t block_hist_size = getBlockHistogramSize();
+        Size blocks_per_win = numPartsWithin(win_size_, block_size_, block_stride_);
+        Size wins_per_img   = numPartsWithin(img.size(), win_size_, win_stride_);
+
+        _descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32FC1);
+        GpuMat descriptors = _descriptors.getGpuMat();
+
+        switch (descr_format_)
+        {
+        case DESCR_FORMAT_ROW_BY_ROW:
+            hog::extract_descrs_by_rows(win_size_.height, win_size_.width,
+                                        block_stride_.height, block_stride_.width,
+                                        win_stride_.height, win_stride_.width,
+                                        img.rows, img.cols,
+                                        block_hists.ptr<float>(),
+                                        descriptors);
+            break;
+        case DESCR_FORMAT_COL_BY_COL:
+            hog::extract_descrs_by_cols(win_size_.height, win_size_.width,
+                                        block_stride_.height, block_stride_.width,
+                                        win_stride_.height, win_stride_.width,
+                                        img.rows, img.cols,
+                                        block_hists.ptr<float>(),
+                                        descriptors);
+            break;
+        default:
+            CV_Error(cv::Error::StsBadArg, "Unknown descriptor format");
+        }
+    }
+
+    int HOG_Impl::getTotalHistSize(Size img_size) const
+    {
+        size_t block_hist_size = getBlockHistogramSize();
+        Size blocks_per_img = numPartsWithin(img_size, block_size_, block_stride_);
+        return static_cast<int>(block_hist_size * blocks_per_img.area());
+    }
+
+    void HOG_Impl::computeBlockHistograms(const GpuMat& img, GpuMat& block_hists)
+    {
+        cv::Size blocks_per_win = numPartsWithin(win_size_, block_size_, block_stride_);
+        hog::set_up_constants(nbins_, block_stride_.width, block_stride_.height, blocks_per_win.width, blocks_per_win.height);
+
+        BufferPool pool(Stream::Null());
+
+        GpuMat grad = pool.getBuffer(img.size(), CV_32FC2);
+        GpuMat qangle = pool.getBuffer(img.size(), CV_8UC2);
+        computeGradient(img, grad, qangle);
+
+        block_hists.create(1, getTotalHistSize(img.size()), CV_32FC1);
+
+        hog::compute_hists(nbins_,
+                           block_stride_.width, block_stride_.height,
+                           img.rows, img.cols,
+                           grad, qangle,
+                           (float)getWinSigma(),
+                           block_hists.ptr<float>());
+
+        hog::normalize_hists(nbins_,
+                             block_stride_.width, block_stride_.height,
+                             img.rows, img.cols,
+                             block_hists.ptr<float>(),
+                             (float)threshold_L2hys_);
+    }
+
+    void HOG_Impl::computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle)
+    {
+        grad.create(img.size(), CV_32FC2);
+        qangle.create(img.size(), CV_8UC2);
+
+        float angleScale = (float)(nbins_ / CV_PI);
+        switch (img.type())
+        {
+            case CV_8UC1:
+                hog::compute_gradients_8UC1(nbins_, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction_);
+                break;
+            case CV_8UC4:
+                hog::compute_gradients_8UC4(nbins_, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction_);
+                break;
+        }
+    }
 }
 
-int cv::cuda::HOGDescriptor::numPartsWithin(int size, int part_size, int stride)
+Ptr<cuda::HOG> cv::cuda::HOG::create(Size win_size,
+                                     Size block_size,
+                                     Size block_stride,
+                                     Size cell_size,
+                                     int nbins)
 {
-    return (size - part_size + stride) / stride;
+    return makePtr<HOG_Impl>(win_size, block_size, block_stride, cell_size, nbins);
 }
 
-cv::Size cv::cuda::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride)
+namespace
 {
-    return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height));
-}
+    static Mat getPeopleDetector48x96()
+    {
+        static float detector[] = {
+            0.294350f, -0.098796f, -0.129522f, 0.078753f, 0.387527f, 0.261529f,
+            0.145939f, 0.061520f, 0.328699f, 0.227148f, -0.066467f, -0.086723f,
+            0.047559f, 0.106714f, 0.037897f, 0.111461f, -0.024406f, 0.304769f,
+            0.254676f, -0.069235f, 0.082566f, 0.147260f, 0.326969f, 0.148888f,
+            0.055270f, -0.087985f, 0.261720f, 0.143442f, 0.026812f, 0.238212f,
+            0.194020f, 0.056341f, -0.025854f, -0.034444f, -0.156631f, 0.205174f,
+            0.089008f, -0.139811f, -0.100147f, -0.037830f, -0.029230f, -0.055641f,
+            0.033248f, -0.016512f, 0.155244f, 0.247315f, -0.124694f, -0.048414f,
+            -0.062219f, 0.193683f, 0.004574f, 0.055089f, 0.093565f, 0.167712f,
+            0.167581f, 0.018895f, 0.215258f, 0.122609f, 0.090520f, -0.067219f,
+            -0.049029f, -0.099615f, 0.241804f, -0.094893f, -0.176248f, 0.001727f,
+            -0.134473f, 0.104442f, 0.050942f, 0.081165f, 0.072156f, 0.121646f,
+            0.002656f, -0.297974f, -0.133587f, -0.060121f, -0.092515f, -0.048974f,
+            -0.084754f, -0.180111f, -0.038590f, 0.086283f, -0.134636f, -0.107249f,
+            0.132890f, 0.141556f, 0.249425f, 0.130273f, -0.030031f, 0.073212f,
+            -0.008155f, 0.019931f, 0.071688f, 0.000300f, -0.019525f, -0.021725f,
+            -0.040993f, -0.086841f, 0.070124f, 0.240033f, 0.265350f, 0.043208f,
+            0.166754f, 0.091453f, 0.060916f, -0.036972f, -0.091043f, 0.079873f,
+            0.219781f, 0.158102f, -0.140618f, -0.043016f, 0.124802f, 0.093668f,
+            0.103208f, 0.094872f, 0.080541f, 0.137711f, 0.160566f, -0.169231f,
+            0.013983f, 0.309508f, -0.004217f, -0.057200f, -0.064489f, 0.014066f,
+            0.361009f, 0.251328f, -0.080983f, -0.044183f, 0.061436f, -0.037381f,
+            -0.078786f, 0.030993f, 0.066314f, 0.037683f, 0.152325f, -0.091683f,
+            0.070203f, 0.217856f, 0.036435f, -0.076462f, 0.006254f, -0.094431f,
+            0.154829f, -0.023038f, -0.196961f, -0.024594f, 0.178465f, -0.050139f,
+            -0.045932f, -0.000965f, 0.109112f, 0.046165f, -0.159373f, -0.008713f,
+            0.041307f, 0.097129f, -0.057211f, -0.064599f, 0.077165f, 0.176167f,
+            0.138322f, 0.065753f, -0.104950f, 0.017933f, 0.136255f, -0.011598f,
+            0.047007f, 0.080550f, 0.068619f, 0.084661f, -0.035493f, -0.091314f,
+            -0.041411f, 0.060971f, -0.101912f, -0.079870f, -0.085977f, -0.022686f,
+            0.079788f, -0.098064f, -0.054603f, 0.040383f, 0.300794f, 0.128603f,
+            0.094844f, 0.047407f, 0.101825f, 0.061832f, -0.162160f, -0.204553f,
+            -0.035165f, 0.101450f, -0.016641f, -0.027140f, -0.134392f, -0.008743f,
+            0.102331f, 0.114853f, 0.009644f, 0.062823f, 0.237339f, 0.167843f,
+            0.053066f, -0.012592f, 0.043158f, 0.002305f, 0.065001f, -0.038929f,
+            -0.020356f, 0.152343f, 0.043469f, -0.029967f, -0.042948f, 0.032481f,
+            0.068488f, -0.110840f, -0.111083f, 0.111980f, -0.002072f, -0.005562f,
+            0.082926f, 0.006635f, -0.108153f, 0.024242f, -0.086464f, -0.189884f,
+            -0.017492f, 0.191456f, -0.007683f, -0.128769f, -0.038017f, -0.132380f,
+            0.091926f, 0.079696f, -0.106728f, -0.007656f, 0.172744f, 0.011576f,
+            0.009883f, 0.083258f, -0.026516f, 0.145534f, 0.153924f, -0.130290f,
+            -0.108945f, 0.124490f, -0.003186f, -0.100485f, 0.015024f, -0.060512f,
+            0.026288f, -0.086713f, -0.169012f, 0.076517f, 0.215778f, 0.043701f,
+            -0.131642f, -0.012585f, -0.045181f, -0.118183f, -0.241544f, -0.167293f,
+            -0.020107f, -0.019917f, -0.101827f, -0.107096f, -0.010503f, 0.044938f,
+            0.189680f, 0.217119f, -0.046086f, 0.044508f, 0.199716f, -0.036004f,
+            -0.148927f, 0.013355f, -0.078279f, 0.030451f, 0.056301f, -0.024609f,
+            0.083224f, 0.099533f, -0.039432f, -0.138880f, 0.005482f, -0.024120f,
+            -0.140468f, -0.066381f, -0.017057f, 0.009260f, -0.058004f, -0.028486f,
+            -0.061610f, 0.007483f, -0.158309f, -0.150687f, -0.044595f, -0.105121f,
+            -0.045763f, -0.006618f, -0.024419f, -0.117713f, -0.119366f, -0.175941f,
+            -0.071542f, 0.119027f, 0.111362f, 0.043080f, 0.034889f, 0.093003f,
+            0.007842f, 0.057368f, -0.108834f, -0.079968f, 0.230959f, 0.020205f,
+            0.011470f, 0.098877f, 0.101310f, -0.030215f, -0.018018f, -0.059552f,
+            -0.106157f, 0.021866f, -0.036471f, 0.080051f, 0.041165f, -0.082101f,
+            0.117726f, 0.030961f, -0.054763f, -0.084102f, -0.185778f, -0.061305f,
+            -0.038089f, -0.110728f, -0.264010f, 0.076675f, -0.077111f, -0.137644f,
+            0.036232f, 0.277995f, 0.019116f, 0.107738f, 0.144003f, 0.080304f,
+            0.215036f, 0.228897f, 0.072713f, 0.077773f, 0.120168f, 0.075324f,
+            0.062730f, 0.122478f, -0.049008f, 0.164912f, 0.162450f, 0.041246f,
+            0.009891f, -0.097827f, -0.038700f, -0.023027f, -0.120020f, 0.203364f,
+            0.248474f, 0.149810f, -0.036276f, -0.082814f, -0.090343f, -0.027143f,
+            -0.075689f, -0.320310f, -0.000500f, -0.143334f, -0.065077f, -0.186936f,
+            0.129372f, 0.116431f, 0.181699f, 0.170436f, 0.418854f, 0.460045f,
+            0.333719f, 0.230515f, 0.047822f, -0.044954f, -0.068086f, 0.140179f,
+            -0.044821f, 0.085550f, 0.092483f, -0.107296f, -0.130670f, -0.206629f,
+            0.114601f, -0.317869f, -0.076663f, 0.038680f, 0.212753f, -0.016059f,
+            -0.126526f, -0.163602f, 0.210154f, 0.099887f, -0.126366f, 0.118453f,
+            0.019309f, -0.021611f, -0.096499f, -0.111809f, -0.200489f, 0.142854f,
+            0.228840f, -0.353346f, -0.179151f, 0.116834f, 0.252389f, -0.031728f,
+            -0.188135f, -0.158998f, 0.386523f, 0.122315f, 0.209944f, 0.394023f,
+            0.359030f, 0.260717f, 0.170335f, 0.013683f, -0.142596f, -0.026138f,
+            -0.011878f, -0.150519f, 0.047159f, -0.107062f, -0.147347f, -0.187689f,
+            -0.186027f, -0.208048f, 0.058468f, -0.073026f, -0.236556f, -0.079788f,
+            -0.146216f, -0.058563f, -0.101361f, -0.071294f, -0.071093f, 0.116919f,
+            0.234304f, 0.306781f, 0.321866f, 0.240000f, 0.073261f, -0.012173f,
+            0.026479f, 0.050173f, 0.166127f, 0.228955f, 0.061905f, 0.156460f,
+            0.205990f, 0.120672f, 0.037350f, 0.167884f, 0.290099f, 0.420900f,
+            -0.012601f, 0.189839f, 0.306378f, 0.118383f, -0.095598f, -0.072360f,
+            -0.132496f, -0.224259f, -0.126021f, 0.022714f, 0.284039f, 0.051369f,
+            -0.000927f, -0.058735f, -0.083354f, -0.141254f, -0.187578f, -0.202669f,
+            0.048902f, 0.246597f, 0.441863f, 0.342519f, 0.066979f, 0.215286f,
+            0.188191f, -0.072240f, -0.208142f, -0.030196f, 0.178141f, 0.136985f,
+            -0.043374f, -0.181098f, 0.091815f, 0.116177f, -0.126690f, -0.386625f,
+            0.368165f, 0.269149f, -0.088042f, -0.028823f, 0.092961f, 0.024099f,
+            0.046112f, 0.176756f, 0.135849f, 0.124955f, 0.195467f, -0.037218f,
+            0.167217f, 0.188938f, 0.053528f, -0.066561f, 0.133721f, -0.070565f,
+            0.115898f, 0.152435f, -0.116993f, -0.110592f, -0.179005f, 0.026668f,
+            0.080530f, 0.075084f, -0.070401f, 0.012497f, 0.021849f, -0.139764f,
+            -0.022020f, -0.096301f, -0.064954f, -0.127446f, -0.013806f, -0.108315f,
+            0.156285f, 0.149867f, -0.011382f, 0.064532f, 0.029168f, 0.027393f,
+            0.069716f, 0.153735f, 0.038459f, 0.230714f, 0.253840f, 0.059522f,
+            -0.045053f, 0.014083f, 0.071103f, 0.068747f, 0.095887f, 0.005832f,
+            0.144887f, 0.026357f, -0.067359f, -0.044151f, -0.123283f, -0.019911f,
+            0.005318f, 0.109208f, -0.003201f, -0.021734f, 0.142025f, -0.066907f,
+            -0.120070f, -0.188639f, 0.012472f, -0.048704f, -0.012366f, -0.184828f,
+            0.168591f, 0.267166f, 0.058208f, -0.044101f, 0.033500f, 0.178558f,
+            0.104550f, 0.122418f, 0.080177f, 0.173246f, 0.298537f, 0.064173f,
+            0.053397f, 0.174341f, 0.230984f, 0.117025f, 0.166242f, 0.227781f,
+            0.120623f, 0.176952f, -0.011393f, -0.086483f, -0.008270f, 0.051700f,
+            -0.153369f, -0.058837f, -0.057639f, -0.060115f, 0.026349f, -0.160745f,
+            -0.037894f, -0.048575f, 0.041052f, -0.022112f, 0.060365f, 0.051906f,
+            0.162657f, 0.138519f, -0.050185f, -0.005938f, 0.071301f, 0.127686f,
+            0.062342f, 0.144400f, 0.072600f, 0.198436f, 0.246219f, -0.078185f,
+            -0.036169f, 0.075934f, 0.047328f, -0.013601f, 0.087205f, 0.019900f,
+            0.022606f, -0.015365f, -0.092506f, 0.075275f, -0.116375f, 0.050500f,
+            0.045118f, 0.166567f, 0.072073f, 0.060371f, 0.131747f, -0.169863f,
+            -0.039352f, -0.047486f, -0.039797f, -0.204312f, 0.021710f, 0.129443f,
+            -0.021173f, 0.173416f, -0.070794f, -0.063986f, 0.069689f, -0.064099f,
+            -0.123201f, -0.017372f, -0.206870f, 0.065863f, 0.113226f, 0.024707f,
+            -0.071341f, -0.066964f, -0.098278f, -0.062927f, 0.075840f, 0.014716f,
+            0.019378f, 0.132699f, -0.074191f, -0.089557f, -0.078446f, -0.197488f,
+            -0.173665f, 0.052583f, 0.044361f, 0.113549f, 0.098492f, 0.077379f,
+            -0.011146f, -0.192593f, -0.164435f, 0.045568f, 0.205699f, 0.049187f,
+            -0.082281f, 0.134874f, 0.185499f, 0.034968f, -0.119561f, -0.112372f,
+            -0.115091f, -0.054042f, -0.183816f, -0.078100f, 0.190695f, 0.091617f,
+            0.004257f, -0.041135f, -0.061453f, -0.141592f, -0.194809f, -0.120638f,
+            0.020168f, 0.109672f, 0.067398f, -0.015238f, -0.239145f, -0.264671f,
+            -0.185176f, 0.050472f, 0.020793f, 0.035678f, 0.022839f, -0.052055f,
+            -0.127968f, -0.113049f, -0.228416f, -0.258281f, -0.053437f, 0.076424f,
+            0.061450f, 0.237478f, 0.003618f, -0.055865f, -0.108087f, -0.028937f,
+            0.045585f, 0.052829f, -0.001471f, 0.022826f, 0.059565f, -0.104430f,
+            -0.077266f, -0.211882f, -0.212078f, 0.028074f, 0.075846f, 0.016265f,
+            0.161879f, 0.134477f, 0.008935f, -0.048041f, 0.074692f, 0.004928f,
+            -0.025156f, 0.192874f, 0.074410f, 0.308732f, 0.267400f, 0.094208f,
+            -0.005251f, 0.042041f, -0.032148f, 0.015588f, 0.252869f, 0.175302f,
+            0.022892f, 0.081673f, 0.063208f, 0.162626f, 0.194426f, 0.233890f,
+            0.262292f, 0.186930f, 0.084079f, -0.286388f, -0.213034f, -0.048867f,
+            -0.207669f, -0.170050f, 0.011673f, -0.092958f, -0.192786f, -0.273536f,
+            0.230904f, 0.266732f, 0.320519f, 0.297155f, 0.548169f, 0.304922f,
+            0.132687f, 0.247333f, 0.212488f, -0.271472f, -0.142105f, -0.002627f,
+            -0.119215f, 0.128383f, 0.100079f, -0.057490f, -0.121902f, -0.228892f,
+            0.202292f, -0.399795f, -0.371326f, -0.095836f, -0.063626f, -0.161375f,
+            -0.311180f, -0.294797f, 0.242122f, 0.011788f, 0.095573f, 0.322523f,
+            0.511840f, 0.322880f, 0.313259f, 0.173331f, 0.002542f, -0.029802f,
+            0.324766f, -0.326170f, -0.340547f, -0.138288f, -0.002963f, -0.114060f,
+            -0.377312f, -0.442570f, 0.212446f, -0.007759f, -0.011576f, 0.169711f,
+            0.308689f, 0.317348f, 0.539390f, 0.332845f, 0.057331f, -0.068180f,
+            0.101994f, 0.266995f, 0.209570f, 0.355730f, 0.091635f, 0.170238f,
+            0.125215f, 0.274154f, 0.070223f, 0.025515f, 0.049946f, -0.000550f,
+            0.043715f, -0.141843f, 0.020844f, 0.129871f, 0.256588f, 0.105015f,
+            0.148339f, 0.170682f, 0.028792f, 0.074037f, 0.160042f, 0.405137f,
+            0.246187f, 0.352160f, 0.168951f, 0.222263f, 0.264439f, 0.065945f,
+            0.021963f, -0.075084f, 0.093105f, 0.027318f, 0.098864f, 0.057566f,
+            -0.080282f, 0.185032f, 0.314419f, 0.333727f, 0.125798f, 0.294919f,
+            0.386002f, 0.217619f, -0.183517f, -0.278622f, -0.002342f, -0.027821f,
+            -0.134266f, -0.331843f, -0.008296f, 0.124564f, 0.053712f, -0.369016f,
+            -0.095036f, 0.209381f, 0.423760f, 0.371760f, 0.106397f, 0.369408f,
+            0.485608f, 0.231201f, -0.138685f, -0.349208f, -0.070083f, 0.028991f,
+            -0.081630f, -0.395992f, -0.146791f, -0.027354f, 0.063396f, -0.272484f,
+            0.058299f, 0.338207f, 0.110767f, -0.052642f, -0.233848f, -0.027448f,
+            0.030328f, 0.155572f, -0.093826f, 0.019331f, 0.120638f, 0.006292f,
+            -0.106083f, -0.236290f, -0.140933f, -0.088067f, -0.025138f, -0.208395f,
+            -0.025502f, 0.144192f, -0.048353f, -0.106144f, -0.305121f, -0.114147f,
+            0.090963f, 0.327727f, 0.035606f, -0.093779f, 0.002651f, -0.171081f,
+            -0.188131f, -0.216571f, -0.209101f, -0.054402f, 0.157147f, -0.057127f,
+            0.066584f, 0.008988f, 0.041191f, 0.034456f, -0.078255f, 0.052099f,
+            -0.022239f, 0.066981f, -0.117520f, -0.072637f, 0.062512f, 0.037570f,
+            -0.057544f, -0.312359f, 0.034357f, -0.031549f, 0.002566f, -0.207375f,
+            -0.070654f, -0.018786f, -0.044815f, -0.012814f, -0.076320f, 0.078183f,
+            0.023877f, 0.117078f, 0.022292f, -0.205424f, -0.060430f, -0.017296f,
+            -0.004827f, -0.321036f, -0.092155f, 0.038837f, 0.073190f, -0.067513f,
+            0.026521f, 0.171945f, 0.087318f, 0.034495f, -0.034089f, 0.154410f,
+            -0.061431f, 0.007435f, -0.111094f, -0.095976f, 0.014741f, -0.132324f,
+            -0.029517f, -0.192160f, 0.098667f, 0.020762f, 0.177050f, -0.064510f,
+            -0.054437f, -0.058678f, -0.001858f, 0.167602f, 0.015735f, 0.054338f,
+            0.016477f, 0.186381f, -0.010667f, 0.054692f, 0.126742f, 0.013140f,
+            0.090353f, -0.133608f, -0.018017f, -0.152619f, 0.027600f, -0.138700f,
+            -0.050274f, 0.045141f, -0.118731f, 0.094797f, -0.167605f, 0.097461f,
+            -0.009131f, 0.199920f, -0.052976f, 0.158194f, 0.178568f, -0.107600f,
+            0.009671f, -0.084072f, -0.040258f, -0.205673f, 0.102891f, 0.223511f,
+            0.042699f, 0.118548f, -0.021274f, 0.110997f, -0.155121f, 0.027696f,
+            -0.149968f, 0.051552f, -0.129219f, 0.173524f, 0.073972f, -0.189045f,
+            -0.034523f, -0.106655f, -0.011843f, -0.197381f, 0.219413f, 0.183197f,
+            -0.054920f, 0.144955f, 0.036517f, -0.085412f, -0.229070f, -0.143710f,
+            -0.049486f, 0.156634f, -0.008673f, -0.064778f, 0.082344f, 0.145673f,
+            0.002912f, -0.210121f, -0.116564f, 0.078425f, 0.220908f, -0.067594f,
+            0.048610f, 0.084912f, -0.066202f, -0.112515f, -0.217767f, -0.082640f,
+            -0.017414f, 0.230265f, -0.070735f, 0.066073f, 0.215256f, 0.071157f,
+            -0.087220f, -0.202235f, -0.011918f, 0.099562f, 0.174716f, -0.063845f,
+            -0.121055f, 0.014367f, 0.132709f, -0.005060f, -0.244606f, -0.179693f,
+            -0.134690f, 0.023239f, -0.193116f, -0.076975f, -0.021164f, -0.001938f,
+            -0.163799f, -0.111437f, -0.210362f, -0.166376f, 0.034754f, 0.010036f,
+            -0.021917f, 0.068014f, -0.086893f, -0.251746f, -0.267171f, 0.037383f,
+            0.003966f, 0.033571f, -0.151506f, 0.025437f, -0.020626f, -0.308454f,
+            -0.343143f, -0.092263f, -0.026261f, -0.028345f, 0.036036f, 0.035169f,
+            0.129470f, 0.122205f, 0.015661f, -0.070612f, -0.094333f, -0.066055f,
+            -0.041083f, 0.159146f, 0.073184f, 0.110044f, 0.174471f, 0.078069f,
+            -0.014881f, 0.008116f, 0.013209f, 0.075857f, 0.195605f, 0.062714f,
+            0.067955f, 0.056544f, -0.153908f, -0.141749f, -0.072550f, 0.033523f,
+            -0.024665f, 0.134487f, 0.079076f, 0.133562f, 0.227130f, 0.018054f,
+            0.004928f, 0.169162f, 0.065152f, 0.072160f, 0.131631f, 0.096303f,
+            0.054288f, 0.106256f, 0.114632f, 0.119038f, 0.515200f, 0.247429f,
+            0.199134f, 0.211957f, 0.127558f, -0.294684f, -0.194890f, -0.049988f,
+            -0.112247f, -0.008122f, -0.006176f, 0.037035f, -0.110881f, -0.249989f,
+            0.152434f, 0.234621f, 0.153340f, 0.349283f, 0.683049f, 0.157174f,
+            0.124844f, 0.099136f, 0.064407f, -0.248400f, -0.155323f, -0.026498f,
+            -0.023450f, 0.049051f, -0.114187f, 0.007195f, -0.176825f, -0.376926f,
+            0.366159f, -0.179938f, -0.148508f, 0.006043f, 0.170048f, 0.097866f,
+            -0.102658f, -0.260430f, 0.248868f, 0.037019f, -0.118111f, 0.078176f,
+            0.194171f, 0.211328f, 0.368612f, 0.361213f, 0.130013f, 0.094650f,
+            0.227396f, -0.178058f, -0.114782f, -0.008093f, 0.231080f, -0.011843f,
+            -0.097917f, -0.325788f, 0.141879f, 0.119738f, -0.230427f, -0.117419f,
+            -0.114153f, 0.037903f, 0.116383f, 0.218773f, -0.101884f, 0.059466f,
+            0.119255f, 0.010874f, -0.031449f, 0.045996f, 0.119931f, 0.273760f,
+            0.311700f, 0.261794f, 0.194809f, 0.339829f, 0.239449f, 0.064140f,
+            0.077597f, 0.098996f, 0.143534f, 0.184602f, 0.037507f, 0.225494f,
+            0.096142f, -0.147370f, -0.207833f, -0.174742f, -0.086391f, -0.038942f,
+            0.159577f, -0.088492f, -0.000989f, 0.108154f, -0.025890f, -0.072713f,
+            0.025997f, -0.006803f, -0.086879f, -0.011290f, -0.269200f, -0.103450f,
+            -0.124910f, -0.116340f, 0.141459f, 0.208800f, 0.042268f, 0.265034f,
+            0.516474f, 0.217591f, -0.018843f, -0.313328f, -0.168363f, 0.047129f,
+            0.090480f, -0.109852f, -0.018761f, 0.210669f, 0.281269f, -0.043591f,
+            -0.034147f, -0.237772f, -0.134843f, -0.072481f, -0.103831f, 0.038355f,
+            0.308619f, 0.148023f, -0.045867f, -0.123950f, -0.210860f, -0.064973f,
+            -0.036308f, -0.046731f, -0.022099f, 0.095776f, 0.409423f, 0.060635f,
+            -0.065196f, 0.051828f, 0.027981f, -0.009609f, -0.137681f, -0.095011f,
+            -0.019045f, 0.177278f, 0.009759f, -0.092119f, -0.016958f, -0.133860f,
+            -0.118421f, -0.032039f, -0.006214f, -0.084541f, 0.063971f, -0.073642f,
+            0.165676f, 0.110443f, 0.044131f, 0.046568f, 0.053292f, -0.055466f,
+            0.015512f, 0.371947f, 0.232102f, -0.016923f, 0.103979f, -0.091758f,
+            0.005907f, 0.209100f, 0.157433f, 0.030518f, 0.250366f, 0.062322f,
+            0.036720f, 0.094676f, 0.017306f, -0.010328f, -0.079012f, 0.016781f,
+            -0.112435f, 0.061795f, 0.042543f, -0.126799f, -0.009975f, -0.056760f,
+            0.046424f, -0.194712f, -0.139399f, -0.037731f, 0.157989f, -0.016261f,
+            0.123345f, 0.230563f, 0.083300f, -0.016392f, 0.059567f, -0.016035f,
+            -0.064767f, 0.231945f, 0.156629f, 0.034602f, 0.145628f, 0.041315f,
+            0.034535f, 0.019967f, -0.089188f, -0.012091f, 0.307857f, 0.211405f,
+            -0.025091f, -0.148249f, -0.129384f, 0.063536f, -0.068603f, -0.067941f,
+            -0.035104f, 0.210832f, 0.063810f, 0.062764f, -0.089889f, -0.030554f,
+            0.014791f, -0.053362f, -0.037818f, -0.196640f, 0.008388f, -0.082654f,
+            0.143056f, 0.064221f, 0.069795f, 0.191040f, 0.097321f, -0.028679f,
+            0.075794f, 0.313154f, 0.086240f, 0.207643f, 0.017809f, 0.122867f,
+            0.224586f, 0.167403f, -0.023884f, 0.047434f, 0.344091f, 0.187745f,
+            0.136177f, 0.141738f, 0.063799f, 0.045233f, -0.077342f, -0.003525f,
+            -0.165041f, -0.025616f, -0.073745f, 0.164439f, 0.011200f, -0.145896f,
+            -0.027954f, -0.061987f, -0.039874f, -0.142775f, 0.151042f, -0.038238f,
+            0.053152f, 0.078615f, 0.086061f, 0.100593f, 0.128046f, -0.071006f,
+            -0.116558f, 0.208445f, 0.051086f, 0.076843f, 0.023191f, -0.084781f,
+            -0.011790f, 0.147807f, -0.048554f, -0.113932f, 0.283322f, 0.190934f,
+            0.092789f, 0.033018f, -0.142428f, -0.142480f, -0.099023f, -0.041020f,
+            -0.042760f, 0.203295f, -0.053475f, 0.042424f, 0.222839f, -0.019167f,
+            -0.133176f, -0.276216f, -0.031998f, 0.117290f, 0.177827f, -0.059973f,
+            -0.064744f, -0.117040f, -0.155482f, -0.099531f, 0.164121f, -0.026682f,
+            -0.093810f, 0.238993f, -0.006506f, 0.007830f, 0.065819f, -0.203643f,
+            -0.100925f, -0.053652f, -0.130770f, 0.026277f, 0.131796f, 0.032742f,
+            0.127186f, 0.116694f, -0.161122f, -0.279773f, -0.252515f, -0.002638f,
+            0.042812f, 0.096776f, -0.123280f, 0.064858f, -0.010455f, -0.219760f,
+            -0.239331f, -0.104363f, -0.058022f, -0.053584f, 0.025611f, 0.005129f,
+            -0.100418f, -0.045712f, -0.194418f, -0.126366f, -0.030530f, 0.051168f,
+            0.215959f, 0.172402f, -0.054700f, -0.185995f, -0.278360f, -0.193693f,
+            -0.040309f, 0.003735f, -0.007770f, 0.123556f, 0.190179f, -0.077315f,
+            0.117403f, 0.212942f, 0.012160f, 0.000113f, 0.027331f, 0.040202f,
+            0.033293f, 0.219438f, 0.184174f, 0.259349f, 0.311206f, 0.082547f,
+            -0.047875f, -0.078417f, 0.010746f, 0.082620f, 0.311931f, 0.307605f,
+            0.003863f, 0.021405f, -0.026388f, -0.019572f, 0.020582f, -0.059353f,
+            0.025199f, 0.261319f, 0.086316f, 0.143614f, 0.107780f, 0.003900f,
+            -0.188397f, -0.038563f, -0.106045f, -0.125154f, -0.010509f, 0.054021f,
+            0.242130f, 0.279152f, 0.215546f, 0.346995f, 0.440856f, 0.237452f,
+            0.234154f, 0.301646f, 0.168929f, -0.208358f, -0.126848f, 0.010260f,
+            0.121018f, -0.062975f, -0.052848f, 0.050341f, -0.061103f, -0.266482f,
+            0.107186f, 0.140221f, 0.280065f, 0.287889f, 0.373198f, 0.151596f,
+            0.013593f, 0.115616f, 0.014616f, -0.281710f, -0.237597f, -0.117305f,
+            -0.000034f, -0.136739f, -0.196275f, -0.095225f, -0.125310f, -0.250514f,
+            0.236804f, -0.071805f, -0.037421f, 0.048230f, 0.321596f, 0.063632f,
+            0.024039f, -0.029133f, 0.230983f, 0.160593f, -0.154355f, -0.013086f,
+            -0.079929f, 0.094692f, 0.160391f, 0.180239f, 0.053895f, 0.100759f,
+            0.288631f, 0.038191f, 0.181692f, 0.229682f, 0.440166f, 0.063401f,
+            0.006273f, 0.020865f, 0.338695f, 0.256244f, -0.043927f, 0.115617f,
+            0.003296f, 0.173965f, 0.021318f, -0.040936f, -0.118932f, 0.182380f,
+            0.235922f, -0.053233f, -0.015053f, -0.101057f, 0.095341f, 0.051111f,
+            0.161831f, 0.032614f, 0.159496f, 0.072375f, 0.025089f, 0.023748f,
+            0.029151f, 0.161284f, -0.117717f, -0.036191f, -0.176822f, -0.162006f,
+            0.226542f, -0.078329f, 0.043079f, -0.119172f, 0.054614f, -0.101365f,
+            -0.064541f, -0.115304f, 0.135170f, 0.298872f, 0.098060f, 0.089428f,
+            -0.007497f, 0.110391f, -0.028824f, 0.020835f, -0.036804f, 0.125411f,
+            0.192105f, -0.048931f, 0.003086f, -0.010681f, 0.074698f, -0.016263f,
+            0.096063f, 0.060267f, -0.007277f, 0.139139f, -0.080635f, 0.036628f,
+            0.086058f, 0.131979f, 0.085707f, 0.025301f, 0.226094f, 0.194759f,
+            0.042193f, -0.157846f, -0.068402f, -0.141450f, -0.112659f, -0.076305f,
+            -0.069085f, -0.114332f, -0.102005f, 0.132193f, -0.067042f, 0.106643f,
+            0.198964f, 0.171616f, 0.167237f, -0.033730f, -0.026755f, 0.083621f,
+            0.149459f, -0.002799f, -0.000318f, 0.011753f, 0.065889f, -0.089375f,
+            -0.049610f, 0.224579f, 0.216548f, -0.034908f, -0.017851f, -0.088144f,
+            0.007530f, 0.240268f, 0.073270f, 0.013263f, 0.175323f, 0.012082f,
+            0.093993f, 0.015282f, 0.105854f, 0.107990f, 0.077798f, -0.096166f,
+            -0.079607f, 0.177820f, 0.142392f, 0.033337f, -0.078100f, -0.081616f,
+            -0.046993f, 0.139459f, 0.020272f, -0.123161f, 0.175269f, 0.105217f,
+            0.057328f, 0.080909f, -0.012612f, -0.097081f, 0.082060f, -0.096716f,
+            -0.063921f, 0.201884f, 0.128166f, -0.035051f, -0.032227f, -0.068139f,
+            -0.115915f, 0.095080f, -0.086007f, -0.067543f, 0.030776f, 0.032712f,
+            0.088937f, 0.054336f, -0.039329f, -0.114022f, 0.171672f, -0.112321f,
+            -0.217646f, 0.065186f, 0.060223f, 0.192174f, 0.055580f, -0.131107f,
+            -0.144338f, 0.056730f, -0.034707f, -0.081616f, -0.135298f, -0.000614f,
+            0.087189f, 0.014614f, 0.067709f, 0.107689f, 0.225780f, 0.084361f,
+            -0.008544f, 0.051649f, -0.048369f, -0.037739f, -0.060710f, 0.002654f,
+            0.016935f, 0.085563f, -0.015961f, -0.019265f, 0.111788f, 0.062376f,
+            0.202019f, 0.047713f, 0.042261f, 0.069716f, 0.242913f, 0.021052f,
+            -0.072812f, -0.155920f, -0.026436f, 0.035621f, -0.079300f, -0.028787f,
+            -0.048329f, 0.084718f, -0.060565f, -0.083750f, -0.164075f, -0.040742f,
+            -0.086219f, 0.015271f, -0.005204f, -0.016038f, 0.045816f, -0.050433f,
+            -0.077652f, 0.117109f, 0.009611f, -0.009045f, -0.008634f, -0.055373f,
+            -0.085968f, 0.028527f, -0.054736f, -0.168089f, 0.175839f, 0.071205f,
+            -0.023603f, 0.037907f, -0.004561f, -0.022634f, 0.123831f, 0.094469f,
+            -0.072920f, -0.133642f, -0.014032f, -0.142754f, -0.026999f, -0.199409f,
+            0.013268f, 0.226989f, 0.048650f, -0.170988f, -0.050141f, 0.007880f,
+            0.061880f, 0.019078f, -0.043578f, -0.038139f, 0.134814f, 0.054097f,
+            -0.081670f, 0.176838f, 0.047920f, -0.038176f, 0.050406f, -0.107181f,
+            -0.036279f, 0.027060f, 0.081594f, -0.002820f, 0.090507f, -0.033338f,
+            -0.059571f, 0.013404f, -0.099860f, 0.073371f, 0.342805f, 0.098305f,
+            -0.150910f, -0.020822f, -0.056960f, 0.046262f, -0.043413f, -0.149405f,
+            -0.129105f, -0.010899f, -0.014229f, -0.179949f, -0.113044f, -0.049468f,
+            -0.065513f, 0.090269f, -0.011919f, 0.087846f, 0.095796f, 0.146127f,
+            0.101599f, 0.078066f, -0.084348f, -0.100002f, -0.020134f, -0.050169f,
+            0.062122f, 0.014640f, 0.019143f, 0.036543f, 0.180924f, -0.013976f,
+            -0.066768f, -0.001090f, -0.070419f, -0.004839f, -0.001504f, 0.034483f,
+            -0.044954f, -0.050336f, -0.088638f, -0.174782f, -0.116082f, -0.205507f,
+            0.015587f, -0.042839f, -0.096879f, -0.144097f, -0.050268f, -0.196796f,
+            0.109639f, 0.271411f, 0.173732f, 0.108070f, 0.156437f, 0.124255f,
+            0.097242f, 0.238693f, 0.083941f, 0.109105f, 0.223940f, 0.267188f,
+            0.027385f, 0.025819f, 0.125070f, 0.093738f, 0.040353f, 0.038645f,
+            -0.012730f, 0.144063f, 0.052931f, -0.009138f, 0.084193f, 0.160272f,
+            -0.041366f, 0.011951f, -0.121446f, -0.106713f, -0.047566f, 0.047984f,
+            -0.255224f, -0.076116f, 0.098685f, -0.150845f, -0.171513f, -0.156590f,
+            0.058331f, 0.187493f, 0.413018f, 0.554265f, 0.372242f, 0.237943f,
+            0.124571f, 0.110829f, 0.010322f, -0.174477f, -0.067627f, -0.001979f,
+            0.142913f, 0.040597f, 0.019907f, 0.025963f, -0.043585f, -0.120732f,
+            0.099937f, 0.091059f, 0.247307f, 0.204226f, -0.042753f, -0.068580f,
+            -0.119002f, 0.026722f, 0.034853f, -0.060934f, -0.025054f, -0.093026f,
+            -0.035372f, -0.233209f, -0.049869f, -0.039151f, -0.022279f, -0.065380f,
+            -9.063785f };
 
-std::vector<float> cv::cuda::HOGDescriptor::getDefaultPeopleDetector()
-{
-    return getPeopleDetector64x128();
-}
+        return Mat(1, static_cast<int>(sizeof(detector)/sizeof(detector[0])), CV_32FC1, detector);
+    }
 
-std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector48x96()
-{
-    static const float detector[] = {
-        0.294350f, -0.098796f, -0.129522f, 0.078753f, 0.387527f, 0.261529f,
-        0.145939f, 0.061520f, 0.328699f, 0.227148f, -0.066467f, -0.086723f,
-        0.047559f, 0.106714f, 0.037897f, 0.111461f, -0.024406f, 0.304769f,
-        0.254676f, -0.069235f, 0.082566f, 0.147260f, 0.326969f, 0.148888f,
-        0.055270f, -0.087985f, 0.261720f, 0.143442f, 0.026812f, 0.238212f,
-        0.194020f, 0.056341f, -0.025854f, -0.034444f, -0.156631f, 0.205174f,
-        0.089008f, -0.139811f, -0.100147f, -0.037830f, -0.029230f, -0.055641f,
-        0.033248f, -0.016512f, 0.155244f, 0.247315f, -0.124694f, -0.048414f,
-        -0.062219f, 0.193683f, 0.004574f, 0.055089f, 0.093565f, 0.167712f,
-        0.167581f, 0.018895f, 0.215258f, 0.122609f, 0.090520f, -0.067219f,
-        -0.049029f, -0.099615f, 0.241804f, -0.094893f, -0.176248f, 0.001727f,
-        -0.134473f, 0.104442f, 0.050942f, 0.081165f, 0.072156f, 0.121646f,
-        0.002656f, -0.297974f, -0.133587f, -0.060121f, -0.092515f, -0.048974f,
-        -0.084754f, -0.180111f, -0.038590f, 0.086283f, -0.134636f, -0.107249f,
-        0.132890f, 0.141556f, 0.249425f, 0.130273f, -0.030031f, 0.073212f,
-        -0.008155f, 0.019931f, 0.071688f, 0.000300f, -0.019525f, -0.021725f,
-        -0.040993f, -0.086841f, 0.070124f, 0.240033f, 0.265350f, 0.043208f,
-        0.166754f, 0.091453f, 0.060916f, -0.036972f, -0.091043f, 0.079873f,
-        0.219781f, 0.158102f, -0.140618f, -0.043016f, 0.124802f, 0.093668f,
-        0.103208f, 0.094872f, 0.080541f, 0.137711f, 0.160566f, -0.169231f,
-        0.013983f, 0.309508f, -0.004217f, -0.057200f, -0.064489f, 0.014066f,
-        0.361009f, 0.251328f, -0.080983f, -0.044183f, 0.061436f, -0.037381f,
-        -0.078786f, 0.030993f, 0.066314f, 0.037683f, 0.152325f, -0.091683f,
-        0.070203f, 0.217856f, 0.036435f, -0.076462f, 0.006254f, -0.094431f,
-        0.154829f, -0.023038f, -0.196961f, -0.024594f, 0.178465f, -0.050139f,
-        -0.045932f, -0.000965f, 0.109112f, 0.046165f, -0.159373f, -0.008713f,
-        0.041307f, 0.097129f, -0.057211f, -0.064599f, 0.077165f, 0.176167f,
-        0.138322f, 0.065753f, -0.104950f, 0.017933f, 0.136255f, -0.011598f,
-        0.047007f, 0.080550f, 0.068619f, 0.084661f, -0.035493f, -0.091314f,
-        -0.041411f, 0.060971f, -0.101912f, -0.079870f, -0.085977f, -0.022686f,
-        0.079788f, -0.098064f, -0.054603f, 0.040383f, 0.300794f, 0.128603f,
-        0.094844f, 0.047407f, 0.101825f, 0.061832f, -0.162160f, -0.204553f,
-        -0.035165f, 0.101450f, -0.016641f, -0.027140f, -0.134392f, -0.008743f,
-        0.102331f, 0.114853f, 0.009644f, 0.062823f, 0.237339f, 0.167843f,
-        0.053066f, -0.012592f, 0.043158f, 0.002305f, 0.065001f, -0.038929f,
-        -0.020356f, 0.152343f, 0.043469f, -0.029967f, -0.042948f, 0.032481f,
-        0.068488f, -0.110840f, -0.111083f, 0.111980f, -0.002072f, -0.005562f,
-        0.082926f, 0.006635f, -0.108153f, 0.024242f, -0.086464f, -0.189884f,
-        -0.017492f, 0.191456f, -0.007683f, -0.128769f, -0.038017f, -0.132380f,
-        0.091926f, 0.079696f, -0.106728f, -0.007656f, 0.172744f, 0.011576f,
-        0.009883f, 0.083258f, -0.026516f, 0.145534f, 0.153924f, -0.130290f,
-        -0.108945f, 0.124490f, -0.003186f, -0.100485f, 0.015024f, -0.060512f,
-        0.026288f, -0.086713f, -0.169012f, 0.076517f, 0.215778f, 0.043701f,
-        -0.131642f, -0.012585f, -0.045181f, -0.118183f, -0.241544f, -0.167293f,
-        -0.020107f, -0.019917f, -0.101827f, -0.107096f, -0.010503f, 0.044938f,
-        0.189680f, 0.217119f, -0.046086f, 0.044508f, 0.199716f, -0.036004f,
-        -0.148927f, 0.013355f, -0.078279f, 0.030451f, 0.056301f, -0.024609f,
-        0.083224f, 0.099533f, -0.039432f, -0.138880f, 0.005482f, -0.024120f,
-        -0.140468f, -0.066381f, -0.017057f, 0.009260f, -0.058004f, -0.028486f,
-        -0.061610f, 0.007483f, -0.158309f, -0.150687f, -0.044595f, -0.105121f,
-        -0.045763f, -0.006618f, -0.024419f, -0.117713f, -0.119366f, -0.175941f,
-        -0.071542f, 0.119027f, 0.111362f, 0.043080f, 0.034889f, 0.093003f,
-        0.007842f, 0.057368f, -0.108834f, -0.079968f, 0.230959f, 0.020205f,
-        0.011470f, 0.098877f, 0.101310f, -0.030215f, -0.018018f, -0.059552f,
-        -0.106157f, 0.021866f, -0.036471f, 0.080051f, 0.041165f, -0.082101f,
-        0.117726f, 0.030961f, -0.054763f, -0.084102f, -0.185778f, -0.061305f,
-        -0.038089f, -0.110728f, -0.264010f, 0.076675f, -0.077111f, -0.137644f,
-        0.036232f, 0.277995f, 0.019116f, 0.107738f, 0.144003f, 0.080304f,
-        0.215036f, 0.228897f, 0.072713f, 0.077773f, 0.120168f, 0.075324f,
-        0.062730f, 0.122478f, -0.049008f, 0.164912f, 0.162450f, 0.041246f,
-        0.009891f, -0.097827f, -0.038700f, -0.023027f, -0.120020f, 0.203364f,
-        0.248474f, 0.149810f, -0.036276f, -0.082814f, -0.090343f, -0.027143f,
-        -0.075689f, -0.320310f, -0.000500f, -0.143334f, -0.065077f, -0.186936f,
-        0.129372f, 0.116431f, 0.181699f, 0.170436f, 0.418854f, 0.460045f,
-        0.333719f, 0.230515f, 0.047822f, -0.044954f, -0.068086f, 0.140179f,
-        -0.044821f, 0.085550f, 0.092483f, -0.107296f, -0.130670f, -0.206629f,
-        0.114601f, -0.317869f, -0.076663f, 0.038680f, 0.212753f, -0.016059f,
-        -0.126526f, -0.163602f, 0.210154f, 0.099887f, -0.126366f, 0.118453f,
-        0.019309f, -0.021611f, -0.096499f, -0.111809f, -0.200489f, 0.142854f,
-        0.228840f, -0.353346f, -0.179151f, 0.116834f, 0.252389f, -0.031728f,
-        -0.188135f, -0.158998f, 0.386523f, 0.122315f, 0.209944f, 0.394023f,
-        0.359030f, 0.260717f, 0.170335f, 0.013683f, -0.142596f, -0.026138f,
-        -0.011878f, -0.150519f, 0.047159f, -0.107062f, -0.147347f, -0.187689f,
-        -0.186027f, -0.208048f, 0.058468f, -0.073026f, -0.236556f, -0.079788f,
-        -0.146216f, -0.058563f, -0.101361f, -0.071294f, -0.071093f, 0.116919f,
-        0.234304f, 0.306781f, 0.321866f, 0.240000f, 0.073261f, -0.012173f,
-        0.026479f, 0.050173f, 0.166127f, 0.228955f, 0.061905f, 0.156460f,
-        0.205990f, 0.120672f, 0.037350f, 0.167884f, 0.290099f, 0.420900f,
-        -0.012601f, 0.189839f, 0.306378f, 0.118383f, -0.095598f, -0.072360f,
-        -0.132496f, -0.224259f, -0.126021f, 0.022714f, 0.284039f, 0.051369f,
-        -0.000927f, -0.058735f, -0.083354f, -0.141254f, -0.187578f, -0.202669f,
-        0.048902f, 0.246597f, 0.441863f, 0.342519f, 0.066979f, 0.215286f,
-        0.188191f, -0.072240f, -0.208142f, -0.030196f, 0.178141f, 0.136985f,
-        -0.043374f, -0.181098f, 0.091815f, 0.116177f, -0.126690f, -0.386625f,
-        0.368165f, 0.269149f, -0.088042f, -0.028823f, 0.092961f, 0.024099f,
-        0.046112f, 0.176756f, 0.135849f, 0.124955f, 0.195467f, -0.037218f,
-        0.167217f, 0.188938f, 0.053528f, -0.066561f, 0.133721f, -0.070565f,
-        0.115898f, 0.152435f, -0.116993f, -0.110592f, -0.179005f, 0.026668f,
-        0.080530f, 0.075084f, -0.070401f, 0.012497f, 0.021849f, -0.139764f,
-        -0.022020f, -0.096301f, -0.064954f, -0.127446f, -0.013806f, -0.108315f,
-        0.156285f, 0.149867f, -0.011382f, 0.064532f, 0.029168f, 0.027393f,
-        0.069716f, 0.153735f, 0.038459f, 0.230714f, 0.253840f, 0.059522f,
-        -0.045053f, 0.014083f, 0.071103f, 0.068747f, 0.095887f, 0.005832f,
-        0.144887f, 0.026357f, -0.067359f, -0.044151f, -0.123283f, -0.019911f,
-        0.005318f, 0.109208f, -0.003201f, -0.021734f, 0.142025f, -0.066907f,
-        -0.120070f, -0.188639f, 0.012472f, -0.048704f, -0.012366f, -0.184828f,
-        0.168591f, 0.267166f, 0.058208f, -0.044101f, 0.033500f, 0.178558f,
-        0.104550f, 0.122418f, 0.080177f, 0.173246f, 0.298537f, 0.064173f,
-        0.053397f, 0.174341f, 0.230984f, 0.117025f, 0.166242f, 0.227781f,
-        0.120623f, 0.176952f, -0.011393f, -0.086483f, -0.008270f, 0.051700f,
-        -0.153369f, -0.058837f, -0.057639f, -0.060115f, 0.026349f, -0.160745f,
-        -0.037894f, -0.048575f, 0.041052f, -0.022112f, 0.060365f, 0.051906f,
-        0.162657f, 0.138519f, -0.050185f, -0.005938f, 0.071301f, 0.127686f,
-        0.062342f, 0.144400f, 0.072600f, 0.198436f, 0.246219f, -0.078185f,
-        -0.036169f, 0.075934f, 0.047328f, -0.013601f, 0.087205f, 0.019900f,
-        0.022606f, -0.015365f, -0.092506f, 0.075275f, -0.116375f, 0.050500f,
-        0.045118f, 0.166567f, 0.072073f, 0.060371f, 0.131747f, -0.169863f,
-        -0.039352f, -0.047486f, -0.039797f, -0.204312f, 0.021710f, 0.129443f,
-        -0.021173f, 0.173416f, -0.070794f, -0.063986f, 0.069689f, -0.064099f,
-        -0.123201f, -0.017372f, -0.206870f, 0.065863f, 0.113226f, 0.024707f,
-        -0.071341f, -0.066964f, -0.098278f, -0.062927f, 0.075840f, 0.014716f,
-        0.019378f, 0.132699f, -0.074191f, -0.089557f, -0.078446f, -0.197488f,
-        -0.173665f, 0.052583f, 0.044361f, 0.113549f, 0.098492f, 0.077379f,
-        -0.011146f, -0.192593f, -0.164435f, 0.045568f, 0.205699f, 0.049187f,
-        -0.082281f, 0.134874f, 0.185499f, 0.034968f, -0.119561f, -0.112372f,
-        -0.115091f, -0.054042f, -0.183816f, -0.078100f, 0.190695f, 0.091617f,
-        0.004257f, -0.041135f, -0.061453f, -0.141592f, -0.194809f, -0.120638f,
-        0.020168f, 0.109672f, 0.067398f, -0.015238f, -0.239145f, -0.264671f,
-        -0.185176f, 0.050472f, 0.020793f, 0.035678f, 0.022839f, -0.052055f,
-        -0.127968f, -0.113049f, -0.228416f, -0.258281f, -0.053437f, 0.076424f,
-        0.061450f, 0.237478f, 0.003618f, -0.055865f, -0.108087f, -0.028937f,
-        0.045585f, 0.052829f, -0.001471f, 0.022826f, 0.059565f, -0.104430f,
-        -0.077266f, -0.211882f, -0.212078f, 0.028074f, 0.075846f, 0.016265f,
-        0.161879f, 0.134477f, 0.008935f, -0.048041f, 0.074692f, 0.004928f,
-        -0.025156f, 0.192874f, 0.074410f, 0.308732f, 0.267400f, 0.094208f,
-        -0.005251f, 0.042041f, -0.032148f, 0.015588f, 0.252869f, 0.175302f,
-        0.022892f, 0.081673f, 0.063208f, 0.162626f, 0.194426f, 0.233890f,
-        0.262292f, 0.186930f, 0.084079f, -0.286388f, -0.213034f, -0.048867f,
-        -0.207669f, -0.170050f, 0.011673f, -0.092958f, -0.192786f, -0.273536f,
-        0.230904f, 0.266732f, 0.320519f, 0.297155f, 0.548169f, 0.304922f,
-        0.132687f, 0.247333f, 0.212488f, -0.271472f, -0.142105f, -0.002627f,
-        -0.119215f, 0.128383f, 0.100079f, -0.057490f, -0.121902f, -0.228892f,
-        0.202292f, -0.399795f, -0.371326f, -0.095836f, -0.063626f, -0.161375f,
-        -0.311180f, -0.294797f, 0.242122f, 0.011788f, 0.095573f, 0.322523f,
-        0.511840f, 0.322880f, 0.313259f, 0.173331f, 0.002542f, -0.029802f,
-        0.324766f, -0.326170f, -0.340547f, -0.138288f, -0.002963f, -0.114060f,
-        -0.377312f, -0.442570f, 0.212446f, -0.007759f, -0.011576f, 0.169711f,
-        0.308689f, 0.317348f, 0.539390f, 0.332845f, 0.057331f, -0.068180f,
-        0.101994f, 0.266995f, 0.209570f, 0.355730f, 0.091635f, 0.170238f,
-        0.125215f, 0.274154f, 0.070223f, 0.025515f, 0.049946f, -0.000550f,
-        0.043715f, -0.141843f, 0.020844f, 0.129871f, 0.256588f, 0.105015f,
-        0.148339f, 0.170682f, 0.028792f, 0.074037f, 0.160042f, 0.405137f,
-        0.246187f, 0.352160f, 0.168951f, 0.222263f, 0.264439f, 0.065945f,
-        0.021963f, -0.075084f, 0.093105f, 0.027318f, 0.098864f, 0.057566f,
-        -0.080282f, 0.185032f, 0.314419f, 0.333727f, 0.125798f, 0.294919f,
-        0.386002f, 0.217619f, -0.183517f, -0.278622f, -0.002342f, -0.027821f,
-        -0.134266f, -0.331843f, -0.008296f, 0.124564f, 0.053712f, -0.369016f,
-        -0.095036f, 0.209381f, 0.423760f, 0.371760f, 0.106397f, 0.369408f,
-        0.485608f, 0.231201f, -0.138685f, -0.349208f, -0.070083f, 0.028991f,
-        -0.081630f, -0.395992f, -0.146791f, -0.027354f, 0.063396f, -0.272484f,
-        0.058299f, 0.338207f, 0.110767f, -0.052642f, -0.233848f, -0.027448f,
-        0.030328f, 0.155572f, -0.093826f, 0.019331f, 0.120638f, 0.006292f,
-        -0.106083f, -0.236290f, -0.140933f, -0.088067f, -0.025138f, -0.208395f,
-        -0.025502f, 0.144192f, -0.048353f, -0.106144f, -0.305121f, -0.114147f,
-        0.090963f, 0.327727f, 0.035606f, -0.093779f, 0.002651f, -0.171081f,
-        -0.188131f, -0.216571f, -0.209101f, -0.054402f, 0.157147f, -0.057127f,
-        0.066584f, 0.008988f, 0.041191f, 0.034456f, -0.078255f, 0.052099f,
-        -0.022239f, 0.066981f, -0.117520f, -0.072637f, 0.062512f, 0.037570f,
-        -0.057544f, -0.312359f, 0.034357f, -0.031549f, 0.002566f, -0.207375f,
-        -0.070654f, -0.018786f, -0.044815f, -0.012814f, -0.076320f, 0.078183f,
-        0.023877f, 0.117078f, 0.022292f, -0.205424f, -0.060430f, -0.017296f,
-        -0.004827f, -0.321036f, -0.092155f, 0.038837f, 0.073190f, -0.067513f,
-        0.026521f, 0.171945f, 0.087318f, 0.034495f, -0.034089f, 0.154410f,
-        -0.061431f, 0.007435f, -0.111094f, -0.095976f, 0.014741f, -0.132324f,
-        -0.029517f, -0.192160f, 0.098667f, 0.020762f, 0.177050f, -0.064510f,
-        -0.054437f, -0.058678f, -0.001858f, 0.167602f, 0.015735f, 0.054338f,
-        0.016477f, 0.186381f, -0.010667f, 0.054692f, 0.126742f, 0.013140f,
-        0.090353f, -0.133608f, -0.018017f, -0.152619f, 0.027600f, -0.138700f,
-        -0.050274f, 0.045141f, -0.118731f, 0.094797f, -0.167605f, 0.097461f,
-        -0.009131f, 0.199920f, -0.052976f, 0.158194f, 0.178568f, -0.107600f,
-        0.009671f, -0.084072f, -0.040258f, -0.205673f, 0.102891f, 0.223511f,
-        0.042699f, 0.118548f, -0.021274f, 0.110997f, -0.155121f, 0.027696f,
-        -0.149968f, 0.051552f, -0.129219f, 0.173524f, 0.073972f, -0.189045f,
-        -0.034523f, -0.106655f, -0.011843f, -0.197381f, 0.219413f, 0.183197f,
-        -0.054920f, 0.144955f, 0.036517f, -0.085412f, -0.229070f, -0.143710f,
-        -0.049486f, 0.156634f, -0.008673f, -0.064778f, 0.082344f, 0.145673f,
-        0.002912f, -0.210121f, -0.116564f, 0.078425f, 0.220908f, -0.067594f,
-        0.048610f, 0.084912f, -0.066202f, -0.112515f, -0.217767f, -0.082640f,
-        -0.017414f, 0.230265f, -0.070735f, 0.066073f, 0.215256f, 0.071157f,
-        -0.087220f, -0.202235f, -0.011918f, 0.099562f, 0.174716f, -0.063845f,
-        -0.121055f, 0.014367f, 0.132709f, -0.005060f, -0.244606f, -0.179693f,
-        -0.134690f, 0.023239f, -0.193116f, -0.076975f, -0.021164f, -0.001938f,
-        -0.163799f, -0.111437f, -0.210362f, -0.166376f, 0.034754f, 0.010036f,
-        -0.021917f, 0.068014f, -0.086893f, -0.251746f, -0.267171f, 0.037383f,
-        0.003966f, 0.033571f, -0.151506f, 0.025437f, -0.020626f, -0.308454f,
-        -0.343143f, -0.092263f, -0.026261f, -0.028345f, 0.036036f, 0.035169f,
-        0.129470f, 0.122205f, 0.015661f, -0.070612f, -0.094333f, -0.066055f,
-        -0.041083f, 0.159146f, 0.073184f, 0.110044f, 0.174471f, 0.078069f,
-        -0.014881f, 0.008116f, 0.013209f, 0.075857f, 0.195605f, 0.062714f,
-        0.067955f, 0.056544f, -0.153908f, -0.141749f, -0.072550f, 0.033523f,
-        -0.024665f, 0.134487f, 0.079076f, 0.133562f, 0.227130f, 0.018054f,
-        0.004928f, 0.169162f, 0.065152f, 0.072160f, 0.131631f, 0.096303f,
-        0.054288f, 0.106256f, 0.114632f, 0.119038f, 0.515200f, 0.247429f,
-        0.199134f, 0.211957f, 0.127558f, -0.294684f, -0.194890f, -0.049988f,
-        -0.112247f, -0.008122f, -0.006176f, 0.037035f, -0.110881f, -0.249989f,
-        0.152434f, 0.234621f, 0.153340f, 0.349283f, 0.683049f, 0.157174f,
-        0.124844f, 0.099136f, 0.064407f, -0.248400f, -0.155323f, -0.026498f,
-        -0.023450f, 0.049051f, -0.114187f, 0.007195f, -0.176825f, -0.376926f,
-        0.366159f, -0.179938f, -0.148508f, 0.006043f, 0.170048f, 0.097866f,
-        -0.102658f, -0.260430f, 0.248868f, 0.037019f, -0.118111f, 0.078176f,
-        0.194171f, 0.211328f, 0.368612f, 0.361213f, 0.130013f, 0.094650f,
-        0.227396f, -0.178058f, -0.114782f, -0.008093f, 0.231080f, -0.011843f,
-        -0.097917f, -0.325788f, 0.141879f, 0.119738f, -0.230427f, -0.117419f,
-        -0.114153f, 0.037903f, 0.116383f, 0.218773f, -0.101884f, 0.059466f,
-        0.119255f, 0.010874f, -0.031449f, 0.045996f, 0.119931f, 0.273760f,
-        0.311700f, 0.261794f, 0.194809f, 0.339829f, 0.239449f, 0.064140f,
-        0.077597f, 0.098996f, 0.143534f, 0.184602f, 0.037507f, 0.225494f,
-        0.096142f, -0.147370f, -0.207833f, -0.174742f, -0.086391f, -0.038942f,
-        0.159577f, -0.088492f, -0.000989f, 0.108154f, -0.025890f, -0.072713f,
-        0.025997f, -0.006803f, -0.086879f, -0.011290f, -0.269200f, -0.103450f,
-        -0.124910f, -0.116340f, 0.141459f, 0.208800f, 0.042268f, 0.265034f,
-        0.516474f, 0.217591f, -0.018843f, -0.313328f, -0.168363f, 0.047129f,
-        0.090480f, -0.109852f, -0.018761f, 0.210669f, 0.281269f, -0.043591f,
-        -0.034147f, -0.237772f, -0.134843f, -0.072481f, -0.103831f, 0.038355f,
-        0.308619f, 0.148023f, -0.045867f, -0.123950f, -0.210860f, -0.064973f,
-        -0.036308f, -0.046731f, -0.022099f, 0.095776f, 0.409423f, 0.060635f,
-        -0.065196f, 0.051828f, 0.027981f, -0.009609f, -0.137681f, -0.095011f,
-        -0.019045f, 0.177278f, 0.009759f, -0.092119f, -0.016958f, -0.133860f,
-        -0.118421f, -0.032039f, -0.006214f, -0.084541f, 0.063971f, -0.073642f,
-        0.165676f, 0.110443f, 0.044131f, 0.046568f, 0.053292f, -0.055466f,
-        0.015512f, 0.371947f, 0.232102f, -0.016923f, 0.103979f, -0.091758f,
-        0.005907f, 0.209100f, 0.157433f, 0.030518f, 0.250366f, 0.062322f,
-        0.036720f, 0.094676f, 0.017306f, -0.010328f, -0.079012f, 0.016781f,
-        -0.112435f, 0.061795f, 0.042543f, -0.126799f, -0.009975f, -0.056760f,
-        0.046424f, -0.194712f, -0.139399f, -0.037731f, 0.157989f, -0.016261f,
-        0.123345f, 0.230563f, 0.083300f, -0.016392f, 0.059567f, -0.016035f,
-        -0.064767f, 0.231945f, 0.156629f, 0.034602f, 0.145628f, 0.041315f,
-        0.034535f, 0.019967f, -0.089188f, -0.012091f, 0.307857f, 0.211405f,
-        -0.025091f, -0.148249f, -0.129384f, 0.063536f, -0.068603f, -0.067941f,
-        -0.035104f, 0.210832f, 0.063810f, 0.062764f, -0.089889f, -0.030554f,
-        0.014791f, -0.053362f, -0.037818f, -0.196640f, 0.008388f, -0.082654f,
-        0.143056f, 0.064221f, 0.069795f, 0.191040f, 0.097321f, -0.028679f,
-        0.075794f, 0.313154f, 0.086240f, 0.207643f, 0.017809f, 0.122867f,
-        0.224586f, 0.167403f, -0.023884f, 0.047434f, 0.344091f, 0.187745f,
-        0.136177f, 0.141738f, 0.063799f, 0.045233f, -0.077342f, -0.003525f,
-        -0.165041f, -0.025616f, -0.073745f, 0.164439f, 0.011200f, -0.145896f,
-        -0.027954f, -0.061987f, -0.039874f, -0.142775f, 0.151042f, -0.038238f,
-        0.053152f, 0.078615f, 0.086061f, 0.100593f, 0.128046f, -0.071006f,
-        -0.116558f, 0.208445f, 0.051086f, 0.076843f, 0.023191f, -0.084781f,
-        -0.011790f, 0.147807f, -0.048554f, -0.113932f, 0.283322f, 0.190934f,
-        0.092789f, 0.033018f, -0.142428f, -0.142480f, -0.099023f, -0.041020f,
-        -0.042760f, 0.203295f, -0.053475f, 0.042424f, 0.222839f, -0.019167f,
-        -0.133176f, -0.276216f, -0.031998f, 0.117290f, 0.177827f, -0.059973f,
-        -0.064744f, -0.117040f, -0.155482f, -0.099531f, 0.164121f, -0.026682f,
-        -0.093810f, 0.238993f, -0.006506f, 0.007830f, 0.065819f, -0.203643f,
-        -0.100925f, -0.053652f, -0.130770f, 0.026277f, 0.131796f, 0.032742f,
-        0.127186f, 0.116694f, -0.161122f, -0.279773f, -0.252515f, -0.002638f,
-        0.042812f, 0.096776f, -0.123280f, 0.064858f, -0.010455f, -0.219760f,
-        -0.239331f, -0.104363f, -0.058022f, -0.053584f, 0.025611f, 0.005129f,
-        -0.100418f, -0.045712f, -0.194418f, -0.126366f, -0.030530f, 0.051168f,
-        0.215959f, 0.172402f, -0.054700f, -0.185995f, -0.278360f, -0.193693f,
-        -0.040309f, 0.003735f, -0.007770f, 0.123556f, 0.190179f, -0.077315f,
-        0.117403f, 0.212942f, 0.012160f, 0.000113f, 0.027331f, 0.040202f,
-        0.033293f, 0.219438f, 0.184174f, 0.259349f, 0.311206f, 0.082547f,
-        -0.047875f, -0.078417f, 0.010746f, 0.082620f, 0.311931f, 0.307605f,
-        0.003863f, 0.021405f, -0.026388f, -0.019572f, 0.020582f, -0.059353f,
-        0.025199f, 0.261319f, 0.086316f, 0.143614f, 0.107780f, 0.003900f,
-        -0.188397f, -0.038563f, -0.106045f, -0.125154f, -0.010509f, 0.054021f,
-        0.242130f, 0.279152f, 0.215546f, 0.346995f, 0.440856f, 0.237452f,
-        0.234154f, 0.301646f, 0.168929f, -0.208358f, -0.126848f, 0.010260f,
-        0.121018f, -0.062975f, -0.052848f, 0.050341f, -0.061103f, -0.266482f,
-        0.107186f, 0.140221f, 0.280065f, 0.287889f, 0.373198f, 0.151596f,
-        0.013593f, 0.115616f, 0.014616f, -0.281710f, -0.237597f, -0.117305f,
-        -0.000034f, -0.136739f, -0.196275f, -0.095225f, -0.125310f, -0.250514f,
-        0.236804f, -0.071805f, -0.037421f, 0.048230f, 0.321596f, 0.063632f,
-        0.024039f, -0.029133f, 0.230983f, 0.160593f, -0.154355f, -0.013086f,
-        -0.079929f, 0.094692f, 0.160391f, 0.180239f, 0.053895f, 0.100759f,
-        0.288631f, 0.038191f, 0.181692f, 0.229682f, 0.440166f, 0.063401f,
-        0.006273f, 0.020865f, 0.338695f, 0.256244f, -0.043927f, 0.115617f,
-        0.003296f, 0.173965f, 0.021318f, -0.040936f, -0.118932f, 0.182380f,
-        0.235922f, -0.053233f, -0.015053f, -0.101057f, 0.095341f, 0.051111f,
-        0.161831f, 0.032614f, 0.159496f, 0.072375f, 0.025089f, 0.023748f,
-        0.029151f, 0.161284f, -0.117717f, -0.036191f, -0.176822f, -0.162006f,
-        0.226542f, -0.078329f, 0.043079f, -0.119172f, 0.054614f, -0.101365f,
-        -0.064541f, -0.115304f, 0.135170f, 0.298872f, 0.098060f, 0.089428f,
-        -0.007497f, 0.110391f, -0.028824f, 0.020835f, -0.036804f, 0.125411f,
-        0.192105f, -0.048931f, 0.003086f, -0.010681f, 0.074698f, -0.016263f,
-        0.096063f, 0.060267f, -0.007277f, 0.139139f, -0.080635f, 0.036628f,
-        0.086058f, 0.131979f, 0.085707f, 0.025301f, 0.226094f, 0.194759f,
-        0.042193f, -0.157846f, -0.068402f, -0.141450f, -0.112659f, -0.076305f,
-        -0.069085f, -0.114332f, -0.102005f, 0.132193f, -0.067042f, 0.106643f,
-        0.198964f, 0.171616f, 0.167237f, -0.033730f, -0.026755f, 0.083621f,
-        0.149459f, -0.002799f, -0.000318f, 0.011753f, 0.065889f, -0.089375f,
-        -0.049610f, 0.224579f, 0.216548f, -0.034908f, -0.017851f, -0.088144f,
-        0.007530f, 0.240268f, 0.073270f, 0.013263f, 0.175323f, 0.012082f,
-        0.093993f, 0.015282f, 0.105854f, 0.107990f, 0.077798f, -0.096166f,
-        -0.079607f, 0.177820f, 0.142392f, 0.033337f, -0.078100f, -0.081616f,
-        -0.046993f, 0.139459f, 0.020272f, -0.123161f, 0.175269f, 0.105217f,
-        0.057328f, 0.080909f, -0.012612f, -0.097081f, 0.082060f, -0.096716f,
-        -0.063921f, 0.201884f, 0.128166f, -0.035051f, -0.032227f, -0.068139f,
-        -0.115915f, 0.095080f, -0.086007f, -0.067543f, 0.030776f, 0.032712f,
-        0.088937f, 0.054336f, -0.039329f, -0.114022f, 0.171672f, -0.112321f,
-        -0.217646f, 0.065186f, 0.060223f, 0.192174f, 0.055580f, -0.131107f,
-        -0.144338f, 0.056730f, -0.034707f, -0.081616f, -0.135298f, -0.000614f,
-        0.087189f, 0.014614f, 0.067709f, 0.107689f, 0.225780f, 0.084361f,
-        -0.008544f, 0.051649f, -0.048369f, -0.037739f, -0.060710f, 0.002654f,
-        0.016935f, 0.085563f, -0.015961f, -0.019265f, 0.111788f, 0.062376f,
-        0.202019f, 0.047713f, 0.042261f, 0.069716f, 0.242913f, 0.021052f,
-        -0.072812f, -0.155920f, -0.026436f, 0.035621f, -0.079300f, -0.028787f,
-        -0.048329f, 0.084718f, -0.060565f, -0.083750f, -0.164075f, -0.040742f,
-        -0.086219f, 0.015271f, -0.005204f, -0.016038f, 0.045816f, -0.050433f,
-        -0.077652f, 0.117109f, 0.009611f, -0.009045f, -0.008634f, -0.055373f,
-        -0.085968f, 0.028527f, -0.054736f, -0.168089f, 0.175839f, 0.071205f,
-        -0.023603f, 0.037907f, -0.004561f, -0.022634f, 0.123831f, 0.094469f,
-        -0.072920f, -0.133642f, -0.014032f, -0.142754f, -0.026999f, -0.199409f,
-        0.013268f, 0.226989f, 0.048650f, -0.170988f, -0.050141f, 0.007880f,
-        0.061880f, 0.019078f, -0.043578f, -0.038139f, 0.134814f, 0.054097f,
-        -0.081670f, 0.176838f, 0.047920f, -0.038176f, 0.050406f, -0.107181f,
-        -0.036279f, 0.027060f, 0.081594f, -0.002820f, 0.090507f, -0.033338f,
-        -0.059571f, 0.013404f, -0.099860f, 0.073371f, 0.342805f, 0.098305f,
-        -0.150910f, -0.020822f, -0.056960f, 0.046262f, -0.043413f, -0.149405f,
-        -0.129105f, -0.010899f, -0.014229f, -0.179949f, -0.113044f, -0.049468f,
-        -0.065513f, 0.090269f, -0.011919f, 0.087846f, 0.095796f, 0.146127f,
-        0.101599f, 0.078066f, -0.084348f, -0.100002f, -0.020134f, -0.050169f,
-        0.062122f, 0.014640f, 0.019143f, 0.036543f, 0.180924f, -0.013976f,
-        -0.066768f, -0.001090f, -0.070419f, -0.004839f, -0.001504f, 0.034483f,
-        -0.044954f, -0.050336f, -0.088638f, -0.174782f, -0.116082f, -0.205507f,
-        0.015587f, -0.042839f, -0.096879f, -0.144097f, -0.050268f, -0.196796f,
-        0.109639f, 0.271411f, 0.173732f, 0.108070f, 0.156437f, 0.124255f,
-        0.097242f, 0.238693f, 0.083941f, 0.109105f, 0.223940f, 0.267188f,
-        0.027385f, 0.025819f, 0.125070f, 0.093738f, 0.040353f, 0.038645f,
-        -0.012730f, 0.144063f, 0.052931f, -0.009138f, 0.084193f, 0.160272f,
-        -0.041366f, 0.011951f, -0.121446f, -0.106713f, -0.047566f, 0.047984f,
-        -0.255224f, -0.076116f, 0.098685f, -0.150845f, -0.171513f, -0.156590f,
-        0.058331f, 0.187493f, 0.413018f, 0.554265f, 0.372242f, 0.237943f,
-        0.124571f, 0.110829f, 0.010322f, -0.174477f, -0.067627f, -0.001979f,
-        0.142913f, 0.040597f, 0.019907f, 0.025963f, -0.043585f, -0.120732f,
-        0.099937f, 0.091059f, 0.247307f, 0.204226f, -0.042753f, -0.068580f,
-        -0.119002f, 0.026722f, 0.034853f, -0.060934f, -0.025054f, -0.093026f,
-        -0.035372f, -0.233209f, -0.049869f, -0.039151f, -0.022279f, -0.065380f,
-        -9.063785f };
-    return std::vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
-}
+    Mat getPeopleDetector64x128()
+    {
+        static float detector[] = {
+           0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
+           0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
+           0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
+           0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,
+           -0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,
+           -0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f,
+           -3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,
+           0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,
+           0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,
+           0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,
+           0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,
+           0.09879354f, 0.05362710f, -0.06745391f, -7.01260753e-003f,
+           5.24702156e-003f, 0.03236255f, 0.01407916f, 0.02207983f, 0.02537322f,
+           0.04547948f, 0.07200756f, 0.03129894f, -0.06274468f, 0.02107014f,
+           0.06035208f, 0.08636236f, 4.53164103e-003f, 0.02193363f, 0.02309801f,
+           0.05568166f, -0.02645093f, 0.04448695f, 0.02837519f, 0.08975694f,
+           0.04461516f, 0.08975355f, 0.07514391f, 0.02306982f, 0.10410084f,
+           0.06368385f, 0.05943464f, 4.58420580e-003f, 0.05220337f, 0.06675851f,
+           0.08358569f, 0.06712101f, 0.06559004f, -0.03930482f, -9.15936660e-003f,
+           -0.05897915f, 0.02816453f, 0.05032348f, 0.06780671f, 0.03377650f,
+           -6.09417039e-004f, -0.01795146f, -0.03083684f, -0.01302475f,
+           -0.02972313f, 7.88706727e-003f, -0.03525961f, -2.50397739e-003f,
+           0.05245084f, 0.11791293f, -0.02167498f, 0.05299332f, 0.06640524f,
+           0.05190265f, -8.27316567e-003f, 0.03033127f, 0.05842173f,
+           -4.01050318e-003f, -6.25105947e-003f, 0.05862958f, -0.02465461f,
+           0.05546781f, -0.08228195f, -0.07234028f, 0.04640540f, -0.01308254f,
+           -0.02506191f, 0.03100746f, -0.04665651f, -0.04591486f, 0.02949927f,
+           0.06035462f, 0.02244646f, -0.01698639f, 0.01040041f, 0.01131170f,
+           0.05419579f, -0.02130277f, -0.04321722f, -0.03665198f, 0.01126490f,
+           -0.02606488f, -0.02228328f, -0.02255680f, -0.03427236f,
+           -7.75165204e-003f, -0.06195229f, 8.21638294e-003f, 0.09535975f,
+           -0.03709979f, -0.06942501f, 0.14579427f, -0.05448192f, -0.02055904f,
+           0.05747357f, 0.02781788f, -0.07077577f, -0.05178314f, -0.10429011f,
+           -0.11235505f, 0.07529039f, -0.07559302f, -0.08786739f, 0.02983843f,
+           0.02667585f, 0.01382199f, -0.01797496f, -0.03141199f, -0.02098101f,
+           0.09029204f, 0.04955018f, 0.13718739f, 0.11379953f, 1.80019124e-003f,
+           -0.04577610f, -1.11108483e-003f, -0.09470536f, -0.11596080f,
+           0.04489342f, 0.01784211f, 3.06850672e-003f, 0.10781866f,
+           3.36498418e-003f, -0.10842580f, -0.07436839f, -0.10535070f,
+           -0.01866805f, 0.16057891f, -5.07316366e-003f, -0.04295658f,
+           -5.90488780e-003f, 8.82003549e-003f, -0.01492646f, -0.05029279f,
+           -0.12875880f, 8.78831954e-004f, -0.01297184f, -0.07592774f,
+           -0.02668831f, -6.93787413e-004f, 0.02406698f, -0.01773298f,
+           -0.03855745f, -0.05877856f, 0.03259695f, 0.12826584f, 0.06292590f,
+           -4.10733931e-003f, 0.10996531f, 0.01332991f, 0.02088735f, 0.04037504f,
+           -0.05210760f, 0.07760046f, 0.06399347f, -0.05751930f, -0.10053057f,
+           0.07505023f, -0.02139782f, 0.01796176f, 2.34400877e-003f, -0.04208319f,
+           0.07355055f, 0.05093350f, -0.02996780f, -0.02219072f, 0.03355330f,
+           0.04418742f, -0.05580705f, -0.05037573f, -0.04548179f, 0.01379514f,
+           0.02150671f, -0.02194211f, -0.13682702f, 0.05464972f, 0.01608082f,
+           0.05309116f, 0.04701022f, 1.33690401e-003f, 0.07575664f, 0.09625306f,
+           8.92647635e-003f, -0.02819123f, 0.10866830f, -0.03439325f,
+           -0.07092371f, -0.06004780f, -0.02712298f, -7.07467366e-003f,
+           -0.01637020f, 0.01336790f, -0.10313606f, 0.04906582f, -0.05732445f,
+           -0.02731079f, 0.01042235f, -0.08340668f, 0.03686501f, 0.06108340f,
+           0.01322748f, -0.07809529f, 0.03774724f, -0.03413248f, -0.06096525f,
+           -0.04212124f, -0.07982176f, -1.25973229e-003f, -0.03045501f,
+           -0.01236493f, -0.06312395f, 0.04789570f, -0.04602066f, 0.08576570f,
+           0.02521080f, 0.02988098f, 0.10314583f, 0.07060035f, 0.04520544f,
+           -0.04426654f, 0.13146530f, 0.08386490f, 0.02164590f, -2.12280243e-003f,
+           -0.03686353f, -0.02074944f, -0.03829959f, -0.01530596f, 0.02689708f,
+           0.11867401f, -0.06043470f, -0.02785023f, -0.04775074f, 0.04878745f,
+           0.06350956f, 0.03494788f, 0.01467400f, 1.17890188e-003f, 0.04379614f,
+           2.03681854e-003f, -0.03958609f, -0.01072688f, 6.43705716e-003f,
+           0.02996500f, -0.03418507f, -0.01960307f, -0.01219154f,
+           -4.37000440e-003f, -0.02549453f, 0.02646318f, -0.01632513f,
+           6.46516960e-003f, -0.01929734f, 4.78711911e-003f, 0.04962371f,
+           0.03809111f, 0.07265724f, 0.05758125f, -0.03741554f, 0.01648608f,
+           -8.45285598e-003f, 0.03996826f, -0.08185477f, 0.02638875f,
+           -0.04026615f, -0.02744674f, -0.04071517f, 1.05096330e-003f,
+           -0.04741232f, -0.06733172f, 8.70434940e-003f, -0.02192543f,
+           1.35350740e-003f, -0.03056974f, -0.02975521f, -0.02887780f,
+           -0.01210713f, -0.04828526f, -0.09066251f, -0.09969629f, -0.03665164f,
+           -8.88111943e-004f, -0.06826669f, -0.01866150f, -0.03627640f,
+           -0.01408288f, 0.01874239f, -0.02075835f, 0.09145175f, -0.03547291f,
+           0.05396780f, 0.04198981f, 0.01301925f, -0.03384354f, -0.12201976f,
+           0.06830920f, -0.03715654f, 9.55848210e-003f, 5.05685573e-003f,
+           0.05659294f, 3.90764466e-003f, 0.02808490f, -0.05518097f, -0.03711621f,
+           -0.02835565f, -0.04420464f, -0.01031947f, 0.01883466f,
+           -8.49525444e-003f, -0.09419250f, -0.01269387f, -0.02133371f,
+           -0.10190815f, -0.07844430f, 2.43644323e-003f, -4.09610150e-003f,
+           0.01202551f, -0.06452291f, -0.10593818f, -0.02464746f, -0.02199699f,
+           -0.07401930f, 0.07285886f, 8.87513801e-004f, 9.97662079e-003f,
+           8.46779719e-003f, 0.03730333f, -0.02905126f, 0.03573337f, -0.04393689f,
+           -0.12014472f, 0.03176554f, -2.76015815e-003f, 0.10824566f, 0.05090732f,
+           -3.30179278e-003f, -0.05123822f, 5.04784798e-003f, -0.05664124f,
+           -5.99415926e-003f, -0.05341901f, -0.01221393f, 0.01291318f,
+           9.91760660e-003f, -7.56987557e-003f, -0.06193124f, -2.24549137e-003f,
+           0.01987562f, -0.02018840f, -0.06975540f, -0.06601523f, -0.03349112f,
+           -0.08910118f, -0.03371435f, -0.07406893f, -0.02248047f, -0.06159951f,
+           2.77751544e-003f, -0.05723337f, -0.04792468f, 0.07518548f,
+           2.77279224e-003f, 0.04211938f, 0.03100502f, 0.05278448f, 0.03954679f,
+           -0.03006846f, -0.03851741f, -0.02792403f, -0.02875333f, 0.01531280f,
+           0.02186953f, -0.01989829f, 2.50679464e-003f, -0.10258728f,
+           -0.04785743f, -0.02887216f, 3.85063468e-003f, 0.01112236f,
+           8.29218887e-003f, -0.04822981f, -0.04503597f, -0.03713100f,
+           -0.06988008f, -0.11002295f, -2.69209221e-003f, 1.85383670e-003f,
+           -0.05921049f, -0.06105053f, -0.08458050f, -0.04527602f,
+           8.90329306e-004f, -0.05875023f, -2.68602883e-003f, -0.01591195f,
+           0.03631859f, 0.05493166f, 0.07300330f, 5.53333294e-003f, 0.06400407f,
+           0.01847740f, -5.76280477e-003f, -0.03210877f, 4.25160583e-003f,
+           0.01166520f, -1.44864211e-003f, 0.02253744f, -0.03367080f, 0.06983195f,
+           -4.22323542e-003f, -8.89401045e-003f, -0.07943393f, 0.05199728f,
+           0.06065201f, 0.04133492f, 1.44032843e-003f, -0.09585235f, -0.03964731f,
+           0.04232114f, 0.01750465f, -0.04487902f, -7.59733608e-003f, 0.02011171f,
+           0.04673622f, 0.09011173f, -0.07869188f, -0.04682482f, -0.05080139f,
+           -3.99383716e-003f, -0.05346331f, 0.01085723f, -0.03599333f,
+           -0.07097908f, 0.03551549f, 0.02680387f, 0.03471529f, 0.01790393f,
+           0.05471273f, 9.62048303e-003f, -0.03180215f, 0.05864431f, 0.02330614f,
+           0.01633144f, -0.05616681f, -0.10245429f, -0.08302189f, 0.07291322f,
+           -0.01972590f, -0.02619633f, -0.02485327f, -0.04627592f,
+           1.48853404e-003f, 0.05514185f, -0.01270860f, -0.01948900f, 0.06373586f,
+           0.05002292f, -0.03009798f, 8.76216311e-003f, -0.02474238f,
+           -0.05504891f, 1.74034527e-003f, -0.03333667f, 0.01524987f, 0.11663762f,
+           -1.32344989e-003f, -0.06608453f, 0.05687166f, -6.89525274e-004f,
+           -0.04402352f, 0.09450210f, -0.04222684f, -0.05360983f, 0.01779531f,
+           0.02561388f, -0.11075410f, -8.77790991e-003f, -0.01099504f,
+           -0.10380266f, 0.03103457f, -0.02105741f, -0.07371717f, 0.05146710f,
+           0.10581432f, -0.08617968f, -0.02892107f, 0.01092199f, 0.14551543f,
+           -2.24320893e-003f, -0.05818033f, -0.07390742f, 0.05701261f,
+           0.12937020f, -0.04986651f, 0.10182415f, 0.05028650f, 0.12515625f,
+           0.09175041f, 0.06404983f, 0.01523394f, 0.09460562f, 0.06106631f,
+           -0.14266998f, -0.02926703f, 0.02762171f, 0.02164151f,
+           -9.58488265e-004f, -0.04231362f, -0.09866509f, 0.04322244f,
+           0.05872034f, -0.04838847f, 0.06319253f, 0.02443798f, -0.03606876f,
+           9.38737206e-003f, 0.04289991f, -0.01027411f, 0.08156885f, 0.08751175f,
+           -0.13191354f, 8.16054735e-003f, -0.01452161f, 0.02952677f, 0.03615945f,
+           -2.09128903e-003f, 0.02246693f, 0.09623287f, 0.09412123f, -0.02924758f,
+           -0.07815186f, -0.02203079f, -2.02566991e-003f, 0.01094733f,
+           -0.01442332f, 0.02838561f, 0.11882371f, 7.28798332e-003f, -0.10345965f,
+           0.07561217f, -0.02049661f, 4.44177445e-003f, 0.01609347f, -0.04893158f,
+           -0.08758243f, -7.67420698e-003f, 0.08862378f, 0.06098121f, 0.06565887f,
+           7.32981879e-003f, 0.03558407f, -0.03874352f, -0.02490055f,
+           -0.06771075f, 0.09939223f, -0.01066077f, 0.01382995f, -0.07289080f,
+           7.47184316e-003f, 0.10621431f, -0.02878659f, 0.02383525f, -0.03274646f,
+           0.02137008f, 0.03837290f, 0.02450992f, -0.04296818f, -0.02895143f,
+           0.05327370f, 0.01499020f, 0.04998732f, 0.12938657f, 0.09391870f,
+           0.04292390f, -0.03359194f, -0.06809492f, 0.01125796f, 0.17290455f,
+           -0.03430733f, -0.06255233f, -0.01813114f, 0.11726857f, -0.06127599f,
+           -0.08677909f, -0.03429872f, 0.04684938f, 0.08161420f, 0.03538774f,
+           0.01833884f, 0.11321855f, 0.03261845f, -0.04826299f, 0.01752407f,
+           -0.01796414f, -0.10464549f, -3.30041884e-003f, 2.29343961e-004f,
+           0.01457292f, -0.02132982f, -0.02602923f, -9.87351313e-003f,
+           0.04273872f, -0.02103316f, -0.07994065f, 0.02614958f, -0.02111666f,
+           -0.06964913f, -0.13453490f, -0.06861878f, -6.09341264e-003f,
+           0.08251446f, 0.15612499f, 2.46531400e-003f, 8.88424646e-003f,
+           -0.04152999f, 0.02054853f, 0.05277953f, -0.03087788f, 0.02817579f,
+           0.13939077f, 0.07641046f, -0.03627627f, -0.03015098f, -0.04041540f,
+           -0.01360690f, -0.06227205f, -0.02738223f, 0.13577610f, 0.15235767f,
+           -0.05392922f, -0.11175954f, 0.02157129f, 0.01146481f, -0.05264937f,
+           -0.06595174f, -0.02749175f, 0.11812254f, 0.17404149f, -0.06137035f,
+           -0.11003478f, -0.01351621f, -0.01745916f, -0.08577441f, -0.04469909f,
+           -0.06106115f, 0.10559758f, 0.20806813f, -0.09174948f, 7.09621934e-004f,
+           0.03579374f, 0.07215115f, 0.02221742f, 0.01827742f, -7.90785067e-003f,
+           0.01489554f, 0.14519960f, -0.06425831f, 0.02990399f, -1.80181325e-003f,
+           -0.01401528f, -0.04171134f, -3.70530109e-003f, -0.09090481f,
+           0.09520713f, 0.08845516f, -0.02651753f, -0.03016730f, 0.02562448f,
+           0.03563816f, -0.03817881f, 0.01433385f, 0.02256983f, 0.02872120f,
+           0.01001934f, -0.06332260f, 0.04338406f, 0.07001807f, -0.04705722f,
+           -0.07318907f, 0.02630457f, 0.03106382f, 0.06648342f, 0.10913180f,
+           -0.01630815f, 0.02910308f, 0.02895109f, 0.08040254f, 0.06969310f,
+           0.06797734f, 6.08639978e-003f, 4.16588830e-003f, 0.08926726f,
+           -0.03123648f, 0.02700146f, 0.01168734f, -0.01631594f, 4.61015804e-003f,
+           8.51359498e-003f, -0.03544224f, 0.03571994f, 4.29766066e-003f,
+           -0.01970077f, -8.79793242e-003f, 0.09607988f, 0.01544222f,
+           -0.03923707f, 0.07308586f, 0.06061262f, 1.31683104e-004f,
+           -7.98222050e-003f, 0.02399261f, -0.06084389f, -0.02743429f,
+           -0.05475523f, -0.04131311f, 0.03559756f, 0.03055342f, 0.02981433f,
+           0.14860515f, 0.01766787f, 0.02945257f, 0.04898238f, 0.01026922f,
+           0.02811658f, 0.08267091f, 0.02732154f, -0.01237693f, 0.11760156f,
+           0.03802063f, -0.03309754f, 5.24957618e-003f, -0.02460510f, 0.02691451f,
+           0.05399988f, -0.10133506f, 0.06385437f, -0.01818005f, 0.02259503f,
+           0.03573135f, 0.01042848f, -0.04153402f, -0.04043029f, 0.01643575f,
+           0.08326677f, 4.61383024e-004f, -0.05308095f, -0.08536223f,
+           -1.61011645e-003f, -0.02163720f, -0.01783352f, 0.03859637f,
+           0.08498885f, -0.01725216f, 0.08625131f, 0.10995087f, 0.09177644f,
+           0.08498347f, 0.07646490f, 0.05580502f, 0.02693516f, 0.09996913f,
+           0.09070327f, 0.06667200f, 0.05873008f, -0.02247842f, 0.07772321f,
+           0.12408436f, 0.12629253f, -8.41997913e-004f, 0.01477783f, 0.09165990f,
+           -2.98401713e-003f, -0.06466447f, -0.07057302f, 2.09516948e-004f,
+           0.02210209f, -0.02158809f, -0.08602506f, -0.02284836f,
+           4.01876355e-003f, 9.56660323e-003f, -0.02073978f, -0.04635138f,
+           -7.59423291e-003f, -0.01377393f, -0.04559359f, -0.13284740f,
+           -0.08671406f, -0.03654395f, 0.01142869f, 0.03287891f, -0.04392983f,
+           0.06142959f, 0.17710890f, 0.10385257f, 0.01329137f, 0.10067633f,
+           0.12450829f, -0.04476709f, 0.09049144f, 0.04589312f, 0.11167907f,
+           0.08587538f, 0.04767583f, 1.67188141e-003f, 0.02359802f, -0.03808852f,
+           0.03126272f, -0.01919029f, -0.05698918f, -0.02365112f, -0.06519032f,
+           -0.05599358f, -0.07097308f, -0.03301812f, -0.04719102f, -0.02566297f,
+           0.01324074f, -0.09230672f, -0.05518232f, -0.04712864f, -0.03380903f,
+           -0.06719479f, 0.01183908f, -0.09326738f, 0.01642865f, 0.03789867f,
+           -6.61567831e-003f, 0.07796386f, 0.07246574f, 0.04706347f, -0.02523437f,
+           -0.01696830f, -0.08068866f, 0.06030888f, 0.10527060f, -0.06611756f,
+           0.02977346f, 0.02621830f, 0.01913855f, -0.08479366f, -0.06322418f,
+           -0.13570616f, -0.07644490f, 9.31900274e-003f, -0.08095149f,
+           -0.10197903f, -0.05204025f, 0.01413151f, -0.07800411f, -0.01885122f,
+           -0.07509381f, -0.10136326f, -0.05212355f, -0.09944065f,
+           -1.33606605e-003f, -0.06342617f, -0.04178550f, -0.12373723f,
+           -0.02832736f, -0.06057501f, 0.05830070f, 0.07604282f, -0.06462587f,
+           8.02447461e-003f, 0.11580125f, 0.12332212f, 0.01978462f,
+           -2.72378162e-003f, 0.05850752f, -0.04674481f, 0.05148062f,
+           -2.62542837e-003f, 0.11253355f, 0.09893716f, 0.09785093f, -0.04659257f,
+           -0.01102429f, -0.07002308f, 0.03088913f, -0.02565549f, -0.07671449f,
+           3.17443861e-003f, -0.10783514f, -0.02314270f, -0.11089555f,
+           -0.01024768f, 0.03116021f, -0.04964825f, 0.02281825f, 5.50005678e-003f,
+           -0.08427856f, -0.14685495f, -0.07719755f, -0.13342668f, -0.04525511f,
+           -0.09914210f, 0.02588859f, 0.03469279f, 0.04664020f, 0.11688190f,
+           0.09647275f, 0.10857815f, -0.01448726f, 0.04299758f, -0.06763151f,
+           1.33257592e-003f, 0.14331576f, 0.07574340f, 0.09166205f, 0.05674926f,
+           0.11325553f, -0.01106494f, 0.02062161f, -0.11484840f, -0.07492137f,
+           -0.02864293f, -0.01275638f, -0.06946032f, -0.10101652f, -0.04113498f,
+           -0.02214783f, -0.01273942f, -0.07480393f, -0.10556041f, -0.07622112f,
+           -0.09988393f, -0.11453961f, -0.12073903f, -0.09412795f, -0.07146588f,
+           -0.04054537f, -0.06127083f, 0.04221122f, 0.07688113f, 0.04099256f,
+           0.12663734f, 0.14683802f, 0.21761774f, 0.12525328f, 0.18431792f,
+           -1.66402373e-003f, 2.37777247e-003f, 0.01445475f, 0.03509416f,
+           0.02654697f, 0.01716739f, 0.05374011f, 0.02944174f, 0.11323927f,
+           -0.01485456f, -0.01611330f, -1.85554172e-003f, -0.01708549f,
+           -0.05435753f, -0.05302101f, 0.05260378f, -0.03582945f,
+           -3.42867890e-004f, 1.36076682e-003f, -0.04436073f, -0.04228432f,
+           0.03281291f, -0.05480836f, -0.10197772f, -0.07206279f, -0.10741059f,
+           -0.02366946f, 0.10278475f, -2.74783419e-003f, -0.03242477f,
+           0.02308955f, 0.02835869f, 0.10348799f, 0.19580358f, 0.10252027f,
+           0.08039929f, 0.05525554f, -0.13250865f, -0.14395352f, 3.13586881e-003f,
+           -0.03387071f, 8.94669443e-003f, 0.05406157f, -4.97324532e-003f,
+           -0.01189114f, 2.82919413e-004f, -0.03901557f, -0.04898705f,
+           0.02164520f, -0.01382906f, -0.01850416f, 0.01869347f, -0.02450060f,
+           0.02291678f, 0.08196463f, 0.03309153f, -0.10629974f, 0.02473924f,
+           0.05344394f, -0.02404823f, -0.03243643f, -5.55244600e-003f,
+           -0.08009996f, 0.02811539f, 0.04235742f, 0.01859004f, 0.04902123f,
+           -0.01438252f, -0.01526853f, 0.02044195f, -0.05008660f, 0.04244113f,
+           0.07611816f, 0.04950470f, -0.06020549f, -4.26026015e-003f, 0.13133512f,
+           -0.01438738f, -0.01958807f, -0.04044152f, -0.12425045f,
+           2.84353318e-003f, -0.05042776f, -0.09121484f, 7.34345755e-003f,
+           0.09388847f, 0.11800314f, 4.72295098e-003f, 4.44378285e-003f,
+           -0.07984917f, -0.03613737f, 0.04490915f, -0.02246483f, 0.04681071f,
+           0.05240871f, 0.02157206f, -0.04603431f, -0.01197929f, -0.02748779f,
+           0.13621049f, 0.08812155f, -0.07802048f, 4.86458559e-003f, -0.01598836f,
+           0.01024450f, -0.03463517f, -0.02304239f, -0.08692665f, 0.06655128f,
+           0.05785803f, -0.12640759f, 0.02307472f, 0.07337402f, 0.07525434f,
+           0.04943763f, -0.02241034f, -0.09978238f, 0.14487994f, -0.06570521f,
+           -0.07855482f, 0.02830222f, -5.29603509e-004f, -0.04669895f,
+           -0.11822784f, -0.12246452f, -0.15365660f, -0.02969127f, 0.08078201f,
+           0.13512598f, 0.11505685f, 0.04740673f, 0.01376022f, -0.05852978f,
+           -0.01537809f, -0.05541119f, 0.02491065f, -0.02870786f, 0.02760978f,
+           0.23836176f, 0.22347429f, 0.10306466f, -0.06919070f, -0.10132039f,
+           -0.20198342f, -0.05040560f, 0.27163076f, 0.36987007f, 0.34540465f,
+           0.29095781f, 0.05649706f, 0.04125737f, 0.07505883f, -0.02737836f,
+           -8.43431335e-003f, 0.07368195f, 0.01653876f, -0.09402955f,
+           -0.09574359f, 0.01474337f, -0.07128561f, -0.03460737f, 0.11438941f,
+           0.13752601f, -0.06385452f, -0.06310338f, 8.19548313e-003f, 0.11622470f,
+           5.05133113e-003f, -0.07602754f, 0.06695660f, 0.25723928f, 0.09037900f,
+           0.28826267f, 0.13165380f, -0.05312614f, -0.02137198f, -0.03442232f,
+           -0.06255679f, 0.03899667f, 0.18391028f, 0.26016650f, 0.03374462f,
+           0.01860465f, 0.19077586f, 0.18160543f, 3.43634398e-003f, -0.03036782f,
+           0.19683038f, 0.35378191f, 0.24968483f, -0.03222649f, 0.28972381f,
+           0.43091634f, 0.30778357f, 0.02335266f, -0.09877399f, -6.85245218e-003f,
+           0.08945240f, -0.08150686f, 0.02792493f, 0.24806842f, 0.17338486f,
+           0.06231801f, -0.10432383f, -0.16653322f, -0.13197899f, -0.08531576f,
+           -0.19271527f, -0.13536365f, 0.22240199f, 0.39219588f, 0.26597717f,
+           -0.01231649f, 0.01016179f, 0.13379875f, 0.12018334f, -0.04852953f,
+           -0.07915270f, 0.07036012f, 3.87723115e-003f, -0.06126805f,
+           -0.15015170f, -0.11406515f, -0.08556531f, -0.07429333f, -0.16115491f,
+           0.13214062f, 0.25691369f, 0.05697750f, 0.06861912f, -6.02903729e-003f,
+           -7.94562511e-003f, 0.04799571f, 0.06695165f, -0.01926842f, 0.06206308f,
+           0.13450983f, -0.06381495f, -2.98370165e-003f, -0.03482971f,
+           7.53991678e-003f, 0.03895611f, 0.11464261f, 0.01669971f,
+           8.27818643e-003f, -7.49160210e-003f, -0.11712562f, -0.10650621f,
+           -0.10353880f, -0.04994106f, -7.65618810e-004f, 0.03023767f,
+           -0.04759270f, -0.07302686f, -0.05825012f, -0.13156348f, -0.10639747f,
+           -0.19393684f, -0.09973683f, -0.07918908f, 4.63177625e-004f,
+           -6.61382044e-004f, 0.15853868f, 0.08561199f, -0.07660093f,
+           -0.08015265f, -0.06164073f, 0.01882577f, -7.29908410e-004f,
+           0.06840892f, 0.03843764f, 0.20274927f, 0.22028814f, -5.26101235e-003f,
+           0.01452435f, -0.06331623f, 0.02865064f, 0.05673740f, 0.12171564f,
+           0.03837196f, 0.03555467f, -0.02662914f, -0.10280123f, -0.06526285f,
+           -0.11066351f, -0.08988424f, -0.10103678f, 8.10526591e-003f,
+           5.95238712e-003f, 0.02617721f, -0.01705742f, -0.10897956f,
+           -0.08004991f, -0.11271993f, -0.06185647f, -0.06103712f, 0.01597041f,
+           -0.05923606f, 0.09410726f, 0.22858568f, 0.03263380f, 0.06772990f,
+           -0.09003516f, 0.01017870f, 0.01931688f, 0.08628357f, -0.01430009f,
+           0.10954945f, 0.16612452f, -0.02434544f, -0.03310068f, -0.04236627f,
+           0.01212392f, -6.15046406e-003f, 0.06954194f, 0.03015283f, 0.01787957f,
+           0.02781667f, -0.05561153f, -8.96244217e-003f, -0.04971489f,
+           0.07510284f, 0.01775282f, 0.05889897f, -0.07981427f, 0.03647643f,
+           -3.73833324e-003f, -0.08894575f, -0.06429435f, -0.08068276f,
+           0.03567704f, -0.07131936f, -7.21910037e-003f, -0.09566668f,
+           0.17886090f, 0.14911725f, 0.02070032f, -0.05017120f, -0.04992622f,
+           0.01570143f, -0.09906903f, 0.06456193f, 0.15329507f, 0.18820767f,
+           0.11689861f, -0.01178513f, -0.02225163f, -0.01905318f, 0.10271224f,
+           -7.27029052e-003f, 0.11664233f, 0.14796902f, 0.07771893f, 0.02400013f,
+           -0.05361797f, -0.01972888f, 0.01376177f, 0.06740040f, -0.06525395f,
+           0.05726178f, -0.02404981f, -0.14018567f, -0.02074987f, -0.04621970f,
+           -0.04688627f, -0.01842059f, 0.07722727f, -0.04852883f, 0.01529004f,
+           -0.19639495f, 0.10817073f, 0.03795860f, -0.09435206f, -0.07984378f,
+           -0.03383440f, 0.11081333f, 0.02237366f, 0.12703256f, 0.21613893f,
+           0.02918790f, 4.66472283e-003f, -0.10274266f, -0.04854131f,
+           -3.46305710e-003f, 0.08652268f, 0.02251546f, 0.09636052f, 0.17180754f,
+           -0.09272388f, 4.59174305e-004f, -0.11723048f, -0.12210111f,
+           -0.15547538f, 0.07218186f, -0.05297846f, 0.03779940f, 0.05150875f,
+           -0.03802310f, 0.03870645f, -0.15250699f, -0.08696499f, -0.02021560f,
+           0.04118926f, -0.15177974f, 0.01577647f, 0.10249301f, 7.50041893e-003f,
+           0.01721806f, -0.06828983f, -0.02397596f, -0.06598977f, -0.04317593f,
+           -0.08064980f, 6.66632550e-003f, 0.03333484f, 0.07093620f, 0.08231064f,
+           -0.06577903f, -0.06698844f, -0.06984019f, -0.06508023f, -0.14145090f,
+           -0.02393239f, 0.06485303f, 8.83263443e-003f, 0.09251080f, -0.07557579f,
+           -0.05067699f, -0.09798748f, -0.06703258f, -0.14056294f, 0.03245994f,
+           0.12554143f, 0.01761621f, 0.12980327f, -0.04081950f, -0.11906909f,
+           -0.14813015f, -0.08376863f, -0.12200681f, 0.04988137f, 0.05424247f,
+           -3.90952639e-003f, 0.03255733f, -0.12717837f, -0.07461493f,
+           -0.05703964f, -0.01736189f, -0.08026433f, -0.05433894f, -0.01719359f,
+           0.02886275f, 0.01772653f, -0.09163518f, 3.57789593e-003f, -0.10129993f,
+           -0.02653764f, -0.08131415f, -0.03847986f, -7.62157550e-004f,
+           0.06486648f, 0.19675669f, -0.04919156f, -0.07059129f, -0.04857785f,
+           -0.01042383f, -0.08328653f, 0.03660302f, -0.03696846f, 0.04969259f,
+           0.08241162f, -0.12514858f, -0.06122676f, -0.03750202f,
+           6.52989605e-003f, -0.10247213f, 0.02568346f, 4.51781414e-003f,
+           -0.03734229f, -0.01131264f, -0.05412074f, 8.89345480e-004f,
+           -0.12388977f, -0.05959237f, -0.12418608f, -0.06151643f, -0.07310260f,
+           0.02441575f, 0.07023528f, -0.07548289f, -7.57147965e-004f,
+           -0.09061348f, -0.08112976f, -0.06920306f, 9.54394229e-003f,
+           -0.01219902f, 1.21273217e-003f, -8.88989680e-003f, -0.08309301f,
+           -0.04552661f, -0.10739882f, -0.05691034f, -0.13928030f, 0.09027749f,
+           0.15123098f, 0.03175976f, 0.17763577f, 3.29913251e-004f, 0.05151888f,
+           -0.09844074f, -0.09475287f, -0.08571247f, 0.16241577f, 0.19336018f,
+           8.57454538e-003f, 0.11474732f, -0.01493934f, 0.03352379f, -0.08966240f,
+           -0.02322310f, 0.02663568f, 0.05448750f, -0.03536883f, -0.07210463f,
+           -0.06807277f, -0.03121621f, -0.05932408f, -0.17282860f, -0.15873498f,
+           -0.04956378f, 0.01603377f, -0.12385946f, 0.13878587f, 0.21468069f,
+           0.13510075f, 0.20992437f, 0.08845878f, 0.08104013f, 0.03754176f,
+           0.12173114f, 0.11103114f, 0.10643122f, 0.13941477f, 0.11640384f,
+           0.14786847f, 0.01218238f, 0.01160753f, 0.03547940f, 0.08794311f,
+           -0.01695384f, -0.07692261f, -0.08236158f, 6.79194089e-003f,
+           -0.02458403f, 0.13022894f, 0.10953187f, 0.09857773f, 0.04735930f,
+           -0.04353498f, -0.15173385f, -0.17904443f, -0.10450364f, -0.13418166f,
+           -0.06633098f, -0.03170381f, -0.06839000f, -0.11350126f, -0.06983913f,
+           0.19083543f, 0.17604128f, 0.07730632f, 0.10022651f, 0.36428109f,
+           0.28291923f, 0.12688625f, 0.15942036f, 0.14064661f, -0.11201853f,
+           -0.13969108f, -0.09088077f, -0.14107047f, 0.05117374f,
+           -2.63348082e-003f, -0.10794610f, -0.09715455f, -0.05284977f,
+           0.01565668f, 0.05031200f, 0.07021113f, -0.02963028f, 0.01766960f,
+           0.08333644f, -0.03211382f, 4.90096770e-003f, 0.05186674f, -0.05045737f,
+           -0.09624767f, -0.02525997f, 0.06916669f, 0.01213916f, 0.05333899f,
+           -0.03443280f, -0.10055527f, -0.06291115f, 5.42851724e-003f,
+           -6.30360236e-003f, 0.02270257f, -0.01769792f, 0.03273688f, 0.07746078f,
+           7.77099328e-003f, 0.05041346f, 0.01648103f, -0.02321534f, -0.09930186f,
+           -0.02293853f, 0.02034990f, -0.08324204f, 0.08510064f, -0.03732836f,
+           -0.06465405f, -0.06086946f, 0.13680504f, -0.11469388f, -0.03896406f,
+           -0.07142810f, 2.67581246e-003f, -0.03639632f, -0.09849060f,
+           -0.11014334f, 0.17489147f, 0.17610909f, -0.16091567f, -0.07248894f,
+           0.01567141f, 0.23742996f, 0.07552249f, -0.06270349f, -0.07303379f,
+           0.25442186f, 0.16903116f, -0.08168741f, -0.05913896f, -0.03954096f,
+           6.81776879e-003f, -0.05615319f, -0.07303037f, -0.12176382f,
+           0.12385108f, 0.22084464f, -0.05543206f, -0.03310431f, 0.05731593f,
+           0.19481890f, 0.04016430f, -0.06480758f, -0.12353460f, 0.18733442f,
+           -0.09631214f, -0.11192076f, 0.12404587f, 0.15671748f, 0.19256128f,
+           0.10895617f, 0.03391477f, -0.13032004f, -0.05626907f, -0.09025607f,
+           0.23485197f, 0.27812332f, 0.26725492f, 0.07255980f, 0.16565137f,
+           0.22388470f, 0.07441066f, -0.21003133f, -0.08075339f, -0.15031935f,
+           0.07023834f, 0.10872041f, 0.18156518f, 0.20037253f, 0.13571967f,
+           -0.11915682f, -0.11131983f, -0.18878011f, 0.06074620f, 0.20578890f,
+           0.12413109f, 0.03930207f, 0.29176015f, 0.29502738f, 0.27856228f,
+           -0.01803601f, 0.16646385f, 0.19268319f, 0.01900682f, 0.06026287f,
+           2.35868432e-003f, 0.01558199f, 0.02707230f, 0.11383014f, 0.12103992f,
+           0.03907350f, 0.04637353f, 0.09020995f, 0.11919726f, -3.63007211e-003f,
+           0.02220155f, 0.10336831f, 0.17351882f, 0.12259731f, 0.18983354f,
+           0.15736865f, 0.01160725f, -0.01690723f, -9.69582412e-004f, 0.07213813f,
+           0.01161613f, 0.17864859f, 0.24486147f, 0.18208991f, 0.20177495f,
+           0.05972528f, -8.93934630e-003f, -0.02316955f, 0.14436610f, 0.14114498f,
+           0.05520950f, 0.06353590f, -0.19124921f, 0.10174713f, 0.29414919f,
+           0.26448128f, 0.09344960f, 0.15284036f, 0.19797507f, 0.11369792f,
+           -0.12722753f, -0.21396367f, -0.02008235f, -0.06566695f, -0.01662150f,
+           -0.03937003f, 0.04778343f, 0.05017274f, -0.02299062f, -0.20208496f,
+           -0.06395898f, 0.13721776f, 0.22544557f, 0.14888357f, 0.08687132f,
+           0.27088094f, 0.32206613f, 0.09782200f, -0.18523243f, -0.17232181f,
+           -0.01041531f, 0.04008654f, 0.04199702f, -0.08081299f, -0.03755421f,
+           -0.04809646f, -0.05222081f, -0.21709201f, -0.06622940f, 0.02945281f,
+           -0.04600435f, -0.05256077f, -0.08432942f, 0.02848100f, 0.03490564f,
+           8.28621630e-003f, -0.11051246f, -0.11210597f, -0.01998289f,
+           -0.05369405f, -0.08869293f, -0.18799506f, -0.05436598f, -0.05011634f,
+           -0.05419716f, -0.06151857f, -0.10827805f, 0.04346735f, 0.04016083f,
+           0.01520820f, -0.12173316f, -0.04880285f, -0.01101406f, 0.03250847f,
+           -0.06009551f, -0.03082932f, -0.02295134f, -0.06856834f, -0.08775249f,
+           -0.23793389f, -0.09174541f, -0.05538322f, -0.04321031f, -0.11874759f,
+           -0.04221844f, -0.06070468f, 0.01194489f, 0.02608565f, -0.03892140f,
+           -0.01643151f, -0.02602034f, -0.01305472f, 0.03920100f, -0.06514261f,
+           0.01126918f, -6.27710763e-003f, -0.02720047f, -0.11133634f,
+           0.03300330f, 0.02398472f, 0.04079665f, -0.10564448f, 0.05966159f,
+           0.01195221f, -0.03179441f, -0.01692590f, -0.06177841f, 0.01841576f,
+           -5.51078189e-003f, -0.06821765f, -0.03191888f, -0.09545476f,
+           0.03030550f, -0.04896152f, -0.02914624f, -0.13283344f, -0.04783419f,
+           6.07836898e-003f, -0.01449538f, -0.13358212f, -0.09687774f,
+           -0.02813793f, 0.01213498f, 0.06650011f, -0.02039067f, 0.13356198f,
+           0.05986415f, -9.12760664e-003f, -0.18780160f, -0.11992817f,
+           -0.06342237f, 0.01229534f, 0.07143231f, 0.10713009f, 0.11085765f,
+           0.06569190f, -0.02956399f, -0.16288325f, -0.13993549f, -0.01292515f,
+           0.03833013f, 0.09130384f, -0.05086257f, 0.05617329f, -0.03896667f,
+           -0.06282311f, -0.11490010f, -0.14264110f, -0.04530499f, 0.01598189f,
+           0.09167797f, 0.08663294f, 0.04885277f, -0.05741219f, -0.07565769f,
+           -0.17136464f, -0.02619422f, -0.02477579f, 0.02679587f, 0.11621952f,
+           0.08788391f, 0.15520640f, 0.04709549f, 0.04504483f, -0.10214074f,
+           -0.12293372f, -0.04820546f, -0.05484834f, 0.05473754f, 0.07346445f,
+           0.05577277f, -0.08209965f, 0.03462975f, -0.20962234f, -0.09324598f,
+           3.79481679e-003f, 0.03617633f, 0.16742408f, 0.07058107f, 0.10204960f,
+           -0.06795346f, 3.22807301e-003f, -0.12589309f, -0.17496960f,
+           0.02078314f, -0.07694324f, 0.12184640f, 0.08997164f, 0.04793497f,
+           -0.11383379f, -0.08046359f, -0.25716835f, -0.08080962f,
+           6.80711539e-003f, -0.02930280f, -3.04938294e-003f, -0.11106286f,
+           -0.04628860f, -0.07821649f, 7.70127494e-003f, -0.10247706f,
+           1.21042714e-003f, 0.20573859f, -0.03241005f, 8.42972286e-003f,
+           0.01946464f, -0.01197973f, -0.14579976f, 0.04233614f,
+           -4.14096704e-003f, -0.06866436f, -0.02431862f, -0.13529138f,
+           1.25891645e-003f, -0.11425111f, -0.04303651f, -0.01694815f,
+           0.05720210f, -0.16040207f, 0.02772896f, 0.05498345f, -0.15010567f,
+           0.01450866f, 0.02350303f, -0.04301004f, -0.04951802f, 0.21702233f,
+           -0.03159155f, -0.01963303f, 0.18232647f, -0.03263875f,
+           -2.88476888e-003f, 0.01587562f, -1.94303901e-003f, -0.07789494f,
+           0.04674156f, -6.25576358e-003f, 0.08925962f, 0.21353747f, 0.01254677f,
+           -0.06999976f, -0.05931328f, -0.01884327f, -0.04306272f, 0.11794136f,
+           0.03842728f, -0.03907030f, 0.05636114f, -0.09766009f, -0.02104000f,
+           8.72711372e-003f, -0.02736877f, -0.05112274f, 0.16996814f, 0.02955785f,
+           0.02094014f, 0.08414304f, -0.03335762f, -0.03617457f, -0.05808248f,
+           -0.08872101f, 0.02927705f, 0.27077839f, 0.06075108f, 0.07478261f,
+           0.15282831f, -0.03908454f, -0.05101782f, -9.51998029e-003f,
+           -0.03272416f, -0.08735625f, 0.07633440f, -0.07185312f, 0.13841286f,
+           0.07812646f, -0.12901451f, -0.05488589f, -0.05644578f, -0.03290703f,
+           -0.11184757f, 0.03751570f, -0.05978153f, -0.09155276f, 0.05657315f,
+           -0.04328186f, -0.03047933f, -0.01413135f, -0.10181040f, -0.01384013f,
+           0.20132534f, -0.01536873f, -0.07641169f, 0.05906778f, -0.07833145f,
+           -0.01523801f, -0.07502609f, -0.09461885f, -0.15013233f, 0.16050665f,
+           0.09021381f, 0.08473236f, 0.03386267f, -0.09147339f, -0.09170618f,
+           -0.08498498f, -0.05119187f, -0.10431040f, 0.01041618f, -0.03064913f,
+           0.09340212f, 0.06448522f, -0.03881054f, -0.04985436f, -0.14794017f,
+           -0.05200112f, -0.02144495f, 0.04000821f, 0.12420804f, -0.01851651f,
+           -0.04116732f, -0.11951703f, -0.04879033f, -0.08722515f, -0.08454733f,
+           -0.10549165f, 0.11251976f, 0.10766345f, 0.19201984f, 0.06128913f,
+           -0.02734615f, -0.08834923f, -0.16999826f, -0.03548348f,
+           -5.36092324e-003f, 0.08297954f, 0.07226378f, 0.04194529f, 0.04668673f,
+           8.73902347e-003f, 0.06980139f, 0.05652480f, 0.05879445f, 0.02477076f,
+           0.02451423f, 0.12433673f, 0.05600227f, 0.06886370f, 0.03863076f,
+           0.07459056f, 0.02264139f, 0.01495469f, 0.06344220f, 0.06945208f,
+           0.02931899f, 0.11719371f, 0.04527427f, 0.03248192f, 2.08271481e-003f,
+           0.02044626f, 0.11403449f, 0.04303892f, 0.06444661f, 0.04959024f,
+           0.08174094f, 0.09240247f, 0.04894639f, 0.02252937f, -0.01652530f,
+           0.07587013f, 0.06064249f, 0.13954395f, 0.02772832f, 0.07093039f,
+           0.08501238f, 0.01701301f, 0.09055722f, 0.33421436f, 0.20163782f,
+           0.09821030f, 0.07951369f, 0.08695120f, -0.12757730f, -0.13865978f,
+           -0.06610068f, -0.10985506f, 0.03406816f, -0.01116336f, -0.07281768f,
+           -0.13525715f, -0.12844718f, 0.08956250f, 0.09171610f, 0.10092317f,
+           0.23385370f, 0.34489515f, 0.09901748f, 0.02002922f, 0.12335990f,
+           0.07606190f, -0.14899330f, -0.15634622f, -0.06494618f, -0.01760547f,
+           0.03404277f, -0.13208845f, -0.12101169f, -0.18294574f, -0.16560709f,
+           0.02183887f, -0.02752613f, 0.01813638f, 0.02000757f, 0.01319924f,
+           0.08030242f, 0.01220535f, 2.98233377e-003f, -0.01307070f, 0.05970297f,
+           -0.05345284f, -0.03381982f, -9.87543724e-003f, -0.06869387f,
+           0.03956730f, -0.03108176f, -0.05732809f, 0.02172386f, 0.04159765f,
+           2.62783933e-003f, 0.04813229f, 0.09358983f, -8.18389002e-003f,
+           0.01724574f, -0.02547474f, -0.04967288f, -0.02390376f, 0.06640504f,
+           -0.06306566f, 0.01137518f, 0.05589378f, -0.08237787f, 0.02455001f,
+           -0.03059422f, -0.08953978f, 0.06851497f, 0.07190268f, -0.07610799f,
+           7.87237938e-003f, -7.85830803e-003f, 0.06006952f, -0.01126728f,
+           -2.85743061e-003f, -0.04772895f, 0.01884944f, 0.15005857f,
+           -0.06268821f, -0.01989072f, 0.01138399f, 0.08760451f, 0.03879007f,
+           -9.66926850e-003f, -0.08012961f, 0.06414555f, -0.01362950f,
+           -0.09135523f, 0.01755159f, 0.04459474f, 0.09650917f, 0.05219948f,
+           -2.19440833e-003f, -0.07037939f, -0.01599054f, 0.13103317f,
+           -0.02492603f, -0.01032540f, -0.02903307f, 0.04489160f, 0.05148086f,
+           0.01858173f, -0.02919228f, 0.08299296f, -0.04590359f, -0.15745632f,
+           -0.09068198f, -0.02972453f, 0.12985018f, 0.22320485f, 0.24261914f,
+           0.03642650f, -0.05506422f, 2.67413049e-003f, -0.03834032f, 0.06449424f,
+           0.03834866f, 0.03816991f, 0.25039271f, 0.34212017f, 0.32433882f,
+           0.18824573f, -0.08599839f, -0.17599408f, -0.15317015f, -0.09913155f,
+           -0.02856072f, -0.05304699f, -1.06437842e-003f, -0.06641813f,
+           -0.07509298f, 0.01463361f, -0.07551918f, -0.04510373f,
+           -8.44620075e-003f, 0.01772176f, 0.04068235f, 0.20295307f, 0.15719447f,
+           0.05712103f, 0.26296997f, 0.14657754f, 0.01547317f, -0.05052776f,
+           -0.03881342f, -0.01437883f, -0.04930177f, 0.11719568f, 0.24098417f,
+           0.26468599f, 0.31698579f, 0.10103608f, -0.01096375f, -0.01367013f,
+           0.17104232f, 0.20065314f, 2.67622480e-003f, -0.01190034f, 0.18301608f,
+           0.09459770f, -0.06357619f, -0.06473801f, 0.01377906f, -0.10032775f,
+           -0.06388740f, 3.80393048e-003f, 0.06206078f, 0.10349120f, 0.26804337f,
+           8.17918684e-003f, -0.02314351f, 9.34422202e-003f, 0.09198381f,
+           0.03681326f, -8.77339672e-003f, -0.09662418f, -0.02715708f,
+           0.13503517f, 0.08962728f, -6.57071499e-003f, -0.03201199f, 0.28510824f,
+           0.32095715f, 0.18512695f, -0.14230858f, -0.14048551f, -0.07181299f,
+           -0.08575408f, -0.08661680f, -0.17416079f, 7.54326640e-004f,
+           0.05601677f, 0.13585392f, -0.04960437f, -0.07708392f, 0.10676333f,
+           -0.04407546f, -0.07209078f, 0.03663663f, 0.28949317f, 0.41127121f,
+           0.27431169f, -0.06900328f, -0.21474190f, -0.15578632f, -0.19555484f,
+           -0.15209621f, -0.11269179f, 0.07416003f, 0.18991330f, 0.26858172f,
+           0.01952259f, 0.01017922f, 0.02159843f, -4.95165400e-003f, -0.04368168f,
+           -0.12721671f, -0.06673957f, -0.11275250f, 0.04413409f, 0.05578312f,
+           0.03896771f, 0.03566417f, -0.05871816f, -0.07388090f, -0.17965563f,
+           -0.08570268f, -0.15273231f, -0.06022318f, -0.06999847f,
+           -6.81510568e-003f, 0.06294262f, -6.54901436e-004f, -0.01128654f,
+           -0.02289657f, 0.04849290f, 0.04140804f, 0.23681939f, 0.14545733f,
+           0.01989965f, 0.12032662f, 3.87463090e-003f, -6.02597650e-003f,
+           -0.05919775f, -0.03067224f, -0.07787777f, 0.10834727f, 0.02153730f,
+           0.02765649f, 0.03975543f, -0.12182906f, -0.04900113f, -0.09940100f,
+           -0.06453611f, -0.13757215f, -0.03721382f, 0.02827376f, -0.04351249f,
+           0.01907038f, -0.10284120f, -0.05671160f, -0.10760647f, -0.09624009f,
+           -0.09565596f, -0.01303654f, 0.03080539f, 0.01416511f, 0.05846142f,
+           -5.42971538e-003f, 0.06221476f, -0.03320325f, -0.06791797f,
+           -0.05791342f, 0.12851369f, 0.14990346f, 0.03634374f, 0.14262885f,
+           0.04330391f, 0.05032569f, -0.05631914f, 0.01606137f, 0.04387223f,
+           0.22344995f, 0.15722635f, -0.04693628f, 0.03006579f, -2.52882647e-003f,
+           0.05717621f, -0.07529724f, -0.02848588f, -0.06868757f,
+           -4.51729307e-003f, 0.06466042f, -0.05935378f, -0.04704857f,
+           -0.07363959f, 0.04843248f, -0.13421375f, -0.09789340f, -0.10255270f,
+           0.03509852f, 0.04751543f, -0.03822323f, 0.09740467f, 0.04762916f,
+           0.03940146f, -0.08283259f, 0.09552965f, 0.05038739f, 0.21258622f,
+           0.09646992f, 0.03241193f, 0.05167701f, 0.04614570f, 0.04330090f,
+           -0.02671840f, -0.06259909f, -0.02301898f, 0.18829170f, 0.10522786f,
+           0.04313190f, 0.01670948f, -0.08421925f, 0.05911417f, -0.10582602f,
+           -0.04855484f, -0.08373898f, 0.07775915f, 0.03723533f, -0.12047344f,
+           4.86345543e-003f, -0.10520902f, 0.06571782f, -0.07528137f,
+           -0.03245651f, -0.09869066f, -0.02917477f, -0.18293270f, 0.14810945f,
+           9.24033765e-003f, -0.04354914f, 0.02266885f, -0.11872729f,
+           -0.04016589f, 0.02830229f, 0.22539048f, 0.20565644f, 0.16701797f,
+           0.09019924f, 0.01300652f, 0.09760600f, -0.03675831f, -0.01935448f,
+           -0.06894835f, 0.08077277f, 0.19047537f, 0.11312226f, 0.04106043f,
+           -0.11187182f, 0.04312806f, -0.18548580f, -0.11287174f, -0.08794551f,
+           0.02078281f, -0.15295486f, 0.11806386f, -0.01103218f, -0.15971117f,
+           0.02153538f, -0.05232147f, -0.10835317f, -0.13910367f, 0.05920752f,
+           -0.10122602f, 0.20174250f, 0.09105796f, -0.01881348f, 0.09559010f,
+           -0.03725745f, -0.09442931f, -0.09763174f, 0.05854454f, 0.08287182f,
+           0.12919849f, 0.08594352f, -2.49806582e-003f, 0.02398440f,
+           5.67950122e-003f, -0.06296340f, -0.12993270f, 0.03855852f, 0.05186560f,
+           0.10839908f, -0.03380463f, -0.12654832f, -0.05399339f, -0.07456800f,
+           -0.04736232f, -0.10164231f, 0.07496139f, 0.08125214f, 0.07656177f,
+           -0.04999603f, -0.12823077f, -0.07692395f, -0.11317524f, -0.09118655f,
+           -0.05695669f, 0.10477209f, 0.07468581f, 0.01630048f, -8.00961629e-003f,
+           -0.06582128f, -0.04019095f, -0.04682907f, -0.01907842f, -0.10997720f,
+           0.04911406f, 0.02931030f, 0.04197735f, -0.05773980f, -0.09670641f,
+           -0.03594951f, -0.03402121f, -0.07149299f, -0.10566200f, 0.10601286f,
+           0.06340689f, -0.01518632f, -5.96402306e-003f, -0.07628012f,
+           -3.52779147e-003f, -0.02683854f, -0.10265494f, -0.02680815f,
+           0.16338381f, 0.03103515f, 0.02296976f, 0.01624348f, -0.10831620f,
+           -0.02314233f, -0.04789969f, -0.05530700f, -0.06461314f, 0.10494506f,
+           0.04642856f, -0.07592955f, -0.06197905f, -0.09042154f, -0.01445521f,
+           -0.04297818f, -0.11262015f, -0.11430512f, 0.03174541f, -0.03677487f,
+           -0.02963996f, -0.06610169f, -0.13292049f, -0.07059067f, -0.08444111f,
+           -0.02640536f, -0.07136250f, 0.04559967f, 0.01459980f, 0.17989251f,
+           0.04435328f, -0.12464730f, -0.02871115f, -0.10752209f, -0.03393742f,
+           -0.03791408f, 0.02548251f, 0.01956050f, 0.19245651f, 0.13963254f,
+           -0.05904696f, -0.07424626f, -0.10411884f, 1.54176133e-003f,
+           0.01797429f, 0.13025844f, 0.04547642f, -0.05710349f, -0.10697161f,
+           -0.13489437f, -0.06515755f, -0.06406886f, -4.08572936e-003f,
+           -0.01336483f, 0.04368737f, -0.11259720f, -0.05701635f, -0.06469971f,
+           -0.08346602f, -0.04166770f, -0.05795543f, -0.08247511f, -0.05742628f,
+           0.08452254f, -0.03350224f, 0.13980860f, 0.13252275f, 0.07589617f,
+           0.07539988f, 0.12155797f, 0.19087289f, 0.15050751f, 0.21250245f,
+           0.14206800f, 0.01298489f, 0.07450245f, 0.06559097f, 0.01700557f,
+           0.04512971f, 0.16950700f, 0.10261577f, 0.16389982f, 0.05505059f,
+           -0.03453077f, 0.08622462f, 0.07935954f, 0.03976260f, 0.02036091f,
+           3.95744899e-003f, 0.03267065f, 0.15235919f, 0.01297494f, -0.08109194f,
+           0.01407558f, 4.40693414e-003f, -0.15157418f, -0.11390478f,
+           -0.07487597f, -7.81322457e-003f, -0.02749545f, -0.10181408f,
+           0.13755716f, 0.14007211f, 0.13482562f, 0.27517235f, 0.34251109f,
+           0.07639657f, 0.07268607f, 0.19823882f, 0.16135791f, -0.04186463f,
+           -0.12784107f, -0.09846287f, 0.03169041f, 0.10974082f, -0.15051922f,
+           -0.08916726f, -0.07138767f, -0.04153349f, 6.25418453e-003f,
+           0.01266654f, 0.10533249f, 0.12749144f, 0.15148053f, 0.01498513f,
+           0.06305949f, -0.01247123f, -0.08778401f, -0.08551880f, -0.11955146f,
+           -0.08493572f, -0.02901620f, -0.02394859f, -0.13427313f, -0.11053200f,
+           -0.14413260f, -0.15203285f, 0.03972760f, -3.72127310e-004f,
+           -0.04200919f, 0.06105104f, 0.01904975f, -0.01106191f,
+           -7.27445772e-003f, -0.01520341f, 1.10228511e-003f, -0.04949187f,
+           -0.08013099f, 5.72071038e-003f, 0.08415454f, -0.06523152f, 0.03664081f,
+           -0.02673042f, -0.12066154f, -0.03702074f, 0.06006580f, 0.01628682f,
+           -6.17772620e-003f, 0.08192339f, -3.41629819e-003f, 0.02870512f,
+           0.05807141f, 0.04959986f, 0.04618251f, -0.04901629f, -0.10579574f,
+           0.02274442f, 0.12070961f, 2.23597488e-003f, 0.09831765f, -0.03019848f,
+           -0.11181970f, -0.04961075f, 0.02498928f, -0.03714991f, -0.01619653f,
+           0.02643486f, -7.62964319e-003f, -0.02882290f, -0.06242594f,
+           -0.08439861f, 0.07220893f, 0.07263952f, 0.01561574f, 0.03091968f,
+           0.01708712f, -0.03797151f, -3.18561122e-003f, 0.01624021f,
+           -0.02828573f, 0.11284444f, -1.32280716e-003f, -0.07784860f,
+           -0.07209100f, 0.03372242f, 0.12154529f, 0.02278104f, -0.05275500f,
+           -0.01918484f, 0.12989293f, 0.05424401f, 0.02333086f, 0.04029022f,
+           0.12392918f, 0.09495489f, 0.09190340f, 0.07935889f, 8.76816828e-003f,
+           0.17148446f, -8.51302687e-003f, -0.08011249f, -0.06796283f,
+           0.04884845f, 0.01112272f, -0.07835306f, -1.14811445e-003f,
+           -0.03440760f, 0.02845243f, 0.07695542f, -0.07069533f, -0.01151784f,
+           -8.53884313e-003f, -0.01662786f, -0.04163864f, 0.05400505f,
+           0.02859163f, 0.02921852f, 0.05003135f, -6.85718050e-003f, -0.01632611f,
+           0.07780217f, 0.04042810f, -0.01216440f, 3.60914599e-003f, -0.06322435f,
+           0.09516726f, 0.12877031f, -9.69162490e-003f, 0.01031179f, 0.05180895f,
+           -9.34659224e-003f, -0.01644533f, -0.04849347f, -0.04343236f,
+           0.10514783f, 0.08046635f, -0.04615205f, -0.03975486f, -0.01485525f,
+           0.13096830f, -0.01517950f, -0.06571898f, -0.04016372f, 0.01849786f,
+           0.02439670f, 0.08067258f, 1.74824719e-003f, 0.07053747f, 0.08819518f,
+           -5.08352555e-003f, -0.06550863f, -0.08266170f, -0.07780605f,
+           0.01453450f, -0.08756890f, 0.01096501f, -8.71319138e-003f, 0.10110464f,
+           0.02420769f, -0.06708383f, 0.02007811f, 5.93133038e-003f, 0.05398923f,
+           0.07538138f, 0.02049227f, 0.02242589f, 0.04011070f, -1.44875818e-003f,
+           -4.19115182e-003f, 0.06367654f, 0.02506934f, 0.02434536f, 0.05879405f,
+           -8.22952855e-003f, -0.01242441f, 0.04224926f, -0.01754923f,
+           0.05958161f, 0.03818886f, -0.01830363f, -0.04308917f, -0.04422197f,
+           -0.02432721f, 0.02264866f, 2.03751423e-003f, 0.01197031f, 0.04439203f,
+           0.12169247f, 0.03602713f, -0.02599251f, -1.98226492e-003f, 0.02046336f,
+           -0.02639058f, -1.91242550e-003f, -0.09334669f, -0.03595153f,
+           -9.88179818e-003f, -0.06848445f, -0.04666303f, -0.09955736f,
+           -0.04206430f, 0.02609075f, 9.09005292e-003f, -0.07138551f,
+           -4.22313227e-004f, 0.01766645f, 0.02756404f, 0.01308276f, 0.04052891f,
+           0.02387515f, 0.05337298f, 0.02500631f, -0.04970853f, -0.12467445f,
+           0.17604403f, 0.12256411f, -0.07512254f, 8.70451052e-003f, -0.05697548f,
+           -0.03626474f, -8.76623299e-003f, -0.01210897f, -0.09451522f,
+           0.07490732f, -0.02008001f, -0.02681278f, -0.06463405f, -0.01517507f,
+           7.33757764e-003f, 6.07147906e-003f, -0.09316964f, -0.04575328f,
+           0.13261597f, 0.15424870f, -0.01655918f, -0.02772390f, -0.05243644f,
+           -0.02356456f, -0.02351753f, -0.10211615f, -0.12873036f, 0.14549787f,
+           0.12519856f, 4.38762689e-003f, 0.02795992f, 0.05170322f, 0.09223596f,
+           0.05890015f, 0.02376701f, -0.02777346f, 0.09506908f, 0.02328936f,
+           -0.02319928f, -0.03218696f, -0.01527841f, -0.01016694f, -0.02674719f,
+           0.05137179f, 0.01980666f, 0.06544447f, -0.01746171f, 0.01026380f,
+           0.01561806f, 7.97004555e-004f, 0.07601810f, 0.01907250f, -0.03083035f,
+           -0.05987392f, 0.09242783f, 0.14555025f, 0.01035827f, 0.03092401f,
+           -0.09562709f, -0.03802354f, 0.02531144f, 0.03079449f, -0.07100715f,
+           0.03330721f, -2.69116857e-003f, 0.03167490f, 0.05744999f, 0.03259895f,
+           1.91266940e-003f, 0.03194578f, 0.07389776f, 0.02198060f, 0.07633314f,
+           0.03293105f, -0.09103648f, 0.04718142f, 0.06102672f, -0.01003063f,
+           5.85481385e-003f, -0.01522574f, 0.02323526f, 0.10584345f,
+           4.35879454e-003f, 0.06107873f, 0.05868603f, -0.03115531f, 0.01214679f,
+           0.08567052f, 3.93926632e-003f, -0.02521488f, -1.88425183e-003f,
+           0.02038053f, -6.26854831e-004f, 0.04897438f, -0.04280585f,
+           -0.04819689f, -0.04812867f, -0.01451186f, 0.05101469f,
+           -9.01125465e-003f, -0.03333859f, 0.03917955f, 0.04196448f, 0.04292135f,
+           0.02809529f, 0.02999715f, 0.04081348f, 9.10039060e-003f, 0.09703232f,
+           0.10379741f, 0.02348725f, -4.72756615e-003f, 0.01027325f, 0.10402658f,
+           0.12071823f, 0.09817299f, -0.02612033f, 0.03638414f, 0.05896405f,
+           0.04865025f, 0.04793910f, -0.03882321f, -0.02962117f, -0.01222268f,
+           0.04071597f, 0.01922777f, -0.02287866f, 0.03328381f, 0.01859092f,
+           0.09024994f, 0.03804455f, -0.01424510f, 0.01953739f, 0.02509617f,
+           -0.03390914f, -0.05663941f, -0.01641979f, 0.05848591f, 0.04639670f,
+           0.02092116f, 0.12911791f, 0.19918139f, 0.07739855f, -7.25806039e-003f,
+           0.04074838f, 0.03183993f, 1.39251316e-003f, -0.01428625f, 0.01865480f,
+           0.08529541f, 0.13547510f, 0.11189661f, 0.03998901f, 0.09575938f,
+           -0.02631102f, -0.03458253f, -0.04749985f, -0.06070716f,
+           4.71884012e-003f, 0.06445789f, -0.02450038f, -0.05483776f,
+           -0.04657237f, -0.02030717f, -0.03480766f, -0.09397731f, -0.06399718f,
+           -0.01804585f, 5.62348310e-003f, -6.64811488e-003f, -0.06517869f,
+           6.96210237e-003f, -0.01860148f, -0.04245830f, -0.05850367f,
+           -3.24417115e-003f, 0.07700698f, 0.11290991f, 0.09923030f, -0.02970599f,
+           0.05592411f, 0.04813979f, -0.09811195f, -0.09357996f, -0.03276114f,
+           0.05218338f, 0.04141375f, 3.92977800e-003f, -0.05047480f, 0.15960084f,
+           0.04612800f, -0.03114098f, -0.04650044f, -0.03249795f, -0.02425641f,
+           -0.04311355f, 0.04307659f, -0.09401883f, -0.04742785f, -0.01254499f,
+           -0.06598741f, 3.41369561e-003f, -0.05620445f, -7.28127593e-003f,
+           -0.05998361f, -0.03274450f, -0.07376868f, 3.19015374e-003f,
+           -0.07733069f, 0.05815864f, -0.02471071f, 0.03850617f, 0.13838784f,
+           0.15399861f, 0.01731321f, -0.01477586f, 0.10393341f, 0.05159833f,
+           -0.01945555f, -0.03427503f, -0.04867341f, 0.09237480f, 0.10732719f,
+           0.06071450f, -0.01355071f, 0.01844356f, -0.03480803f, -0.03796671f,
+           2.15628621e-004f, -0.05440186f, 0.01889855f, -0.01443413f,
+           -0.02607902f, -0.02938001f, 0.02720689f, -0.06228397f, -0.02970936f,
+           -0.03426210f, -0.10280876f, -0.06739304f, -0.05227850f, 0.03360292f,
+           -0.11278441f, -0.06966180f, -0.13937433f, 9.10932291e-003f,
+           2.52020749e-004f, -4.07359656e-003f, 0.12310639f, 0.09343060f,
+           0.07302511f, 0.03222093f, 0.07532879f, 0.03792387f, -0.04985180f,
+           0.01804602f, 0.02694195f, 0.13481498f, 0.04601225f, 0.04106982f,
+           0.08511057f, 0.12314661f, 0.01320830f, 0.05044121f, -5.52943908e-003f,
+           -0.08992624f, -0.02249301f, -0.08181777f, 0.06165213f, -0.03256603f,
+           -0.01068920f, -0.01323473f, -0.11970232f, -0.04616347f, -0.12088681f,
+           -0.06762606f, -0.08676834f, -0.06434575f, 0.01772529f, 0.03469615f,
+           -0.10926618f, 0.03013873f, 0.14030397f, 0.16130108f, 0.17985588f,
+           0.11281928f, 0.10530639f, 0.08905948f, 0.07733764f, 0.06695238f,
+           0.02142088f, 0.06438877f, 0.09794453f, 0.05745072f, 0.02788557f,
+           0.02632830f, 0.07985807f, 4.24902979e-003f, 8.47890321e-003f,
+           -0.02679466f, -5.28812688e-003f, -0.02162580f, -0.07490715f,
+           -0.08251337f, -0.02056576f, -0.01026194f, -1.15492963e-003f,
+           -5.75720915e-004f, -0.07210591f, -0.07320981f, -0.04883312f,
+           -0.10897151f, -0.07477258f, -0.08867134f, -0.09222437f, -0.10924666f,
+           -0.10430276f, 0.07953499f, 0.02767959f, 0.11393359f, 0.18779543f,
+           0.03313421f, 0.02143700f, 0.05852016f, -2.12067598e-003f,
+           -3.76984011e-003f, 0.02774167f, -0.03124610f, 0.01465141f, 0.01616004f,
+           -0.01391913f, -0.04404102f, -0.05444227f, -0.14684731f, -0.15016587f,
+           0.04509468f, 1.29563001e-003f, 0.01398350f, 0.05610404f, -0.04868806f,
+           -0.04776716f, -8.16873740e-003f, -2.30126386e-003f, -0.02286313f,
+           0.11983398f, -0.04703261f, -0.08814441f, -0.07585249f, -0.10799607f,
+           -0.03232087f, 0.01509786f, -0.04843464f, -0.03967846f, 0.09589416f,
+           0.01352560f, -0.01458119f, 0.01050829f, -0.03038946f, 0.01608388f,
+           1.11975556e-003f, -0.01250656f, 2.86211423e-003f, 0.04333691f,
+           -0.14603497f, -0.01946543f, -0.02327525f, -0.01973944f, 0.07944400f,
+           -0.02224544f, -0.06701808f, 0.03476532f, 0.11505594f, -0.02712801f,
+           -0.01665113f, 0.06315716f, -0.08205860f, 0.07431999f, 0.04915778f,
+           -0.04468752f, -0.01490402f, 0.07400476f, -0.11650901f, 0.05102430f,
+           0.04559118f, -0.05916039f, 0.08840760f, -0.01587902f, -0.14890194f,
+           0.07857784f, 0.04710254f, -0.05381983f, -0.07331945f, -0.03604643f,
+           0.15611970f, 0.07649943f, -0.05959348f, -0.02776607f, 0.11098688f,
+           0.03758875f, -0.04446875f, 0.04933187f, 0.01345535f, 0.06921103f,
+           0.07364785f, 0.05518956f, 0.02899585f, 0.09375840f, 0.10518434f,
+           -0.04420241f, 0.01915282f, -3.56386811e-003f, 0.14586878f, 0.10286101f,
+           -0.04360626f, -0.12723237f, 0.09076386f, 0.11119842f, -0.06035013f,
+           0.09674817f, 0.08938243f, 0.07065924f, 0.02603180f, 5.84815582e-003f,
+           -0.05922065f, 0.12360309f, 3.59695964e-003f, 2.99844006e-003f,
+           0.03697936f, 0.02043072f, 0.04168725f, 0.01025975f, -0.01359980f,
+           -0.01600920f, 0.02581056f, 0.02329250f, 2.98100687e-003f, 0.01629762f,
+           0.06652115f, 0.05855627f, 0.01237463f, -0.01297135f, 0.01761587f,
+           0.05090865f, 0.06549342f, -0.04425945f, 2.43203156e-003f,
+           3.07327788e-003f, 0.06678630f, -0.04303836f, 0.01082393f, -0.06476044f,
+           0.04077786f, 0.12441979f, 0.08237778f, 0.07424165f, 0.04065890f,
+           0.06905543f, 0.09556347f, 0.12724875f, -0.02132082f, 0.08514154f,
+           -0.04175328f, -0.02666954f, 0.01897836f, 0.03317382f, 9.45465732e-003f,
+           -0.01238974f, -0.04242500f, -0.01419479f, -0.03545213f, -0.02440874f,
+           0.08684119f, 0.04212951f, 0.02462858f, -0.01104825f, -5.01706870e-003f,
+           0.02968982f, 0.02597476f, -0.01568939f, 0.04514892f, 0.06974549f,
+           0.08670278f, 0.06828108f, 0.10238872f, 0.05405957f, 0.06548470f,
+           -0.03763957f, 0.01366090f, 0.07069602f, 0.05363748f, 0.04798120f,
+           0.11706422f, 0.05466456f, -0.01869259f, 0.06344382f, 0.03106543f,
+           0.08432506f, -0.02061096f, 0.03821088f, -6.92190882e-003f,
+           6.40467042e-003f, -0.01271779f, 6.89014705e-005f, 0.04541415f,
+           -0.01899539f, -0.05020239f, 0.03000903f, 0.01090422f, 4.52452758e-003f,
+           0.02573632f, -0.02388454f, -0.04200457f, 1.72783900e-003f,
+           -0.05978370f, -0.02720562f, 0.06573715f, 0.01154317f, 0.01265615f,
+           0.07375994f, -9.19828378e-003f, -0.04914120f, 0.02124831f, 0.06455322f,
+           0.04372910f, -0.03310043f, 0.03605788f, -6.78055827e-003f,
+           9.36202332e-003f, 0.01747596f, -0.06406314f, -0.06812935f, 0.08080816f,
+           -0.02778088f, 0.02735260f, 0.06393493f, 0.06652229f, 0.05676993f,
+           0.08640018f, -7.59188086e-003f, -0.02012847f, -0.04741159f,
+           -0.01657069f, -0.01624399f, 0.05547778f, -2.33309763e-003f,
+           0.01120033f, 0.06141156f, -0.06285004f, -0.08732341f, -0.09313398f,
+           -0.04267832f, 5.57443965e-003f, 0.04809862f, 0.01773641f,
+           5.37361018e-003f, 0.14842421f, -0.06298012f, -0.02935147f, 0.11443478f,
+           -0.05034208f, 5.65494271e-003f, 0.02076526f, -0.04577984f,
+           -0.04735741f, 0.02961071f, -0.09307127f, -0.04417921f, -0.04990027f,
+           -0.03940028f, 0.01306016f, 0.06267900f, 0.03758737f, 0.08460117f,
+           0.13858789f, 0.04862388f, -0.06319809f, -0.05655516f, 0.01885816f,
+           -0.03285607f, 0.03371567f, -0.07040928f, -0.04514049f, 0.01392166f,
+           0.08184422f, -0.07230316f, 0.02386871f, 0.02184591f, 0.02605764f,
+           -0.01033954f, 9.29878280e-003f, 7.67351175e-003f, 0.15189242f,
+           0.02069071f, -0.09738296f, -0.08894105f, -0.07768748f, 0.02332268f,
+           -0.01778995f, -0.03258888f, -0.08180822f, -0.08492987f, 0.02290156f,
+           -0.11368170f, -0.03554465f, -0.04533844f, -0.02861580f, 0.06782424f,
+           0.01113123f, 0.02453644f, 0.12721945f, 0.08084814f, -0.03607795f,
+           0.01109122f, 0.04803548f, -0.03489929f, 0.03399536f, -0.05682014f,
+           8.59533902e-003f, -4.27904585e-003f, 0.03230887f, -0.01300198f,
+           -0.01038137f, -0.07930113f, 8.33097473e-003f, 0.02296994f,
+           -0.01306500f, -0.01881626f, 0.04413369f, 0.05729880f, -0.03761553f,
+           0.01942326f, 1.64540811e-003f, -0.03811319f, 0.04190650f, -0.14978096f,
+           -0.04514487f, 0.01209545f, -5.46460645e-003f, -0.01647195f,
+           7.63064111e-003f, -0.07494587f, 0.08415288f, 0.10020141f, -0.01228561f,
+           0.06553826f, 0.04554005f, 0.07890417f, 0.03041138f, 0.01752007f,
+           0.09208256f, -3.74419295e-004f, 0.10549527f, 0.04686913f, 0.01894833f,
+           -0.02651412f, -4.34682379e-003f, 5.44942822e-003f, 0.01444484f,
+           0.05882156f, -0.03336544f, 0.04603891f, -0.10432546f, 0.01923928f,
+           0.01842845f, -0.01712168f, -0.02222766f, 0.04693324f, -0.06202956f,
+           -0.01422159f, 0.08732220f, -0.07706107f, 0.02661049f, -0.04300238f,
+           -0.03092422f, -0.03552184f, -0.01886088f, -0.04979934f, 0.03906401f,
+           0.04608644f, 0.04966111f, 0.04275464f, -0.04621769f, -0.02653212f,
+           8.57011229e-003f, 0.03839684f, 0.05818764f, 0.03880796f,
+           -2.76100676e-004f, 0.03076511f, -0.03266929f, -0.05374557f,
+           0.04986527f, -9.45429131e-003f, 0.03582499f, -2.64564669e-003f,
+           -1.07461517e-003f, 0.02962313f, -0.01483363f, 0.03060869f, 0.02448327f,
+           0.01845641f, 0.03282966f, -0.03534438f, -0.01084059f, -0.01119136f,
+           -1.85360224e-003f, -5.94652840e-004f, -0.04451817f, 2.98327743e-003f,
+           0.06272484f, -0.02152076f, -3.05971340e-003f, -0.05070828f,
+           0.01531762f, 0.01282815f, 0.05167150f, 9.46266949e-003f,
+           -3.34558333e-003f, 0.11442288f, -0.03906701f, -2.67325155e-003f,
+           0.03069184f, -0.01134165f, 0.02949462f, 0.02879886f, 0.03855566f,
+           -0.03450781f, 0.09142872f, -0.02156654f, 0.06075062f, -0.06220816f,
+           0.01944680f, 6.68372354e-003f, -0.06656796f, 8.70784000e-003f,
+           0.03456013f, 0.02434320f, -0.13236357f, -0.04177035f, -0.02069627f,
+           0.01068112f, 0.01505432f, -0.07517391f, -3.83571628e-003f,
+           -0.06298508f, -0.02881260f, -0.13101046f, -0.07221562f,
+           -5.79945277e-003f, -8.57300125e-003f, 0.03782469f, 0.02762164f,
+           0.04942456f, -0.02936396f, 0.09597211f, 0.01921411f, 0.06101191f,
+           -0.04787507f, -0.01379578f, -7.40224449e-003f, -0.02220136f,
+           -0.01313756f, 7.77558051e-003f, 0.12296968f, 0.02939998f, 0.03594062f,
+           -0.07788624f, -0.01133144f, 3.99316690e-004f, -0.06090347f,
+           -0.01122066f, -4.68682544e-003f, 0.07633100f, -0.06748922f,
+           -0.05640298f, -0.05265681f, -0.01139122f, -0.01624347f, -0.04715714f,
+           -0.01099092f, 0.01048561f, 3.28499987e-003f, -0.05810167f,
+           -0.07699911f, -0.03330683f, 0.04185145f, 0.03478536f, 0.02275165f,
+           0.02304766f, 6.66040834e-003f, 0.10968148f, -5.93013782e-003f,
+           -0.04858336f, -0.04203213f, -0.09316786f, -6.13074889e-003f,
+           -0.02544625f, 0.01366201f, 9.18555818e-003f, -0.01846578f,
+           -0.05622401f, -0.03989377f, -0.07810296f, 6.91275718e-003f,
+           0.05957597f, -0.03901334f, 0.01572002f, -0.01193903f,
+           -6.89400872e-003f, -0.03093356f, -0.04136098f, -0.01562869f,
+           -0.04604580f, 0.02865234f, -0.08678447f, -0.03232484f, -0.05364593f,
+           -0.01445016f, -0.07003860f, -0.08669746f, -0.04520775f, 0.04274122f,
+           0.03117515f, 0.08175703f, 0.01081109f, 0.06379741f, 0.06199206f,
+           0.02865988f, 0.02360346f, 0.06725410f, -0.03248780f, -9.37702879e-003f,
+           0.08265898f, -0.02245839f, 0.05125763f, -0.01862395f, 0.01973453f,
+           -0.01994494f, -0.10770868f, 0.03180375f, 3.23935156e-003f,
+           -0.02142080f, -0.04256190f, 0.04760900f, 0.04282863f, 0.05635953f,
+           -0.01870849f, 0.05540622f, -0.03042666f, 0.01455277f, -0.06630179f,
+           -0.05843807f, -0.03739681f, -0.09739155f, -0.03220233f, -0.05620182f,
+           -0.10381401f, 0.07400211f, 4.20676917e-003f, 0.03258535f,
+           2.14308966e-003f, 0.05121966f, -0.01274337f, 0.02384761f, 0.06335578f,
+           -0.07905591f, 0.08375625f, -0.07898903f, -0.06508528f, -0.02498444f,
+           0.06535810f, 0.03970535f, 0.04895468f, -0.01169566f, -0.03980601f,
+           0.05682293f, 0.05925463f, -0.01165808f, -0.07936699f, -0.04208954f,
+           0.01333987f, 0.09051196f, 0.10098671f, -0.03974256f, 0.01238771f,
+           -0.07501741f, -0.03655440f, -0.04301528f, 0.09216860f,
+           4.63579083e-004f, 0.02851115f, 0.02142735f, 1.28244064e-004f,
+           0.02879687f, -0.08554889f, -0.04838862f, 0.08135369f, -0.05756533f,
+           0.01413900f, 0.03451880f, -0.06619488f, -0.03053130f, 0.02961676f,
+           -0.07384635f, 0.01135692f, 0.05283910f, -0.07778034f, -0.02107482f,
+           -0.05511716f, -0.13473752f, 0.03030157f, 0.06722020f, -0.06218817f,
+           -0.05826827f, 0.06254654f, 0.02895772f, -0.01664000f, -0.03620280f,
+           -0.01612278f, -1.46097376e-003f, 0.14013411f, -8.96181818e-003f,
+           -0.03250246f, 3.38630192e-003f, 2.64779478e-003f, 0.03359732f,
+           -0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f };
 
-
-
-
-std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector64x128()
-{
-    static const float detector[] = {
-       0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
-       0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
-       0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
-       0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,
-       -0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,
-       -0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f,
-       -3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,
-       0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,
-       0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,
-       0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,
-       0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,
-       0.09879354f, 0.05362710f, -0.06745391f, -7.01260753e-003f,
-       5.24702156e-003f, 0.03236255f, 0.01407916f, 0.02207983f, 0.02537322f,
-       0.04547948f, 0.07200756f, 0.03129894f, -0.06274468f, 0.02107014f,
-       0.06035208f, 0.08636236f, 4.53164103e-003f, 0.02193363f, 0.02309801f,
-       0.05568166f, -0.02645093f, 0.04448695f, 0.02837519f, 0.08975694f,
-       0.04461516f, 0.08975355f, 0.07514391f, 0.02306982f, 0.10410084f,
-       0.06368385f, 0.05943464f, 4.58420580e-003f, 0.05220337f, 0.06675851f,
-       0.08358569f, 0.06712101f, 0.06559004f, -0.03930482f, -9.15936660e-003f,
-       -0.05897915f, 0.02816453f, 0.05032348f, 0.06780671f, 0.03377650f,
-       -6.09417039e-004f, -0.01795146f, -0.03083684f, -0.01302475f,
-       -0.02972313f, 7.88706727e-003f, -0.03525961f, -2.50397739e-003f,
-       0.05245084f, 0.11791293f, -0.02167498f, 0.05299332f, 0.06640524f,
-       0.05190265f, -8.27316567e-003f, 0.03033127f, 0.05842173f,
-       -4.01050318e-003f, -6.25105947e-003f, 0.05862958f, -0.02465461f,
-       0.05546781f, -0.08228195f, -0.07234028f, 0.04640540f, -0.01308254f,
-       -0.02506191f, 0.03100746f, -0.04665651f, -0.04591486f, 0.02949927f,
-       0.06035462f, 0.02244646f, -0.01698639f, 0.01040041f, 0.01131170f,
-       0.05419579f, -0.02130277f, -0.04321722f, -0.03665198f, 0.01126490f,
-       -0.02606488f, -0.02228328f, -0.02255680f, -0.03427236f,
-       -7.75165204e-003f, -0.06195229f, 8.21638294e-003f, 0.09535975f,
-       -0.03709979f, -0.06942501f, 0.14579427f, -0.05448192f, -0.02055904f,
-       0.05747357f, 0.02781788f, -0.07077577f, -0.05178314f, -0.10429011f,
-       -0.11235505f, 0.07529039f, -0.07559302f, -0.08786739f, 0.02983843f,
-       0.02667585f, 0.01382199f, -0.01797496f, -0.03141199f, -0.02098101f,
-       0.09029204f, 0.04955018f, 0.13718739f, 0.11379953f, 1.80019124e-003f,
-       -0.04577610f, -1.11108483e-003f, -0.09470536f, -0.11596080f,
-       0.04489342f, 0.01784211f, 3.06850672e-003f, 0.10781866f,
-       3.36498418e-003f, -0.10842580f, -0.07436839f, -0.10535070f,
-       -0.01866805f, 0.16057891f, -5.07316366e-003f, -0.04295658f,
-       -5.90488780e-003f, 8.82003549e-003f, -0.01492646f, -0.05029279f,
-       -0.12875880f, 8.78831954e-004f, -0.01297184f, -0.07592774f,
-       -0.02668831f, -6.93787413e-004f, 0.02406698f, -0.01773298f,
-       -0.03855745f, -0.05877856f, 0.03259695f, 0.12826584f, 0.06292590f,
-       -4.10733931e-003f, 0.10996531f, 0.01332991f, 0.02088735f, 0.04037504f,
-       -0.05210760f, 0.07760046f, 0.06399347f, -0.05751930f, -0.10053057f,
-       0.07505023f, -0.02139782f, 0.01796176f, 2.34400877e-003f, -0.04208319f,
-       0.07355055f, 0.05093350f, -0.02996780f, -0.02219072f, 0.03355330f,
-       0.04418742f, -0.05580705f, -0.05037573f, -0.04548179f, 0.01379514f,
-       0.02150671f, -0.02194211f, -0.13682702f, 0.05464972f, 0.01608082f,
-       0.05309116f, 0.04701022f, 1.33690401e-003f, 0.07575664f, 0.09625306f,
-       8.92647635e-003f, -0.02819123f, 0.10866830f, -0.03439325f,
-       -0.07092371f, -0.06004780f, -0.02712298f, -7.07467366e-003f,
-       -0.01637020f, 0.01336790f, -0.10313606f, 0.04906582f, -0.05732445f,
-       -0.02731079f, 0.01042235f, -0.08340668f, 0.03686501f, 0.06108340f,
-       0.01322748f, -0.07809529f, 0.03774724f, -0.03413248f, -0.06096525f,
-       -0.04212124f, -0.07982176f, -1.25973229e-003f, -0.03045501f,
-       -0.01236493f, -0.06312395f, 0.04789570f, -0.04602066f, 0.08576570f,
-       0.02521080f, 0.02988098f, 0.10314583f, 0.07060035f, 0.04520544f,
-       -0.04426654f, 0.13146530f, 0.08386490f, 0.02164590f, -2.12280243e-003f,
-       -0.03686353f, -0.02074944f, -0.03829959f, -0.01530596f, 0.02689708f,
-       0.11867401f, -0.06043470f, -0.02785023f, -0.04775074f, 0.04878745f,
-       0.06350956f, 0.03494788f, 0.01467400f, 1.17890188e-003f, 0.04379614f,
-       2.03681854e-003f, -0.03958609f, -0.01072688f, 6.43705716e-003f,
-       0.02996500f, -0.03418507f, -0.01960307f, -0.01219154f,
-       -4.37000440e-003f, -0.02549453f, 0.02646318f, -0.01632513f,
-       6.46516960e-003f, -0.01929734f, 4.78711911e-003f, 0.04962371f,
-       0.03809111f, 0.07265724f, 0.05758125f, -0.03741554f, 0.01648608f,
-       -8.45285598e-003f, 0.03996826f, -0.08185477f, 0.02638875f,
-       -0.04026615f, -0.02744674f, -0.04071517f, 1.05096330e-003f,
-       -0.04741232f, -0.06733172f, 8.70434940e-003f, -0.02192543f,
-       1.35350740e-003f, -0.03056974f, -0.02975521f, -0.02887780f,
-       -0.01210713f, -0.04828526f, -0.09066251f, -0.09969629f, -0.03665164f,
-       -8.88111943e-004f, -0.06826669f, -0.01866150f, -0.03627640f,
-       -0.01408288f, 0.01874239f, -0.02075835f, 0.09145175f, -0.03547291f,
-       0.05396780f, 0.04198981f, 0.01301925f, -0.03384354f, -0.12201976f,
-       0.06830920f, -0.03715654f, 9.55848210e-003f, 5.05685573e-003f,
-       0.05659294f, 3.90764466e-003f, 0.02808490f, -0.05518097f, -0.03711621f,
-       -0.02835565f, -0.04420464f, -0.01031947f, 0.01883466f,
-       -8.49525444e-003f, -0.09419250f, -0.01269387f, -0.02133371f,
-       -0.10190815f, -0.07844430f, 2.43644323e-003f, -4.09610150e-003f,
-       0.01202551f, -0.06452291f, -0.10593818f, -0.02464746f, -0.02199699f,
-       -0.07401930f, 0.07285886f, 8.87513801e-004f, 9.97662079e-003f,
-       8.46779719e-003f, 0.03730333f, -0.02905126f, 0.03573337f, -0.04393689f,
-       -0.12014472f, 0.03176554f, -2.76015815e-003f, 0.10824566f, 0.05090732f,
-       -3.30179278e-003f, -0.05123822f, 5.04784798e-003f, -0.05664124f,
-       -5.99415926e-003f, -0.05341901f, -0.01221393f, 0.01291318f,
-       9.91760660e-003f, -7.56987557e-003f, -0.06193124f, -2.24549137e-003f,
-       0.01987562f, -0.02018840f, -0.06975540f, -0.06601523f, -0.03349112f,
-       -0.08910118f, -0.03371435f, -0.07406893f, -0.02248047f, -0.06159951f,
-       2.77751544e-003f, -0.05723337f, -0.04792468f, 0.07518548f,
-       2.77279224e-003f, 0.04211938f, 0.03100502f, 0.05278448f, 0.03954679f,
-       -0.03006846f, -0.03851741f, -0.02792403f, -0.02875333f, 0.01531280f,
-       0.02186953f, -0.01989829f, 2.50679464e-003f, -0.10258728f,
-       -0.04785743f, -0.02887216f, 3.85063468e-003f, 0.01112236f,
-       8.29218887e-003f, -0.04822981f, -0.04503597f, -0.03713100f,
-       -0.06988008f, -0.11002295f, -2.69209221e-003f, 1.85383670e-003f,
-       -0.05921049f, -0.06105053f, -0.08458050f, -0.04527602f,
-       8.90329306e-004f, -0.05875023f, -2.68602883e-003f, -0.01591195f,
-       0.03631859f, 0.05493166f, 0.07300330f, 5.53333294e-003f, 0.06400407f,
-       0.01847740f, -5.76280477e-003f, -0.03210877f, 4.25160583e-003f,
-       0.01166520f, -1.44864211e-003f, 0.02253744f, -0.03367080f, 0.06983195f,
-       -4.22323542e-003f, -8.89401045e-003f, -0.07943393f, 0.05199728f,
-       0.06065201f, 0.04133492f, 1.44032843e-003f, -0.09585235f, -0.03964731f,
-       0.04232114f, 0.01750465f, -0.04487902f, -7.59733608e-003f, 0.02011171f,
-       0.04673622f, 0.09011173f, -0.07869188f, -0.04682482f, -0.05080139f,
-       -3.99383716e-003f, -0.05346331f, 0.01085723f, -0.03599333f,
-       -0.07097908f, 0.03551549f, 0.02680387f, 0.03471529f, 0.01790393f,
-       0.05471273f, 9.62048303e-003f, -0.03180215f, 0.05864431f, 0.02330614f,
-       0.01633144f, -0.05616681f, -0.10245429f, -0.08302189f, 0.07291322f,
-       -0.01972590f, -0.02619633f, -0.02485327f, -0.04627592f,
-       1.48853404e-003f, 0.05514185f, -0.01270860f, -0.01948900f, 0.06373586f,
-       0.05002292f, -0.03009798f, 8.76216311e-003f, -0.02474238f,
-       -0.05504891f, 1.74034527e-003f, -0.03333667f, 0.01524987f, 0.11663762f,
-       -1.32344989e-003f, -0.06608453f, 0.05687166f, -6.89525274e-004f,
-       -0.04402352f, 0.09450210f, -0.04222684f, -0.05360983f, 0.01779531f,
-       0.02561388f, -0.11075410f, -8.77790991e-003f, -0.01099504f,
-       -0.10380266f, 0.03103457f, -0.02105741f, -0.07371717f, 0.05146710f,
-       0.10581432f, -0.08617968f, -0.02892107f, 0.01092199f, 0.14551543f,
-       -2.24320893e-003f, -0.05818033f, -0.07390742f, 0.05701261f,
-       0.12937020f, -0.04986651f, 0.10182415f, 0.05028650f, 0.12515625f,
-       0.09175041f, 0.06404983f, 0.01523394f, 0.09460562f, 0.06106631f,
-       -0.14266998f, -0.02926703f, 0.02762171f, 0.02164151f,
-       -9.58488265e-004f, -0.04231362f, -0.09866509f, 0.04322244f,
-       0.05872034f, -0.04838847f, 0.06319253f, 0.02443798f, -0.03606876f,
-       9.38737206e-003f, 0.04289991f, -0.01027411f, 0.08156885f, 0.08751175f,
-       -0.13191354f, 8.16054735e-003f, -0.01452161f, 0.02952677f, 0.03615945f,
-       -2.09128903e-003f, 0.02246693f, 0.09623287f, 0.09412123f, -0.02924758f,
-       -0.07815186f, -0.02203079f, -2.02566991e-003f, 0.01094733f,
-       -0.01442332f, 0.02838561f, 0.11882371f, 7.28798332e-003f, -0.10345965f,
-       0.07561217f, -0.02049661f, 4.44177445e-003f, 0.01609347f, -0.04893158f,
-       -0.08758243f, -7.67420698e-003f, 0.08862378f, 0.06098121f, 0.06565887f,
-       7.32981879e-003f, 0.03558407f, -0.03874352f, -0.02490055f,
-       -0.06771075f, 0.09939223f, -0.01066077f, 0.01382995f, -0.07289080f,
-       7.47184316e-003f, 0.10621431f, -0.02878659f, 0.02383525f, -0.03274646f,
-       0.02137008f, 0.03837290f, 0.02450992f, -0.04296818f, -0.02895143f,
-       0.05327370f, 0.01499020f, 0.04998732f, 0.12938657f, 0.09391870f,
-       0.04292390f, -0.03359194f, -0.06809492f, 0.01125796f, 0.17290455f,
-       -0.03430733f, -0.06255233f, -0.01813114f, 0.11726857f, -0.06127599f,
-       -0.08677909f, -0.03429872f, 0.04684938f, 0.08161420f, 0.03538774f,
-       0.01833884f, 0.11321855f, 0.03261845f, -0.04826299f, 0.01752407f,
-       -0.01796414f, -0.10464549f, -3.30041884e-003f, 2.29343961e-004f,
-       0.01457292f, -0.02132982f, -0.02602923f, -9.87351313e-003f,
-       0.04273872f, -0.02103316f, -0.07994065f, 0.02614958f, -0.02111666f,
-       -0.06964913f, -0.13453490f, -0.06861878f, -6.09341264e-003f,
-       0.08251446f, 0.15612499f, 2.46531400e-003f, 8.88424646e-003f,
-       -0.04152999f, 0.02054853f, 0.05277953f, -0.03087788f, 0.02817579f,
-       0.13939077f, 0.07641046f, -0.03627627f, -0.03015098f, -0.04041540f,
-       -0.01360690f, -0.06227205f, -0.02738223f, 0.13577610f, 0.15235767f,
-       -0.05392922f, -0.11175954f, 0.02157129f, 0.01146481f, -0.05264937f,
-       -0.06595174f, -0.02749175f, 0.11812254f, 0.17404149f, -0.06137035f,
-       -0.11003478f, -0.01351621f, -0.01745916f, -0.08577441f, -0.04469909f,
-       -0.06106115f, 0.10559758f, 0.20806813f, -0.09174948f, 7.09621934e-004f,
-       0.03579374f, 0.07215115f, 0.02221742f, 0.01827742f, -7.90785067e-003f,
-       0.01489554f, 0.14519960f, -0.06425831f, 0.02990399f, -1.80181325e-003f,
-       -0.01401528f, -0.04171134f, -3.70530109e-003f, -0.09090481f,
-       0.09520713f, 0.08845516f, -0.02651753f, -0.03016730f, 0.02562448f,
-       0.03563816f, -0.03817881f, 0.01433385f, 0.02256983f, 0.02872120f,
-       0.01001934f, -0.06332260f, 0.04338406f, 0.07001807f, -0.04705722f,
-       -0.07318907f, 0.02630457f, 0.03106382f, 0.06648342f, 0.10913180f,
-       -0.01630815f, 0.02910308f, 0.02895109f, 0.08040254f, 0.06969310f,
-       0.06797734f, 6.08639978e-003f, 4.16588830e-003f, 0.08926726f,
-       -0.03123648f, 0.02700146f, 0.01168734f, -0.01631594f, 4.61015804e-003f,
-       8.51359498e-003f, -0.03544224f, 0.03571994f, 4.29766066e-003f,
-       -0.01970077f, -8.79793242e-003f, 0.09607988f, 0.01544222f,
-       -0.03923707f, 0.07308586f, 0.06061262f, 1.31683104e-004f,
-       -7.98222050e-003f, 0.02399261f, -0.06084389f, -0.02743429f,
-       -0.05475523f, -0.04131311f, 0.03559756f, 0.03055342f, 0.02981433f,
-       0.14860515f, 0.01766787f, 0.02945257f, 0.04898238f, 0.01026922f,
-       0.02811658f, 0.08267091f, 0.02732154f, -0.01237693f, 0.11760156f,
-       0.03802063f, -0.03309754f, 5.24957618e-003f, -0.02460510f, 0.02691451f,
-       0.05399988f, -0.10133506f, 0.06385437f, -0.01818005f, 0.02259503f,
-       0.03573135f, 0.01042848f, -0.04153402f, -0.04043029f, 0.01643575f,
-       0.08326677f, 4.61383024e-004f, -0.05308095f, -0.08536223f,
-       -1.61011645e-003f, -0.02163720f, -0.01783352f, 0.03859637f,
-       0.08498885f, -0.01725216f, 0.08625131f, 0.10995087f, 0.09177644f,
-       0.08498347f, 0.07646490f, 0.05580502f, 0.02693516f, 0.09996913f,
-       0.09070327f, 0.06667200f, 0.05873008f, -0.02247842f, 0.07772321f,
-       0.12408436f, 0.12629253f, -8.41997913e-004f, 0.01477783f, 0.09165990f,
-       -2.98401713e-003f, -0.06466447f, -0.07057302f, 2.09516948e-004f,
-       0.02210209f, -0.02158809f, -0.08602506f, -0.02284836f,
-       4.01876355e-003f, 9.56660323e-003f, -0.02073978f, -0.04635138f,
-       -7.59423291e-003f, -0.01377393f, -0.04559359f, -0.13284740f,
-       -0.08671406f, -0.03654395f, 0.01142869f, 0.03287891f, -0.04392983f,
-       0.06142959f, 0.17710890f, 0.10385257f, 0.01329137f, 0.10067633f,
-       0.12450829f, -0.04476709f, 0.09049144f, 0.04589312f, 0.11167907f,
-       0.08587538f, 0.04767583f, 1.67188141e-003f, 0.02359802f, -0.03808852f,
-       0.03126272f, -0.01919029f, -0.05698918f, -0.02365112f, -0.06519032f,
-       -0.05599358f, -0.07097308f, -0.03301812f, -0.04719102f, -0.02566297f,
-       0.01324074f, -0.09230672f, -0.05518232f, -0.04712864f, -0.03380903f,
-       -0.06719479f, 0.01183908f, -0.09326738f, 0.01642865f, 0.03789867f,
-       -6.61567831e-003f, 0.07796386f, 0.07246574f, 0.04706347f, -0.02523437f,
-       -0.01696830f, -0.08068866f, 0.06030888f, 0.10527060f, -0.06611756f,
-       0.02977346f, 0.02621830f, 0.01913855f, -0.08479366f, -0.06322418f,
-       -0.13570616f, -0.07644490f, 9.31900274e-003f, -0.08095149f,
-       -0.10197903f, -0.05204025f, 0.01413151f, -0.07800411f, -0.01885122f,
-       -0.07509381f, -0.10136326f, -0.05212355f, -0.09944065f,
-       -1.33606605e-003f, -0.06342617f, -0.04178550f, -0.12373723f,
-       -0.02832736f, -0.06057501f, 0.05830070f, 0.07604282f, -0.06462587f,
-       8.02447461e-003f, 0.11580125f, 0.12332212f, 0.01978462f,
-       -2.72378162e-003f, 0.05850752f, -0.04674481f, 0.05148062f,
-       -2.62542837e-003f, 0.11253355f, 0.09893716f, 0.09785093f, -0.04659257f,
-       -0.01102429f, -0.07002308f, 0.03088913f, -0.02565549f, -0.07671449f,
-       3.17443861e-003f, -0.10783514f, -0.02314270f, -0.11089555f,
-       -0.01024768f, 0.03116021f, -0.04964825f, 0.02281825f, 5.50005678e-003f,
-       -0.08427856f, -0.14685495f, -0.07719755f, -0.13342668f, -0.04525511f,
-       -0.09914210f, 0.02588859f, 0.03469279f, 0.04664020f, 0.11688190f,
-       0.09647275f, 0.10857815f, -0.01448726f, 0.04299758f, -0.06763151f,
-       1.33257592e-003f, 0.14331576f, 0.07574340f, 0.09166205f, 0.05674926f,
-       0.11325553f, -0.01106494f, 0.02062161f, -0.11484840f, -0.07492137f,
-       -0.02864293f, -0.01275638f, -0.06946032f, -0.10101652f, -0.04113498f,
-       -0.02214783f, -0.01273942f, -0.07480393f, -0.10556041f, -0.07622112f,
-       -0.09988393f, -0.11453961f, -0.12073903f, -0.09412795f, -0.07146588f,
-       -0.04054537f, -0.06127083f, 0.04221122f, 0.07688113f, 0.04099256f,
-       0.12663734f, 0.14683802f, 0.21761774f, 0.12525328f, 0.18431792f,
-       -1.66402373e-003f, 2.37777247e-003f, 0.01445475f, 0.03509416f,
-       0.02654697f, 0.01716739f, 0.05374011f, 0.02944174f, 0.11323927f,
-       -0.01485456f, -0.01611330f, -1.85554172e-003f, -0.01708549f,
-       -0.05435753f, -0.05302101f, 0.05260378f, -0.03582945f,
-       -3.42867890e-004f, 1.36076682e-003f, -0.04436073f, -0.04228432f,
-       0.03281291f, -0.05480836f, -0.10197772f, -0.07206279f, -0.10741059f,
-       -0.02366946f, 0.10278475f, -2.74783419e-003f, -0.03242477f,
-       0.02308955f, 0.02835869f, 0.10348799f, 0.19580358f, 0.10252027f,
-       0.08039929f, 0.05525554f, -0.13250865f, -0.14395352f, 3.13586881e-003f,
-       -0.03387071f, 8.94669443e-003f, 0.05406157f, -4.97324532e-003f,
-       -0.01189114f, 2.82919413e-004f, -0.03901557f, -0.04898705f,
-       0.02164520f, -0.01382906f, -0.01850416f, 0.01869347f, -0.02450060f,
-       0.02291678f, 0.08196463f, 0.03309153f, -0.10629974f, 0.02473924f,
-       0.05344394f, -0.02404823f, -0.03243643f, -5.55244600e-003f,
-       -0.08009996f, 0.02811539f, 0.04235742f, 0.01859004f, 0.04902123f,
-       -0.01438252f, -0.01526853f, 0.02044195f, -0.05008660f, 0.04244113f,
-       0.07611816f, 0.04950470f, -0.06020549f, -4.26026015e-003f, 0.13133512f,
-       -0.01438738f, -0.01958807f, -0.04044152f, -0.12425045f,
-       2.84353318e-003f, -0.05042776f, -0.09121484f, 7.34345755e-003f,
-       0.09388847f, 0.11800314f, 4.72295098e-003f, 4.44378285e-003f,
-       -0.07984917f, -0.03613737f, 0.04490915f, -0.02246483f, 0.04681071f,
-       0.05240871f, 0.02157206f, -0.04603431f, -0.01197929f, -0.02748779f,
-       0.13621049f, 0.08812155f, -0.07802048f, 4.86458559e-003f, -0.01598836f,
-       0.01024450f, -0.03463517f, -0.02304239f, -0.08692665f, 0.06655128f,
-       0.05785803f, -0.12640759f, 0.02307472f, 0.07337402f, 0.07525434f,
-       0.04943763f, -0.02241034f, -0.09978238f, 0.14487994f, -0.06570521f,
-       -0.07855482f, 0.02830222f, -5.29603509e-004f, -0.04669895f,
-       -0.11822784f, -0.12246452f, -0.15365660f, -0.02969127f, 0.08078201f,
-       0.13512598f, 0.11505685f, 0.04740673f, 0.01376022f, -0.05852978f,
-       -0.01537809f, -0.05541119f, 0.02491065f, -0.02870786f, 0.02760978f,
-       0.23836176f, 0.22347429f, 0.10306466f, -0.06919070f, -0.10132039f,
-       -0.20198342f, -0.05040560f, 0.27163076f, 0.36987007f, 0.34540465f,
-       0.29095781f, 0.05649706f, 0.04125737f, 0.07505883f, -0.02737836f,
-       -8.43431335e-003f, 0.07368195f, 0.01653876f, -0.09402955f,
-       -0.09574359f, 0.01474337f, -0.07128561f, -0.03460737f, 0.11438941f,
-       0.13752601f, -0.06385452f, -0.06310338f, 8.19548313e-003f, 0.11622470f,
-       5.05133113e-003f, -0.07602754f, 0.06695660f, 0.25723928f, 0.09037900f,
-       0.28826267f, 0.13165380f, -0.05312614f, -0.02137198f, -0.03442232f,
-       -0.06255679f, 0.03899667f, 0.18391028f, 0.26016650f, 0.03374462f,
-       0.01860465f, 0.19077586f, 0.18160543f, 3.43634398e-003f, -0.03036782f,
-       0.19683038f, 0.35378191f, 0.24968483f, -0.03222649f, 0.28972381f,
-       0.43091634f, 0.30778357f, 0.02335266f, -0.09877399f, -6.85245218e-003f,
-       0.08945240f, -0.08150686f, 0.02792493f, 0.24806842f, 0.17338486f,
-       0.06231801f, -0.10432383f, -0.16653322f, -0.13197899f, -0.08531576f,
-       -0.19271527f, -0.13536365f, 0.22240199f, 0.39219588f, 0.26597717f,
-       -0.01231649f, 0.01016179f, 0.13379875f, 0.12018334f, -0.04852953f,
-       -0.07915270f, 0.07036012f, 3.87723115e-003f, -0.06126805f,
-       -0.15015170f, -0.11406515f, -0.08556531f, -0.07429333f, -0.16115491f,
-       0.13214062f, 0.25691369f, 0.05697750f, 0.06861912f, -6.02903729e-003f,
-       -7.94562511e-003f, 0.04799571f, 0.06695165f, -0.01926842f, 0.06206308f,
-       0.13450983f, -0.06381495f, -2.98370165e-003f, -0.03482971f,
-       7.53991678e-003f, 0.03895611f, 0.11464261f, 0.01669971f,
-       8.27818643e-003f, -7.49160210e-003f, -0.11712562f, -0.10650621f,
-       -0.10353880f, -0.04994106f, -7.65618810e-004f, 0.03023767f,
-       -0.04759270f, -0.07302686f, -0.05825012f, -0.13156348f, -0.10639747f,
-       -0.19393684f, -0.09973683f, -0.07918908f, 4.63177625e-004f,
-       -6.61382044e-004f, 0.15853868f, 0.08561199f, -0.07660093f,
-       -0.08015265f, -0.06164073f, 0.01882577f, -7.29908410e-004f,
-       0.06840892f, 0.03843764f, 0.20274927f, 0.22028814f, -5.26101235e-003f,
-       0.01452435f, -0.06331623f, 0.02865064f, 0.05673740f, 0.12171564f,
-       0.03837196f, 0.03555467f, -0.02662914f, -0.10280123f, -0.06526285f,
-       -0.11066351f, -0.08988424f, -0.10103678f, 8.10526591e-003f,
-       5.95238712e-003f, 0.02617721f, -0.01705742f, -0.10897956f,
-       -0.08004991f, -0.11271993f, -0.06185647f, -0.06103712f, 0.01597041f,
-       -0.05923606f, 0.09410726f, 0.22858568f, 0.03263380f, 0.06772990f,
-       -0.09003516f, 0.01017870f, 0.01931688f, 0.08628357f, -0.01430009f,
-       0.10954945f, 0.16612452f, -0.02434544f, -0.03310068f, -0.04236627f,
-       0.01212392f, -6.15046406e-003f, 0.06954194f, 0.03015283f, 0.01787957f,
-       0.02781667f, -0.05561153f, -8.96244217e-003f, -0.04971489f,
-       0.07510284f, 0.01775282f, 0.05889897f, -0.07981427f, 0.03647643f,
-       -3.73833324e-003f, -0.08894575f, -0.06429435f, -0.08068276f,
-       0.03567704f, -0.07131936f, -7.21910037e-003f, -0.09566668f,
-       0.17886090f, 0.14911725f, 0.02070032f, -0.05017120f, -0.04992622f,
-       0.01570143f, -0.09906903f, 0.06456193f, 0.15329507f, 0.18820767f,
-       0.11689861f, -0.01178513f, -0.02225163f, -0.01905318f, 0.10271224f,
-       -7.27029052e-003f, 0.11664233f, 0.14796902f, 0.07771893f, 0.02400013f,
-       -0.05361797f, -0.01972888f, 0.01376177f, 0.06740040f, -0.06525395f,
-       0.05726178f, -0.02404981f, -0.14018567f, -0.02074987f, -0.04621970f,
-       -0.04688627f, -0.01842059f, 0.07722727f, -0.04852883f, 0.01529004f,
-       -0.19639495f, 0.10817073f, 0.03795860f, -0.09435206f, -0.07984378f,
-       -0.03383440f, 0.11081333f, 0.02237366f, 0.12703256f, 0.21613893f,
-       0.02918790f, 4.66472283e-003f, -0.10274266f, -0.04854131f,
-       -3.46305710e-003f, 0.08652268f, 0.02251546f, 0.09636052f, 0.17180754f,
-       -0.09272388f, 4.59174305e-004f, -0.11723048f, -0.12210111f,
-       -0.15547538f, 0.07218186f, -0.05297846f, 0.03779940f, 0.05150875f,
-       -0.03802310f, 0.03870645f, -0.15250699f, -0.08696499f, -0.02021560f,
-       0.04118926f, -0.15177974f, 0.01577647f, 0.10249301f, 7.50041893e-003f,
-       0.01721806f, -0.06828983f, -0.02397596f, -0.06598977f, -0.04317593f,
-       -0.08064980f, 6.66632550e-003f, 0.03333484f, 0.07093620f, 0.08231064f,
-       -0.06577903f, -0.06698844f, -0.06984019f, -0.06508023f, -0.14145090f,
-       -0.02393239f, 0.06485303f, 8.83263443e-003f, 0.09251080f, -0.07557579f,
-       -0.05067699f, -0.09798748f, -0.06703258f, -0.14056294f, 0.03245994f,
-       0.12554143f, 0.01761621f, 0.12980327f, -0.04081950f, -0.11906909f,
-       -0.14813015f, -0.08376863f, -0.12200681f, 0.04988137f, 0.05424247f,
-       -3.90952639e-003f, 0.03255733f, -0.12717837f, -0.07461493f,
-       -0.05703964f, -0.01736189f, -0.08026433f, -0.05433894f, -0.01719359f,
-       0.02886275f, 0.01772653f, -0.09163518f, 3.57789593e-003f, -0.10129993f,
-       -0.02653764f, -0.08131415f, -0.03847986f, -7.62157550e-004f,
-       0.06486648f, 0.19675669f, -0.04919156f, -0.07059129f, -0.04857785f,
-       -0.01042383f, -0.08328653f, 0.03660302f, -0.03696846f, 0.04969259f,
-       0.08241162f, -0.12514858f, -0.06122676f, -0.03750202f,
-       6.52989605e-003f, -0.10247213f, 0.02568346f, 4.51781414e-003f,
-       -0.03734229f, -0.01131264f, -0.05412074f, 8.89345480e-004f,
-       -0.12388977f, -0.05959237f, -0.12418608f, -0.06151643f, -0.07310260f,
-       0.02441575f, 0.07023528f, -0.07548289f, -7.57147965e-004f,
-       -0.09061348f, -0.08112976f, -0.06920306f, 9.54394229e-003f,
-       -0.01219902f, 1.21273217e-003f, -8.88989680e-003f, -0.08309301f,
-       -0.04552661f, -0.10739882f, -0.05691034f, -0.13928030f, 0.09027749f,
-       0.15123098f, 0.03175976f, 0.17763577f, 3.29913251e-004f, 0.05151888f,
-       -0.09844074f, -0.09475287f, -0.08571247f, 0.16241577f, 0.19336018f,
-       8.57454538e-003f, 0.11474732f, -0.01493934f, 0.03352379f, -0.08966240f,
-       -0.02322310f, 0.02663568f, 0.05448750f, -0.03536883f, -0.07210463f,
-       -0.06807277f, -0.03121621f, -0.05932408f, -0.17282860f, -0.15873498f,
-       -0.04956378f, 0.01603377f, -0.12385946f, 0.13878587f, 0.21468069f,
-       0.13510075f, 0.20992437f, 0.08845878f, 0.08104013f, 0.03754176f,
-       0.12173114f, 0.11103114f, 0.10643122f, 0.13941477f, 0.11640384f,
-       0.14786847f, 0.01218238f, 0.01160753f, 0.03547940f, 0.08794311f,
-       -0.01695384f, -0.07692261f, -0.08236158f, 6.79194089e-003f,
-       -0.02458403f, 0.13022894f, 0.10953187f, 0.09857773f, 0.04735930f,
-       -0.04353498f, -0.15173385f, -0.17904443f, -0.10450364f, -0.13418166f,
-       -0.06633098f, -0.03170381f, -0.06839000f, -0.11350126f, -0.06983913f,
-       0.19083543f, 0.17604128f, 0.07730632f, 0.10022651f, 0.36428109f,
-       0.28291923f, 0.12688625f, 0.15942036f, 0.14064661f, -0.11201853f,
-       -0.13969108f, -0.09088077f, -0.14107047f, 0.05117374f,
-       -2.63348082e-003f, -0.10794610f, -0.09715455f, -0.05284977f,
-       0.01565668f, 0.05031200f, 0.07021113f, -0.02963028f, 0.01766960f,
-       0.08333644f, -0.03211382f, 4.90096770e-003f, 0.05186674f, -0.05045737f,
-       -0.09624767f, -0.02525997f, 0.06916669f, 0.01213916f, 0.05333899f,
-       -0.03443280f, -0.10055527f, -0.06291115f, 5.42851724e-003f,
-       -6.30360236e-003f, 0.02270257f, -0.01769792f, 0.03273688f, 0.07746078f,
-       7.77099328e-003f, 0.05041346f, 0.01648103f, -0.02321534f, -0.09930186f,
-       -0.02293853f, 0.02034990f, -0.08324204f, 0.08510064f, -0.03732836f,
-       -0.06465405f, -0.06086946f, 0.13680504f, -0.11469388f, -0.03896406f,
-       -0.07142810f, 2.67581246e-003f, -0.03639632f, -0.09849060f,
-       -0.11014334f, 0.17489147f, 0.17610909f, -0.16091567f, -0.07248894f,
-       0.01567141f, 0.23742996f, 0.07552249f, -0.06270349f, -0.07303379f,
-       0.25442186f, 0.16903116f, -0.08168741f, -0.05913896f, -0.03954096f,
-       6.81776879e-003f, -0.05615319f, -0.07303037f, -0.12176382f,
-       0.12385108f, 0.22084464f, -0.05543206f, -0.03310431f, 0.05731593f,
-       0.19481890f, 0.04016430f, -0.06480758f, -0.12353460f, 0.18733442f,
-       -0.09631214f, -0.11192076f, 0.12404587f, 0.15671748f, 0.19256128f,
-       0.10895617f, 0.03391477f, -0.13032004f, -0.05626907f, -0.09025607f,
-       0.23485197f, 0.27812332f, 0.26725492f, 0.07255980f, 0.16565137f,
-       0.22388470f, 0.07441066f, -0.21003133f, -0.08075339f, -0.15031935f,
-       0.07023834f, 0.10872041f, 0.18156518f, 0.20037253f, 0.13571967f,
-       -0.11915682f, -0.11131983f, -0.18878011f, 0.06074620f, 0.20578890f,
-       0.12413109f, 0.03930207f, 0.29176015f, 0.29502738f, 0.27856228f,
-       -0.01803601f, 0.16646385f, 0.19268319f, 0.01900682f, 0.06026287f,
-       2.35868432e-003f, 0.01558199f, 0.02707230f, 0.11383014f, 0.12103992f,
-       0.03907350f, 0.04637353f, 0.09020995f, 0.11919726f, -3.63007211e-003f,
-       0.02220155f, 0.10336831f, 0.17351882f, 0.12259731f, 0.18983354f,
-       0.15736865f, 0.01160725f, -0.01690723f, -9.69582412e-004f, 0.07213813f,
-       0.01161613f, 0.17864859f, 0.24486147f, 0.18208991f, 0.20177495f,
-       0.05972528f, -8.93934630e-003f, -0.02316955f, 0.14436610f, 0.14114498f,
-       0.05520950f, 0.06353590f, -0.19124921f, 0.10174713f, 0.29414919f,
-       0.26448128f, 0.09344960f, 0.15284036f, 0.19797507f, 0.11369792f,
-       -0.12722753f, -0.21396367f, -0.02008235f, -0.06566695f, -0.01662150f,
-       -0.03937003f, 0.04778343f, 0.05017274f, -0.02299062f, -0.20208496f,
-       -0.06395898f, 0.13721776f, 0.22544557f, 0.14888357f, 0.08687132f,
-       0.27088094f, 0.32206613f, 0.09782200f, -0.18523243f, -0.17232181f,
-       -0.01041531f, 0.04008654f, 0.04199702f, -0.08081299f, -0.03755421f,
-       -0.04809646f, -0.05222081f, -0.21709201f, -0.06622940f, 0.02945281f,
-       -0.04600435f, -0.05256077f, -0.08432942f, 0.02848100f, 0.03490564f,
-       8.28621630e-003f, -0.11051246f, -0.11210597f, -0.01998289f,
-       -0.05369405f, -0.08869293f, -0.18799506f, -0.05436598f, -0.05011634f,
-       -0.05419716f, -0.06151857f, -0.10827805f, 0.04346735f, 0.04016083f,
-       0.01520820f, -0.12173316f, -0.04880285f, -0.01101406f, 0.03250847f,
-       -0.06009551f, -0.03082932f, -0.02295134f, -0.06856834f, -0.08775249f,
-       -0.23793389f, -0.09174541f, -0.05538322f, -0.04321031f, -0.11874759f,
-       -0.04221844f, -0.06070468f, 0.01194489f, 0.02608565f, -0.03892140f,
-       -0.01643151f, -0.02602034f, -0.01305472f, 0.03920100f, -0.06514261f,
-       0.01126918f, -6.27710763e-003f, -0.02720047f, -0.11133634f,
-       0.03300330f, 0.02398472f, 0.04079665f, -0.10564448f, 0.05966159f,
-       0.01195221f, -0.03179441f, -0.01692590f, -0.06177841f, 0.01841576f,
-       -5.51078189e-003f, -0.06821765f, -0.03191888f, -0.09545476f,
-       0.03030550f, -0.04896152f, -0.02914624f, -0.13283344f, -0.04783419f,
-       6.07836898e-003f, -0.01449538f, -0.13358212f, -0.09687774f,
-       -0.02813793f, 0.01213498f, 0.06650011f, -0.02039067f, 0.13356198f,
-       0.05986415f, -9.12760664e-003f, -0.18780160f, -0.11992817f,
-       -0.06342237f, 0.01229534f, 0.07143231f, 0.10713009f, 0.11085765f,
-       0.06569190f, -0.02956399f, -0.16288325f, -0.13993549f, -0.01292515f,
-       0.03833013f, 0.09130384f, -0.05086257f, 0.05617329f, -0.03896667f,
-       -0.06282311f, -0.11490010f, -0.14264110f, -0.04530499f, 0.01598189f,
-       0.09167797f, 0.08663294f, 0.04885277f, -0.05741219f, -0.07565769f,
-       -0.17136464f, -0.02619422f, -0.02477579f, 0.02679587f, 0.11621952f,
-       0.08788391f, 0.15520640f, 0.04709549f, 0.04504483f, -0.10214074f,
-       -0.12293372f, -0.04820546f, -0.05484834f, 0.05473754f, 0.07346445f,
-       0.05577277f, -0.08209965f, 0.03462975f, -0.20962234f, -0.09324598f,
-       3.79481679e-003f, 0.03617633f, 0.16742408f, 0.07058107f, 0.10204960f,
-       -0.06795346f, 3.22807301e-003f, -0.12589309f, -0.17496960f,
-       0.02078314f, -0.07694324f, 0.12184640f, 0.08997164f, 0.04793497f,
-       -0.11383379f, -0.08046359f, -0.25716835f, -0.08080962f,
-       6.80711539e-003f, -0.02930280f, -3.04938294e-003f, -0.11106286f,
-       -0.04628860f, -0.07821649f, 7.70127494e-003f, -0.10247706f,
-       1.21042714e-003f, 0.20573859f, -0.03241005f, 8.42972286e-003f,
-       0.01946464f, -0.01197973f, -0.14579976f, 0.04233614f,
-       -4.14096704e-003f, -0.06866436f, -0.02431862f, -0.13529138f,
-       1.25891645e-003f, -0.11425111f, -0.04303651f, -0.01694815f,
-       0.05720210f, -0.16040207f, 0.02772896f, 0.05498345f, -0.15010567f,
-       0.01450866f, 0.02350303f, -0.04301004f, -0.04951802f, 0.21702233f,
-       -0.03159155f, -0.01963303f, 0.18232647f, -0.03263875f,
-       -2.88476888e-003f, 0.01587562f, -1.94303901e-003f, -0.07789494f,
-       0.04674156f, -6.25576358e-003f, 0.08925962f, 0.21353747f, 0.01254677f,
-       -0.06999976f, -0.05931328f, -0.01884327f, -0.04306272f, 0.11794136f,
-       0.03842728f, -0.03907030f, 0.05636114f, -0.09766009f, -0.02104000f,
-       8.72711372e-003f, -0.02736877f, -0.05112274f, 0.16996814f, 0.02955785f,
-       0.02094014f, 0.08414304f, -0.03335762f, -0.03617457f, -0.05808248f,
-       -0.08872101f, 0.02927705f, 0.27077839f, 0.06075108f, 0.07478261f,
-       0.15282831f, -0.03908454f, -0.05101782f, -9.51998029e-003f,
-       -0.03272416f, -0.08735625f, 0.07633440f, -0.07185312f, 0.13841286f,
-       0.07812646f, -0.12901451f, -0.05488589f, -0.05644578f, -0.03290703f,
-       -0.11184757f, 0.03751570f, -0.05978153f, -0.09155276f, 0.05657315f,
-       -0.04328186f, -0.03047933f, -0.01413135f, -0.10181040f, -0.01384013f,
-       0.20132534f, -0.01536873f, -0.07641169f, 0.05906778f, -0.07833145f,
-       -0.01523801f, -0.07502609f, -0.09461885f, -0.15013233f, 0.16050665f,
-       0.09021381f, 0.08473236f, 0.03386267f, -0.09147339f, -0.09170618f,
-       -0.08498498f, -0.05119187f, -0.10431040f, 0.01041618f, -0.03064913f,
-       0.09340212f, 0.06448522f, -0.03881054f, -0.04985436f, -0.14794017f,
-       -0.05200112f, -0.02144495f, 0.04000821f, 0.12420804f, -0.01851651f,
-       -0.04116732f, -0.11951703f, -0.04879033f, -0.08722515f, -0.08454733f,
-       -0.10549165f, 0.11251976f, 0.10766345f, 0.19201984f, 0.06128913f,
-       -0.02734615f, -0.08834923f, -0.16999826f, -0.03548348f,
-       -5.36092324e-003f, 0.08297954f, 0.07226378f, 0.04194529f, 0.04668673f,
-       8.73902347e-003f, 0.06980139f, 0.05652480f, 0.05879445f, 0.02477076f,
-       0.02451423f, 0.12433673f, 0.05600227f, 0.06886370f, 0.03863076f,
-       0.07459056f, 0.02264139f, 0.01495469f, 0.06344220f, 0.06945208f,
-       0.02931899f, 0.11719371f, 0.04527427f, 0.03248192f, 2.08271481e-003f,
-       0.02044626f, 0.11403449f, 0.04303892f, 0.06444661f, 0.04959024f,
-       0.08174094f, 0.09240247f, 0.04894639f, 0.02252937f, -0.01652530f,
-       0.07587013f, 0.06064249f, 0.13954395f, 0.02772832f, 0.07093039f,
-       0.08501238f, 0.01701301f, 0.09055722f, 0.33421436f, 0.20163782f,
-       0.09821030f, 0.07951369f, 0.08695120f, -0.12757730f, -0.13865978f,
-       -0.06610068f, -0.10985506f, 0.03406816f, -0.01116336f, -0.07281768f,
-       -0.13525715f, -0.12844718f, 0.08956250f, 0.09171610f, 0.10092317f,
-       0.23385370f, 0.34489515f, 0.09901748f, 0.02002922f, 0.12335990f,
-       0.07606190f, -0.14899330f, -0.15634622f, -0.06494618f, -0.01760547f,
-       0.03404277f, -0.13208845f, -0.12101169f, -0.18294574f, -0.16560709f,
-       0.02183887f, -0.02752613f, 0.01813638f, 0.02000757f, 0.01319924f,
-       0.08030242f, 0.01220535f, 2.98233377e-003f, -0.01307070f, 0.05970297f,
-       -0.05345284f, -0.03381982f, -9.87543724e-003f, -0.06869387f,
-       0.03956730f, -0.03108176f, -0.05732809f, 0.02172386f, 0.04159765f,
-       2.62783933e-003f, 0.04813229f, 0.09358983f, -8.18389002e-003f,
-       0.01724574f, -0.02547474f, -0.04967288f, -0.02390376f, 0.06640504f,
-       -0.06306566f, 0.01137518f, 0.05589378f, -0.08237787f, 0.02455001f,
-       -0.03059422f, -0.08953978f, 0.06851497f, 0.07190268f, -0.07610799f,
-       7.87237938e-003f, -7.85830803e-003f, 0.06006952f, -0.01126728f,
-       -2.85743061e-003f, -0.04772895f, 0.01884944f, 0.15005857f,
-       -0.06268821f, -0.01989072f, 0.01138399f, 0.08760451f, 0.03879007f,
-       -9.66926850e-003f, -0.08012961f, 0.06414555f, -0.01362950f,
-       -0.09135523f, 0.01755159f, 0.04459474f, 0.09650917f, 0.05219948f,
-       -2.19440833e-003f, -0.07037939f, -0.01599054f, 0.13103317f,
-       -0.02492603f, -0.01032540f, -0.02903307f, 0.04489160f, 0.05148086f,
-       0.01858173f, -0.02919228f, 0.08299296f, -0.04590359f, -0.15745632f,
-       -0.09068198f, -0.02972453f, 0.12985018f, 0.22320485f, 0.24261914f,
-       0.03642650f, -0.05506422f, 2.67413049e-003f, -0.03834032f, 0.06449424f,
-       0.03834866f, 0.03816991f, 0.25039271f, 0.34212017f, 0.32433882f,
-       0.18824573f, -0.08599839f, -0.17599408f, -0.15317015f, -0.09913155f,
-       -0.02856072f, -0.05304699f, -1.06437842e-003f, -0.06641813f,
-       -0.07509298f, 0.01463361f, -0.07551918f, -0.04510373f,
-       -8.44620075e-003f, 0.01772176f, 0.04068235f, 0.20295307f, 0.15719447f,
-       0.05712103f, 0.26296997f, 0.14657754f, 0.01547317f, -0.05052776f,
-       -0.03881342f, -0.01437883f, -0.04930177f, 0.11719568f, 0.24098417f,
-       0.26468599f, 0.31698579f, 0.10103608f, -0.01096375f, -0.01367013f,
-       0.17104232f, 0.20065314f, 2.67622480e-003f, -0.01190034f, 0.18301608f,
-       0.09459770f, -0.06357619f, -0.06473801f, 0.01377906f, -0.10032775f,
-       -0.06388740f, 3.80393048e-003f, 0.06206078f, 0.10349120f, 0.26804337f,
-       8.17918684e-003f, -0.02314351f, 9.34422202e-003f, 0.09198381f,
-       0.03681326f, -8.77339672e-003f, -0.09662418f, -0.02715708f,
-       0.13503517f, 0.08962728f, -6.57071499e-003f, -0.03201199f, 0.28510824f,
-       0.32095715f, 0.18512695f, -0.14230858f, -0.14048551f, -0.07181299f,
-       -0.08575408f, -0.08661680f, -0.17416079f, 7.54326640e-004f,
-       0.05601677f, 0.13585392f, -0.04960437f, -0.07708392f, 0.10676333f,
-       -0.04407546f, -0.07209078f, 0.03663663f, 0.28949317f, 0.41127121f,
-       0.27431169f, -0.06900328f, -0.21474190f, -0.15578632f, -0.19555484f,
-       -0.15209621f, -0.11269179f, 0.07416003f, 0.18991330f, 0.26858172f,
-       0.01952259f, 0.01017922f, 0.02159843f, -4.95165400e-003f, -0.04368168f,
-       -0.12721671f, -0.06673957f, -0.11275250f, 0.04413409f, 0.05578312f,
-       0.03896771f, 0.03566417f, -0.05871816f, -0.07388090f, -0.17965563f,
-       -0.08570268f, -0.15273231f, -0.06022318f, -0.06999847f,
-       -6.81510568e-003f, 0.06294262f, -6.54901436e-004f, -0.01128654f,
-       -0.02289657f, 0.04849290f, 0.04140804f, 0.23681939f, 0.14545733f,
-       0.01989965f, 0.12032662f, 3.87463090e-003f, -6.02597650e-003f,
-       -0.05919775f, -0.03067224f, -0.07787777f, 0.10834727f, 0.02153730f,
-       0.02765649f, 0.03975543f, -0.12182906f, -0.04900113f, -0.09940100f,
-       -0.06453611f, -0.13757215f, -0.03721382f, 0.02827376f, -0.04351249f,
-       0.01907038f, -0.10284120f, -0.05671160f, -0.10760647f, -0.09624009f,
-       -0.09565596f, -0.01303654f, 0.03080539f, 0.01416511f, 0.05846142f,
-       -5.42971538e-003f, 0.06221476f, -0.03320325f, -0.06791797f,
-       -0.05791342f, 0.12851369f, 0.14990346f, 0.03634374f, 0.14262885f,
-       0.04330391f, 0.05032569f, -0.05631914f, 0.01606137f, 0.04387223f,
-       0.22344995f, 0.15722635f, -0.04693628f, 0.03006579f, -2.52882647e-003f,
-       0.05717621f, -0.07529724f, -0.02848588f, -0.06868757f,
-       -4.51729307e-003f, 0.06466042f, -0.05935378f, -0.04704857f,
-       -0.07363959f, 0.04843248f, -0.13421375f, -0.09789340f, -0.10255270f,
-       0.03509852f, 0.04751543f, -0.03822323f, 0.09740467f, 0.04762916f,
-       0.03940146f, -0.08283259f, 0.09552965f, 0.05038739f, 0.21258622f,
-       0.09646992f, 0.03241193f, 0.05167701f, 0.04614570f, 0.04330090f,
-       -0.02671840f, -0.06259909f, -0.02301898f, 0.18829170f, 0.10522786f,
-       0.04313190f, 0.01670948f, -0.08421925f, 0.05911417f, -0.10582602f,
-       -0.04855484f, -0.08373898f, 0.07775915f, 0.03723533f, -0.12047344f,
-       4.86345543e-003f, -0.10520902f, 0.06571782f, -0.07528137f,
-       -0.03245651f, -0.09869066f, -0.02917477f, -0.18293270f, 0.14810945f,
-       9.24033765e-003f, -0.04354914f, 0.02266885f, -0.11872729f,
-       -0.04016589f, 0.02830229f, 0.22539048f, 0.20565644f, 0.16701797f,
-       0.09019924f, 0.01300652f, 0.09760600f, -0.03675831f, -0.01935448f,
-       -0.06894835f, 0.08077277f, 0.19047537f, 0.11312226f, 0.04106043f,
-       -0.11187182f, 0.04312806f, -0.18548580f, -0.11287174f, -0.08794551f,
-       0.02078281f, -0.15295486f, 0.11806386f, -0.01103218f, -0.15971117f,
-       0.02153538f, -0.05232147f, -0.10835317f, -0.13910367f, 0.05920752f,
-       -0.10122602f, 0.20174250f, 0.09105796f, -0.01881348f, 0.09559010f,
-       -0.03725745f, -0.09442931f, -0.09763174f, 0.05854454f, 0.08287182f,
-       0.12919849f, 0.08594352f, -2.49806582e-003f, 0.02398440f,
-       5.67950122e-003f, -0.06296340f, -0.12993270f, 0.03855852f, 0.05186560f,
-       0.10839908f, -0.03380463f, -0.12654832f, -0.05399339f, -0.07456800f,
-       -0.04736232f, -0.10164231f, 0.07496139f, 0.08125214f, 0.07656177f,
-       -0.04999603f, -0.12823077f, -0.07692395f, -0.11317524f, -0.09118655f,
-       -0.05695669f, 0.10477209f, 0.07468581f, 0.01630048f, -8.00961629e-003f,
-       -0.06582128f, -0.04019095f, -0.04682907f, -0.01907842f, -0.10997720f,
-       0.04911406f, 0.02931030f, 0.04197735f, -0.05773980f, -0.09670641f,
-       -0.03594951f, -0.03402121f, -0.07149299f, -0.10566200f, 0.10601286f,
-       0.06340689f, -0.01518632f, -5.96402306e-003f, -0.07628012f,
-       -3.52779147e-003f, -0.02683854f, -0.10265494f, -0.02680815f,
-       0.16338381f, 0.03103515f, 0.02296976f, 0.01624348f, -0.10831620f,
-       -0.02314233f, -0.04789969f, -0.05530700f, -0.06461314f, 0.10494506f,
-       0.04642856f, -0.07592955f, -0.06197905f, -0.09042154f, -0.01445521f,
-       -0.04297818f, -0.11262015f, -0.11430512f, 0.03174541f, -0.03677487f,
-       -0.02963996f, -0.06610169f, -0.13292049f, -0.07059067f, -0.08444111f,
-       -0.02640536f, -0.07136250f, 0.04559967f, 0.01459980f, 0.17989251f,
-       0.04435328f, -0.12464730f, -0.02871115f, -0.10752209f, -0.03393742f,
-       -0.03791408f, 0.02548251f, 0.01956050f, 0.19245651f, 0.13963254f,
-       -0.05904696f, -0.07424626f, -0.10411884f, 1.54176133e-003f,
-       0.01797429f, 0.13025844f, 0.04547642f, -0.05710349f, -0.10697161f,
-       -0.13489437f, -0.06515755f, -0.06406886f, -4.08572936e-003f,
-       -0.01336483f, 0.04368737f, -0.11259720f, -0.05701635f, -0.06469971f,
-       -0.08346602f, -0.04166770f, -0.05795543f, -0.08247511f, -0.05742628f,
-       0.08452254f, -0.03350224f, 0.13980860f, 0.13252275f, 0.07589617f,
-       0.07539988f, 0.12155797f, 0.19087289f, 0.15050751f, 0.21250245f,
-       0.14206800f, 0.01298489f, 0.07450245f, 0.06559097f, 0.01700557f,
-       0.04512971f, 0.16950700f, 0.10261577f, 0.16389982f, 0.05505059f,
-       -0.03453077f, 0.08622462f, 0.07935954f, 0.03976260f, 0.02036091f,
-       3.95744899e-003f, 0.03267065f, 0.15235919f, 0.01297494f, -0.08109194f,
-       0.01407558f, 4.40693414e-003f, -0.15157418f, -0.11390478f,
-       -0.07487597f, -7.81322457e-003f, -0.02749545f, -0.10181408f,
-       0.13755716f, 0.14007211f, 0.13482562f, 0.27517235f, 0.34251109f,
-       0.07639657f, 0.07268607f, 0.19823882f, 0.16135791f, -0.04186463f,
-       -0.12784107f, -0.09846287f, 0.03169041f, 0.10974082f, -0.15051922f,
-       -0.08916726f, -0.07138767f, -0.04153349f, 6.25418453e-003f,
-       0.01266654f, 0.10533249f, 0.12749144f, 0.15148053f, 0.01498513f,
-       0.06305949f, -0.01247123f, -0.08778401f, -0.08551880f, -0.11955146f,
-       -0.08493572f, -0.02901620f, -0.02394859f, -0.13427313f, -0.11053200f,
-       -0.14413260f, -0.15203285f, 0.03972760f, -3.72127310e-004f,
-       -0.04200919f, 0.06105104f, 0.01904975f, -0.01106191f,
-       -7.27445772e-003f, -0.01520341f, 1.10228511e-003f, -0.04949187f,
-       -0.08013099f, 5.72071038e-003f, 0.08415454f, -0.06523152f, 0.03664081f,
-       -0.02673042f, -0.12066154f, -0.03702074f, 0.06006580f, 0.01628682f,
-       -6.17772620e-003f, 0.08192339f, -3.41629819e-003f, 0.02870512f,
-       0.05807141f, 0.04959986f, 0.04618251f, -0.04901629f, -0.10579574f,
-       0.02274442f, 0.12070961f, 2.23597488e-003f, 0.09831765f, -0.03019848f,
-       -0.11181970f, -0.04961075f, 0.02498928f, -0.03714991f, -0.01619653f,
-       0.02643486f, -7.62964319e-003f, -0.02882290f, -0.06242594f,
-       -0.08439861f, 0.07220893f, 0.07263952f, 0.01561574f, 0.03091968f,
-       0.01708712f, -0.03797151f, -3.18561122e-003f, 0.01624021f,
-       -0.02828573f, 0.11284444f, -1.32280716e-003f, -0.07784860f,
-       -0.07209100f, 0.03372242f, 0.12154529f, 0.02278104f, -0.05275500f,
-       -0.01918484f, 0.12989293f, 0.05424401f, 0.02333086f, 0.04029022f,
-       0.12392918f, 0.09495489f, 0.09190340f, 0.07935889f, 8.76816828e-003f,
-       0.17148446f, -8.51302687e-003f, -0.08011249f, -0.06796283f,
-       0.04884845f, 0.01112272f, -0.07835306f, -1.14811445e-003f,
-       -0.03440760f, 0.02845243f, 0.07695542f, -0.07069533f, -0.01151784f,
-       -8.53884313e-003f, -0.01662786f, -0.04163864f, 0.05400505f,
-       0.02859163f, 0.02921852f, 0.05003135f, -6.85718050e-003f, -0.01632611f,
-       0.07780217f, 0.04042810f, -0.01216440f, 3.60914599e-003f, -0.06322435f,
-       0.09516726f, 0.12877031f, -9.69162490e-003f, 0.01031179f, 0.05180895f,
-       -9.34659224e-003f, -0.01644533f, -0.04849347f, -0.04343236f,
-       0.10514783f, 0.08046635f, -0.04615205f, -0.03975486f, -0.01485525f,
-       0.13096830f, -0.01517950f, -0.06571898f, -0.04016372f, 0.01849786f,
-       0.02439670f, 0.08067258f, 1.74824719e-003f, 0.07053747f, 0.08819518f,
-       -5.08352555e-003f, -0.06550863f, -0.08266170f, -0.07780605f,
-       0.01453450f, -0.08756890f, 0.01096501f, -8.71319138e-003f, 0.10110464f,
-       0.02420769f, -0.06708383f, 0.02007811f, 5.93133038e-003f, 0.05398923f,
-       0.07538138f, 0.02049227f, 0.02242589f, 0.04011070f, -1.44875818e-003f,
-       -4.19115182e-003f, 0.06367654f, 0.02506934f, 0.02434536f, 0.05879405f,
-       -8.22952855e-003f, -0.01242441f, 0.04224926f, -0.01754923f,
-       0.05958161f, 0.03818886f, -0.01830363f, -0.04308917f, -0.04422197f,
-       -0.02432721f, 0.02264866f, 2.03751423e-003f, 0.01197031f, 0.04439203f,
-       0.12169247f, 0.03602713f, -0.02599251f, -1.98226492e-003f, 0.02046336f,
-       -0.02639058f, -1.91242550e-003f, -0.09334669f, -0.03595153f,
-       -9.88179818e-003f, -0.06848445f, -0.04666303f, -0.09955736f,
-       -0.04206430f, 0.02609075f, 9.09005292e-003f, -0.07138551f,
-       -4.22313227e-004f, 0.01766645f, 0.02756404f, 0.01308276f, 0.04052891f,
-       0.02387515f, 0.05337298f, 0.02500631f, -0.04970853f, -0.12467445f,
-       0.17604403f, 0.12256411f, -0.07512254f, 8.70451052e-003f, -0.05697548f,
-       -0.03626474f, -8.76623299e-003f, -0.01210897f, -0.09451522f,
-       0.07490732f, -0.02008001f, -0.02681278f, -0.06463405f, -0.01517507f,
-       7.33757764e-003f, 6.07147906e-003f, -0.09316964f, -0.04575328f,
-       0.13261597f, 0.15424870f, -0.01655918f, -0.02772390f, -0.05243644f,
-       -0.02356456f, -0.02351753f, -0.10211615f, -0.12873036f, 0.14549787f,
-       0.12519856f, 4.38762689e-003f, 0.02795992f, 0.05170322f, 0.09223596f,
-       0.05890015f, 0.02376701f, -0.02777346f, 0.09506908f, 0.02328936f,
-       -0.02319928f, -0.03218696f, -0.01527841f, -0.01016694f, -0.02674719f,
-       0.05137179f, 0.01980666f, 0.06544447f, -0.01746171f, 0.01026380f,
-       0.01561806f, 7.97004555e-004f, 0.07601810f, 0.01907250f, -0.03083035f,
-       -0.05987392f, 0.09242783f, 0.14555025f, 0.01035827f, 0.03092401f,
-       -0.09562709f, -0.03802354f, 0.02531144f, 0.03079449f, -0.07100715f,
-       0.03330721f, -2.69116857e-003f, 0.03167490f, 0.05744999f, 0.03259895f,
-       1.91266940e-003f, 0.03194578f, 0.07389776f, 0.02198060f, 0.07633314f,
-       0.03293105f, -0.09103648f, 0.04718142f, 0.06102672f, -0.01003063f,
-       5.85481385e-003f, -0.01522574f, 0.02323526f, 0.10584345f,
-       4.35879454e-003f, 0.06107873f, 0.05868603f, -0.03115531f, 0.01214679f,
-       0.08567052f, 3.93926632e-003f, -0.02521488f, -1.88425183e-003f,
-       0.02038053f, -6.26854831e-004f, 0.04897438f, -0.04280585f,
-       -0.04819689f, -0.04812867f, -0.01451186f, 0.05101469f,
-       -9.01125465e-003f, -0.03333859f, 0.03917955f, 0.04196448f, 0.04292135f,
-       0.02809529f, 0.02999715f, 0.04081348f, 9.10039060e-003f, 0.09703232f,
-       0.10379741f, 0.02348725f, -4.72756615e-003f, 0.01027325f, 0.10402658f,
-       0.12071823f, 0.09817299f, -0.02612033f, 0.03638414f, 0.05896405f,
-       0.04865025f, 0.04793910f, -0.03882321f, -0.02962117f, -0.01222268f,
-       0.04071597f, 0.01922777f, -0.02287866f, 0.03328381f, 0.01859092f,
-       0.09024994f, 0.03804455f, -0.01424510f, 0.01953739f, 0.02509617f,
-       -0.03390914f, -0.05663941f, -0.01641979f, 0.05848591f, 0.04639670f,
-       0.02092116f, 0.12911791f, 0.19918139f, 0.07739855f, -7.25806039e-003f,
-       0.04074838f, 0.03183993f, 1.39251316e-003f, -0.01428625f, 0.01865480f,
-       0.08529541f, 0.13547510f, 0.11189661f, 0.03998901f, 0.09575938f,
-       -0.02631102f, -0.03458253f, -0.04749985f, -0.06070716f,
-       4.71884012e-003f, 0.06445789f, -0.02450038f, -0.05483776f,
-       -0.04657237f, -0.02030717f, -0.03480766f, -0.09397731f, -0.06399718f,
-       -0.01804585f, 5.62348310e-003f, -6.64811488e-003f, -0.06517869f,
-       6.96210237e-003f, -0.01860148f, -0.04245830f, -0.05850367f,
-       -3.24417115e-003f, 0.07700698f, 0.11290991f, 0.09923030f, -0.02970599f,
-       0.05592411f, 0.04813979f, -0.09811195f, -0.09357996f, -0.03276114f,
-       0.05218338f, 0.04141375f, 3.92977800e-003f, -0.05047480f, 0.15960084f,
-       0.04612800f, -0.03114098f, -0.04650044f, -0.03249795f, -0.02425641f,
-       -0.04311355f, 0.04307659f, -0.09401883f, -0.04742785f, -0.01254499f,
-       -0.06598741f, 3.41369561e-003f, -0.05620445f, -7.28127593e-003f,
-       -0.05998361f, -0.03274450f, -0.07376868f, 3.19015374e-003f,
-       -0.07733069f, 0.05815864f, -0.02471071f, 0.03850617f, 0.13838784f,
-       0.15399861f, 0.01731321f, -0.01477586f, 0.10393341f, 0.05159833f,
-       -0.01945555f, -0.03427503f, -0.04867341f, 0.09237480f, 0.10732719f,
-       0.06071450f, -0.01355071f, 0.01844356f, -0.03480803f, -0.03796671f,
-       2.15628621e-004f, -0.05440186f, 0.01889855f, -0.01443413f,
-       -0.02607902f, -0.02938001f, 0.02720689f, -0.06228397f, -0.02970936f,
-       -0.03426210f, -0.10280876f, -0.06739304f, -0.05227850f, 0.03360292f,
-       -0.11278441f, -0.06966180f, -0.13937433f, 9.10932291e-003f,
-       2.52020749e-004f, -4.07359656e-003f, 0.12310639f, 0.09343060f,
-       0.07302511f, 0.03222093f, 0.07532879f, 0.03792387f, -0.04985180f,
-       0.01804602f, 0.02694195f, 0.13481498f, 0.04601225f, 0.04106982f,
-       0.08511057f, 0.12314661f, 0.01320830f, 0.05044121f, -5.52943908e-003f,
-       -0.08992624f, -0.02249301f, -0.08181777f, 0.06165213f, -0.03256603f,
-       -0.01068920f, -0.01323473f, -0.11970232f, -0.04616347f, -0.12088681f,
-       -0.06762606f, -0.08676834f, -0.06434575f, 0.01772529f, 0.03469615f,
-       -0.10926618f, 0.03013873f, 0.14030397f, 0.16130108f, 0.17985588f,
-       0.11281928f, 0.10530639f, 0.08905948f, 0.07733764f, 0.06695238f,
-       0.02142088f, 0.06438877f, 0.09794453f, 0.05745072f, 0.02788557f,
-       0.02632830f, 0.07985807f, 4.24902979e-003f, 8.47890321e-003f,
-       -0.02679466f, -5.28812688e-003f, -0.02162580f, -0.07490715f,
-       -0.08251337f, -0.02056576f, -0.01026194f, -1.15492963e-003f,
-       -5.75720915e-004f, -0.07210591f, -0.07320981f, -0.04883312f,
-       -0.10897151f, -0.07477258f, -0.08867134f, -0.09222437f, -0.10924666f,
-       -0.10430276f, 0.07953499f, 0.02767959f, 0.11393359f, 0.18779543f,
-       0.03313421f, 0.02143700f, 0.05852016f, -2.12067598e-003f,
-       -3.76984011e-003f, 0.02774167f, -0.03124610f, 0.01465141f, 0.01616004f,
-       -0.01391913f, -0.04404102f, -0.05444227f, -0.14684731f, -0.15016587f,
-       0.04509468f, 1.29563001e-003f, 0.01398350f, 0.05610404f, -0.04868806f,
-       -0.04776716f, -8.16873740e-003f, -2.30126386e-003f, -0.02286313f,
-       0.11983398f, -0.04703261f, -0.08814441f, -0.07585249f, -0.10799607f,
-       -0.03232087f, 0.01509786f, -0.04843464f, -0.03967846f, 0.09589416f,
-       0.01352560f, -0.01458119f, 0.01050829f, -0.03038946f, 0.01608388f,
-       1.11975556e-003f, -0.01250656f, 2.86211423e-003f, 0.04333691f,
-       -0.14603497f, -0.01946543f, -0.02327525f, -0.01973944f, 0.07944400f,
-       -0.02224544f, -0.06701808f, 0.03476532f, 0.11505594f, -0.02712801f,
-       -0.01665113f, 0.06315716f, -0.08205860f, 0.07431999f, 0.04915778f,
-       -0.04468752f, -0.01490402f, 0.07400476f, -0.11650901f, 0.05102430f,
-       0.04559118f, -0.05916039f, 0.08840760f, -0.01587902f, -0.14890194f,
-       0.07857784f, 0.04710254f, -0.05381983f, -0.07331945f, -0.03604643f,
-       0.15611970f, 0.07649943f, -0.05959348f, -0.02776607f, 0.11098688f,
-       0.03758875f, -0.04446875f, 0.04933187f, 0.01345535f, 0.06921103f,
-       0.07364785f, 0.05518956f, 0.02899585f, 0.09375840f, 0.10518434f,
-       -0.04420241f, 0.01915282f, -3.56386811e-003f, 0.14586878f, 0.10286101f,
-       -0.04360626f, -0.12723237f, 0.09076386f, 0.11119842f, -0.06035013f,
-       0.09674817f, 0.08938243f, 0.07065924f, 0.02603180f, 5.84815582e-003f,
-       -0.05922065f, 0.12360309f, 3.59695964e-003f, 2.99844006e-003f,
-       0.03697936f, 0.02043072f, 0.04168725f, 0.01025975f, -0.01359980f,
-       -0.01600920f, 0.02581056f, 0.02329250f, 2.98100687e-003f, 0.01629762f,
-       0.06652115f, 0.05855627f, 0.01237463f, -0.01297135f, 0.01761587f,
-       0.05090865f, 0.06549342f, -0.04425945f, 2.43203156e-003f,
-       3.07327788e-003f, 0.06678630f, -0.04303836f, 0.01082393f, -0.06476044f,
-       0.04077786f, 0.12441979f, 0.08237778f, 0.07424165f, 0.04065890f,
-       0.06905543f, 0.09556347f, 0.12724875f, -0.02132082f, 0.08514154f,
-       -0.04175328f, -0.02666954f, 0.01897836f, 0.03317382f, 9.45465732e-003f,
-       -0.01238974f, -0.04242500f, -0.01419479f, -0.03545213f, -0.02440874f,
-       0.08684119f, 0.04212951f, 0.02462858f, -0.01104825f, -5.01706870e-003f,
-       0.02968982f, 0.02597476f, -0.01568939f, 0.04514892f, 0.06974549f,
-       0.08670278f, 0.06828108f, 0.10238872f, 0.05405957f, 0.06548470f,
-       -0.03763957f, 0.01366090f, 0.07069602f, 0.05363748f, 0.04798120f,
-       0.11706422f, 0.05466456f, -0.01869259f, 0.06344382f, 0.03106543f,
-       0.08432506f, -0.02061096f, 0.03821088f, -6.92190882e-003f,
-       6.40467042e-003f, -0.01271779f, 6.89014705e-005f, 0.04541415f,
-       -0.01899539f, -0.05020239f, 0.03000903f, 0.01090422f, 4.52452758e-003f,
-       0.02573632f, -0.02388454f, -0.04200457f, 1.72783900e-003f,
-       -0.05978370f, -0.02720562f, 0.06573715f, 0.01154317f, 0.01265615f,
-       0.07375994f, -9.19828378e-003f, -0.04914120f, 0.02124831f, 0.06455322f,
-       0.04372910f, -0.03310043f, 0.03605788f, -6.78055827e-003f,
-       9.36202332e-003f, 0.01747596f, -0.06406314f, -0.06812935f, 0.08080816f,
-       -0.02778088f, 0.02735260f, 0.06393493f, 0.06652229f, 0.05676993f,
-       0.08640018f, -7.59188086e-003f, -0.02012847f, -0.04741159f,
-       -0.01657069f, -0.01624399f, 0.05547778f, -2.33309763e-003f,
-       0.01120033f, 0.06141156f, -0.06285004f, -0.08732341f, -0.09313398f,
-       -0.04267832f, 5.57443965e-003f, 0.04809862f, 0.01773641f,
-       5.37361018e-003f, 0.14842421f, -0.06298012f, -0.02935147f, 0.11443478f,
-       -0.05034208f, 5.65494271e-003f, 0.02076526f, -0.04577984f,
-       -0.04735741f, 0.02961071f, -0.09307127f, -0.04417921f, -0.04990027f,
-       -0.03940028f, 0.01306016f, 0.06267900f, 0.03758737f, 0.08460117f,
-       0.13858789f, 0.04862388f, -0.06319809f, -0.05655516f, 0.01885816f,
-       -0.03285607f, 0.03371567f, -0.07040928f, -0.04514049f, 0.01392166f,
-       0.08184422f, -0.07230316f, 0.02386871f, 0.02184591f, 0.02605764f,
-       -0.01033954f, 9.29878280e-003f, 7.67351175e-003f, 0.15189242f,
-       0.02069071f, -0.09738296f, -0.08894105f, -0.07768748f, 0.02332268f,
-       -0.01778995f, -0.03258888f, -0.08180822f, -0.08492987f, 0.02290156f,
-       -0.11368170f, -0.03554465f, -0.04533844f, -0.02861580f, 0.06782424f,
-       0.01113123f, 0.02453644f, 0.12721945f, 0.08084814f, -0.03607795f,
-       0.01109122f, 0.04803548f, -0.03489929f, 0.03399536f, -0.05682014f,
-       8.59533902e-003f, -4.27904585e-003f, 0.03230887f, -0.01300198f,
-       -0.01038137f, -0.07930113f, 8.33097473e-003f, 0.02296994f,
-       -0.01306500f, -0.01881626f, 0.04413369f, 0.05729880f, -0.03761553f,
-       0.01942326f, 1.64540811e-003f, -0.03811319f, 0.04190650f, -0.14978096f,
-       -0.04514487f, 0.01209545f, -5.46460645e-003f, -0.01647195f,
-       7.63064111e-003f, -0.07494587f, 0.08415288f, 0.10020141f, -0.01228561f,
-       0.06553826f, 0.04554005f, 0.07890417f, 0.03041138f, 0.01752007f,
-       0.09208256f, -3.74419295e-004f, 0.10549527f, 0.04686913f, 0.01894833f,
-       -0.02651412f, -4.34682379e-003f, 5.44942822e-003f, 0.01444484f,
-       0.05882156f, -0.03336544f, 0.04603891f, -0.10432546f, 0.01923928f,
-       0.01842845f, -0.01712168f, -0.02222766f, 0.04693324f, -0.06202956f,
-       -0.01422159f, 0.08732220f, -0.07706107f, 0.02661049f, -0.04300238f,
-       -0.03092422f, -0.03552184f, -0.01886088f, -0.04979934f, 0.03906401f,
-       0.04608644f, 0.04966111f, 0.04275464f, -0.04621769f, -0.02653212f,
-       8.57011229e-003f, 0.03839684f, 0.05818764f, 0.03880796f,
-       -2.76100676e-004f, 0.03076511f, -0.03266929f, -0.05374557f,
-       0.04986527f, -9.45429131e-003f, 0.03582499f, -2.64564669e-003f,
-       -1.07461517e-003f, 0.02962313f, -0.01483363f, 0.03060869f, 0.02448327f,
-       0.01845641f, 0.03282966f, -0.03534438f, -0.01084059f, -0.01119136f,
-       -1.85360224e-003f, -5.94652840e-004f, -0.04451817f, 2.98327743e-003f,
-       0.06272484f, -0.02152076f, -3.05971340e-003f, -0.05070828f,
-       0.01531762f, 0.01282815f, 0.05167150f, 9.46266949e-003f,
-       -3.34558333e-003f, 0.11442288f, -0.03906701f, -2.67325155e-003f,
-       0.03069184f, -0.01134165f, 0.02949462f, 0.02879886f, 0.03855566f,
-       -0.03450781f, 0.09142872f, -0.02156654f, 0.06075062f, -0.06220816f,
-       0.01944680f, 6.68372354e-003f, -0.06656796f, 8.70784000e-003f,
-       0.03456013f, 0.02434320f, -0.13236357f, -0.04177035f, -0.02069627f,
-       0.01068112f, 0.01505432f, -0.07517391f, -3.83571628e-003f,
-       -0.06298508f, -0.02881260f, -0.13101046f, -0.07221562f,
-       -5.79945277e-003f, -8.57300125e-003f, 0.03782469f, 0.02762164f,
-       0.04942456f, -0.02936396f, 0.09597211f, 0.01921411f, 0.06101191f,
-       -0.04787507f, -0.01379578f, -7.40224449e-003f, -0.02220136f,
-       -0.01313756f, 7.77558051e-003f, 0.12296968f, 0.02939998f, 0.03594062f,
-       -0.07788624f, -0.01133144f, 3.99316690e-004f, -0.06090347f,
-       -0.01122066f, -4.68682544e-003f, 0.07633100f, -0.06748922f,
-       -0.05640298f, -0.05265681f, -0.01139122f, -0.01624347f, -0.04715714f,
-       -0.01099092f, 0.01048561f, 3.28499987e-003f, -0.05810167f,
-       -0.07699911f, -0.03330683f, 0.04185145f, 0.03478536f, 0.02275165f,
-       0.02304766f, 6.66040834e-003f, 0.10968148f, -5.93013782e-003f,
-       -0.04858336f, -0.04203213f, -0.09316786f, -6.13074889e-003f,
-       -0.02544625f, 0.01366201f, 9.18555818e-003f, -0.01846578f,
-       -0.05622401f, -0.03989377f, -0.07810296f, 6.91275718e-003f,
-       0.05957597f, -0.03901334f, 0.01572002f, -0.01193903f,
-       -6.89400872e-003f, -0.03093356f, -0.04136098f, -0.01562869f,
-       -0.04604580f, 0.02865234f, -0.08678447f, -0.03232484f, -0.05364593f,
-       -0.01445016f, -0.07003860f, -0.08669746f, -0.04520775f, 0.04274122f,
-       0.03117515f, 0.08175703f, 0.01081109f, 0.06379741f, 0.06199206f,
-       0.02865988f, 0.02360346f, 0.06725410f, -0.03248780f, -9.37702879e-003f,
-       0.08265898f, -0.02245839f, 0.05125763f, -0.01862395f, 0.01973453f,
-       -0.01994494f, -0.10770868f, 0.03180375f, 3.23935156e-003f,
-       -0.02142080f, -0.04256190f, 0.04760900f, 0.04282863f, 0.05635953f,
-       -0.01870849f, 0.05540622f, -0.03042666f, 0.01455277f, -0.06630179f,
-       -0.05843807f, -0.03739681f, -0.09739155f, -0.03220233f, -0.05620182f,
-       -0.10381401f, 0.07400211f, 4.20676917e-003f, 0.03258535f,
-       2.14308966e-003f, 0.05121966f, -0.01274337f, 0.02384761f, 0.06335578f,
-       -0.07905591f, 0.08375625f, -0.07898903f, -0.06508528f, -0.02498444f,
-       0.06535810f, 0.03970535f, 0.04895468f, -0.01169566f, -0.03980601f,
-       0.05682293f, 0.05925463f, -0.01165808f, -0.07936699f, -0.04208954f,
-       0.01333987f, 0.09051196f, 0.10098671f, -0.03974256f, 0.01238771f,
-       -0.07501741f, -0.03655440f, -0.04301528f, 0.09216860f,
-       4.63579083e-004f, 0.02851115f, 0.02142735f, 1.28244064e-004f,
-       0.02879687f, -0.08554889f, -0.04838862f, 0.08135369f, -0.05756533f,
-       0.01413900f, 0.03451880f, -0.06619488f, -0.03053130f, 0.02961676f,
-       -0.07384635f, 0.01135692f, 0.05283910f, -0.07778034f, -0.02107482f,
-       -0.05511716f, -0.13473752f, 0.03030157f, 0.06722020f, -0.06218817f,
-       -0.05826827f, 0.06254654f, 0.02895772f, -0.01664000f, -0.03620280f,
-       -0.01612278f, -1.46097376e-003f, 0.14013411f, -8.96181818e-003f,
-       -0.03250246f, 3.38630192e-003f, 2.64779478e-003f, 0.03359732f,
-       -0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f };
-    return std::vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
+        return Mat(1, static_cast<int>(sizeof(detector)/sizeof(detector[0])), CV_32FC1, detector);
+    }
 }
 
 #endif
diff --git a/modules/cudaobjdetect/test/test_objdetect.cpp b/modules/cudaobjdetect/test/test_objdetect.cpp
index 8c7b5ec918..79b1f0734f 100644
--- a/modules/cudaobjdetect/test/test_objdetect.cpp
+++ b/modules/cudaobjdetect/test/test_objdetect.cpp
@@ -48,9 +48,10 @@ using namespace cvtest;
 
 //#define DUMP
 
-struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescriptor
+struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
     cv::cuda::DeviceInfo devInfo;
+    cv::Ptr<cv::cuda::HOG> hog;
 
 #ifdef DUMP
     std::ofstream f;
@@ -69,23 +70,13 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
         devInfo = GetParam();
 
         cv::cuda::setDevice(devInfo.deviceID());
+
+        hog = cv::cuda::HOG::create();
     }
 
 #ifdef DUMP
-    void dump(const cv::Mat& blockHists, const std::vector<cv::Point>& locations)
+    void dump(const std::vector<cv::Point>& locations)
     {
-        f.write((char*)&blockHists.rows, sizeof(blockHists.rows));
-        f.write((char*)&blockHists.cols, sizeof(blockHists.cols));
-
-        for (int i = 0; i < blockHists.rows; ++i)
-        {
-            for (int j = 0; j < blockHists.cols; ++j)
-            {
-                float val = blockHists.at<float>(i, j);
-                f.write((char*)&val, sizeof(val));
-            }
-        }
-
         int nlocations = locations.size();
         f.write((char*)&nlocations, sizeof(nlocations));
 
@@ -93,21 +84,18 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
             f.write((char*)&locations[i], sizeof(locations[i]));
     }
 #else
-    void compare(const cv::Mat& blockHists, const std::vector<cv::Point>& locations)
+    void compare(const std::vector<cv::Point>& locations)
     {
+        // skip block_hists check
         int rows, cols;
         f.read((char*)&rows, sizeof(rows));
         f.read((char*)&cols, sizeof(cols));
-        ASSERT_EQ(rows, blockHists.rows);
-        ASSERT_EQ(cols, blockHists.cols);
-
-        for (int i = 0; i < blockHists.rows; ++i)
+        for (int i = 0; i < rows; ++i)
         {
-            for (int j = 0; j < blockHists.cols; ++j)
+            for (int j = 0; j < cols; ++j)
             {
                 float val;
                 f.read((char*)&val, sizeof(val));
-                ASSERT_NEAR(val, blockHists.at<float>(i, j), 1e-3);
             }
         }
 
@@ -126,54 +114,41 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
 
     void testDetect(const cv::Mat& img)
     {
-        gamma_correction = false;
-        setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
+        hog->setGammaCorrection(false);
+        hog->setSVMDetector(hog->getDefaultPeopleDetector());
 
         std::vector<cv::Point> locations;
 
         // Test detect
-        detect(loadMat(img), locations, 0);
+        hog->detect(loadMat(img), locations);
 
 #ifdef DUMP
-        dump(cv::Mat(block_hists), locations);
+        dump(locations);
 #else
-        compare(cv::Mat(block_hists), locations);
+        compare(locations);
 #endif
 
         // Test detect on smaller image
         cv::Mat img2;
         cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2));
-        detect(loadMat(img2), locations, 0);
+        hog->detect(loadMat(img2), locations);
 
 #ifdef DUMP
-        dump(cv::Mat(block_hists), locations);
+        dump(locations);
 #else
-        compare(cv::Mat(block_hists), locations);
+        compare(locations);
 #endif
 
         // Test detect on greater image
         cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2));
-        detect(loadMat(img2), locations, 0);
+        hog->detect(loadMat(img2), locations);
 
 #ifdef DUMP
-        dump(cv::Mat(block_hists), locations);
+        dump(locations);
 #else
-        compare(cv::Mat(block_hists), locations);
+        compare(locations);
 #endif
     }
-
-    // Does not compare border value, as interpolation leads to delta
-    void compare_inner_parts(cv::Mat d1, cv::Mat d2)
-    {
-        for (int i = 1; i < blocks_per_win_y - 1; ++i)
-            for (int j = 1; j < blocks_per_win_x - 1; ++j)
-                for (int k = 0; k < block_hist_size; ++k)
-                {
-                    float a = d1.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
-                    float b = d2.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
-                    ASSERT_FLOAT_EQ(a, b);
-                }
-    }
 };
 
 // desabled while resize does not fixed
@@ -182,13 +157,8 @@ CUDA_TEST_P(HOG, DISABLED_Detect)
     cv::Mat img_rgb = readImage("hog/road.png");
     ASSERT_FALSE(img_rgb.empty());
 
-#ifdef DUMP
     f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
     ASSERT_TRUE(f.is_open());
-#else
-    f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
-    ASSERT_TRUE(f.is_open());
-#endif
 
     // Test on color image
     cv::Mat img;
@@ -198,8 +168,6 @@ CUDA_TEST_P(HOG, DISABLED_Detect)
     // Test on gray image
     cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
     testDetect(img);
-
-    f.close();
 }
 
 CUDA_TEST_P(HOG, GetDescriptors)
@@ -216,8 +184,14 @@ CUDA_TEST_P(HOG, GetDescriptors)
 
     // Convert train images into feature vectors (train table)
     cv::cuda::GpuMat descriptors, descriptors_by_cols;
-    getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW);
-    getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL);
+
+    hog->setWinStride(Size(64, 128));
+
+    hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_ROW_BY_ROW);
+    hog->compute(d_img, descriptors);
+
+    hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_COL_BY_COL);
+    hog->compute(d_img, descriptors_by_cols);
 
     // Check size of the result train table
     wins_per_img_x = 3;
@@ -242,48 +216,6 @@ CUDA_TEST_P(HOG, GetDescriptors)
                     ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k],
                               r[(x * blocks_per_win_y + y) * block_hist_size + k]);
     }
-
-    /* Now we want to extract the same feature vectors, but from single images. NOTE: results will
-    be defferent, due to border values interpolation. Using of many small images is slower, however we
-    wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms
-    works good, it can be checked in the gpu_hog sample */
-
-    img_rgb = readImage("hog/positive1.png");
-    ASSERT_TRUE(!img_rgb.empty());
-    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
-    computeBlockHistograms(cv::cuda::GpuMat(img));
-    // Everything is fine with interpolation for left top subimage
-    ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1)));
-
-    img_rgb = readImage("hog/positive2.png");
-    ASSERT_TRUE(!img_rgb.empty());
-    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
-    computeBlockHistograms(cv::cuda::GpuMat(img));
-    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2)));
-
-    img_rgb = readImage("hog/negative1.png");
-    ASSERT_TRUE(!img_rgb.empty());
-    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
-    computeBlockHistograms(cv::cuda::GpuMat(img));
-    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3)));
-
-    img_rgb = readImage("hog/negative2.png");
-    ASSERT_TRUE(!img_rgb.empty());
-    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
-    computeBlockHistograms(cv::cuda::GpuMat(img));
-    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4)));
-
-    img_rgb = readImage("hog/positive3.png");
-    ASSERT_TRUE(!img_rgb.empty());
-    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
-    computeBlockHistograms(cv::cuda::GpuMat(img));
-    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5)));
-
-    img_rgb = readImage("hog/negative3.png");
-    ASSERT_TRUE(!img_rgb.empty());
-    cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
-    computeBlockHistograms(cv::cuda::GpuMat(img));
-    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6)));
 }
 
 INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, HOG, ALL_DEVICES);
@@ -310,12 +242,12 @@ CUDA_TEST_P(CalTech, HOG)
     cv::cuda::GpuMat d_img(img);
     cv::Mat markedImage(img.clone());
 
-    cv::cuda::HOGDescriptor d_hog;
-    d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
-    d_hog.nlevels = d_hog.nlevels + 32;
+    cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
+    d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
+    d_hog->setNumLevels(d_hog->getNumLevels() + 32);
 
     std::vector<cv::Rect> found_locations;
-    d_hog.detectMultiScale(d_img, found_locations);
+    d_hog->detectMultiScale(d_img, found_locations);
 
 #if defined (LOG_CASCADE_STATISTIC)
     for (int i = 0; i < (int)found_locations.size(); i++)
@@ -326,7 +258,8 @@ CUDA_TEST_P(CalTech, HOG)
         cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
     }
 
-    cv::imshow("Res", markedImage); cv::waitKey();
+    cv::imshow("Res", markedImage);
+    cv::waitKey();
 #endif
 }
 
diff --git a/samples/gpu/hog.cpp b/samples/gpu/hog.cpp
index 106de79320..01978a21c9 100644
--- a/samples/gpu/hog.cpp
+++ b/samples/gpu/hog.cpp
@@ -244,19 +244,13 @@ void App::run()
     Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96)
     Size win_stride(args.win_stride_width, args.win_stride_height);
 
-    // Create HOG descriptors and detectors here
-    vector<float> detector;
-    if (win_size == Size(64, 128))
-        detector = cv::cuda::HOGDescriptor::getPeopleDetector64x128();
-    else
-        detector = cv::cuda::HOGDescriptor::getPeopleDetector48x96();
+    cv::Ptr<cv::cuda::HOG> gpu_hog = cv::cuda::HOG::create(win_size);
+    cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9);
 
-    cv::cuda::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
-                                   cv::cuda::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
-                                   cv::cuda::HOGDescriptor::DEFAULT_NLEVELS);
-    cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
-                              HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
-    gpu_hog.setSVMDetector(detector);
+    // Create HOG descriptors and detectors here
+    Mat detector = gpu_hog->getDefaultPeopleDetector();
+
+    gpu_hog->setSVMDetector(detector);
     cpu_hog.setSVMDetector(detector);
 
     while (running)
@@ -307,9 +301,6 @@ void App::run()
             else img = img_aux;
             img_to_show = img;
 
-            gpu_hog.nlevels = nlevels;
-            cpu_hog.nlevels = nlevels;
-
             vector<Rect> found;
 
             // Perform HOG classification
@@ -317,11 +308,19 @@ void App::run()
             if (use_gpu)
             {
                 gpu_img.upload(img);
-                gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
-                                         Size(0, 0), scale, gr_threshold);
+                gpu_hog->setNumLevels(nlevels);
+                gpu_hog->setHitThreshold(hit_threshold);
+                gpu_hog->setWinStride(win_stride);
+                gpu_hog->setScaleFactor(scale);
+                gpu_hog->setGroupThreshold(gr_threshold);
+                gpu_hog->detectMultiScale(gpu_img, found);
             }
-            else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
+            else
+            {
+                cpu_hog.nlevels = nlevels;
+                cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
                                           Size(0, 0), scale, gr_threshold);
+            }
             hogWorkEnd();
 
             // Draw positive classified windows

From 734212a4023ca82a7b0ae6f7fabca53e08de177f Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 14 Jan 2015 19:48:58 +0300
Subject: [PATCH 34/55] refactor CUDA CascadeClassifier

---
 .../include/opencv2/cudaobjdetect.hpp         |  97 ++-
 modules/cudaobjdetect/perf/perf_objdetect.cpp |  26 +-
 .../cudaobjdetect/src/cascadeclassifier.cpp   | 738 ++++++++++--------
 modules/cudaobjdetect/test/test_objdetect.cpp |  33 +-
 samples/gpu/cascadeclassifier.cpp             |  60 +-
 5 files changed, 519 insertions(+), 435 deletions(-)

diff --git a/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp b/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
index 2887bf472c..31a6a94c99 100644
--- a/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
+++ b/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
@@ -75,7 +75,7 @@ namespace cv { namespace cuda {
     -   (Python) An example applying the HOG descriptor for people detection can be found at
         opencv_source_code/samples/python2/peopledetect.py
  */
-class CV_EXPORTS HOG : public cv::Algorithm
+class CV_EXPORTS HOG : public Algorithm
 {
 public:
     enum
@@ -204,87 +204,84 @@ public:
     -   A Nvidea API specific cascade classifier example can be found at
         opencv_source_code/samples/gpu/cascadeclassifier_nvidia_api.cpp
  */
-class CV_EXPORTS CascadeClassifier_CUDA
+class CV_EXPORTS CascadeClassifier : public Algorithm
 {
 public:
-    CascadeClassifier_CUDA();
     /** @brief Loads the classifier from a file. Cascade type is detected automatically by constructor parameter.
 
     @param filename Name of the file from which the classifier is loaded. Only the old haar classifier
     (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
     type of OpenCV XML cascade supported for LBP.
      */
-    CascadeClassifier_CUDA(const String& filename);
-    ~CascadeClassifier_CUDA();
-
-    /** @brief Checks whether the classifier is loaded or not.
-    */
-    bool empty() const;
-    /** @brief Loads the classifier from a file. The previous content is destroyed.
-
-    @param filename Name of the file from which the classifier is loaded. Only the old haar classifier
-    (trained by the haar training application) and NVIDIA's nvbin are supported for HAAR and only new
-    type of OpenCV XML cascade supported for LBP.
+    static Ptr<CascadeClassifier> create(const String& filename);
+    /** @overload
      */
-    bool load(const String& filename);
-    /** @brief Destroys the loaded classifier.
-    */
-    void release();
+    static Ptr<CascadeClassifier> create(const FileStorage& file);
+
+    //! Maximum possible object size. Objects larger than that are ignored. Used for
+    //! second signature and supported only for LBP cascades.
+    virtual void setMaxObjectSize(Size maxObjectSize) = 0;
+    virtual Size getMaxObjectSize() const = 0;
+
+    //! Minimum possible object size. Objects smaller than that are ignored.
+    virtual void setMinObjectSize(Size minSize) = 0;
+    virtual Size getMinObjectSize() const = 0;
+
+    //! Parameter specifying how much the image size is reduced at each image scale.
+    virtual void setScaleFactor(double scaleFactor) = 0;
+    virtual double getScaleFactor() const = 0;
+
+    //! Parameter specifying how many neighbors each candidate rectangle should have
+    //! to retain it.
+    virtual void setMinNeighbors(int minNeighbors) = 0;
+    virtual int getMinNeighbors() const = 0;
+
+    virtual void setFindLargestObject(bool findLargestObject) = 0;
+    virtual bool getFindLargestObject() = 0;
+
+    virtual void setMaxNumObjects(int maxNumObjects) = 0;
+    virtual int getMaxNumObjects() const = 0;
+
+    virtual Size getClassifierSize() const = 0;
 
-    /** @overload */
-    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
     /** @brief Detects objects of different sizes in the input image.
 
     @param image Matrix of type CV_8U containing an image where objects should be detected.
-    @param objectsBuf Buffer to store detected objects (rectangles). If it is empty, it is allocated
-    with the default size. If not empty, the function searches not more than N objects, where
-    N = sizeof(objectsBufer's data)/sizeof(cv::Rect).
-    @param maxObjectSize Maximum possible object size. Objects larger than that are ignored. Used for
-    second signature and supported only for LBP cascades.
-    @param scaleFactor Parameter specifying how much the image size is reduced at each image scale.
-    @param minNeighbors Parameter specifying how many neighbors each candidate rectangle should have
-    to retain it.
-    @param minSize Minimum possible object size. Objects smaller than that are ignored.
+    @param objects Buffer to store detected objects (rectangles).
 
-    The detected objects are returned as a list of rectangles.
+    To get final array of detected objects use CascadeClassifier::convert method.
 
-    The function returns the number of detected objects, so you can retrieve them as in the following
-    example:
     @code
-        cuda::CascadeClassifier_CUDA cascade_gpu(...);
+        Ptr<cuda::CascadeClassifier> cascade_gpu = cuda::CascadeClassifier::create(...);
 
         Mat image_cpu = imread(...)
         GpuMat image_gpu(image_cpu);
 
         GpuMat objbuf;
-        int detections_number = cascade_gpu.detectMultiScale( image_gpu,
-                  objbuf, 1.2, minNeighbors);
+        cascade_gpu->detectMultiScale(image_gpu, objbuf);
 
-        Mat obj_host;
-        // download only detected number of rectangles
-        objbuf.colRange(0, detections_number).download(obj_host);
+        std::vector<Rect> faces;
+        cascade_gpu->convert(objbuf, faces);
 
-        Rect* faces = obj_host.ptr<Rect>();
         for(int i = 0; i < detections_num; ++i)
            cv::rectangle(image_cpu, faces[i], Scalar(255));
 
         imshow("Faces", image_cpu);
     @endcode
+
     @sa CascadeClassifier::detectMultiScale
      */
-    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
+    virtual void detectMultiScale(InputArray image,
+                                  OutputArray objects,
+                                  Stream& stream = Stream::Null()) = 0;
 
-    bool findLargestObject;
-    bool visualizeInPlace;
+    /** @brief Converts objects array from internal representation to standard vector.
 
-    Size getClassifierSize() const;
-
-private:
-    struct CascadeClassifierImpl;
-    CascadeClassifierImpl* impl;
-    struct HaarCascade;
-    struct LbpCascade;
-    friend class CascadeClassifier_CUDA_LBP;
+    @param gpu_objects Objects array in internal representation.
+    @param objects Resulting array.
+     */
+    virtual void convert(OutputArray gpu_objects,
+                         std::vector<Rect>& objects) = 0;
 };
 
 //! @}
diff --git a/modules/cudaobjdetect/perf/perf_objdetect.cpp b/modules/cudaobjdetect/perf/perf_objdetect.cpp
index 8aeabc4ec0..8b3112498d 100644
--- a/modules/cudaobjdetect/perf/perf_objdetect.cpp
+++ b/modules/cudaobjdetect/perf/perf_objdetect.cpp
@@ -107,18 +107,17 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
 
     if (PERF_RUN_CUDA())
     {
-        cv::cuda::CascadeClassifier_CUDA d_cascade;
-        ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
+        cv::Ptr<cv::cuda::CascadeClassifier> d_cascade =
+                cv::cuda::CascadeClassifier::create(perf::TestBase::getDataPath(GetParam().second));
 
         const cv::cuda::GpuMat d_img(img);
         cv::cuda::GpuMat objects_buffer;
-        int detections_num = 0;
 
-        TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
+        TEST_CYCLE() d_cascade->detectMultiScale(d_img, objects_buffer);
+
+        std::vector<cv::Rect> gpu_rects;
+        d_cascade->convert(objects_buffer, gpu_rects);
 
-        std::vector<cv::Rect> gpu_rects(detections_num);
-        cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
-        objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
         cv::groupRectangles(gpu_rects, 3, 0.2);
         SANITY_CHECK(gpu_rects);
     }
@@ -146,18 +145,17 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
 
     if (PERF_RUN_CUDA())
     {
-        cv::cuda::CascadeClassifier_CUDA d_cascade;
-        ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
+        cv::Ptr<cv::cuda::CascadeClassifier> d_cascade =
+                cv::cuda::CascadeClassifier::create(perf::TestBase::getDataPath(GetParam().second));
 
         const cv::cuda::GpuMat d_img(img);
         cv::cuda::GpuMat objects_buffer;
-        int detections_num = 0;
 
-        TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
+        TEST_CYCLE() d_cascade->detectMultiScale(d_img, objects_buffer);
+
+        std::vector<cv::Rect> gpu_rects;
+        d_cascade->convert(objects_buffer, gpu_rects);
 
-        std::vector<cv::Rect> gpu_rects(detections_num);
-        cv::Mat gpu_rects_mat(1, detections_num, cv::DataType<cv::Rect>::type, &gpu_rects[0]);
-        objects_buffer.colRange(0, detections_num).download(gpu_rects_mat);
         cv::groupRectangles(gpu_rects, 3, 0.2);
         SANITY_CHECK(gpu_rects);
     }
diff --git a/modules/cudaobjdetect/src/cascadeclassifier.cpp b/modules/cudaobjdetect/src/cascadeclassifier.cpp
index c4e9870151..10443aa66c 100644
--- a/modules/cudaobjdetect/src/cascadeclassifier.cpp
+++ b/modules/cudaobjdetect/src/cascadeclassifier.cpp
@@ -48,160 +48,185 @@ using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA()               { throw_no_cuda(); }
-cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA(const String&)  { throw_no_cuda(); }
-cv::cuda::CascadeClassifier_CUDA::~CascadeClassifier_CUDA()              { throw_no_cuda(); }
-bool cv::cuda::CascadeClassifier_CUDA::empty() const                    { throw_no_cuda(); return true; }
-bool cv::cuda::CascadeClassifier_CUDA::load(const String&)              { throw_no_cuda(); return true; }
-Size cv::cuda::CascadeClassifier_CUDA::getClassifierSize() const        { throw_no_cuda(); return Size();}
-void cv::cuda::CascadeClassifier_CUDA::release()                        { throw_no_cuda(); }
-int cv::cuda::CascadeClassifier_CUDA::detectMultiScale( const GpuMat&, GpuMat&, double, int, Size)       {throw_no_cuda(); return -1;}
-int cv::cuda::CascadeClassifier_CUDA::detectMultiScale( const GpuMat&, GpuMat&, Size, Size, double, int) {throw_no_cuda(); return -1;}
+Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
+Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
 
 #else
 
-struct cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl
+//
+// CascadeClassifierBase
+//
+
+namespace
 {
-public:
-    CascadeClassifierImpl(){}
-    virtual ~CascadeClassifierImpl(){}
+    class CascadeClassifierBase : public cuda::CascadeClassifier
+    {
+    public:
+        CascadeClassifierBase();
 
-    virtual unsigned int process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors,
-                      bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize, cv::Size maxObjectSize) = 0;
+        virtual void setMaxObjectSize(Size maxObjectSize) { maxObjectSize_ = maxObjectSize; }
+        virtual Size getMaxObjectSize() const { return maxObjectSize_; }
 
-    virtual cv::Size getClassifierCvSize() const = 0;
-    virtual bool read(const String& classifierAsXml) = 0;
-};
+        virtual void setMinObjectSize(Size minSize) { minObjectSize_ = minSize; }
+        virtual Size getMinObjectSize() const { return minObjectSize_; }
 
-#ifndef HAVE_OPENCV_CUDALEGACY
+        virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; }
+        virtual double getScaleFactor() const { return scaleFactor_; }
 
-struct cv::cuda::CascadeClassifier_CUDA::HaarCascade : cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl
+        virtual void setMinNeighbors(int minNeighbors) { minNeighbors_ = minNeighbors; }
+        virtual int getMinNeighbors() const { return minNeighbors_; }
+
+        virtual void setFindLargestObject(bool findLargestObject) { findLargestObject_ = findLargestObject; }
+        virtual bool getFindLargestObject() { return findLargestObject_; }
+
+        virtual void setMaxNumObjects(int maxNumObjects) { maxNumObjects_ = maxNumObjects; }
+        virtual int getMaxNumObjects() const { return maxNumObjects_; }
+
+    protected:
+        Size maxObjectSize_;
+        Size minObjectSize_;
+        double scaleFactor_;
+        int minNeighbors_;
+        bool findLargestObject_;
+        int maxNumObjects_;
+    };
+
+    CascadeClassifierBase::CascadeClassifierBase() :
+        maxObjectSize_(),
+        minObjectSize_(),
+        scaleFactor_(1.2),
+        minNeighbors_(4),
+        findLargestObject_(false),
+        maxNumObjects_(100)
+    {
+    }
+}
+
+//
+// HaarCascade
+//
+
+#ifdef HAVE_OPENCV_CUDALEGACY
+
+namespace
 {
-public:
-    HaarCascade()
+    class HaarCascade_Impl : public CascadeClassifierBase
     {
-        throw_no_cuda();
+    public:
+        explicit HaarCascade_Impl(const String& filename);
+
+        virtual Size getClassifierSize() const;
+
+        virtual void detectMultiScale(InputArray image,
+                                      OutputArray objects,
+                                      Stream& stream);
+
+        virtual void convert(OutputArray gpu_objects,
+                             std::vector<Rect>& objects);
+
+    private:
+        NCVStatus load(const String& classifierFile);
+        NCVStatus calculateMemReqsAndAllocate(const Size& frameSize);
+        NCVStatus process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections);
+
+        Size lastAllocatedFrameSize;
+
+        Ptr<NCVMemStackAllocator> gpuAllocator;
+        Ptr<NCVMemStackAllocator> cpuAllocator;
+
+        cudaDeviceProp devProp;
+        NCVStatus ncvStat;
+
+        Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
+        Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
+
+        Ptr<NCVVectorAlloc<HaarStage64> >           h_haarStages;
+        Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
+        Ptr<NCVVectorAlloc<HaarFeature64> >         h_haarFeatures;
+
+        HaarClassifierCascadeDescriptor haar;
+
+        Ptr<NCVVectorAlloc<HaarStage64> >           d_haarStages;
+        Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
+        Ptr<NCVVectorAlloc<HaarFeature64> >         d_haarFeatures;
+    };
+
+    static void NCVDebugOutputHandler(const String &msg)
+    {
+        CV_Error(Error::GpuApiCallError, msg.c_str());
     }
 
-    unsigned int process(const GpuMat&, GpuMat&, float, int, bool, bool, cv::Size, cv::Size)
-    {
-        throw_no_cuda();
-        return 0;
-    }
-
-    cv::Size getClassifierCvSize() const
-    {
-        throw_no_cuda();
-        return cv::Size();
-    }
-
-    bool read(const String&)
-    {
-        throw_no_cuda();
-        return false;
-    }
-};
-
-#else
-
-struct cv::cuda::CascadeClassifier_CUDA::HaarCascade : cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl
-{
-public:
-    HaarCascade() : lastAllocatedFrameSize(-1, -1)
+    HaarCascade_Impl::HaarCascade_Impl(const String& filename) :
+        lastAllocatedFrameSize(-1, -1)
     {
         ncvSetDebugOutputHandler(NCVDebugOutputHandler);
-    }
-
-    bool read(const String& filename)
-    {
         ncvSafeCall( load(filename) );
-        return true;
     }
 
-    NCVStatus process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors,
-                      bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize,
-                      /*out*/unsigned int& numDetections)
+    Size HaarCascade_Impl::getClassifierSize() const
     {
-        calculateMemReqsAndAllocate(src.size());
-
-        NCVMemPtr src_beg;
-        src_beg.ptr = (void*)src.ptr<Ncv8u>();
-        src_beg.memtype = NCVMemoryTypeDevice;
-
-        NCVMemSegment src_seg;
-        src_seg.begin = src_beg;
-        src_seg.size  = src.step * src.rows;
-
-        NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true);
-        ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
-
-        CV_Assert(objects.rows == 1);
-
-        NCVMemPtr objects_beg;
-        objects_beg.ptr = (void*)objects.ptr<NcvRect32u>();
-        objects_beg.memtype = NCVMemoryTypeDevice;
-
-        NCVMemSegment objects_seg;
-        objects_seg.begin = objects_beg;
-        objects_seg.size = objects.step * objects.rows;
-        NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
-        ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
-
-        NcvSize32u roi;
-        roi.width = d_src.width();
-        roi.height = d_src.height();
-
-        NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height);
-
-        Ncv32u flags = 0;
-        flags |= findLargestObject? NCVPipeObjDet_FindLargestObject : 0;
-        flags |= visualizeInPlace ? NCVPipeObjDet_VisualizeInPlace  : 0;
-
-        ncvStat = ncvDetectObjectsMultiScale_device(
-            d_src, roi, d_rects, numDetections, haar, *h_haarStages,
-            *d_haarStages, *d_haarNodes, *d_haarFeatures,
-            winMinSize,
-            minNeighbors,
-            scaleStep, 1,
-            flags,
-            *gpuAllocator, *cpuAllocator, devProp, 0);
-        ncvAssertReturnNcvStat(ncvStat);
-        ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
-
-        return NCV_SUCCESS;
+        return Size(haar.ClassifierSize.width, haar.ClassifierSize.height);
     }
 
-    unsigned int process(const GpuMat& image, GpuMat& objectsBuf, float scaleFactor, int minNeighbors,
-                      bool findLargestObject, bool visualizeInPlace, cv::Size minSize, cv::Size /*maxObjectSize*/)
+    void HaarCascade_Impl::detectMultiScale(InputArray _image,
+                                            OutputArray _objects,
+                                            Stream& stream)
     {
-        CV_Assert( scaleFactor > 1 && image.depth() == CV_8U);
+        const GpuMat image = _image.getGpuMat();
 
-        const int defaultObjSearchNum = 100;
-        if (objectsBuf.empty())
+        CV_Assert( image.depth() == CV_8U);
+        CV_Assert( scaleFactor_ > 1 );
+        CV_Assert( !stream );
+
+        Size ncvMinSize = getClassifierSize();
+        if (ncvMinSize.width < minObjectSize_.width && ncvMinSize.height < minObjectSize_.height)
         {
-            objectsBuf.create(1, defaultObjSearchNum, DataType<Rect>::type);
+            ncvMinSize.width = minObjectSize_.width;
+            ncvMinSize.height = minObjectSize_.height;
         }
 
-        cv::Size ncvMinSize = this->getClassifierCvSize();
-
-        if (ncvMinSize.width < minSize.width && ncvMinSize.height < minSize.height)
-        {
-            ncvMinSize.width = minSize.width;
-            ncvMinSize.height = minSize.height;
-        }
+        BufferPool pool(stream);
+        GpuMat objectsBuf = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
 
         unsigned int numDetections;
-        ncvSafeCall(this->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections));
+        ncvSafeCall( process(image, objectsBuf, ncvMinSize, numDetections) );
 
-        return numDetections;
+        if (numDetections > 0)
+        {
+            objectsBuf.colRange(0, numDetections).copyTo(_objects);
+        }
+        else
+        {
+            _objects.release();
+        }
     }
 
-    cv::Size getClassifierCvSize() const { return cv::Size(haar.ClassifierSize.width, haar.ClassifierSize.height); }
+    void HaarCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
+    {
+        if (_gpu_objects.empty())
+        {
+            objects.clear();
+            return;
+        }
 
-private:
-    static void NCVDebugOutputHandler(const String &msg) { CV_Error(cv::Error::GpuApiCallError, msg.c_str()); }
+        Mat gpu_objects;
+        if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
+        {
+            _gpu_objects.getGpuMat().download(gpu_objects);
+        }
+        else
+        {
+            gpu_objects = _gpu_objects.getMat();
+        }
 
-    NCVStatus load(const String& classifierFile)
+        CV_Assert( gpu_objects.rows == 1 );
+        CV_Assert( gpu_objects.type() == DataType<Rect>::type );
+
+        Rect* ptr = gpu_objects.ptr<Rect>();
+        objects.assign(ptr, ptr + gpu_objects.cols);
+    }
+
+    NCVStatus HaarCascade_Impl::load(const String& classifierFile)
     {
         int devId = cv::cuda::getDevice();
         ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
@@ -246,7 +271,7 @@ private:
         return NCV_SUCCESS;
     }
 
-    NCVStatus calculateMemReqsAndAllocate(const Size& frameSize)
+    NCVStatus HaarCascade_Impl::calculateMemReqsAndAllocate(const Size& frameSize)
     {
         if (lastAllocatedFrameSize == frameSize)
         {
@@ -289,88 +314,62 @@ private:
         return NCV_SUCCESS;
     }
 
-    cudaDeviceProp devProp;
-    NCVStatus ncvStat;
+    NCVStatus HaarCascade_Impl::process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections)
+    {
+        calculateMemReqsAndAllocate(src.size());
 
-    Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
-    Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
+        NCVMemPtr src_beg;
+        src_beg.ptr = (void*)src.ptr<Ncv8u>();
+        src_beg.memtype = NCVMemoryTypeDevice;
 
-    Ptr<NCVVectorAlloc<HaarStage64> >           h_haarStages;
-    Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
-    Ptr<NCVVectorAlloc<HaarFeature64> >         h_haarFeatures;
+        NCVMemSegment src_seg;
+        src_seg.begin = src_beg;
+        src_seg.size  = src.step * src.rows;
 
-    HaarClassifierCascadeDescriptor haar;
+        NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true);
+        ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
 
-    Ptr<NCVVectorAlloc<HaarStage64> >           d_haarStages;
-    Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
-    Ptr<NCVVectorAlloc<HaarFeature64> >         d_haarFeatures;
+        CV_Assert(objects.rows == 1);
 
-    Size lastAllocatedFrameSize;
+        NCVMemPtr objects_beg;
+        objects_beg.ptr = (void*)objects.ptr<NcvRect32u>();
+        objects_beg.memtype = NCVMemoryTypeDevice;
 
-    Ptr<NCVMemStackAllocator> gpuAllocator;
-    Ptr<NCVMemStackAllocator> cpuAllocator;
+        NCVMemSegment objects_seg;
+        objects_seg.begin = objects_beg;
+        objects_seg.size = objects.step * objects.rows;
+        NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
+        ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
 
-    virtual ~HaarCascade(){}
-};
+        NcvSize32u roi;
+        roi.width = d_src.width();
+        roi.height = d_src.height();
+
+        NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height);
+
+        Ncv32u flags = 0;
+        flags |= findLargestObject_ ? NCVPipeObjDet_FindLargestObject : 0;
+
+        ncvStat = ncvDetectObjectsMultiScale_device(
+            d_src, roi, d_rects, numDetections, haar, *h_haarStages,
+            *d_haarStages, *d_haarNodes, *d_haarFeatures,
+            winMinSize,
+            minNeighbors_,
+            scaleFactor_, 1,
+            flags,
+            *gpuAllocator, *cpuAllocator, devProp, 0);
+        ncvAssertReturnNcvStat(ncvStat);
+        ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
+
+        return NCV_SUCCESS;
+    }
+}
 
 #endif
 
-cv::Size operator -(const cv::Size& a, const cv::Size& b)
-{
-    return cv::Size(a.width - b.width, a.height - b.height);
-}
-
-cv::Size operator +(const cv::Size& a, const int& i)
-{
-    return cv::Size(a.width + i, a.height + i);
-}
-
-cv::Size operator *(const cv::Size& a, const float& f)
-{
-    return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
-}
-
-cv::Size operator /(const cv::Size& a, const float& f)
-{
-    return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
-}
-
-bool operator <=(const cv::Size& a, const cv::Size& b)
-{
-    return a.width <= b.width && a.height <= b.width;
-}
-
-struct PyrLavel
-{
-    PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize)
-    {
-        do
-        {
-            order = _order;
-            scale = pow(_scale, order);
-            sFrame = frame / scale;
-            workArea = sFrame - window + 1;
-            sWindow = window * scale;
-            _order++;
-        } while (sWindow <= minObjectSize);
-    }
-
-    bool isFeasible(cv::Size maxObj)
-    {
-        return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
-    }
-
-    PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize)
-    {
-        return PyrLavel(order + 1, factor, frame, window, minObjectSize);
-    }
-
-    int order;
-    float scale;
-    cv::Size sFrame;
-    cv::Size workArea;
-    cv::Size sWindow;
-};
+//
+// LbpCascade
+//
 
 namespace cv { namespace cuda { namespace device
 {
@@ -394,42 +393,154 @@ namespace cv { namespace cuda { namespace device
                              unsigned int* classified,
                              PtrStepSzi integral);
 
-        void connectedConmonents(PtrStepSz<int4>  candidates, int ncandidates, PtrStepSz<int4> objects,int groupThreshold, float grouping_eps, unsigned int* nclasses);
+        void connectedConmonents(PtrStepSz<int4> candidates,
+                                 int ncandidates,
+                                 PtrStepSz<int4> objects,
+                                 int groupThreshold,
+                                 float grouping_eps,
+                                 unsigned int* nclasses);
     }
 }}}
 
-struct cv::cuda::CascadeClassifier_CUDA::LbpCascade : cv::cuda::CascadeClassifier_CUDA::CascadeClassifierImpl
+namespace
 {
-public:
-    struct Stage
+    cv::Size operator -(const cv::Size& a, const cv::Size& b)
     {
-        int    first;
-        int    ntrees;
-        float  threshold;
+        return cv::Size(a.width - b.width, a.height - b.height);
+    }
+
+    cv::Size operator +(const cv::Size& a, const int& i)
+    {
+        return cv::Size(a.width + i, a.height + i);
+    }
+
+    cv::Size operator *(const cv::Size& a, const float& f)
+    {
+        return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
+    }
+
+    cv::Size operator /(const cv::Size& a, const float& f)
+    {
+        return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
+    }
+
+    bool operator <=(const cv::Size& a, const cv::Size& b)
+    {
+        return a.width <= b.width && a.height <= b.width;
+    }
+
+    struct PyrLavel
+    {
+        PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize)
+        {
+            do
+            {
+                order = _order;
+                scale = pow(_scale, order);
+                sFrame = frame / scale;
+                workArea = sFrame - window + 1;
+                sWindow = window * scale;
+                _order++;
+            } while (sWindow <= minObjectSize);
+        }
+
+        bool isFeasible(cv::Size maxObj)
+        {
+            return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
+        }
+
+        PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize)
+        {
+            return PyrLavel(order + 1, factor, frame, window, minObjectSize);
+        }
+
+        int order;
+        float scale;
+        cv::Size sFrame;
+        cv::Size workArea;
+        cv::Size sWindow;
     };
 
-    LbpCascade(){}
-    virtual ~LbpCascade(){}
-
-    virtual unsigned int process(const GpuMat& image, GpuMat& objects, float scaleFactor, int groupThreshold, bool /*findLargestObject*/,
-        bool /*visualizeInPlace*/, cv::Size minObjectSize, cv::Size maxObjectSize)
+    class LbpCascade_Impl : public CascadeClassifierBase
     {
-        CV_Assert(scaleFactor > 1 && image.depth() == CV_8U);
+    public:
+        explicit LbpCascade_Impl(const FileStorage& file);
+
+        virtual Size getClassifierSize() const { return NxM; }
+
+        virtual void detectMultiScale(InputArray image,
+                                      OutputArray objects,
+                                      Stream& stream);
+
+        virtual void convert(OutputArray gpu_objects,
+                             std::vector<Rect>& objects);
+
+    private:
+        bool load(const FileNode &root);
+        void allocateBuffers(cv::Size frame);
+
+    private:
+        struct Stage
+        {
+            int    first;
+            int    ntrees;
+            float  threshold;
+        };
+
+        enum stage { BOOST = 0 };
+        enum feature { LBP = 1, HAAR = 2 };
+
+        static const stage stageType = BOOST;
+        static const feature featureType = LBP;
+
+        cv::Size NxM;
+        bool isStumps;
+        int ncategories;
+        int subsetSize;
+        int nodeStep;
+
+        // gpu representation of classifier
+        GpuMat stage_mat;
+        GpuMat trees_mat;
+        GpuMat nodes_mat;
+        GpuMat leaves_mat;
+        GpuMat subsets_mat;
+        GpuMat features_mat;
+
+        GpuMat integral;
+        GpuMat integralBuffer;
+        GpuMat resuzeBuffer;
+
+        GpuMat candidates;
+        static const int integralFactor = 4;
+    };
+
+    LbpCascade_Impl::LbpCascade_Impl(const FileStorage& file)
+    {
+        load(file.getFirstTopLevelNode());
+    }
+
+    void LbpCascade_Impl::detectMultiScale(InputArray _image,
+                                           OutputArray _objects,
+                                           Stream& stream)
+    {
+        const GpuMat image = _image.getGpuMat();
+
+        CV_Assert( image.depth() == CV_8U);
+        CV_Assert( scaleFactor_ > 1 );
+        CV_Assert( !stream );
 
-        // const int defaultObjSearchNum = 100;
         const float grouping_eps = 0.2f;
 
-        if( !objects.empty() && objects.depth() == CV_32S)
-            objects.reshape(4, 1);
-        else
-            objects.create(1 , image.cols >> 4, CV_32SC4);
+        BufferPool pool(stream);
+        GpuMat objects = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
 
         // used for debug
         // candidates.setTo(cv::Scalar::all(0));
         // objects.setTo(cv::Scalar::all(0));
 
-        if (maxObjectSize == cv::Size())
-            maxObjectSize = image.size();
+        if (maxObjectSize_ == cv::Size())
+            maxObjectSize_ = image.size();
 
         allocateBuffers(image.size());
 
@@ -437,9 +548,9 @@ public:
         GpuMat dclassified(1, 1, CV_32S);
         cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
 
-        PyrLavel level(0, scaleFactor, image.size(), NxM, minObjectSize);
+        PyrLavel level(0, scaleFactor_, image.size(), NxM, minObjectSize_);
 
-        while (level.isFeasible(maxObjectSize))
+        while (level.isFeasible(maxObjectSize_))
         {
             int acc = level.sFrame.width + 1;
             float iniScale = level.scale;
@@ -449,7 +560,7 @@ public:
 
             int total = 0, prev  = 0;
 
-            while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize))
+            while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize_))
             {
                 // create sutable matrix headers
                 GpuMat src  = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height));
@@ -465,7 +576,7 @@ public:
                 total += totalWidth * (level.workArea.height / step);
 
                 // go to next pyramide level
-                level = level.next(scaleFactor, image.size(), NxM, minObjectSize);
+                level = level.next(scaleFactor_, image.size(), NxM, minObjectSize_);
                 area = level.workArea;
 
                 step = (1 + (level.scale <= 2.f));
@@ -473,60 +584,55 @@ public:
                 acc += level.sFrame.width + 1;
             }
 
-            device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
+            device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor_, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
                 leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral);
         }
 
-        if (groupThreshold <= 0  || objects.empty())
-            return 0;
+        if (minNeighbors_ <= 0  || objects.empty())
+            return;
 
         cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
-        device::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr<unsigned int>());
+        device::lbp::connectedConmonents(candidates, classified, objects, minNeighbors_, grouping_eps, dclassified.ptr<unsigned int>());
 
         cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
         cudaSafeCall( cudaDeviceSynchronize() );
-        return classified;
-    }
 
-    virtual cv::Size getClassifierCvSize() const { return NxM; }
-
-    bool read(const String& classifierAsXml)
-    {
-        FileStorage fs(classifierAsXml, FileStorage::READ);
-        return fs.isOpened() ? read(fs.getFirstTopLevelNode()) : false;
-    }
-
-private:
-
-    void allocateBuffers(cv::Size frame)
-    {
-        if (frame == cv::Size())
-            return;
-
-        if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
+        if (classified > 0)
         {
-            resuzeBuffer.create(frame, CV_8UC1);
-
-            integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
-
-#ifdef HAVE_OPENCV_CUDALEGACY
-            NcvSize32u roiSize;
-            roiSize.width = frame.width;
-            roiSize.height = frame.height;
-
-            cudaDeviceProp prop;
-            cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
-
-            Ncv32u bufSize;
-            ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
-            integralBuffer.create(1, bufSize, CV_8UC1);
-#endif
-
-            candidates.create(1 , frame.width >> 1, CV_32SC4);
+            objects.colRange(0, classified).copyTo(_objects);
+        }
+        else
+        {
+            _objects.release();
         }
     }
 
-    bool read(const FileNode &root)
+    void LbpCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
+    {
+        if (_gpu_objects.empty())
+        {
+            objects.clear();
+            return;
+        }
+
+        Mat gpu_objects;
+        if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
+        {
+            _gpu_objects.getGpuMat().download(gpu_objects);
+        }
+        else
+        {
+            gpu_objects = _gpu_objects.getMat();
+        }
+
+        CV_Assert( gpu_objects.rows == 1 );
+        CV_Assert( gpu_objects.type() == DataType<Rect>::type );
+
+        Rect* ptr = gpu_objects.ptr<Rect>();
+        objects.assign(ptr, ptr + gpu_objects.cols);
+    }
+
+    bool LbpCascade_Impl::load(const FileNode &root)
     {
         const char *CUDA_CC_STAGE_TYPE       = "stageType";
         const char *CUDA_CC_FEATURE_TYPE     = "featureType";
@@ -667,92 +773,90 @@ private:
         return true;
     }
 
-    enum stage { BOOST = 0 };
-    enum feature { LBP = 1, HAAR = 2 };
-    static const stage stageType = BOOST;
-    static const feature featureType = LBP;
+    void LbpCascade_Impl::allocateBuffers(cv::Size frame)
+    {
+        if (frame == cv::Size())
+            return;
 
-    cv::Size NxM;
-    bool isStumps;
-    int ncategories;
-    int subsetSize;
-    int nodeStep;
+        if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
+        {
+            resuzeBuffer.create(frame, CV_8UC1);
 
-    // gpu representation of classifier
-    GpuMat stage_mat;
-    GpuMat trees_mat;
-    GpuMat nodes_mat;
-    GpuMat leaves_mat;
-    GpuMat subsets_mat;
-    GpuMat features_mat;
+            integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
 
-    GpuMat integral;
-    GpuMat integralBuffer;
-    GpuMat resuzeBuffer;
+        #ifdef HAVE_OPENCV_CUDALEGACY
+            NcvSize32u roiSize;
+            roiSize.width = frame.width;
+            roiSize.height = frame.height;
 
-    GpuMat candidates;
-    static const int integralFactor = 4;
-};
+            cudaDeviceProp prop;
+            cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
 
-cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA()
-: findLargestObject(false), visualizeInPlace(false), impl(0) {}
+            Ncv32u bufSize;
+            ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
+            integralBuffer.create(1, bufSize, CV_8UC1);
+        #endif
 
-cv::cuda::CascadeClassifier_CUDA::CascadeClassifier_CUDA(const String& filename)
-: findLargestObject(false), visualizeInPlace(false), impl(0) { load(filename); }
+            candidates.create(1 , frame.width >> 1, CV_32SC4);
+        }
+    }
 
-cv::cuda::CascadeClassifier_CUDA::~CascadeClassifier_CUDA() { release(); }
-
-void cv::cuda::CascadeClassifier_CUDA::release() { if (impl) { delete impl; impl = 0; } }
-
-bool cv::cuda::CascadeClassifier_CUDA::empty() const { return impl == 0; }
-
-Size cv::cuda::CascadeClassifier_CUDA::getClassifierSize() const
-{
-    return this->empty() ? Size() : impl->getClassifierCvSize();
 }
 
-int cv::cuda::CascadeClassifier_CUDA::detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor, int minNeighbors, Size minSize)
-{
-    CV_Assert( !this->empty());
-    return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, cv::Size());
-}
+//
+// create
+//
 
-int cv::cuda::CascadeClassifier_CUDA::detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize, double scaleFactor, int minNeighbors)
+Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String& filename)
 {
-    CV_Assert( !this->empty());
-    return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, maxObjectSize);
-}
-
-bool cv::cuda::CascadeClassifier_CUDA::load(const String& filename)
-{
-    release();
-
     String fext = filename.substr(filename.find_last_of(".") + 1);
     fext = fext.toLowerCase();
 
     if (fext == "nvbin")
     {
-        impl = new HaarCascade();
-        return impl->read(filename);
+    #ifndef HAVE_OPENCV_CUDALEGACY
+        CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
+        return Ptr<cuda::CascadeClassifier>();
+    #else
+        return makePtr<HaarCascade_Impl>(filename);
+    #endif
     }
 
     FileStorage fs(filename, FileStorage::READ);
 
     if (!fs.isOpened())
     {
-        impl = new HaarCascade();
-        return impl->read(filename);
+    #ifndef HAVE_OPENCV_CUDALEGACY
+        CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
+        return Ptr<cuda::CascadeClassifier>();
+    #else
+        return makePtr<HaarCascade_Impl>(filename);
+    #endif
     }
 
     const char *CUDA_CC_LBP = "LBP";
     String featureTypeStr = (String)fs.getFirstTopLevelNode()["featureType"];
     if (featureTypeStr == CUDA_CC_LBP)
-        impl = new LbpCascade();
+    {
+        return makePtr<LbpCascade_Impl>(fs);
+    }
     else
-        impl = new HaarCascade();
+    {
+    #ifndef HAVE_OPENCV_CUDALEGACY
+        CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
+        return Ptr<cuda::CascadeClassifier>();
+    #else
+        return makePtr<HaarCascade_Impl>(filename);
+    #endif
+    }
 
-    impl->read(filename);
-    return !this->empty();
+    CV_Error(Error::StsUnsupportedFormat, "Unsupported format for CUDA CascadeClassifier");
+    return Ptr<cuda::CascadeClassifier>();
+}
+
+Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage& file)
+{
+    return makePtr<LbpCascade_Impl>(file);
 }
 
 #endif
diff --git a/modules/cudaobjdetect/test/test_objdetect.cpp b/modules/cudaobjdetect/test/test_objdetect.cpp
index 79b1f0734f..336d6e0718 100644
--- a/modules/cudaobjdetect/test/test_objdetect.cpp
+++ b/modules/cudaobjdetect/test/test_objdetect.cpp
@@ -287,9 +287,15 @@ PARAM_TEST_CASE(LBP_Read_classifier, cv::cuda::DeviceInfo, int)
 
 CUDA_TEST_P(LBP_Read_classifier, Accuracy)
 {
-    cv::cuda::CascadeClassifier_CUDA classifier;
     std::string classifierXmlPath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/lbpcascade_frontalface.xml";
-    ASSERT_TRUE(classifier.load(classifierXmlPath));
+
+    cv::Ptr<cv::cuda::CascadeClassifier> d_cascade;
+
+    ASSERT_NO_THROW(
+        d_cascade = cv::cuda::CascadeClassifier::create(classifierXmlPath);
+    );
+
+    ASSERT_FALSE(d_cascade.empty());
 }
 
 INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, LBP_Read_classifier,
@@ -329,29 +335,28 @@ CUDA_TEST_P(LBP_classify, Accuracy)
     for (; it != rects.end(); ++it)
         cv::rectangle(markedImage, *it, cv::Scalar(255, 0, 0));
 
-    cv::cuda::CascadeClassifier_CUDA gpuClassifier;
-    ASSERT_TRUE(gpuClassifier.load(classifierXmlPath));
+    cv::Ptr<cv::cuda::CascadeClassifier> gpuClassifier =
+            cv::cuda::CascadeClassifier::create(classifierXmlPath);
 
-    cv::cuda::GpuMat gpu_rects;
     cv::cuda::GpuMat tested(grey);
-    int count = gpuClassifier.detectMultiScale(tested, gpu_rects);
+    cv::cuda::GpuMat gpu_rects_buf;
+    gpuClassifier->detectMultiScale(tested, gpu_rects_buf);
+
+    std::vector<cv::Rect> gpu_rects;
+    gpuClassifier->convert(gpu_rects_buf, gpu_rects);
 
 #if defined (LOG_CASCADE_STATISTIC)
-    cv::Mat downloaded(gpu_rects);
-    const cv::Rect* faces = downloaded.ptr<cv::Rect>();
-    for (int i = 0; i < count; i++)
+    for (size_t i = 0; i < gpu_rects.size(); i++)
     {
-        cv::Rect r = faces[i];
+        cv::Rect r = gpu_rects[i];
 
         std::cout << r.x << " " << r.y  << " " << r.width << " " << r.height << std::endl;
         cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
     }
-#endif
 
-#if defined (LOG_CASCADE_STATISTIC)
-    cv::imshow("Res", markedImage); cv::waitKey();
+    cv::imshow("Res", markedImage);
+    cv::waitKey();
 #endif
-    (void)count;
 }
 
 INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, LBP_classify,
diff --git a/samples/gpu/cascadeclassifier.cpp b/samples/gpu/cascadeclassifier.cpp
index ff19c1d224..f6209f9fa3 100644
--- a/samples/gpu/cascadeclassifier.cpp
+++ b/samples/gpu/cascadeclassifier.cpp
@@ -173,13 +173,9 @@ int main(int argc, const char *argv[])
         }
     }
 
-    CascadeClassifier_CUDA cascade_gpu;
-    if (!cascade_gpu.load(cascadeName))
-    {
-        return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
-    }
+    Ptr<cuda::CascadeClassifier> cascade_gpu = cuda::CascadeClassifier::create(cascadeName);
 
-    CascadeClassifier cascade_cpu;
+    cv::CascadeClassifier cascade_cpu;
     if (!cascade_cpu.load(cascadeName))
     {
         return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
@@ -206,8 +202,8 @@ int main(int argc, const char *argv[])
 
     namedWindow("result", 1);
 
-    Mat frame, frame_cpu, gray_cpu, resized_cpu, faces_downloaded, frameDisp;
-    vector<Rect> facesBuf_cpu;
+    Mat frame, frame_cpu, gray_cpu, resized_cpu, frameDisp;
+    vector<Rect> faces;
 
     GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu;
 
@@ -218,7 +214,6 @@ int main(int argc, const char *argv[])
     bool filterRects = true;
     bool helpScreen = false;
 
-    int detections_num;
     for (;;)
     {
         if (isInputCamera || isInputVideo)
@@ -241,40 +236,26 @@ int main(int argc, const char *argv[])
 
         if (useGPU)
         {
-            //cascade_gpu.visualizeInPlace = true;
-            cascade_gpu.findLargestObject = findLargestObject;
+            cascade_gpu->setFindLargestObject(findLargestObject);
+            cascade_gpu->setScaleFactor(1.2);
+            cascade_gpu->setMinNeighbors((filterRects || findLargestObject) ? 4 : 0);
 
-            detections_num = cascade_gpu.detectMultiScale(resized_gpu, facesBuf_gpu, 1.2,
-                                                          (filterRects || findLargestObject) ? 4 : 0);
-            facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded);
+            cascade_gpu->detectMultiScale(resized_gpu, facesBuf_gpu);
+            cascade_gpu->convert(facesBuf_gpu, faces);
         }
         else
         {
-            Size minSize = cascade_gpu.getClassifierSize();
-            cascade_cpu.detectMultiScale(resized_cpu, facesBuf_cpu, 1.2,
+            Size minSize = cascade_gpu->getClassifierSize();
+            cascade_cpu.detectMultiScale(resized_cpu, faces, 1.2,
                                          (filterRects || findLargestObject) ? 4 : 0,
                                          (findLargestObject ? CASCADE_FIND_BIGGEST_OBJECT : 0)
                                             | CASCADE_SCALE_IMAGE,
                                          minSize);
-            detections_num = (int)facesBuf_cpu.size();
         }
 
-        if (!useGPU && detections_num)
+        for (size_t i = 0; i < faces.size(); ++i)
         {
-            for (int i = 0; i < detections_num; ++i)
-            {
-                rectangle(resized_cpu, facesBuf_cpu[i], Scalar(255));
-            }
-        }
-
-        if (useGPU)
-        {
-            resized_gpu.download(resized_cpu);
-
-             for (int i = 0; i < detections_num; ++i)
-             {
-                rectangle(resized_cpu, faces_downloaded.ptr<cv::Rect>()[i], Scalar(255));
-             }
+            rectangle(resized_cpu, faces[i], Scalar(255));
         }
 
         tm.stop();
@@ -283,16 +264,15 @@ int main(int argc, const char *argv[])
 
         //print detections to console
         cout << setfill(' ') << setprecision(2);
-        cout << setw(6) << fixed << fps << " FPS, " << detections_num << " det";
-        if ((filterRects || findLargestObject) && detections_num > 0)
+        cout << setw(6) << fixed << fps << " FPS, " << faces.size() << " det";
+        if ((filterRects || findLargestObject) && !faces.empty())
         {
-            Rect *faceRects = useGPU ? faces_downloaded.ptr<Rect>() : &facesBuf_cpu[0];
-            for (int i = 0; i < min(detections_num, 2); ++i)
+            for (size_t i = 0; i < faces.size(); ++i)
             {
-                cout << ", [" << setw(4) << faceRects[i].x
-                     << ", " << setw(4) << faceRects[i].y
-                     << ", " << setw(4) << faceRects[i].width
-                     << ", " << setw(4) << faceRects[i].height << "]";
+                cout << ", [" << setw(4) << faces[i].x
+                     << ", " << setw(4) << faces[i].y
+                     << ", " << setw(4) << faces[i].width
+                     << ", " << setw(4) << faces[i].height << "]";
             }
         }
         cout << endl;

From 54f52c3f45e998d50dcf77e13945ed1dcf5d2408 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Tue, 13 Jan 2015 23:33:01 +0300
Subject: [PATCH 35/55] Build fix for non-TEGRA case.

---
 modules/stitching/src/blenders.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/stitching/src/blenders.cpp b/modules/stitching/src/blenders.cpp
index 3fddff5ffe..caae058681 100644
--- a/modules/stitching/src/blenders.cpp
+++ b/modules/stitching/src/blenders.cpp
@@ -476,9 +476,9 @@ static bool ocl_normalizeUsingWeightMap(InputArray _weight, InputOutputArray _ma
 
 void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
 {
-#ifdef HAVE_TEGRA_OPTIMIZATION
-    Mat weight = _weight.getMat();
     Mat src = _src.getMat();
+    Mat weight = _weight.getMat();
+#ifdef HAVE_TEGRA_OPTIMIZATION
     if(tegra::normalizeUsingWeightMap(weight, src))
         return;
 #endif

From d5092a6f7c1f1da22aad2aecf4fa5f4bab770b9d Mon Sep 17 00:00:00 2001
From: VBystricky <vladimir.bystricky@stdutility.com>
Date: Thu, 15 Jan 2015 01:55:17 +0400
Subject: [PATCH 36/55] Add CV_CAP_PROP_POS_FRAMES property to cap_images.cpp

---
 modules/videoio/src/cap_images.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/videoio/src/cap_images.cpp b/modules/videoio/src/cap_images.cpp
index a92211ca36..253261adc1 100644
--- a/modules/videoio/src/cap_images.cpp
+++ b/modules/videoio/src/cap_images.cpp
@@ -135,6 +135,8 @@ double CvCapture_Images::getProperty(int id) const
         return 0;
     case CV_CAP_PROP_POS_FRAMES:
         return currentframe;
+    case CV_CAP_PROP_FRAME_COUNT:
+        return length;
     case CV_CAP_PROP_POS_AVI_RATIO:
         return (double)currentframe / (double)(length - 1);
     case CV_CAP_PROP_FRAME_WIDTH:

From dccdadc38a0268720eef4bb8bdc9bce836d90af6 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 15 Jan 2015 10:34:43 +0300
Subject: [PATCH 37/55] fix documentation warnings

---
 modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp b/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
index 31a6a94c99..ce916b25a2 100644
--- a/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
+++ b/modules/cudaobjdetect/include/opencv2/cudaobjdetect.hpp
@@ -52,13 +52,13 @@
 /**
   @addtogroup cuda
   @{
-      @defgroup cuda_objdetect Object Detection
+      @defgroup cudaobjdetect Object Detection
   @}
  */
 
 namespace cv { namespace cuda {
 
-//! @addtogroup cuda_objdetect
+//! @addtogroup cudaobjdetect
 //! @{
 
 //
@@ -172,10 +172,6 @@ public:
     @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
     @param found_locations Detected objects boundaries.
     @param confidences Optional output array for confidences.
-    @param hit_threshold Threshold for the distance between features and SVM classifying plane. See
-    cuda::HOGDescriptor::detect for details.
-    @param win_stride Window stride. It must be a multiple of block stride.
-    @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
      */
     virtual void detectMultiScale(InputArray img,
                                   std::vector<Rect>& found_locations,
@@ -248,6 +244,7 @@ public:
 
     @param image Matrix of type CV_8U containing an image where objects should be detected.
     @param objects Buffer to store detected objects (rectangles).
+    @param stream CUDA stream.
 
     To get final array of detected objects use CascadeClassifier::convert method.
 

From fd60e98c5b69fead0ed7b6657374cf5ec52abfcb Mon Sep 17 00:00:00 2001
From: berak <px1704@web.de>
Date: Thu, 1 Jan 2015 10:30:44 +0100
Subject: [PATCH 38/55] fixes for latest changes in opencv3.0 api

fixes for latest changes in opencv3.0 api

waitKey() normalization

fixed mser bindings
---
 modules/features2d/include/opencv2/features2d.hpp |  2 +-
 samples/python2/calibrate.py                      |  2 +-
 samples/python2/common.py                         |  4 ++--
 samples/python2/deconvolution.py                  |  2 +-
 samples/python2/digits_video.py                   |  2 +-
 samples/python2/edge.py                           |  2 +-
 samples/python2/find_obj.py                       | 12 +++++++-----
 samples/python2/fitline.py                        |  2 +-
 samples/python2/lappyr.py                         |  2 +-
 samples/python2/mosse.py                          |  2 +-
 samples/python2/mser.py                           |  4 ++--
 samples/python2/plane_ar.py                       |  2 +-
 samples/python2/plane_tracker.py                  |  4 ++--
 samples/python2/squares.py                        |  2 +-
 samples/python2/stereo_match.py                   | 13 ++++++-------
 15 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp
index c6223fb6bb..3d70172284 100644
--- a/modules/features2d/include/opencv2/features2d.hpp
+++ b/modules/features2d/include/opencv2/features2d.hpp
@@ -337,7 +337,7 @@ public:
           double _min_margin=0.003, int _edge_blur_size=5 );
 
     CV_WRAP virtual void detectRegions( InputArray image,
-                                        std::vector<std::vector<Point> >& msers,
+                                        CV_OUT std::vector<std::vector<Point> >& msers,
                                         std::vector<Rect>& bboxes ) = 0;
 
     CV_WRAP virtual void setDelta(int delta) = 0;
diff --git a/samples/python2/calibrate.py b/samples/python2/calibrate.py
index 2c759ff972..9f6f60cb73 100755
--- a/samples/python2/calibrate.py
+++ b/samples/python2/calibrate.py
@@ -26,7 +26,7 @@ if __name__ == '__main__':
     try:
         img_mask = img_mask[0]
     except:
-        img_mask = '../cpp/left*.jpg'
+        img_mask = '../data/left*.jpg'
 
     img_names = glob(img_mask)
     debug_dir = args.get('--debug')
diff --git a/samples/python2/common.py b/samples/python2/common.py
index 5ba1a71f4d..0ad811ae20 100755
--- a/samples/python2/common.py
+++ b/samples/python2/common.py
@@ -71,8 +71,8 @@ def mtx2rvec(R):
     return axis * np.arctan2(s, c)
 
 def draw_str(dst, (x, y), s):
-    cv2.putText(dst, s, (x+1, y+1), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness = 2, lineType=cv2.CV_AA)
-    cv2.putText(dst, s, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv2.CV_AA)
+    cv2.putText(dst, s, (x+1, y+1), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness = 2, lineType=cv2.LINE_AA)
+    cv2.putText(dst, s, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv2.LINE_AA)
 
 class Sketcher:
     def __init__(self, windowname, dests, colors_func):
diff --git a/samples/python2/deconvolution.py b/samples/python2/deconvolution.py
index 218efe883b..bbb1567bd4 100755
--- a/samples/python2/deconvolution.py
+++ b/samples/python2/deconvolution.py
@@ -119,7 +119,7 @@ if __name__ == '__main__':
     update(None)
 
     while True:
-        ch = cv2.waitKey()
+        ch = cv2.waitKey() & 0xFF
         if ch == 27:
             break
         if ch == ord(' '):
diff --git a/samples/python2/digits_video.py b/samples/python2/digits_video.py
index ca72a93501..bb142eb687 100755
--- a/samples/python2/digits_video.py
+++ b/samples/python2/digits_video.py
@@ -86,7 +86,7 @@ def main():
 
         cv2.imshow('frame', frame)
         cv2.imshow('bin', bin)
-        ch = cv2.waitKey(1)
+        ch = cv2.waitKey(1) & 0xFF
         if ch == 27:
             break
 
diff --git a/samples/python2/edge.py b/samples/python2/edge.py
index bd0c8bde79..413bf8859b 100755
--- a/samples/python2/edge.py
+++ b/samples/python2/edge.py
@@ -45,7 +45,7 @@ if __name__ == '__main__':
         vis /= 2
         vis[edge != 0] = (0, 255, 0)
         cv2.imshow('edge', vis)
-        ch = cv2.waitKey(5)
+        ch = cv2.waitKey(5) & 0xFF
         if ch == 27:
             break
     cv2.destroyAllWindows()
diff --git a/samples/python2/find_obj.py b/samples/python2/find_obj.py
index fb1e0730d9..35bce86fde 100755
--- a/samples/python2/find_obj.py
+++ b/samples/python2/find_obj.py
@@ -3,6 +3,8 @@
 '''
 Feature-based image matching sample.
 
+Note, that you will need the https://github.com/Itseez/opencv_contrib repo for SIFT and SURF
+
 USAGE
   find_obj.py [--feature=<sift|surf|orb|akaze|brisk>[-flann]] [ <image1> <image2> ]
 
@@ -23,19 +25,19 @@ FLANN_INDEX_LSH    = 6
 def init_feature(name):
     chunks = name.split('-')
     if chunks[0] == 'sift':
-        detector = cv2.xfeatures2d.SIFT()
+        detector = cv2.xfeatures2d.SIFT_create()
         norm = cv2.NORM_L2
     elif chunks[0] == 'surf':
-        detector = cv2.xfeatures2d.SURF(800)
+        detector = cv2.xfeatures2d.SURF_create(800)
         norm = cv2.NORM_L2
     elif chunks[0] == 'orb':
-        detector = cv2.ORB(400)
+        detector = cv2.ORB_create(400)
         norm = cv2.NORM_HAMMING
     elif chunks[0] == 'akaze':
-        detector = cv2.AKAZE()
+        detector = cv2.AKAZE_create()
         norm = cv2.NORM_HAMMING
     elif chunks[0] == 'brisk':
-        detector = cv2.BRISK()
+        detector = cv2.BRISK_create()
         norm = cv2.NORM_HAMMING
     else:
         return None, None
diff --git a/samples/python2/fitline.py b/samples/python2/fitline.py
index 82b45b0232..c91a7e4535 100755
--- a/samples/python2/fitline.py
+++ b/samples/python2/fitline.py
@@ -79,7 +79,7 @@ if __name__ == '__main__':
     cv2.createTrackbar('outlier %', 'fit line', 30, 100, update)
     while True:
         update()
-        ch = cv2.waitKey(0)
+        ch = cv2.waitKey(0) & 0xFF
         if ch == ord('f'):
             cur_func_name = dist_func_names.next()
         if ch == 27:
diff --git a/samples/python2/lappyr.py b/samples/python2/lappyr.py
index 0c08484de9..3cf2679b08 100755
--- a/samples/python2/lappyr.py
+++ b/samples/python2/lappyr.py
@@ -62,5 +62,5 @@ if __name__ == '__main__':
 
         cv2.imshow('laplacian pyramid filter', res)
 
-        if cv2.waitKey(1) == 27:
+        if cv2.waitKey(1) & 0xFF == 27:
             break
diff --git a/samples/python2/mosse.py b/samples/python2/mosse.py
index 0e2e7eed98..81196dcc36 100755
--- a/samples/python2/mosse.py
+++ b/samples/python2/mosse.py
@@ -168,7 +168,7 @@ class App:
             self.rect_sel.draw(vis)
 
             cv2.imshow('frame', vis)
-            ch = cv2.waitKey(10)
+            ch = cv2.waitKey(10) & 0xFF
             if ch == 27:
                 break
             if ch == ord(' '):
diff --git a/samples/python2/mser.py b/samples/python2/mser.py
index beaa6e7dcb..9d7a65c10f 100755
--- a/samples/python2/mser.py
+++ b/samples/python2/mser.py
@@ -26,13 +26,13 @@ if __name__ == '__main__':
         video_src = 0
 
     cam = video.create_capture(video_src)
-    mser = cv2.MSER()
+    mser = cv2.MSER_create()
     while True:
         ret, img = cam.read()
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         vis = img.copy()
 
-        regions = mser.detect(gray, None)
+        regions = mser.detectRegions(gray, None)
         hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions]
         cv2.polylines(vis, hulls, 1, (0, 255, 0))
 
diff --git a/samples/python2/plane_ar.py b/samples/python2/plane_ar.py
index dcb5559cdf..6580be7d05 100755
--- a/samples/python2/plane_ar.py
+++ b/samples/python2/plane_ar.py
@@ -71,7 +71,7 @@ class App:
 
             self.rect_sel.draw(vis)
             cv2.imshow('plane', vis)
-            ch = cv2.waitKey(1)
+            ch = cv2.waitKey(1) & 0xFF
             if ch == ord(' '):
                 self.paused = not self.paused
             if ch == ord('c'):
diff --git a/samples/python2/plane_tracker.py b/samples/python2/plane_tracker.py
index de5d7a0ec9..5ae0933bc0 100755
--- a/samples/python2/plane_tracker.py
+++ b/samples/python2/plane_tracker.py
@@ -61,7 +61,7 @@ TrackedTarget = namedtuple('TrackedTarget', 'target, p0, p1, H, quad')
 
 class PlaneTracker:
     def __init__(self):
-        self.detector = cv2.ORB( nfeatures = 1000 )
+        self.detector = cv2.ORB_create( nfeatures = 1000 )
         self.matcher = cv2.FlannBasedMatcher(flann_params, {})  # bug : need to pass empty dict (#1329)
         self.targets = []
 
@@ -160,7 +160,7 @@ class App:
 
             self.rect_sel.draw(vis)
             cv2.imshow('plane', vis)
-            ch = cv2.waitKey(1)
+            ch = cv2.waitKey(1) & 0xFF
             if ch == ord(' '):
                 self.paused = not self.paused
             if ch == ord('c'):
diff --git a/samples/python2/squares.py b/samples/python2/squares.py
index c12b884011..84160a2919 100755
--- a/samples/python2/squares.py
+++ b/samples/python2/squares.py
@@ -37,7 +37,7 @@ def find_squares(img):
 
 if __name__ == '__main__':
     from glob import glob
-    for fn in glob('../cpp/pic*.png'):
+    for fn in glob('../data/pic*.png'):
         img = cv2.imread(fn)
         squares = find_squares(img)
         cv2.drawContours( img, squares, -1, (0, 255, 0), 3 )
diff --git a/samples/python2/stereo_match.py b/samples/python2/stereo_match.py
index 5b21617cca..e53ae77025 100755
--- a/samples/python2/stereo_match.py
+++ b/samples/python2/stereo_match.py
@@ -39,16 +39,15 @@ if __name__ == '__main__':
     window_size = 3
     min_disp = 16
     num_disp = 112-min_disp
-    stereo = cv2.StereoSGBM(minDisparity = min_disp,
+    stereo = cv2.StereoSGBM_create(minDisparity = min_disp,
         numDisparities = num_disp,
-        SADWindowSize = window_size,
-        uniquenessRatio = 10,
-        speckleWindowSize = 100,
-        speckleRange = 32,
-        disp12MaxDiff = 1,
+        blockSize = 16,
         P1 = 8*3*window_size**2,
         P2 = 32*3*window_size**2,
-        fullDP = False
+        disp12MaxDiff = 1,
+        uniquenessRatio = 10,
+        speckleWindowSize = 100,
+        speckleRange = 32
     )
 
     print 'computing disparity...'

From 997dded1ac5c145e982e24da36bb10ec9a04a294 Mon Sep 17 00:00:00 2001
From: Tzafrir <tzafrir@ceemple.com>
Date: Tue, 6 Jan 2015 16:13:46 +0200
Subject: [PATCH 39/55] add default file to load in case sample was run without
 command line arguments

---
 samples/gpu/farneback_optical_flow.cpp |  4 ++--
 samples/gpu/hog.cpp                    | 14 +++++++++++---
 samples/gpu/opengl.cpp                 | 12 +++++++++---
 samples/gpu/optical_flow.cpp           | 19 +++++++++++++------
 samples/gpu/pyrlk_optical_flow.cpp     |  4 ++--
 5 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/samples/gpu/farneback_optical_flow.cpp b/samples/gpu/farneback_optical_flow.cpp
index 6fc3f931fe..b8ed55ea6c 100644
--- a/samples/gpu/farneback_optical_flow.cpp
+++ b/samples/gpu/farneback_optical_flow.cpp
@@ -44,8 +44,8 @@ static void colorizeFlow(const Mat &u, const Mat &v, Mat &dst)
 int main(int argc, char **argv)
 {
     CommandLineParser cmd(argc, argv,
-            "{ l left  | | specify left image }"
-            "{ r right | | specify right image }"
+            "{ l left  | ../data/basketball1.png | specify left image }"
+            "{ r right | ../data/basketball2.png | specify right image }"
             "{ h help  | | print help message }");
 
     cmd.about("Farneback's optical flow sample.");
diff --git a/samples/gpu/hog.cpp b/samples/gpu/hog.cpp
index 59ea44f31b..4b963d3b0f 100644
--- a/samples/gpu/hog.cpp
+++ b/samples/gpu/hog.cpp
@@ -115,11 +115,19 @@ int main(int argc, char** argv)
 {
     try
     {
+        Args args;
         if (argc < 2)
+        {
             printHelp();
-        Args args = Args::read(argc, argv);
-        if (help_showed)
-            return -1;
+            args.camera_id = 0;
+            args.src_is_camera = true;
+        }
+        else
+        {
+            args = Args::read(argc, argv);
+            if (help_showed)
+                return -1;
+        }
         App app(args);
         app.run();
     }
diff --git a/samples/gpu/opengl.cpp b/samples/gpu/opengl.cpp
index eef8e1a94c..e3e3ddc687 100644
--- a/samples/gpu/opengl.cpp
+++ b/samples/gpu/opengl.cpp
@@ -14,6 +14,9 @@ int main()
     #define NOMINMAX 1
     #include <windows.h>
 #endif
+#if defined(_WIN64)
+    #include <windows.h>
+#endif
 
 #if defined(__APPLE__)
     #include <OpenGL/gl.h>
@@ -55,16 +58,19 @@ void draw(void* userdata)
 
 int main(int argc, char* argv[])
 {
+    string filename;
     if (argc < 2)
     {
         cout << "Usage: " << argv[0] << " image" << endl;
-        return -1;
+        filename = "../data/lena.jpg";
     }
+    else
+        filename = argv[1];
 
-    Mat img = imread(argv[1]);
+    Mat img = imread(filename);
     if (img.empty())
     {
-        cerr << "Can't open image " << argv[1] << endl;
+        cerr << "Can't open image " << filename << endl;
         return -1;
     }
 
diff --git a/samples/gpu/optical_flow.cpp b/samples/gpu/optical_flow.cpp
index 8c6bc74a40..7d625de85b 100644
--- a/samples/gpu/optical_flow.cpp
+++ b/samples/gpu/optical_flow.cpp
@@ -135,23 +135,30 @@ static void showFlow(const char* name, const GpuMat& d_flowx, const GpuMat& d_fl
 
 int main(int argc, const char* argv[])
 {
+    string filename1, filename2;
     if (argc < 3)
     {
-        cerr << "Usage : " << argv[0] << "<frame0> <frame1>" << endl;
-        return -1;
+        cerr << "Usage : " << argv[0] << " <frame0> <frame1>" << endl;
+        filename1 = "../data/basketball1.png";
+        filename2 = "../data/basketball2.png";
+    }
+    else
+    {
+        filename1 = argv[1];
+        filename2 = argv[2];
     }
 
-    Mat frame0 = imread(argv[1], IMREAD_GRAYSCALE);
-    Mat frame1 = imread(argv[2], IMREAD_GRAYSCALE);
+    Mat frame0 = imread(filename1, IMREAD_GRAYSCALE);
+    Mat frame1 = imread(filename2, IMREAD_GRAYSCALE);
 
     if (frame0.empty())
     {
-        cerr << "Can't open image ["  << argv[1] << "]" << endl;
+        cerr << "Can't open image ["  << filename1 << "]" << endl;
         return -1;
     }
     if (frame1.empty())
     {
-        cerr << "Can't open image ["  << argv[2] << "]" << endl;
+        cerr << "Can't open image ["  << filename2 << "]" << endl;
         return -1;
     }
 
diff --git a/samples/gpu/pyrlk_optical_flow.cpp b/samples/gpu/pyrlk_optical_flow.cpp
index dc5de6c0aa..febc28f28d 100644
--- a/samples/gpu/pyrlk_optical_flow.cpp
+++ b/samples/gpu/pyrlk_optical_flow.cpp
@@ -119,8 +119,8 @@ int main(int argc, const char* argv[])
 {
     const char* keys =
         "{ h             help   |       | print help message }"
-        "{ l             left   |       | specify left image }"
-        "{ r             right  |       | specify right image }"
+        "{ l             left   | ../data/pic1.png       | specify left image }"
+        "{ r             right  | ../data/pic2.png       | specify right image }"
         "{ gray                 |       | use grayscale sources [PyrLK Sparse] }"
         "{ win_size             | 21    | specify windows size [PyrLK] }"
         "{ max_level            | 3     | specify max level [PyrLK] }"

From f48befc6f0ffa75506239765b8459dd33e71c43c Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 15 Jan 2015 16:45:35 +0300
Subject: [PATCH 40/55] refactor CUDA part of photo module

---
 modules/photo/include/opencv2/photo/cuda.hpp | 106 ++++++++++---------
 modules/photo/perf/perf_cuda.cpp             |   8 +-
 modules/photo/src/denoising.cuda.cpp         |  58 ++++++----
 modules/photo/test/test_denoising.cuda.cpp   |   5 +-
 4 files changed, 93 insertions(+), 84 deletions(-)

diff --git a/modules/photo/include/opencv2/photo/cuda.hpp b/modules/photo/include/opencv2/photo/cuda.hpp
index 4b69afa7be..474e6f8317 100644
--- a/modules/photo/include/opencv2/photo/cuda.hpp
+++ b/modules/photo/include/opencv2/photo/cuda.hpp
@@ -64,64 +64,66 @@ BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supporte
 @sa
    fastNlMeansDenoising
  */
-CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null());
+CV_EXPORTS void nonLocalMeans(InputArray src, OutputArray dst,
+                              float h,
+                              int search_window = 21,
+                              int block_size = 7,
+                              int borderMode = BORDER_DEFAULT,
+                              Stream& stream = Stream::Null());
 
-/** @brief The class implements fast approximate Non Local Means Denoising algorithm.
+/** @brief Perform image denoising using Non-local Means Denoising algorithm
+<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising> with several computational
+optimizations. Noise expected to be a gaussian white noise
+
+@param src Input 8-bit 1-channel, 2-channel or 3-channel image.
+@param dst Output image with the same size and type as src .
+@param h Parameter regulating filter strength. Big h value perfectly removes noise but also
+removes image details, smaller h value preserves details but also preserves some noise
+@param search_window Size in pixels of the window that is used to compute weighted average for
+given pixel. Should be odd. Affect performance linearly: greater search_window - greater
+denoising time. Recommended value 21 pixels
+@param block_size Size in pixels of the template patch that is used to compute weights. Should be
+odd. Recommended value 7 pixels
+@param s Stream for the asynchronous invocations.
+
+This function expected to be applied to grayscale images. For colored images look at
+FastNonLocalMeansDenoising::labMethod.
+
+@sa
+   fastNlMeansDenoising
  */
-class CV_EXPORTS FastNonLocalMeansDenoising
-{
-public:
-    /** @brief Perform image denoising using Non-local Means Denoising algorithm
-    <http://www.ipol.im/pub/algo/bcm_non_local_means_denoising> with several computational
-    optimizations. Noise expected to be a gaussian white noise
+CV_EXPORTS void fastNlMeansDenoising(InputArray src, OutputArray dst,
+                                     float h,
+                                     int search_window = 21,
+                                     int block_size = 7,
+                                     Stream& stream = Stream::Null());
 
-    @param src Input 8-bit 1-channel, 2-channel or 3-channel image.
-    @param dst Output image with the same size and type as src .
-    @param h Parameter regulating filter strength. Big h value perfectly removes noise but also
-    removes image details, smaller h value preserves details but also preserves some noise
-    @param search_window Size in pixels of the window that is used to compute weighted average for
-    given pixel. Should be odd. Affect performance linearly: greater search_window - greater
-    denoising time. Recommended value 21 pixels
-    @param block_size Size in pixels of the template patch that is used to compute weights. Should be
-    odd. Recommended value 7 pixels
-    @param s Stream for the asynchronous invocations.
+/** @brief Modification of fastNlMeansDenoising function for colored images
 
-    This function expected to be applied to grayscale images. For colored images look at
-    FastNonLocalMeansDenoising::labMethod.
+@param src Input 8-bit 3-channel image.
+@param dst Output image with the same size and type as src .
+@param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but
+also removes image details, smaller h value preserves details but also preserves some noise
+@param photo_render float The same as h but for color components. For most images value equals 10 will be
+enought to remove colored noise and do not distort colors
+@param search_window Size in pixels of the window that is used to compute weighted average for
+given pixel. Should be odd. Affect performance linearly: greater search_window - greater
+denoising time. Recommended value 21 pixels
+@param block_size Size in pixels of the template patch that is used to compute weights. Should be
+odd. Recommended value 7 pixels
+@param s Stream for the asynchronous invocations.
 
-    @sa
-       fastNlMeansDenoising
-     */
-    void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
+The function converts image to CIELAB colorspace and then separately denoise L and AB components
+with given h parameters using FastNonLocalMeansDenoising::simpleMethod function.
 
-    /** @brief Modification of FastNonLocalMeansDenoising::simpleMethod for color images
-
-    @param src Input 8-bit 3-channel image.
-    @param dst Output image with the same size and type as src .
-    @param h_luminance Parameter regulating filter strength. Big h value perfectly removes noise but
-    also removes image details, smaller h value preserves details but also preserves some noise
-    @param photo_render float The same as h but for color components. For most images value equals 10 will be
-    enought to remove colored noise and do not distort colors
-    @param search_window Size in pixels of the window that is used to compute weighted average for
-    given pixel. Should be odd. Affect performance linearly: greater search_window - greater
-    denoising time. Recommended value 21 pixels
-    @param block_size Size in pixels of the template patch that is used to compute weights. Should be
-    odd. Recommended value 7 pixels
-    @param s Stream for the asynchronous invocations.
-
-    The function converts image to CIELAB colorspace and then separately denoise L and AB components
-    with given h parameters using FastNonLocalMeansDenoising::simpleMethod function.
-
-    @sa
-       fastNlMeansDenoisingColored
-     */
-    void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float photo_render, int search_window = 21, int block_size = 7, Stream& s = Stream::Null());
-
-private:
-
-    GpuMat buffer, extended_src_buffer;
-    GpuMat lab, l, ab;
-};
+@sa
+   fastNlMeansDenoisingColored
+ */
+CV_EXPORTS void fastNlMeansDenoisingColored(InputArray src, OutputArray dst,
+                                            float h_luminance, float photo_render,
+                                            int search_window = 21,
+                                            int block_size = 7,
+                                            Stream& stream = Stream::Null());
 
 //! @} photo
 
diff --git a/modules/photo/perf/perf_cuda.cpp b/modules/photo/perf/perf_cuda.cpp
index 318ec17dfd..4496599d1b 100644
--- a/modules/photo/perf/perf_cuda.cpp
+++ b/modules/photo/perf/perf_cuda.cpp
@@ -126,12 +126,10 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, CUDA_FastNonLocalMeans,
 
     if (PERF_RUN_CUDA())
     {
-        cv::cuda::FastNonLocalMeansDenoising fnlmd;
-
         const cv::cuda::GpuMat d_src(src);
         cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() fnlmd.simpleMethod(d_src, dst, h, search_widow_size, block_size);
+        TEST_CYCLE() cv::cuda::fastNlMeansDenoising(d_src, dst, h, search_widow_size, block_size);
 
         CUDA_SANITY_CHECK(dst);
     }
@@ -171,12 +169,10 @@ PERF_TEST_P(Sz_Depth_WinSz_BlockSz, CUDA_FastNonLocalMeansColored,
 
     if (PERF_RUN_CUDA())
     {
-        cv::cuda::FastNonLocalMeansDenoising fnlmd;
-
         const cv::cuda::GpuMat d_src(src);
         cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() fnlmd.labMethod(d_src, dst, h, h, search_widow_size, block_size);
+        TEST_CYCLE() cv::cuda::fastNlMeansDenoisingColored(d_src, dst, h, h, search_widow_size, block_size);
 
         CUDA_SANITY_CHECK(dst);
     }
diff --git a/modules/photo/src/denoising.cuda.cpp b/modules/photo/src/denoising.cuda.cpp
index 76b870fe58..7ea37f6951 100644
--- a/modules/photo/src/denoising.cuda.cpp
+++ b/modules/photo/src/denoising.cuda.cpp
@@ -60,9 +60,9 @@ using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || !defined(HAVE_OPENCV_CUDAARITHM) || !defined(HAVE_OPENCV_CUDAIMGPROC)
 
-void cv::cuda::nonLocalMeans(const GpuMat&, GpuMat&, float, int, int, int, Stream&) { throw_no_cuda(); }
-void cv::cuda::FastNonLocalMeansDenoising::simpleMethod(const GpuMat&, GpuMat&, float, int, int, Stream&) { throw_no_cuda(); }
-void cv::cuda::FastNonLocalMeansDenoising::labMethod( const GpuMat&, GpuMat&, float, float, int, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::nonLocalMeans(InputArray, OutputArray, float, int, int, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::fastNlMeansDenoising(InputArray, OutputArray, float, int, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::fastNlMeansDenoisingColored(InputArray, OutputArray, float, float, int, int, Stream&) { throw_no_cuda(); }
 
 #else
 
@@ -78,13 +78,15 @@ namespace cv { namespace cuda { namespace device
     }
 }}}
 
-void cv::cuda::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, int borderMode, Stream& s)
+void cv::cuda::nonLocalMeans(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, int borderMode, Stream& stream)
 {
     using cv::cuda::device::imgproc::nlm_bruteforce_gpu;
     typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream);
 
     static const func_t funcs[4] = { nlm_bruteforce_gpu<uchar>, nlm_bruteforce_gpu<uchar2>, nlm_bruteforce_gpu<uchar3>, 0/*nlm_bruteforce_gpu<uchar4>,*/ };
 
+    const GpuMat src = _src.getGpuMat();
+
     CV_Assert(src.type() == CV_8U || src.type() == CV_8UC2 || src.type() == CV_8UC3);
 
     const func_t func = funcs[src.channels() - 1];
@@ -93,8 +95,10 @@ void cv::cuda::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search
     int b = borderMode;
     CV_Assert(b == BORDER_REFLECT101 || b == BORDER_REPLICATE || b == BORDER_CONSTANT || b == BORDER_REFLECT || b == BORDER_WRAP);
 
-    dst.create(src.size(), src.type());
-    func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(s));
+    _dst.create(src.size(), src.type());
+    GpuMat dst = _dst.getGpuMat();
+
+    func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(stream));
 }
 
 namespace cv { namespace cuda { namespace device
@@ -112,47 +116,55 @@ namespace cv { namespace cuda { namespace device
      }
 }}}
 
-void cv::cuda::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, Stream& s)
+void cv::cuda::fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, int search_window, int block_window, Stream& stream)
 {
+    const GpuMat src = _src.getGpuMat();
+
     CV_Assert(src.depth() == CV_8U && src.channels() < 4);
 
     int border_size = search_window/2 + block_window/2;
     Size esize = src.size() + Size(border_size, border_size) * 2;
 
-    cv::cuda::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer);
-    GpuMat extended_src(esize, src.type(), extended_src_buffer.ptr(), extended_src_buffer.step);
+    BufferPool pool(stream);
 
-    cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), s);
+    GpuMat extended_src = pool.getBuffer(esize, src.type());
+    cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
     GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size()));
 
     int bcols, brows;
     device::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows);
-    buffer.create(brows, bcols, CV_32S);
+    GpuMat buffer = pool.getBuffer(brows, bcols, CV_32S);
 
     using namespace cv::cuda::device::imgproc;
     typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
     static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0};
 
-    dst.create(src.size(), src.type());
-    funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(s));
+    _dst.create(src.size(), src.type());
+    GpuMat dst = _dst.getGpuMat();
+
+    funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(stream));
 }
 
-void cv::cuda::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window, int block_window, Stream& s)
+void cv::cuda::fastNlMeansDenoisingColored(InputArray _src, OutputArray _dst, float h_luminance, float h_color, int search_window, int block_window, Stream& stream)
 {
+    const GpuMat src = _src.getGpuMat();
+
     CV_Assert(src.type() == CV_8UC3);
 
-    lab.create(src.size(), src.type());
-    cv::cuda::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, s);
+    BufferPool pool(stream);
 
-    l.create(src.size(), CV_8U);
-    ab.create(src.size(), CV_8UC2);
-    device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(s));
+    GpuMat lab = pool.getBuffer(src.size(), src.type());
+    cv::cuda::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, stream);
 
-    simpleMethod(l, l, h_luminance, search_window, block_window, s);
-    simpleMethod(ab, ab, h_color, search_window, block_window, s);
+    GpuMat l = pool.getBuffer(src.size(), CV_8U);
+    GpuMat ab = pool.getBuffer(src.size(), CV_8UC2);
+    device::imgproc::fnlm_split_channels(lab, l, ab, StreamAccessor::getStream(stream));
 
-    device::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(s));
-    cv::cuda::cvtColor(lab, dst, cv::COLOR_Lab2BGR, 0, s);
+    fastNlMeansDenoising(l, l, h_luminance, search_window, block_window, stream);
+    fastNlMeansDenoising(ab, ab, h_color, search_window, block_window, stream);
+
+    device::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(stream));
+    cv::cuda::cvtColor(lab, _dst, cv::COLOR_Lab2BGR, 0, stream);
 }
 
 #endif
diff --git a/modules/photo/test/test_denoising.cuda.cpp b/modules/photo/test/test_denoising.cuda.cpp
index dce20b9f51..209bac3328 100644
--- a/modules/photo/test/test_denoising.cuda.cpp
+++ b/modules/photo/test/test_denoising.cuda.cpp
@@ -99,10 +99,9 @@ TEST(CUDA_FastNonLocalMeans, Regression)
     cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY);
 
     GpuMat dbgr, dgray;
-    cv::cuda::FastNonLocalMeansDenoising fnlmd;
 
-    fnlmd.simpleMethod(GpuMat(gray),  dgray, 20);
-    fnlmd.labMethod(GpuMat(bgr),  dbgr, 20, 10);
+    cv::cuda::fastNlMeansDenoising(GpuMat(gray),  dgray, 20);
+    cv::cuda::fastNlMeansDenoisingColored(GpuMat(bgr),  dbgr, 20, 10);
 
 #if 0
     dumpImage("../gpu/denoising/fnlm_denoised_lena_bgr.png", cv::Mat(dbgr));

From 14e15fb4f781b327a13bf2b22345ae4f0d63dd67 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 15 Jan 2015 17:15:59 +0300
Subject: [PATCH 41/55] fix documentation warnings

---
 modules/photo/include/opencv2/photo/cuda.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/photo/include/opencv2/photo/cuda.hpp b/modules/photo/include/opencv2/photo/cuda.hpp
index 474e6f8317..a5c83f7717 100644
--- a/modules/photo/include/opencv2/photo/cuda.hpp
+++ b/modules/photo/include/opencv2/photo/cuda.hpp
@@ -59,7 +59,7 @@ namespace cv { namespace cuda {
 @param block_size Size of block used for computing weights.
 @param borderMode Border type. See borderInterpolate for details. BORDER_REFLECT101 ,
 BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now.
-@param s Stream for the asynchronous version.
+@param stream Stream for the asynchronous version.
 
 @sa
    fastNlMeansDenoising
@@ -84,7 +84,7 @@ given pixel. Should be odd. Affect performance linearly: greater search_window -
 denoising time. Recommended value 21 pixels
 @param block_size Size in pixels of the template patch that is used to compute weights. Should be
 odd. Recommended value 7 pixels
-@param s Stream for the asynchronous invocations.
+@param stream Stream for the asynchronous invocations.
 
 This function expected to be applied to grayscale images. For colored images look at
 FastNonLocalMeansDenoising::labMethod.
@@ -111,7 +111,7 @@ given pixel. Should be odd. Affect performance linearly: greater search_window -
 denoising time. Recommended value 21 pixels
 @param block_size Size in pixels of the template patch that is used to compute weights. Should be
 odd. Recommended value 7 pixels
-@param s Stream for the asynchronous invocations.
+@param stream Stream for the asynchronous invocations.
 
 The function converts image to CIELAB colorspace and then separately denoise L and AB components
 with given h parameters using FastNonLocalMeansDenoising::simpleMethod function.

From c120001178ffa6220d5cd9b17f1f98b659798fb1 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 15 Jan 2015 17:17:35 +0300
Subject: [PATCH 42/55] fix -Wsign-compare warnings

---
 modules/features2d/src/kaze/AKAZEFeatures.cpp | 2 +-
 modules/imgcodecs/test/test_grfmt.cpp         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/features2d/src/kaze/AKAZEFeatures.cpp b/modules/features2d/src/kaze/AKAZEFeatures.cpp
index 7988584030..fd15345b29 100644
--- a/modules/features2d/src/kaze/AKAZEFeatures.cpp
+++ b/modules/features2d/src/kaze/AKAZEFeatures.cpp
@@ -818,7 +818,7 @@ void AKAZEFeatures::Compute_Main_Orientation(KeyPoint& kpt, const std::vector<TE
     ang2 = (ang1 + (float)(CV_PI / 3.0) >(float)(2.0*CV_PI) ? ang1 - (float)(5.0*CV_PI / 3.0) : ang1 + (float)(CV_PI / 3.0));
     sumX = sumY = 0.f;
 
-    for (size_t k = 0; k < ang_size; ++k) {
+    for (int k = 0; k < ang_size; ++k) {
       // Get angle from the x-axis of the sample point
       const float & ang = Ang[k];
 
diff --git a/modules/imgcodecs/test/test_grfmt.cpp b/modules/imgcodecs/test/test_grfmt.cpp
index d3f21f16b3..d1610ae7fc 100644
--- a/modules/imgcodecs/test/test_grfmt.cpp
+++ b/modules/imgcodecs/test/test_grfmt.cpp
@@ -664,7 +664,7 @@ private:
         vector<Mat> pages;
         bool res = imreadmulti(folder + "multipage.tif", pages, flags);
         ASSERT_TRUE(res == true);
-        ASSERT_TRUE(pages.size() == page_count);
+        ASSERT_EQ(static_cast<size_t>(page_count), pages.size());
 
         for (int i = 0; i < page_count; i++)
         {

From a524a677e45e90a925d9edf5cc8e7efde3e70e19 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 15 Jan 2015 17:20:35 +0300
Subject: [PATCH 43/55] disable warnings caused by CUDA headers

---
 modules/stitching/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/stitching/CMakeLists.txt b/modules/stitching/CMakeLists.txt
index 8d11491be2..36d4452c7f 100644
--- a/modules/stitching/CMakeLists.txt
+++ b/modules/stitching/CMakeLists.txt
@@ -1,7 +1,7 @@
 set(the_description "Images stitching")
 
 if(HAVE_CUDA)
-  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations)
+  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow)
 endif()
 
 ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect

From 0376a8643e8a32e7f773d82054460dae98cd348e Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 15 Jan 2015 18:40:33 +0300
Subject: [PATCH 44/55] fix CMake CMP0054 warning:

ported from c105b72945013b1cecb00617879598c43b82f298
---
 cmake/OpenCVModule.cmake | 2 +-
 cmake/OpenCVUtils.cmake  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake
index c5325e20f1..362aa6f975 100644
--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@@ -787,7 +787,7 @@ macro(__ocv_parse_test_sources tests_type)
       set(__file_group_sources "")
     elseif(arg STREQUAL "DEPENDS_ON")
       set(__currentvar "OPENCV_${tests_type}_${the_module}_DEPS")
-    elseif("${__currentvar}" STREQUAL "__file_group_sources" AND NOT __file_group_name)
+    elseif(" ${__currentvar}" STREQUAL " __file_group_sources" AND NOT __file_group_name) # spaces to avoid CMP0054
       set(__file_group_name "${arg}")
     else()
       list(APPEND ${__currentvar} "${arg}")
diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake
index b32465ead2..60d862efca 100644
--- a/cmake/OpenCVUtils.cmake
+++ b/cmake/OpenCVUtils.cmake
@@ -276,12 +276,12 @@ macro(OCV_OPTION variable description value)
     endif()
   endforeach()
   unset(__varname)
-  if("${__condition}" STREQUAL "")
+  if(__condition STREQUAL "")
     set(__condition 2 GREATER 1)
   endif()
 
   if(${__condition})
-    if("${__value}" MATCHES ";")
+    if(__value MATCHES ";")
       if(${__value})
         option(${variable} "${description}" ON)
       else()

From f676bfb3d74efec62cdc845bb50f58aafeaab6d5 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Thu, 15 Jan 2015 19:33:27 +0300
Subject: [PATCH 45/55] fix GpuMat::setTo method in case if mask is empty:

it might be called from _OutputArray::setTo
---
 modules/core/src/cuda/gpu_mat.cu | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modules/core/src/cuda/gpu_mat.cu b/modules/core/src/cuda/gpu_mat.cu
index 71b1b52198..f21c5f4c19 100644
--- a/modules/core/src/cuda/gpu_mat.cu
+++ b/modules/core/src/cuda/gpu_mat.cu
@@ -390,6 +390,11 @@ GpuMat& cv::cuda::GpuMat::setTo(Scalar value, InputArray _mask, Stream& stream)
 
     GpuMat mask = _mask.getGpuMat();
 
+    if (mask.empty())
+    {
+        return setTo(value, stream);
+    }
+
     CV_DbgAssert( size() == mask.size() && mask.type() == CV_8UC1 );
 
     typedef void (*func_t)(const GpuMat& mat, const GpuMat& mask, Scalar scalar, Stream& stream);

From 46ed461e2221868a7ef8b9036615de80783e859a Mon Sep 17 00:00:00 2001
From: Yan Wang <yan.wang@linux.intel.com>
Date: Wed, 24 Dec 2014 14:02:53 +0800
Subject: [PATCH 46/55] Add template match with mask.

Signed-off-by: Yan Wang <yan.wang@linux.intel.com>
---
 modules/imgproc/include/opencv2/imgproc.hpp |   4 +-
 modules/imgproc/src/templmatch.cpp          |  87 +++++++++++++++++++-
 samples/cpp/mask_tmpl.cpp                   |  72 ++++++++++++++++
 samples/data/lena_tmpl.jpg                  | Bin 0 -> 79467 bytes
 samples/data/mask.png                       | Bin 0 -> 3943 bytes
 samples/data/tmpl.png                       | Bin 0 -> 5911 bytes
 6 files changed, 161 insertions(+), 2 deletions(-)
 create mode 100644 samples/cpp/mask_tmpl.cpp
 create mode 100644 samples/data/lena_tmpl.jpg
 create mode 100644 samples/data/mask.png
 create mode 100644 samples/data/tmpl.png

diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp
index e67922eec0..9118d76ae6 100644
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@@ -3330,9 +3330,11 @@ data type.
 @param result Map of comparison results. It must be single-channel 32-bit floating-point. If image
 is \f$W \times H\f$ and templ is \f$w \times h\f$ , then result is \f$(W-w+1) \times (H-h+1)\f$ .
 @param method Parameter specifying the comparison method, see cv::TemplateMatchModes
+@param mask Mask of searched template. It must have the same datatype and size with templ. It is
+not set by default.
  */
 CV_EXPORTS_W void matchTemplate( InputArray image, InputArray templ,
-                                 OutputArray result, int method );
+                                 OutputArray result, int method, InputArray mask = noArray() );
 
 //! @}
 
diff --git a/modules/imgproc/src/templmatch.cpp b/modules/imgproc/src/templmatch.cpp
index 416917a2fb..8afdba7d10 100644
--- a/modules/imgproc/src/templmatch.cpp
+++ b/modules/imgproc/src/templmatch.cpp
@@ -814,12 +814,97 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
         }
     }
 }
+
+static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
+{
+    int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+    CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
+    CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );
+
+    Mat img = _img.getMat(), templ = _templ.getMat(), mask = _mask.getMat();
+    int ttype = templ.type(), tdepth = CV_MAT_DEPTH(ttype), tcn = CV_MAT_CN(ttype);
+    int mtype = img.type(), mdepth = CV_MAT_DEPTH(type), mcn = CV_MAT_CN(mtype);
+
+    if (depth == CV_8U)
+    {
+        depth = CV_32F;
+        type = CV_MAKETYPE(CV_32F, cn);
+        img.convertTo(img, type, 1.0 / 255);
+    }
+
+    if (tdepth == CV_8U)
+    {
+        tdepth = CV_32F;
+        ttype = CV_MAKETYPE(CV_32F, tcn);
+        templ.convertTo(templ, ttype, 1.0 / 255);
+    }
+
+    if (mdepth == CV_8U)
+    {
+        mdepth = CV_32F;
+        mtype = CV_MAKETYPE(CV_32F, mcn);
+        compare(mask, Scalar::all(0), mask, CMP_NE);
+        mask.convertTo(mask, mtype, 1.0 / 255);
+    }
+
+    Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
+    _result.create(corrSize, CV_32F);
+    Mat result = _result.getMat();
+
+    Mat img2 = img.mul(img);
+    Mat mask2 = mask.mul(mask);
+    Mat mask_templ = templ.mul(mask);
+    Scalar templMean, templSdv;
+
+    double templSum2 = 0;
+    meanStdDev( mask_templ, templMean, templSdv );
+
+    templSum2 = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
+    templSum2 += templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
+    templSum2 *= ((double)templ.rows * templ.cols);
+
+    if (method == CV_TM_SQDIFF)
+    {
+        Mat mask2_templ = templ.mul(mask2);
+
+        Mat corr(corrSize, CV_32F);
+        crossCorr( img, mask2_templ, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
+        crossCorr( img2, mask, result, result.size(), result.type(), Point(0,0), 0, 0 );
+
+        result -= corr * 2;
+        result += templSum2;
+    }
+    else if (method == CV_TM_CCORR_NORMED)
+    {
+        if (templSum2 < DBL_EPSILON)
+        {
+            result = Scalar::all(1);
+            return;
+        }
+
+        Mat corr(corrSize, CV_32F);
+        crossCorr( img2, mask2, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
+        crossCorr( img, mask_templ, result, result.size(), result.type(), Point(0,0), 0, 0 );
+
+        sqrt(corr, corr);
+        result = result.mul(1/corr);
+        result /= std::sqrt(templSum2);
+    }
+    else
+        CV_Error(Error::StsNotImplemented, "");
+}
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method )
+void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
 {
+    if (!_mask.empty())
+    {
+        cv::matchTemplateMask(_img, _templ, _result, method, _mask);
+        return;
+    }
+
     int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
     CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
     CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );
diff --git a/samples/cpp/mask_tmpl.cpp b/samples/cpp/mask_tmpl.cpp
new file mode 100644
index 0000000000..2b6bb77bf1
--- /dev/null
+++ b/samples/cpp/mask_tmpl.cpp
@@ -0,0 +1,72 @@
+#include "opencv2/imgproc.hpp"
+#include "opencv2/highgui.hpp"
+
+#include <cctype>
+#include <iostream>
+#include <iterator>
+#include <stdio.h>
+
+using namespace std;
+using namespace cv;
+
+static void help()
+{
+    cout << "\nThis program demonstrates template match with mask.\n"
+            "Usage:\n"
+            "./mask_tmpl <image_name> <template_name> <mask_name>, Default is ../data/lena_tmpl.jpg\n"
+            << endl;
+}
+
+int main( int argc, const char** argv )
+{
+    const char* filename = argc == 4 ? argv[1] : "../data/lena_tmpl.jpg";
+    const char* tmplname = argc == 4 ? argv[2] : "../data/tmpl.png";
+    const char* maskname = argc == 4 ? argv[3] : "../data/mask.png";
+
+    Mat img = imread(filename);
+    Mat tmpl = imread(tmplname);
+    Mat mask = imread(maskname);
+    Mat res;
+
+    if(img.empty())
+    {
+        help();
+        cout << "can not open " << filename << endl;
+        return -1;
+    }
+
+    if(tmpl.empty())
+    {
+        help();
+        cout << "can not open " << tmplname << endl;
+        return -1;
+    }
+
+    if(mask.empty())
+    {
+        help();
+        cout << "can not open " << maskname << endl;
+        return -1;
+    }
+
+    //int method = CV_TM_SQDIFF;
+    int method = CV_TM_CCORR_NORMED;
+    matchTemplate(img, tmpl, res, method, mask);
+
+    double minVal, maxVal;
+    Point minLoc, maxLoc;
+    Rect rect;
+    minMaxLoc(res, &minVal, &maxVal, &minLoc, &maxLoc);
+
+    if(method == CV_TM_SQDIFF || method == CV_TM_SQDIFF_NORMED)
+        rect = Rect(minLoc, tmpl.size());
+    else
+        rect = Rect(maxLoc, tmpl.size());
+
+    rectangle(img, rect, Scalar(0, 255, 0), 2);
+
+    imshow("detected template", img);
+    waitKey();
+
+    return 0;
+}
diff --git a/samples/data/lena_tmpl.jpg b/samples/data/lena_tmpl.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0c9fc20de8b631e4a9d1c59fbc570564528aacbb
GIT binary patch
literal 79467
zcmb5VWmH^G&^|c01rHt|xI=Is+}+(FcnIzgAh<I~aCf)CA-LP%?(Xhn`R#xAJ$v47
zTXk-qdrw>4>h9|9db<BD{@Vm#$V$pc0-&Iv05Ts8`1cNAk~aBaZf|04@m0;?yNkPt
zlLg~fnIC4%UuEn~EG@o@*qK>b*n56;V`b(={<i`U1HeH4NB@N|9|Q{z`yU~~!^6QN
zAtECqAtE6me?mt^MnOYCLPEttMMKBH#Kc7Yg!LH<1N-A0<G(_n{>upii};Zd0|f~M
z<Nr7P>j7XQLIa=yFi>OwXiO*=OsIc-0Ac_D3hv|QpaB0b!9v5p!6QHc5RpEjiJ$<`
zF#q4!k52$B927jlzh%HDn2%Tt7>ti18jE2zxRUO6`c#yhtp{wOpM$Q%AX!y|8azqT
z)z0m=+b!#!oe?cPx}NJfEfelpN?tsuD)TmP95$n>t<uNC22>NP<$GF}XGCmhzz1Kx
zhH&0tf~3gwJD6>A+QMJA55OG5=B~b!$X%x0gjvChmd~ZL5A6s1d@+tP@qNP+ZZvCI
zyE|mO_PV^ou_|bfe7`SSdhj^lRD=jjZ7q&$7y{)E6IMy)tcZ9wB+JJQ54?LGb4Y4U
zZXvgiqPT|N_|!&ea0M@l`On=~GcWijz1z1v2WQr)*xTigTPP@aMdbf1q2@K*30h;s
zC2U*5X_Z7g;oG+9#nK}E7`mg&$n*B5ID&gTixq-1Lwqh7<(P35|6ScdDLtsMz$bVi
z>)yLg?`kF;JUnr$*%_O*`#|HnG#}~|lxVy-S=34OWgGCA?1*}A<?K>(eT<t`YGYrB
zIV_iddXN)Ze7N_7D)nU({P$r_a<Q1G0^QN~%7;8tOJDgRBJ-j^)the$<a_u6im3>;
zS9B%!IX#BALiUFDcrJ4uf_;=5zEJKSCZ^6Z{i(P5xty<0jj)x>yG2HJ={BuB1QZCx
z(s68KZ_n{&>>a^^FG$yql{7r((%h*;({b#HdqdwjX$!N}+VwLz>mduRfn6jabr^EO
zbVp@<NvfA=QFqM2F<U<^#X!by<kl!Qn|FoF5HrhCWOUW~tN(tiepS@Q&YpJ7ktX7J
z6dr3^#y7*LZQV<;qDRE74lRz*lG0X?sJEgQ?Fo7BcL8edWYp*W$u@N(7ZIQeRW4V8
zGlaZ@r|jJscY=%SFy>;4i~@g`rMP2<lRm%mta15vrKIIn46Oc~@FNjku8hjXtI@qG
z?%6nQ`8=gB&YdAydY0@`2Lo0e_vMIxuOk^HC`-SFUs-(%K}UwBM_R_c^nf{~&sZ6#
zuznJ3ONdSG!+y+TL{yu;=wjsR%vR5dr=25hzd$F;O$trb&${R2Qg}}psnKdp5$h^G
zblqMnS8()2FV@v`|G=&N@ENUsv3u<-^I6}b9?zo_T*Kkre%#8k5ihpS=U-8zx-9>Y
zm+J}4EqG#DmRJ!>(W?8*Br#9RrYFr3c^nSSP{ytK$u|JQs3tGt7*Sns;0`MxUpO?R
zt^JS|TvQConyIWdpm_lI)aOhoTofHB7hwBdeBRypGxyx6^-%wLk!YQ&!&=0;WdiMR
zfjfHm)iqqH=9wd81W9+aAXM6w(NR=CmKk`Xwm?4W9Qm`PG(3LNvud99et}L!{NQS_
zTk04@(*sUQwkidxH^_<lv!?~SdBn(Hu;21hwpmbgOz+k*8)*(T$^}X`G3|_o^U4yY
zt$YqAF1x6-gy2f><U8jt-Zhg-X>P@#xZrMYj>tGg6dMTNzi2QI9o2ehKy}8Y#&!K<
zVU3<A9gCHit#mnOnx&WMOJNoFiXz7+X`hr7ok0-yhF>>t!<)7dW0u$}vGLB7h+mKa
zsx&`h+NyL?hTT97^1)&D12?Zyo@vH$U4z&6uj;V);RgB8cKco<rUf--D+Zj(hT~-}
zz8_=z`D=uVRhbgv7#DEW6CRKoe$AXMC4hL+#L@3ZFj@0^@|uo|jOArkCFsiuNMiz_
zYt#YAdPqHd2$+4v^}n+E*UVF*cd@b=ldE0$7g<1fHy(_4TJ?3+MK$N<@$aF`7x|_~
z1k08p<J|SK28rAdbfI-SOS8pfzc{m_uDrEU)YkZ9RP`?|*kD{)$d69e?a!JK-x!lv
zCo=&tj)u{!*)3xeJUF1QHmbw2DlF9^N5qK!8QzTrL@6E1ZBmp`x2b$uQ?qQlgk`bO
zgsZa@zxv$U%`~;_Rq-c05%=ZWmT7AXo7C851v$rar0pp8$jUst6guc)%Upvi#^xlR
z&Jnq8syT*~=?r96>Cat$2jBUrBedJHW2GP`j^5Fh)c<*3IP}@CWKWXWe(VC3tav%p
zkLrI7nJGQs-M(p?Kwz6&o)N#+bK&M|<Cy3A?bkEv$aF<Ecf@(3l9TZ;4s6}s+3H_b
zFrZQ3>$}%F5vRn#Jq|a-`gxb;y-kSXN%GfgFWj$g{kl$pQIL-Jx*yw2Y8)p8M1AVU
z85FOUaq!)Clu*BhZ2sovI4^lX8GQ-sIpk*mrR`jqrSd$J2PlNrbLq)o*Ac7uKA-AN
zG@j?dqiNRW^B=0qnZCGsrQ9~-R5$MeE|6~=DDRyocxBdJphht;c_V7TQZ-lBo84sz
za(d^sy6bGyByH<_gc?sXVwp`+-#JtKXCd9O_|#?MlEw>XDV{3MuB_;>F6plbJX(I4
z?~Is&m~tN*gBGcOz%G6(<!Pv~kNo?7xnd-v*BoA=p`EN$`0csHC?Bv!%VP0#WL$Oi
z^j*wc`dVjDs&l;rk|!)zL%9*FguIGHt(5^sG@IN>6S)b_>OrF9St<^6S>wS~ailvC
zv1LZrHW-!#AsmEqopCv!sMqi6@;CH`gN6xG^41e7O6<qRMoNkrTTVk3NVf^*&}CMv
zRLgqcT1Ofp2BvaT6(b!aTJ_<0WVL=PpW@+6KOhpsMe@vP9^*Fxg^OD`9`|fUw%j%3
z&OsV3xYoNSOJt1<?V#1&F;82C6;#VrwDF=f-pUC|xBh<ss*cNhDVN-It<Z9(8pZtN
z8QWnFVwvxC6ok1QZtZ_c2nw};k_~bK@ZUu%r9$a1_@=x&@F-yG@$1%R{iNH{)E4Ym
z%*u{HPDFiSFBu1*_3x|o1#c0B<<>2;dc}=P6(NhC3%Rr4`ENNHn~^2tRP2tqL8w~8
zK*-ZW2hIQHL(!YYOkB5k;dJtoz12{jnbN^8D_l(?18tc+8|1!A6X?zZ*+q9DmFbSD
zx0s5y^`59TWY!MxJ=llOsecbRjTVW;GHjF*-->TGyp;YoPyMj8oxLuyrC%6rs!+7F
zJXr*`i&QMtR?3j)L9+UaJGL#YnE2J^3BgQz2h$2NG2`f~(74Gj`SY%c>Wym7@mhwJ
zvt$`hw#!0-4+a05U)X_C`AJQ1!$<<|rk;ZZC2A~Gqr-JWTiSGVvKWqEE<9pJh#2Q=
zygDLlTW&!r+}{z-p=9N}4iiRk&XZm<GPcg_B@Jpw8J-U&Uw5OQKg1WJ3SMGzR^>fn
z*UgA@QYbU-@EHe}$eVCXBx)YuGGcL1-S3$VnDXT8SYY>rNqLJ}r}d80rPSZpJm)Bk
zj;~>RCA(gdixSjkr$?s*mzJoe2{C$IOm-ZReE%q&Srmt?9Mr_ixabqhPXkf^{o+kL
zd$gm`hRU>!skSa-<GOpgFYgeUA>$r)D&~6z)ntRp0XCv^W}q%o9GzIj#!aUD#Wsj`
zjF_L4DOBuxK775<0Cc~Q`5%@p|CBbsKru0V_Z?MhP2t3?WHWGyJPYYd{@(UO_L9@h
zPKlA=`dve|#~9-i`i!ZCg-`rg^>H{mJBxE@DU(Q!BwJoqf(E@O*e+Onx;XB!o#lUe
z40%TU32d65AjO5uz0d@Q;4Je=adt#huYX1W?NhCi*|)1H0ry&^PdiRj&0|kZN3m>h
z{-Ck4v{=0~qE>32kb)dGzI^?N`4I;O!Pupd5nrWYZ-}>PLNqE_pp0#`v}ghJ4>=7&
z(GAd<P?odcfOFI7^k~3QJGV!8>bQ*qQdOai7|nG1I-TebD!WOHGs(TdrYC{rj1tGC
z)bv+gjiqhPo(UOfxCmVO=uXXy<$=MDlmp)*!8?w4C7B-+yHoV4lGYHP0=r0D8LizH
zn1hN*`wUw>4cjS!h!|#P8MGD4T7n5iVuyC(WXA$N=e!Xi?=p@Ok^s6?y+)EO(;pY8
z*Tg-zPPmlOKpHLD<KFO6CV?@rB}4CZQ2&s(6R=dkJZeaK+6lgthBV;;Q|^!>a=l1L
zeQp53s-s$qx8;kosOQ8ziXYht6K0T0@g-Mq687|7?Fo6wfJMLky$I*(T7GE_eVA6g
zK}2WD>Zn`#aHc!UMD7x9=@&Pt%=m=@&nHS8D&PWD@YB~v!)4%ZpAP$67>PTT>#-&K
zX@Zoc?|FoW*w*r=x~)Pke&NxZRu?dvpd#0IsneS3uRCWkg`NeopO#3HBXCT(8OPue
zS~}g>cF6HT^4SSR`pI6fS>#fKAd-cG24)HK*Zay<yM<Ou#JKkwKGYi~h;%OIc}fRQ
z@)!}p$(LSaHK-;cCde#xt?hG`R61$ZjqxK^lWl6i?mZn5Q334mLoy%WlUa+$H$K=1
zLOJ+&ilaEb-x1_e-5~h&f|@e<ZE%+dT*AIzWO9C7A<$M+z^!xb#}#B7bE2+SqAuDE
zNnmcP`}k~7_Mx?Jog7a&YK)Szf|!kBgrrl@V;p%wHhzI{62e#2`Q_g56LF7NT3qNZ
z*%P9lC~5p-_&1@v0+4>Hfo94y1IVjIcb`cgkl?ehe?^!Gdu=H&Evap#b8v%IrI{@l
zet1u@pKI%^&0rwO{w;3F<*A9bTa)7&0OpT6h&)aD)4YWv>k^Um)0&s^g$#X2>>5Kn
zmX|!IpszDBE?6c;<<OU;K@u!^a<gY_bGm*ASsZ}S@PzlVO`uzA>vY5v8SvgP7^Z9U
zb_;r0!g_1$ussoj;%&w~HaV}w8rUlN$Z!*kMH+S89AKS?_-=#w$zRCR;}xX~rCaS1
zH}cJm%%5(!3vaC?Y%F}?Zu%`ARB-jzMz#rQe`jN`UQ6Dj5o>U^pQZf+Ff<*xQ4|NH
z!**p;X#^}8R8RfR<U98%G~L5yoVQiCojn`WEnnO(R$0bA*pXHbnlG3<f@soTBg-eu
z4>PN&+AYxWF99uvPu`$R40Ahnl9KQmnPv`0wu!93K|72YUDC$YMOef(BS1aUsSB}*
zpWyIpplmr)i$&yAcoNaAQfOC(MaJ=rZR1(2Q@N>MuFOAxI&gzf>>AjYG;*6{OzGq#
zXV5AGG1oG`Fn{7^Aa!KH-?dMa9gSL;I-<4;<WXVz6Uj2{dQTYTG;Zj6@??-XDM4zn
zWpmwZdS<MX+67kCs3cTXNvZqY*eJQD5w*Ch(>zn~VjR(F@UZJU%vZ|4^xyM;LLgyf
z*TSe&X<7!G@_mqG_orNakv@uctn+Y=dtFkE*32E4J<2mLm1?n-r^P%4bAlwi1~<kF
zIe~`takqgz>1j2_)P3I<#ouHdK%mY6rIz_|!NrH*|0lfo7#~gtB^hC*KTd7txJ`y@
zik=fGACub#T=?`kMqE91rZ;$i(ux}emLf<#LRgr3I=hgzVopiOxF6vh^sZ8fLjTNC
zZf?1(C4Exnq80q&Hy2JWRXIG5Ko1~InXXsp4~eXj?Q+Gj%EMW?`O!b2#a?c=tv6Ua
z^ByzhSM>*f;=QQd(Oo%O<|`+)jRr0pywEnvn?&RK5z6V^G`J5<2;NTbnw+A;si7Bh
z`fZlLG;HnQx$@_qbihvFVUf%tYm+`Bc}^=@{5qEspKwg|sg@G=pk6rxp6bl<X331B
z{)~fEoM{LdW`QNtD>6_Gw~GhJOfz+X<6m~Nn}vs`!(I^DhFolri(uTOx&pgA2taLH
z@p&)&&S^bNw16P1H^Y|~UuR*Dp{w^59gpy6H8rt5t<N7XP3_JJ^wlRlrsDD;sk*?N
z$l69vBo>x=_{ekAJwUF^%2D8=zwGvfZNWy<;f5!$bsCr7X(LHuZLCsSzdEl!Q`(z_
zfJP|1fElA!%uC?t>!b|roYmxf9I!B7{}++y2F7_hqR!)idu<jSS_*F4uMH7tXO{4*
zAbWfcB7V+NODbgKjPK$e=-cPs#6-cxG@=LmaSRog*9<iI$bVC=drgk!c}7@EQ3R*2
zPmM3<y>3j;dHF69d#G$yYQ_(a(r)$oB!(JCHg{6+bXkm;EXc?Hy4p$k!qZ~D&NU6h
z5_~lkOc*RZAgiLHlMg>Y-!6Dz0H1_#6;9YvYUx1_PTodT3>{fO!$`?iWP0~RGtFJ!
zmN6|t4@e|}MP-u*RX<Vhh$*_s)}w=x^u;wS(Whm@rY1=Ya0_9|P5BR@#GPkxvp9GL
zJ^p^$I}jPEVhD#{+zM7<@aL#K-M!mjkP=DmKR$l7(5RM4&BXEjBuUD}7etr(&8N0k
zbKf16Nxpx!74iq^o7^DP{<vln3z<$r0t()ApVTg)&eX`X8oS2nXr#uLec7HsqD!7^
z+gX1!VfRAuPsp+s%p|b`?(aJU%WC%+?ef+6@0a>DsJ!}?IfZgH>(b`;ilg9cU8*=9
zu6~dfcaY+*4x2||yePG5rxwEV&*Y+y^qGWUEZ#_&K{x0m^Li&`U9e}BR$OmRFmb!u
zZZQJoxrApUqw|*o<h73BQTu6zjL9-M4saV9T#D^YQpF@>U#kJ7F_%UzDKZ>-C)zDY
z!ZpNZxzy7g{s#9Tw74c^!@`MkorkaUCUhdiCx7dYT2X~R=_;M$p!k_)g?-wIG4(wk
zZy^J6vM(xCXj)XqSwHHn7V5R0SBigjv2u`+eZBG8t?)G-!tIkS*VLa4kCRhrr>MQ;
z(UCds^2H)pE<$l~J~-oFmCnL(<I{1zlEuHxV+d(Jf0pOcq@nA~_<F{qlgSq4n0743
zxS8AZn}zRv_TG=vf4@U+kl%0X@9*$smd&6@^7F0l-7}|HFWI~AP4qI%dTyXSRkb>m
zfhxsrY`V+^I%ImgW^O}o7?#E_Zw#3cg~eHNpZ@`HP0ZAL;lqoaawQhaZ-Bn_xz1Dd
zr+mNZP@AHnx|t_qoki^NpCpZHYaB0rqW{_d2N1V1>9{$wbkK|ytX%}0JEwWA<_&mA
zkECr8?~vBE0P7M+a>$Z>uInRXk`0}MearT3RxRll$z*6qk2mNwc_vi2L{-0q&Q-rU
z7TPLzsI(W&f>VFi4SMG;u|kV7G8o6Vwbhw?mOGbJxdny@3{G+qw=esgw{GIC#^j&Y
z_3s4UK+9mT+8pR&-<+2CgGPLT9dk9fATzb<&rxd(`g`tz{e;=aS?rDBKs7#5%e0rW
z_-nX%y>m`Zi~u#pZ<2qo7l?*(&fU1KEza#oKg)fy<z(B)aj;nG^eY;#n>)B~LBUI$
z_Vsg*`bJS71ji>IAHGasqp4dpwIa7z9UQY^eW*>#Sz%!D@lz<rq!~VW6*6@h@o(cC
zp&I_5{Ke|D<}Et&qe$Tz3)AT9tioftRqNO6;a6;M;mT6iXWY5of>h4ie%!o7{>B(7
zA?XYX<)z@~8EhNelc!>=Dubmr(lXF<05E2>?;WQ~W=x?L+Uui%TM#MB6ZDIG5+`|5
zX1t1gCMi=Y*Rhm&Bb^%U4QZ}RHQD%er|nNTbP&G&09IL68srx35WG&G^YBlI01jGW
zD<&EBaK?>1dfAJ={B?9x*sx@Cv)QnJC!U`SuScE%Ir&8oVz72OSI235v=X19C5e3m
zi%Uv~CVvwPqYRb42%~BjYvFWp+IKC^U<7RoZTgc48>=+m5q>WAEIbA<$<0*RzLD77
z(+=USdk4ucO7#_xN2#k^)k5Q!I44GH-_<C=H2CNr1)y2x@?Ih-BnV&1$e9|Yx2W_B
zL4km5AFU=AfpxTcyjI7U33!-a6pSjrG0)M4{GgqruA7Qt1Y<m0uAI?s=7n$)`DrBR
zq=bKvq2M7ic2{pLc_rvb`1{;>E_83z(lQsa0>3jiCupQ<uS}dv@ObmHk}VF!GMi=B
zx8fDU*jSSEc>6{;xO^?kIf2v>Rr)dfYONpA-TFS=%=8UYje5zM*!&XEEIjW~X?ZDW
zQ*R+&^WApa2nZRTYSp@^Dv5KfBnWymwf77Os0NE;m3SHxt`n0K83#0oew2`i#0-!#
zn(rL`Q$aNeKeLqN5-O?LO@5~7Lj@;Q`S2fBBsyTQAJwnD$<jHx*&|M$!CMP0&31b1
z#r*`(&VdU-P256Sg6yhoxjN*6FP$xcqiWg0nPbC~^7sk2nVBLd7RlvLUKC#f)~B&J
zrdxxj!-~Gdl}exl4PDmjUNn&2yZo2kz$z(@D#6<Kfj(b2gcQkIpOM?(8S<Zh-~5~}
z+~|c<uzihZ(9oJs!;1q1ttRH{GUJPM|7IxP?K$yOUG=;ADe*2C3pm&kI<v<L&XNqL
zuI{Tm!!w~9lQrM+iyXFbEZ4awa2gi1hvMHbO3nT<Rhe|)5G3HJDn>P%pAv|<V}~#C
z6g?TgQjWx%=)E+kGk&HME!FiDWoGz&;Q2)dWi?XwQ9vJIn0}YU#T?bmlQRS!!=jST
ztYi(pPd_ZEL)&jH$G(uQQm(FX8OQPi%3#9)K*2&o!F~WKu>V0GAE?C#m;noii3N{M
z#)|Qoob4-x2m%fzI~CqH4q;I-+z*5U@go@&9Q3lI_$&L0ugvk&Rqb%Gt)Kg4-}ELa
zZRRU15P5ldS%|b}`Q2#G9y;(HQ<XjjnLM`el{9we5m@W+W4T_rQ+JukgW(C-NqqIn
zS*Sa1b~5*d*|e>8zeXc<Z3CetWfFnKcp|Mfg0j6m*^%kyp(a7EdJD-6oa=X#x|Je}
zkgZ3;zelJSjO8pcOMcCL222fG5D+?NQgrHEK&WJP^<iTF0qpM%UpdhOo`s?-D|4C-
ztPWqH1Eh@LJRUc?6)*M61f?$Vf{%#(q7157%)o4k={88FE?9v9q^Q?Hn!gC_!0{_7
z_0`eToTP|}TlX8c?&H~T==LJ$(Ye=LKxjS*d-QCf{KqHvV4Br1jae$_@JAs+-A6P=
zR}^xq#vxZ`j^Be{MILQ_q_Gnu9<*$7a#~x%R~C~ZZA9<fU%xCj6`sM)V$k|H3>j)Z
zT+0F@2$z29iFixh@$%U73g?Mz5J}x)W$u~TAr^%NGWM=V(&*gUBarQ(LM4wminp9^
z$}%K(6730g=QpQ$`UuSnd;A03T2}hNZp7|93HuONdgw{On#M~>TBqE%5xOxSO4Nz}
z4shD|dv~&%m!VEsGNT{xxG^wIn-FnZ>-JKMPEm5zr2^wg?V+RJ3^+~ZAcxPaj<wDY
z%Mzzr4!wea=KcW&{k?JD5o)-gi5^iP?`WOed-1IjpH+&IOYq0*^)B0fv(%2*3Che>
zD3q1(B6!?)W#*~17ncYQUZM{cc98GthwD8S+8*r%D%vNoGO%m+s?KXst&lp0SsTWC
zF)=)usaH?6Sq2upZ-sd%-NnD6??Z6kp%fTw{b4qp9Amh6NQ+Dpq<F<|23yq(xi*km
zp{kgKi8-veb=L>PP;hcN*k^u%ta$A|t3?E@TIB>7PBZuy1c$V2WohY78diO6nf&U*
zyR2{>4jWS4r;`#kq;oD8{c3s*DY)}F3w?_cDc)r|+3{ypqz5w_drMij&A4~!D)_HX
zws4@bu1daI6}_lwM65Xdu|8zJMqj4F$~<mr4kRN*Nie(5!jOVMQ1n#rB%jzhOI45c
zUzbjF6)wSq%?V1uU-W0MF^<fyK#hMfJa^tC4od~ig@}ExswJ#BVS#4G333T_GA(#t
zd4744?QyRAO$E3uyX5c6+moR;1U1a+b3hKr3?L}J1cqh;PSf?{PD=FEd$^6c#j)6O
z-C{hHBqIC^e?@r5!&Ap9Qi|n~`e32F9IuE_GEMd|XS9_{l2^AX8d<XxfsO}fk&u*+
zru-VaYWfaXM(RR+gd`}g_)|_nFC^@)B@TElgxJw~()tE%!Y6*W)gQ8xPluJ)(+^jX
zZf!%ji)Ygq<|!*46GDG=G9=J^xX?_oXLx)>%5`Pdle4sQ2(H`dgo<xDroLZ^?QcE3
z>t%?Fu9D{Neux2*1Cjr$*yz221Yy~I)0;$DY_sjx^)@sK-nUQoyT2}{U_5B7bNI)A
zY6a#B;r03|S`n>q6N(UYI1ZlNe(s?3xkWL>-tYQzN<Y15V+n|V#_uK9g1HvghP<Av
zbTWb*@8o;58a%Hvu4?%0=4WASGVv6TuejhFLv+yAZ+&;(#E#^SABydydw2FqyyU+`
zL44o;8cspMJEEFJPz>=&92Lv*1CX1y|G2;`gOvsoJQ4vY#x!7p)Y(kh0XAL@tenTI
zij*z0vvgv|aaaB?gEfKPE1x{0Ko>e@TEDW7xt{nv)#*EDleX+ocUFH|8N*08ObE>y
zf(BROZZFur<#gnvVMvTf2itwSCfb7QA42~GTP(owt+*NX$)jva&R#^3C*STDxiUYh
zwU{{5sU1@h@vo_^Ic>3sju^i&lK8v0SNquYS4D=eI69w>wW*^yh+Uc0L^xZGx?dvO
zE8)QfKbn9*5WU!g^y43V?yI_t+TuRL#O&xdtF{O2T|7!ifbe4>OqId0C;VXIRD<$*
z8Z;1-iLzaie=`2|%4Y+^mg2n4CoKpAD426=$i>U+h*XG<6azWtgP{rQPZ;ced^H!;
z7iv9VPCjDr<hAWqjO0hp&>xTzw5?1{A=>j;zwHWvd*G8NVXBe*2WSz)Fn)wU2{8JX
z04XooNW)2efWNM)1NJ-^3dz<wa}yu_q=~7a^%4{uVVJG+|Kd)p=ZoQugJICt8!o;U
z#*Gtjl9c>S_z-ebS`FtZ<}D_|CNfycO1g5bf&TjfhWZVOKbg^GL7a;VthJ~jLG1S6
zLIz4WDLbp2wG)37zLx6wyVG`!L1D-GxG|C3jw2#=BrRYbPwL|rk{ov_toZ6>$h^+Z
z!r~>_!kX=D$m_)X(;;iuDwT0q7x9>5%zel4kU!kZtY?`mrQ{b>tc11TRz--XrLgDn
zesLg5&GKM|&k7`I6T#GBZ?ErmpB<jxX@%mqg7XYv{$=aiwqk)blwb*JH9$Q^&do~0
zo^Q;7+n?xhWB5TD4pG=6zUznFU!ioo>rikh={+QRuGMcxSlHV4#d>@Gt#bFFWXm^H
zcwioG6@=s&<~IG)2JT!xe{-<arMDXo(307>4%a*r>dLu=K>UFBRXIiW@;Rc?Y@f4M
z-iD0_UKpCiP!{q%?y@z|zkDJt!-Wv7&*G%GHaZHsp}UWC`Zb}QUKaf;9L-LG;y-|y
zPwpkg?bi7HB1N?mP`Xby{q!eF7|lh}sTrABEEk$Hbrwa-FdJJxWd&00*~`*0W#C{f
zaXTu9b=_CBLIr<v^J$k?BmAEje-`!7**peXz6SbKqMKa#t<b5Q17!#m*LHt#MpWJ@
zMR0Y0R@gX%EDS6(imk%*#J#nUq6($^dOa;iez?WReI|)MUy-o5+fO^va>*2;nkzW}
zGUL^C4=IMBN5jnLPjn2McIP1e3sm}&-Dz)%?hP`8-mvRbl!-yZlBOW5TV$2`L{Vo!
z5vk>hZUy(<1lBR7<E{?c{q9}u=*r%h=~Z+1Ti??>S}3Fk_<$btj9!3)S%_Zn51_-+
zzj10g1X&*7?nr8C>NGxU=K*WG<XL(&R)nx!5t<nopM?1Yu&0LtWpc#VGJ$Sauq}9C
z%JrR$sVT8`Q2hqqKB4XtO1dEzHCRSXTqXaT(|wR;93m=;ny*H<zU2>QF2Of@A9MDx
z7wiB>G85|ub3QY(AD!GcCp&V>R_ci)pf~e-{F|5BSJYRW0HK@urtc0GLGHPNLvm)Z
zATu+VeaG!-$^!hI-L?=_lI5}IrDDY{C-Tz*)y7+5O7*j}nz}X5%vBH6#!nSZiO(W%
zhfHFIZTl*5srr&Au_XKoHru}*t#lZOORDxxdKgB%X&A2uXBL?q5)2L{cSg7#h{MVl
zw5)dhHRqcyr;v&Z5kAy1Nb}hCR{M<TEzW2jU7eKS**?L9%gHqCoe;Rr`alzB9rO>N
z%#?dPUe>VWUcP18lGtK5SIjZdYr0dk-aHvqS=1yP{K#E=;wwJ*H#*JX__DEJx2Lq^
zWBBSQb+1##PKo$a$=#>T!<D)J156F2F;3zmwIxkANEU!5-*Z$azkI5n6}39$^UOvi
z!shG5pm()Zk1X>_mM=`MA8n_--+p|mAQmdDB=Qu}C9TLmq_+^cywD;20PoI>MLZsf
z)VR$G7$0h#*w4~RsBiuOoZ3wSJq_oGJh=Y>)M(~hZRHuw*`d=T%`Ie)Dj*Tf20F9<
z0IT|cyF$GkmRU8{zRw7o8C&A!#dbe2NFUj&(4!B^tk($zbuIS+buo#wD}t_gk-VL?
zSntiu@JSvVmFs0rg>oxl{r;G;wKOXV8>2*jfUl!^Z>&`Qkh^~Xxdr9hyR59bh9I>v
zFv=%(F^*p$kUjER(;_y;4<D@FXh+2ojCdx;Y0g>&@sAZ_Y8?P%<5-A_zh^K>|55Q{
zQAGNF_=awTs(Z!n<5E5z#rc?KGa??Iu||>Iuo(8n?CkL#(lTzOVU5y67-Gvd_x8fm
z)n%zAd#9-@&~zv8b(Szl)2qb1qE(Zv8!bfVYiJ4MxZqzky?$$HA9mehf@JN*uYp0X
z{{Y5$;0O=xJ)<ebS0|z0K@VovK;6(J3Lh8qXb!%zrRXTnohBacXsZo=|BEAIy?+4i
zH+hi~PWY<aDsNAK4kv?DGX-kkYUa{k5f4-v+o`|mE0Nx@4rd`l$X8blezH#QNQ&AQ
z@Lh43AJduzX$Qt7^wpJ4ZOYL3Zg86czn${XogC>~-`mGpqJO?tJ2l6D6@n^K4lA_Z
zRqD4W0G~6L8k#{6_I|8H^;l{EB%gtyArdJw%h@o;8;%M(I#!FdI1^_UXWqsN3<5V@
z%PvXvI1SRT2l`jUZeDv=Og}F4NkxVRk5L2moax?rc+61k5;&upe(14JffD@-*RW^q
z%6Iu}S;RP<N&UY(ee-OV5cnZQ?v-Ag*E4F1nK%gU#AdOs?6deJQjU3V;kExTH;(Q0
zw-3L1a5a=u-w7e|*<xFoEt-5r*(QF4;tbzrpdWM$fAxI<L(3jF5798?XYkl(WFck}
z*3Oe>DE@h=OVC?w?!jJ|y++lG^FGsbe{b|8+hrDkH8)wDo$?5(E>BpS%RhiZ`Wp>g
z)OQ9Buh^=htPPRj)DgKiqoVa32N6NAl%@)?+FQiFB~#d;`(B-szecZy$$-xm)$G6`
zeh|*6iUg^$p2?r=wgSqJ784}3xdAoIJB+>=GlYm?9IzBNHV*S*AT#P?66%K;K!Au!
zeA*kXoy3p3{s{j7U=9IvT54iik@)Gp<MYXFX1&IsQd?pNRxfuMgKRB<>2a@dTTH&e
zqpY<Lh`XYfluM4xhUI*TrK%XzAuQuJ9=bfb(Yaulpe<>7F&{#k`mo=zlMaUe+Rqe;
zOVxzRG)f8s`7y6orqiP9{$+5h^1QRcH?3i*2y~741aVeN6Ls2+OpMiTdaAtqFxfEo
zvi`2=h#26EGe#h@-ge(NjXHh8u_99&Th2cTyJw}hmjq-Xj6S43ltk^wn<|7V;zt?J
z=eB)R$cLJuC8B>sLCSf3tXD-lJEAR0Vf{Geq^geJoPKN4YSNE;e=e|#up~fNXbVv)
zO`B#Hu~Xvrfb{kpIkcoRI>0A@J93(?K9n`o!IOs_8ZN*na-%k-M8ZZ=;BX-+;Ha-r
zJP38vJ~r0hAMFdabm$hgVB0^v6vL5Dr!1V(OpyN_&HQ-E+>-@lKBAAVXIgG;rLW5P
z2Y4#YcE%1JS{=Gg8ngfI;(9PCS3tLNlv?{1-mgxdt6@}GhO;vK$hT6#z*20W^I05P
zloB)Kg{?yp(8P1rwB#Q=lQf7pNQW#`kV&qVxiz6jC84T=3wo2)q_{3Wq(LXZ-1@MM
zQA}`YE|>gx0lUkJ`5G2{6E>`3lXR3EW$M{<HcBOB>zH$(*LgM`P05@JU?%d&o3GHb
zr(PS?bJsO=S$E%%GyckL&ZtH9Af`$ZS-)~A+!)N^eIbr=S+GV@WrZd)^KP?EhRqw_
zvuk1(P<|4{)Y(OYVvu2W?5#a?FCRQWl}?I2Ggq;O{wqUQnKthN?!>&LRE91jk}w@x
zH{*?_L;md(1NgZ!E%SuzSm{B$z04spx;+XLno<4)M1&;~`)A@b?$<secbl^G@f|)J
zJ%exPKGzC)9Zu4-x6sU@w?mVjJKTQ?LoU$}E&ER}bq2;&!k*j0AFs3YKZA}IX7V*E
zd2!)8sh`R=*$hP2c9=+1C58Q%**VYbxqFokKel(FG_6?z)P3{lF=FCp@^0Tcjnt+T
z|Fc#?9f;0j8rz8AAdRsd{+2J%kv{^z9?WA28u(~mvG7tBy6M(QQDJ?b=$AA)JFQp$
z5TRBo?#QM~iL$Zj+lMUwBUs8zb+OJPXTW=g#>Fs>A=9(#?a`LSEh)xTC&8z5{>Ou^
z2dNO3RX{8Cbz=Ur!In1%-4*H-oHac8t_-r4M^E-z`5`G9kma52AE1+|k#$6xaV<*|
z&7okpLbncue#!P~UJ90I=dS|Wn_w2(q^9(5(=r?_h`mPn?fGd#JPK}kqq|k`od=jA
z<uB`}1BL2~HLk8Mm*w}j8QYf!g{rFKj8FJNN0b5!GgUpOG!+fm9k@@rZq>xN29dQ+
zDi6k+r)s4n%NMk-_V)IIqr}=H_Z*z%dZL^;n2L9FSAkzUs|zj$9(H)&_WSQ#|JVpS
z<1#I^Ps1THcWTuiEswa8+0yR`lZ)MRgEA!x9LiOTXUD1e2rf-I@PAwN4$HtSGcDF5
zccg;ngp{>yvN<^-JSkJxnu^qujz2MlCn<*2TWGx}$+219dL+#VvdR_FSzB8xo-=jO
zIE{V9aW~S@SUcw3zZ&_wQBsS|Bi`Nr`$;I@ieu>cCt^M=YEix1<Mc|PTVT2^ROg;j
zAI0OY5CQ(dVnovn7q4vU+sh_@%@M!NYfJ^}K@+G2oo9(O)WcOflS89IJu9(=dj1zH
z{MSDG-!`(0iApo^6PAb6BgO9-s~c}z!_*&^`kGAhJxd$8NfJ@Rj^ih_@H`Wf&yo27
zrq%}c!Yk8LIt-0J_M8eQ*vpON?bk+g9yoS%BAFRYX+8u*RG>6Wx1Kd2h5(;r(g*hs
zFn-5z&1N;4(ZU<on3m<a`7H27aQN(e^sCVBVsPao$H3C5dX{d~A70+1T90Bj9YoVl
zB4MoXmH$;~ZF~En7Hdn!R7>OQfn&OXy~AP8sbe?N;-iQ_E=k}6#iiy5-Mb<mGiwp2
zHJ~8^h&hGIgx1e4OR8Y>v6j*l+8~TUJH?60)*fNoa6QR48H>LHvs3A`)qJPr`;tsQ
z=6b~#95<?2(DLW%Q6a7mKS;AnvXk?}3AfvE*_2=fO{X_>tKOQ;HwgTqE~F$Y1bEF3
z%==PU#YrSNMfgZcsHYEt--vKu!OQ9T*a$@Dzf0hMnn*TbpqE(lGo1-qLm^2+nd1%h
zC*|i~IFa$#XR1SKtYz!?6XGA8(LSP4%7%Qk-e$?}5zwp$_G25lz{m_Jn3U#4$hjNl
z%&Blj_+F3bCw>&;HmIU^z;Xq6(J#E70J(okmdL0S=fuC9xv35%ruTQbifxQuIA&Q1
zxFSt4g;M?v{mzDPmOw|-#^3L;A3SANE_;}5F+IsYx_FF1a5SnHvB`w|KwV8Gufd=C
z=`-5TVFfJ|O<KF%3s+`oom!E%Qn05MS()QN@5Xle6K7YXep+*BonVXEu$I&F<E_(H
z-j|9(y-CGsX4%;*B?cSyWGN*&dZEUw(I)z%hx@u@GnS^o-MrY|S4OMV%cg+_A!)HW
zBj)}iVP&bS&Ou!5v`@B<ap~B*72zg0q*ULBHru@F17qLGJ7T^oi=uPK024J#kDdx*
z=3cNZe^{J%2Mzw>=pB&qv)7b559j5J3a`Fgpzd(6(bjEawU07ZRxh|Bme>E$K4J_H
zUte-9c%=Ks?LKoz1WSu3IdsMG^yO~Pwg)1Fg%kRvAiC9NVPb)*el%_l{+egd_in8L
z^Uh1EF}dqnu}o!TY-*IWer8sivVjT4M!KLf5XYpJN@^4qB8Cl%x5VVdntj?-rE#?C
za??3h33-tZl0b#y%E<TU)KsKOIAuL5?v-_q+dso?L87!dS0ekg$=39q%xh@G;GVZm
zU+=jBNRjvl0BJimxDT#$a)e0`m&*S2g#_H^U|vQ>TsK!%GvKd@$gQd4UHQB<Aw0p0
zaG;w}rM+YkMUy)FJJzoYDX4+F`Kj$Vi8so|r$dS33-u8G3{NJvY_VTCqzXNm-G{F_
z^86a~yYn_nRD;fU|2hjlW4fCP9~STn|29itw~9zd$Rhf3t3KL+;TMTK>OrME;H>{(
z=W&@fPkpqQ(17I<9A=2SK*<r9%d#X>Ry}z2;lF8nO|>{xi=Iqf5?#=%;#Z7OTyD{8
ziyate6qjGh`1)+QyZH}*JXdaC%J32k+_8-$CvEP}L*yq(zCHQ$)77@hrrbU)!i`so
zLfrv^hOP+lUKzL@;G1@cMaPV;Ze~>e2cW%(b68^Vf#$c1uar0wqT<ObWGFJ7{qf`|
zUTQCe+%e~IT=K0lb0@nnCOt7JHn@v`#y7D>K8Z-<Oh4~H%0}3;EscL(aE#|<E?7fz
zmt6(EPkr@$kCsV!l(C9>f?D{j$hKC`2OPV*fnt5G+$=l9+R;mb%ZW>m8+HO;qO)sV
zlFUo^*6j^v({KAnAV%PE<qKYYv{3uSaMv#f8f|}gKFgvdT3yjm*?kC-BE49BaA>2(
z#7%>KQHCVDfix}n-S6$@3)T>I|1e@Q0;kA}7@n;v#GN{~t~jEdfdTZf%lv^E-7osS
zI&+<%ThBRPId?W1Ipp{DAbwbje_RWE>nh7?TO~r3sWco~uHNX7c|0ln2iTrj;wL1L
z3MNgI>?h|{C;fg;sC=1r#%#fDzF-qOZA-G6BDGUh(WGczOF0CUb5)8~!EvZHo9vKT
z)vK1XoiWJi?5t+b!%_ixu97<@G|$_*eOXfDk91Cbv;v>C5g=`rapAAwFW)&6R%mXH
zIljHRZ(_^X3}rSp5J}4K^~R0Z!9|s~He9Ac36`F8E(O}^AYP-n)GHYVSxZx3h+mYr
z5I1^BTA0jw`slbn-ljD_TDIV0b7_&NXp(jqT{$FT>oxd5?e@)~1;QbCz~R<O-m!?Y
zY5sj!NU%nj<#=n(T^|{K7}L#I%ln&+sH&wZzR4hpLKQu*2ah{%tJcn(bId9(c(X@y
z)>s->Em;}D(-A1SM|aFep~c!2lHAV2ee4xJwKBr@<N9o{B~1_eg1_PX<g+KQCbBle
z(&k}ti7t+ed5klSMMc#!LAjAkL`Qn@E%(u{FbLt^*DHXDy6FKvJW&$D<BM~j{B~$B
zVd_z+v>-xJA|&wmJq1^X^2J*phR;6Wsq)(M5vT%%TU>*KLDu~2yZT#UyFadeDVF(Q
zV~CBFNC%I5XllN;&5iIM0B`1Hdst#PE}le+_`UH}e(Ou71h$2@54ghKCv(EVOssJD
zeJ$YzE?vPYOPO(;*d551&^!tkPBI!VPHSr4x@K7a`<@P~fDmT2TjE}baDwxi&;Hrl
zMFGIt3FJuoqo$@fqlS$wOZf97gY<qh_oM*+Orrj#L9GJ^z=EIV5aYkq@2aqaADkhF
zuOEt^W@zrLOGrm77vWF;M2z^y9W?M{8ExBtF$i2AM+54+r0<y-KSy?mzY`GIooiP!
zlvlklDk$vf?5*`QyRpTicvQfyIt%)m1kWzLZ~@L@ozH~3DdS-(>Uoy(fOCZn$QvOa
z3i}WXTTS*@UeD<<)XA)sv)xvlV^&2^kuI7I7{d)7GK1M(?ZLlJMlDzP0>Ur$8&iOg
z$Rj*_`*C3ORml2pJ>ADNs{79Td~9u=H4_$2e0ohm@L9VtHEz_YnTk&o^NUH0HU4V`
z*3hZAL0U3+OO0^eXn3zyvrMAdcE}Bph=_q$E2GlNc-Tfhn5ED2i3CWPmi8eN2JL4-
z*Gib!$=gwWkI8oxhDX{=Mh=6OfJ$3@f#yTYbBw`;5)o>tpM<;r0DXdI&_a{N>%_Vz
z?l|2QRWxsMikbli!W6a>tDE`pQ4#5D#H0OHOw=6vzuNgwD$b&)Qlr)m7hm?hj?qeL
z=w;%2hWvtv*^6=*Vg<_UL07eox>wxi7B<`}xku1}Hr5J2UD>GLeULynf3?HwP{fIK
zp%u(WC!i)&e*4XRU*+gZ%<bDzflBF~3TiR`@R#}KtUn;v(|>@nS05)2(2E$PG4DLT
zx-Aw+79%6Jf@7kerSwzR<@+c%r3%83-rXp^P&0KntolXFU4>qbd7KQ~-))PSvBqo$
z@G+}(<4T|OJ0@fmO!gzSl8NGA$GWr=MHWs{4cS_3r|i(*soQL2rHslfEm9K{XH&>q
zPQfzB7Te2?brmS}l=Q;vZlBa!=o|}+<a9axdRt0ds(XX0?w=hd)F8;vNtEvIw3@H#
zUWobPqOeT?ZEXRPl;Nq3J)7<wd^dQGDK#TXN&>Y+^XjHjz%r-aQ<+RT-{wbkC`b0$
zvY6Eyp&Ok!{O0P4Cdd1YA;QVJtdiK1eyKNPQHcK_6(?Xi1V=p2i}Yt}YdF62%Tz5U
zp2Fi`R$Xys?zc1Ew}l3`k5h^?u~0wkIRaZnR^odd?_l-qP5uF9H$)6ybnJV+Pwwwz
z5A{rtv*|DT%1=n-vX1}WspjdY{AOM*6OmbeSsm@|Ab)7ck#PZ>;{lh~R+wchE+1tk
zbSaX|n!x(RZax_e(SC-csmX?wj980sx;7fF%P+v<ul}`@?OV+?txW%MY-B#73(Jmz
z0KcBzh-*gHRHw$CN!{<^ZO5`?yp+1E)&)uzsl=yM@J!DqRTY<i^uMd0fgT0jF2O^^
zl6vkC5t6t~!e!fjnSF4Qzs$zN-C{eBzL{sAHUBm;@D^!K?q<_Fertk=%2*|KOdOKC
zZ03s|@`_Xa%O~_RvWfi9Rf}&q0S0DjU6H@9(Trzn0xN{og3`QjxklLdQ48k#@znP7
zD?il(`=ZHNHSw?o+BKhOb?$YSlq}b@)FPlNyorlkF?MT7BLz7v!F1n=dupzOWQ}Jw
zw&qM&6ij@}<L~z?v8gKtc&`Tgr@FtyLxyx?PKlNrr^BFa79h{9v;7%K<tL;OnD_%h
z%D182;$MMK3P5c&w8S6sPS%f2Fm`js-$qA!L)RD$iKPW@qa^9R+Y*iytFYB{*yz>Q
zee2dm5NHQe<K6gLxmgJ4S-fgC(CWC@B(5a41;p}9h~aEX9WvI#Cb2BBi;f55r6j*C
z5DoOB-UMttTys+R`nYc}qJc=g#+c0iJbjJm6m1UFY+SjVBk*9dSo)1$)ZDAc)2lrd
z`Wq6O)F421se&R+T~zeBujh~Q&DE^ydo0_az2h?D(kZH$<uiekuLwsxj||!Ahu>$_
zow{eZn3>a*t&_G_BB)RHtWyRpZ4(}7$=8qkw>!PC#4$kXF`DtUe$e3DP&fBQ7;eOW
zy*w&<qFNgJM`!j=skds1bmP=nlF_6R{R9LfyYZiV@{u-$s=2|H3J#y4ud2;G^u&5L
zCQ`2Yt*tp(S-%yLqSE3lxJHB(VHP#e&_o9jIDQM^JY{y4vXWs~buqNo++5PfUjgN9
ztLZ)=0$CxBZ5kW(Ac~{fG8QJ35^5jv?v>Ezv~@|~7hMR=*&94{q!_aT-47`SY$xSK
z%qS)uoeB4$;ry=tmM?ZpzBa3$VR`|&Vz;O)yTtF6UmNpgdkYEV6=-p0@BGR!=7HEm
z_htg|u?lCo04wkt#kXh#KGpprwO`md^Xv70$5|Z%3I{rr=l=md4r(xA0-&Kj&S=1V
z+(i9<uAV|c!(g()VqlSpkh7^^fBj}cAqvOt_!)=t|30JfkxB?kx|SRt>$E1`bS)Xv
zK2OTqZdw3C;G#@3wctK-H&#!(biu)G$XF_&=Y^^DMF&JwuSHZPnV^}Jx?=$@D%|V0
z>toKJChfM(e8gKayQ>!nv(UZ{5+_B9|J$F?s8oy+e(k;#RvbWj<1jz@UEG`1H<}^#
zwAN67+PZK8#oVTFLh}(%x+P(rZc|4f<pn<&49`>nOol8uR7EQ82g=?r7tB&Py<H&2
z=An(c=$%SDpO%&vE4cXI(mj37S8)pe0ls9#TJPbk*;Cw4)~Cm7DJ>no(yPO~O&C^!
z76_{~anmkUQI(KQC{0#;QLE%*zT#L8TZo=Yp0xhBH$X3yXeal$Pv6x))Ujk@Y%tVk
z=SjDzRJY9n=C|;w@5obdUEG-lE-|3jMO%vc#LE`g3?{l=O1N=wu+$r%aWYFP9Z9g-
zMbMw_xlLWGb$jDI;tpkl`%;pnYdPes35k^;DM%*qW)$(Kt={!oQpbxaPABkOKZc4b
zBK#?aKiPG<i(gBdi6@N&;qTdz?ldM;tWq-%b>53TEuEES<NHrDwa4l2eU~d)3}8%r
z4htb4Kw_=alN;6)Ot#Btam5-)E1oV(#S*OD?GmRNMj_Iklgl`}>KLJ4OEN0?$Z~1a
zMt~6+WifFM94R{=DrY$a`=af~G|d)NsSRCQe5Uuxa8KW^n%JYEX1$vx{n;vUu>`L~
z`HiY>Y2~|Mwq%hH-dVJf9d1iu+2O2GFCFWyp|qDR7gM9tj$)6VP(m8X)G%Y5FN#Hj
zuA{?uwtktPTGUTx)PkuwWo0w+roB|NLvyh+ZRQN36%Gi_T0phh9m`89i@MVQ#<*_c
zcwWaK3!Fne;3(SAtWr8<CsH&%{)<rWWHD+xTYkNT?sfT5mvpeZtju8d<S|fenf?^C
zL{6J5Z7j-Urg{c1Rgw=6+n$zsrO9M&_D(=pvn|*)pmcPM^+cldX>RV0BmC}A%z)6}
zX60T@lgT%Myc5a1II!nwMeCGLw!d<Pu=$Obvc+xK*f#<w!vJQLni;Zm^ocJjzdMrr
zo>!KuFd~+;!5xo9%OcA%^^Q(d@p*W+z!USCQzzmrmYb#F#f|Dx{Vc%{leX}PCnj6K
zIGJg)aUk|8N!_Z`5LLndjeYmBMo6vmwzdIhZP*SRSx1u)R%d3f)rreA6>{n~tlRg-
zP)mdJvGg+_&3*l1Pn5yTC=@XAW{-c(v$hy`D8V7{7EhCqj32pSFXd{gHfTl#V)Sg`
zxp6l;TWwq$^}&k$o3z$XsuAvkdVL69_E<)fcWG82^C4lBa184lBLC>kdh^F+bhWo8
z_*4PqE*vhGAIio(OgW)^x$SEVZRA;^x>S1W$gUF{N`~zm+RSb)j6St7x~_gy?Pk5d
zH(zU`krDDJ$jw00JWbrL<F;BYd3^Rssr4<DN`z3LVeXFjweY@X{7^kh@O`Vf2MtRR
zF*RnY%g-<?Qw~)NC46)CF|}?ONpYYq^D*oHoZqkArA2#N#l7Sk7xp6K9bqV8A7A-i
zG2>sFO7#e7<4{j@xaRdiZDPf86TKaDt)n=jIj1u=u!}Z*=T9_eSxR|4n%S%x7iumC
zVz!clN2OEbPW6PdoAb!*HR&B4EpX5JdQP2GCU+`Hm<KYKlBkAP^`us1wO(+i&97B(
z<q<9Z0WzzXSb}0{9`77)2P`<PlYHk%rm61pD6dt7OhuPj{{imVG+Q7iSF?B4GlF&U
zDNfB}<P6n~_RvMLl;ZB21`J>9%i69(q9t~Y9j+jE*1ANkQtsBLm;OezC{{^MDol&u
zzmP-_!<@1kCywXn)uF-R2u;9&r?I!bW-W2DY$`2@xOif-#88se#(di_ZU(VKg5ulz
zzyAx|KqJ2Z9**?sw#U`Uezsl6WwvF=%jad1rb;5nmeOhT$@-^dgH<$_5=*OoI7j5E
z?qfWV+Qo;p_KWPPl7NJ2Dt$V4DaEKOm8)9^Cofa-Uh;p9P287qXM{4E>f+t~kjYmj
zpCa2;Q8@7_>Z2&yOsy}gE{ANC{{SBgK^{x=hDWIVlG*JA>MG>Z<XJTmlp%PNUnG^U
zBqrM=?wTn|&+*DP$Ab)x4+o9DHkRy%xT}**kz}r;C|IocyAAx5;b;0PG`k>|KgUs7
zGDL78{fut!YutnN6*6h^Ufr6Sm0wGwaD4eova9HaYUGpfY3SNcPx064BM}wZE$&NZ
z&P6WdRmrExe(Y<d8g%(FJ4Wo9=!Q0mz3uq*PS89T=%!`;k{?8+ag=@KOqz11rSkYH
z##J9Fk0j*$l(12Pl)Ep*roSG!+MSQ;OW>t~+xa||Bq6JmdYM-ypCsRQMQUoA=w$t+
zGApvA*B&eJ>!o=ppVXIkDSIw0-ZqzSZY4G9xl<(!iZ+p{%PM+fWKZDJvVA*?xb^t;
zGUZfw*)A~Kt)2-xA-$6!k(52f{YJ6;4c<jdlzN?t{l!>PP>t8)zbRbsF5QWB8-0%j
zN8o!QWMvGJmm9NCt&<57?F-X8oK_jZI*JIW`PKgbf<+#th~Rc4hK+2W(<wXFMy6b8
z*p5enKH{&(*(?(6k7UF)Y5l~#Q{rmhCO?5n{S)t^?lf97mW&-)6usP;5*<j3=#kfx
z>D*d+a_Ij6&rX`MMzU9GBo}*A_Z6Q03Y|x3ZA^|yWTiC@<SAP6W~Zh|h3_GDu7u$9
z4&_MwIx3nW<7nigE5u|vBWA2tb<=8^BIm_($zAiPQ$xW|-ioikvb*bMD3F&$KFXOX
zK}q&h_b8f&&V3IojI5hKx{4tG0E_AV(Oz7nS&<e^EA;NAZpTtNWl3`N8I{N}km_Ym
zciE|U85CSj#Uxe9L)=#B+@4EZ<77}uQ;o^P={B$A%ech0(@NbDz}<;Yz^WCcWnwuY
zxA!VO#kI=r?H1WBsJoSNP|03PvE93vBWcBve@;m<Q;lB8wUgjXxd!|cDybqxMda<8
zmt<3SWNab(iD&Gq?->+Rp?_1r%{fyg9F-@|q`nA^6nbtYMQ380ZY`{$lfDV%2t_=R
zq*=w2ZsVMyuxd3t?228GeUZs;S;L{m!$KslAtH<w?#@oDw}RZ2(MPLXs=GNqY;snQ
z4@x7Aq`h4o%6*Z1Zc~el?eb43TT}NDz|i7X>LmNg7U{B=Y=%YdY)osBC>%R2X2|5@
z#x@}fB8^KsGM~8>a#Bk7E6VQU=?S(<DX;ihwQ}~76SKF#<?2IXFwsxA-2IQ_)OM^i
zNnKdeO3lTPK6X+Yxx=EP?V5O|?;<Mwi4wdeb5AZ-Wn7TMu=<LN80<-UT0`uq8<Fp_
z66Dh*kz|terc-`}v%yhZlu07|WSJBCz3wOIxbjhtqT*gQ)UPIrMO>7X?uwZ%*3yZD
zNpw<`>Gn`}C6XTEtZhjYSD`XQiiFW3W}bYYe_hr}l2NB&6yxYfU6R45?k0;Ut&)aH
zuJdPt{<{pG3HwS<(9<-&88voO^+>G0jHB#ENg`!JM2#!-v_EPywKaBDpDYxYCu5D3
zhk~Q%uk18AD`cUPqO_RsMP-W362T{U*j||a%`9ae*z#$id76)5PuRAXWXgz{B7?Ih
z?OVsB<Xq81l*<z;GPktQ<g8K8WlQpR8Ext>QS{YBnH$sTb|-$#8uDsAq&kWbB4tEW
z8B5$&@U~=s*3asHkB3c<B*_(hria{CDHVMz&e!abIjiyVMe%kcX)EoeR*lN<w1tap
zl4Q|U42QWIk8yF7V|cVyhtufLt5O@WCP=I8W{2KZD6Y!+9`foeapxPNq&}kkkz7(l
za#B`_FI8)7wvkY(i5Pb!{ELw&ma$0H_cV$=iYRZQOl41NG+k6HWTWmY_aWr-wAXhi
z%Fl@vcX9Smjo7+YH2%7dLou|8qN?bg)E_EnV?wknPuEzJUeZirE2Y5Fbx^EQ$x}PM
zroNo{6x!QM15;CsIyCh7Lua!lim0i!mHz-4a-nZpxkfyy@;P1HlO*=8?-NV@#8vQ9
zDn9mmT%J2At}(F{*+$gkc@e&*r!NTcvlB8&6GdcT(Y~76*DGB~A;*Mlh0+_jEvk1D
zP1QtI!YP$K+juW#KUYNW#Ea;s@`!2YRo-M{WxL*F(NvM7ex{3%?USaiq{Oe=B>ZgO
zXMy0Q)kII)gzecZ?SxFpC2}33O-kfqmj3{u?QxX~q|H0EMYRRk)yXa>ZtP<D7pU+<
z-cQ<#^h>fhC6tGebduVfeSH*COK&1S>5Z7l;HrtDpHo|D(2C+J2-C-ukN?B~F%bX)
z0RjaB0|WvA1Oos7000010s|2Q5E3CVA~Hb}AW;P{Ghrh_LljbRfkct9|Jncu0RjO5
zKLK(R+-4c|63RpNqC*$5B=T=;mjmu*21&)ClFieVZkQL!MjyDAO_KYODy^<dHNjO_
z!b?p{e2<@I4wi`Oen*`S;(0Oqw#wPuUnILpE%QZHc#)Nwk}s)WCSj?1l?SpI-ooUf
z`iW%6orE-7J7*L6lNuIWqTUQNwNuHn2TRk1*v3eX<u2jzl(Rw@^5jF!Z?N}7i56U(
zo(h!V>S}1g)s!2ip3G>&()B75-;<n~AA*l@E;41rEbQL`qTcNEzh#_ud06s9d<>c%
zbyt!y(={QOI$oSE$CG4Gp5+(RJ_bK!$Ww9>)4?Zpa$mz5^0Kgy_{py$&PwQ8qXfNu
zDC0ND7+N<xlzWD@9>Xn?{SZ++8<!bze#t9jiEw#557}!rkE2JTQvH(2>?OHU8Yv^m
zN4R_z9&Kvb8!j_t@<e_KA|$vNhwQ5s+SAE}qOjCeGn}n<QHH6Qp3JCs4}pw%N4VqO
zqyFLPmj?uwNiIr!l@-08PNV8n*8B`#CxX3CMw5|*j`uM=r;?9&@|JL3b8kL)ETR7Z
zIY*SniT0A@q`HhvI}K-nr|HuF0Fj@enCa{3F!9p^5!sav;qWp;EF#w#eA3Xrd2`9N
ztVQZqDt3}6@?v3lCOoOb)S8KHD#$|lCoK`VQde!Wjf_WVkw>_BJfqG_E^?1gQGB`N
zxf$DHva%UVlA_g0#Z_h_>9qA!a()Rj!TK?i{-sCY<ivJmMRgP7DIAnurr7d-8C&}e
zw(cYKk}Y&q>Zft?Q|=?YOjReZVa8DIFf{!-SNS~TpOUQgNwQAwD*cC&k9jPlaACg{
z{g6$QmxM)l7q|Rv66B@HPk5sjKCKZN@L5JX3oJzRQzY#in=5vgDEAWJxY<?OlV%(7
zBa^w3_(yC`cQ<<rE0#7qG)$w(Qlg_u=NrCR;Dfy+^73ORd1)r@^0`^`?mdbf!+Q;k
z#C|4FR;GzN@GX#)&gYfA$gZtB7)5zqld0=U?}T#hJ_=unWWGw0yK?qdeIqFM4}pxu
zel(dy-UCM@-Iaxx8L3!btnoD5rmQ7pq9^Moz?Gn~tt}MEah;0GVe(P#9~m<m?HH72
zwTE$TJ@-FCAI?i-e7xm3UD_g6$+7$oZL-Eo`jh9`TeR{~?<K}~?jlo>;^du`jyD!h
zbBwsjlAX3kXtHG#=TRZ@E=uy(QL*#vue5TZ-b;-Bp!bPN$WG|)WWEKGi<Z$P7Z;@C
zWs)a70(yBD$g{yK)w$%G(@nAaYj84$aQGfr_0<~@WH*8r7Z&V$l&Na!mjrMAF~QeO
zMRf{`PSWJ3g`Ay<uS=1Cxsh46LKo_hf&S7jv5c0Qb|vkua#3AF;A1h~3QFZDxWOWS
z?faKYk}z+oWl7l>czSe_E#SsV^U_F2u=20K(MP=!<2-7-6ELrKQsU%ye{G7EjpfNw
zqNZUf`t>zS!E!v6=c=Q5A?I6xlzWH3#$r~gB?o5N2hc5}_VDz7$v^rr&8XfYcjUuT
z^l2pd5q&XisPa$QUxLLR;#>@6wME2^&cr&1Q*|ESZVytqi(09e3_7blZ-h1?v(rqi
zv$jjwSF~j=;qj9r$y=tWNm&~7FSmkv<6{#W!J`co3P~)y7^lQaud5}J%BN`}mvH#W
zoHC?!Vk|_no4?z@KC2bh)*`JGvR?)&SExx}e7xkctJ*S;aV4<HJ0pK2zb(Y}v4-7Q
zFSmnss$y2c-Crp3R+m(n7BpF`mf2P9BvE}s;AD<TCn;axp+{QO$`qA2eY!7AL}Ggg
z_M^!5i)7x%0(m&+KGJ0_;qjERqlT+mp0w<b$x%~}2;=SH*1Cml>6G_^5nbt|Rp!|u
zFDD!El}^H3l)H!8F_YrP9;{spUwaq|Dl4af@o%=Cji5_<MJ?ck!HRte6W&=|LS%Q!
zqW&^vE^Iyqbir$p>blQU@?darjhK{U(usY$DI#BuqK{yVW9mrd=7fcnUdBt3pCq2g
zBcbZkDN=H(qZ1EJ(t=NtD4*M-E3L^cmQn3O<i$$!e#ntSl7ietU&hH4-sHF_r7DR?
zoSfC6!Y~+BPx&Rw{{Z0j;6&HkrP0#Ix+rk6tnS<PMG~5(u1>R=6jfiw%7i&3!3|f~
z!&7$EA_$b7tRkn|rFSG;q1s8=HdNm`5&1DSwY{=Ps=tksDe_wOFv^^JRoNX{Rmh@J
z+*3(zVk^5R_UL{@e`62BE4x9mV#yeZ`f#w>6>`OY#>uNP9G2-YF>{^#9J7_VEUR`e
z*W1FY)E9Av<)aquO(I;2D45CKRu<$HgknDvBtw!GYRsb)oxBm<o(UD?;~k7d^r)}5
zfmfSke<6q3Y{xzk<U%+_SE^Sj$0W-C0Ev<z$q3piQYcN$+>5ry^<>djYB0MgQgV~^
z7u&$b(Q+gC3SBj<W1sR{C6#BUn`bOrA~{!o#?c}j#HDzn39=yG>|7HmT1wvsmfzdM
zM@1w>^&j~LDq5>CWVtbtv@Ahgky+O5C&>;=bfX96=Nk!?7lRKMPNd(-h^eP%_V|e&
z;-=N^RC=m5Eb-4|ieh9|<%--DIV0RbT_Xv)Vq4s~Q)P2o(xviNZcM|)UCsC=VmCy&
z_9eL0wlI*@Q4`>Sh}x(93!8}JEO&B5c0_p))r#B|IU~s(po}KbB0Y;QCMm63F<w&C
z$j40^6X2`7QB!6iUQWpBMxQK<U9MnPGAF?vOlLR!5EfocX7jdC%0+$(oS14(r0k0r
ze$qc8<(_vjieKtkORl1&{{XT>;Gb$M$Ez!fPspT~f}^@H(e&t|Y>$&4+j1fkB4R%-
zOJU5CF%OcbB^l}9k76k+Q&%D)SBklej-IA{G8ay?yq-xC-X<nHoE9ua$Vtvg97LZC
zMnAC&BrZz-0F{v>%{MJ_yWN#JAX4l|Uj%s<1xx5=H&t3mN6AW>rA<M2K1adeuN#u~
zVPkn;;FkouURd4X5f&mu#C}F!E9Hq+xlo5CCA3Z~N0KAmSa9%K+aEbtw(900j+b@p
z!AAEd!9AmvFR~(8BqI>-QIQ!=oa;+W1-Xk?Y?)17Rw>K+uLI<VCk&6tRS$J?JUpaS
z?YTPt03)duHif6jfs`Y3nJx;Qr-K*U6j-o`m1We~dlYE8(y>a6FtM^BP{b`sk|^}O
zY@oD3CCeqr!MBp>y@|zc$Ud79vWq+t;Gbzeb8JngbYZc6uSEGPc8`M<%u%ax77;${
z9G_05rFxPo&g^?`H+V8+H<B>xRIl)kQ6*<%dy!=OS~t^nINB`mcsw#|apd>OO3vt|
zSN%mV*w)4^+EEuGB0{C+$KP`gN!F;MhiEMe!F|Rq##Q8^`tD&YqhjmH9-YPV#7p3v
z^;vlzV7MP?2YE^)*Ha!>850Xr)0re<-|{8;6A+0{LzZ7lR-2O?ypiCltV4+vc_8e}
zGfo+JBK(MJamBZEFT_KE_LLiSnOXH)+PNZ9exwwPU*=*FeTvS_p_mn>mHtUT2us0;
z;HBFluOvH+DqfyVSc}++;Ns^LihZQsm>7S^RqrqCT#+zOXA=_dT0#*gWK@=^T#-d+
zrjZ@wLz2H~ONkYGi1!$E{81yg-Q;k{oJVt2mgL^0Vmla_>84?SCCZ)5zp)W8F4b&S
z-mJgmLNN}|W3|M!R7k%`Pm7R6e$tncrL}HDN|ljIu?e0}Ie4)vj#u7UV_w!UtEs=y
zKIiROVcwe2B>LO^<r5V8MciT5s+T+v*%2#t6#SKYihbyy^+6A9hmt&8oLk(Iw!&N7
z#Y*xp{j!kx$%`e(h`lmQTep%iJF5}B_b_|ikzNlr5m&H}C0^!1+C~%cp=P%m@=8rd
zVUJ{1caa{PJ^pfj#mNzrVQAcNELE1}YjR*Ac#-6fBHXLm$i2$l!sNs48y*iCQ+@`%
zNjW^Ly)hfRF@MJh!13)wa;Gar7E&E8j6-zfRF%vfEflULWFjWFB*?y^zjG<?Q4~9U
zm$7vDCw=dhE_+zQ!(B_h5sInmRDSOya;Mrsl%4iIr7PlEDM8k%qKzssQAaN*mOrsy
zxS{z`MRe!n@>y~|N5NY4Wu_#pg{Pr%#>?jR&KXkjg|QiF-1>&~T#u#Uw0|UtKO|OB
z8r*!9`<Y7qqGT1KEUmF^&+0ONvQ?o?TA4ZOclRu!BA<D|DQQILdev?zY1s^0L_|vx
zpHW}Em}u*&kg~SNf$~L?F*S7TN_S&<TwcT>5br}OvJ$aQTA5+HA|>p2CSp5`ByU+E
zW#G8>LfMrb#d&ID`wMXq7jk5ijJGR0I}(|OTMZ5FK^5JR|HJ@O5C8!K0s{pE1_lHQ
z2L=ZK009635d$Fv5-|`HB0*6gGGTEA6eBQFfkI-Dp|QaRH9*l7GedHMgOsAe;WSf{
zq_X1h79=)wqwz#kV`PMr(*N232mt{A20sGqM%Y^Z%|7URqS?O3WE_!ewb*^Zruw{D
zVh3#YW8TkWUwt0qI6$nWZ<;txt6to>Zm7G6qURrQW_Ci=X|~?-Hb%?svcB2bW3oGO
zw{%UpY;c6?PShUMlxWZ>mrV0k_jez%Ph(%$a}G!uP`zrKbk%CK+b%~Bhd1h>#a%@E
zgYIv)O}gxk#kNQ6*&U;{7NWy2blaGtLk@4|xQQ~Z!?N1#j>+vuxEn!QF6!jb)UNYa
z@l)izqYHe;?Aef=M#A<ECfqBJ1thJ_0;HXN#k&V?zVl6*O3RT;l!MJnc_;$GdqxLO
zRV*1D;Ip&ZGsE^!c@zjmrEdV?a@^22=8KK?-AU|fvmU|O3#yNB8qp-DZz|<>fI|3>
zis|GOXckyOzEZSObx_~^km{~K>&<w5+c%WGolm%}-F89zeckq7x4C|!Wm=USGw9DX
znc9_YxwP1NY$arB9fR(4+V_xmj%-pRk?Nt&lhtaivU;s<hfsv8Tdn^9H98?W{{Yt|
z8h$IsX70NKzZJctbJF}h)~Y*qLZc3g)^QaBL`s{A3_Jt#+1F&10cf{u*kGXIhhX+R
zw#vTH)1oe>+^iO8R@u-g&}^HMfL8g+)p-4@#ar%PUt-mJHc#A(YUP8;KCT|IVRgXd
zpM}H-a}0I@9gTL|vT8iQ3m|{9ZiuVw)k4v3T81wY*#kg!TTO$odlTEXrAqNqe#NS?
zg{l4ONBwyy1bq1^ALdg=o&kPAWtk(8LO^I-Tt~-ej?Z7ik8Mv;)GViH>=Lrh$?bZj
z5`1HCl7OR{uvyTpRB-e`jT$U~sn|OQZk+Z<V#;~!vhh<Zi9q{Sp?>~oPm+K%Q*kHe
zR1=mtAQ(4v!249hlg)HmuN{{wEU#a~x7pP<1Ci~NW2$ZYM1#gts#R`x9~CjO0?eeS
zLfj}e6#W$?Y1-8gE$%!5F{X+}%~gVzLeQxkzT_|BrD+`0Qw8-LxA>=Lf5XTq(0&U0
zI-Y9QD&(u|-Pd4qPa=*x(bi)}69R+Sx+%nUL8REJ>=(G`i3H>tb~8CoO@M%dK)L{E
z6}tjG<VqD>IP1E8N=mYob!}C(>avTKB>A7*jkR6tDelwiy_;bDBh5!OCKmvZdag4h
zY>BwjZi?$KE{dipjmtu-rMB#pRE3qrMK+gtEYGCok0mi>>U3SNdt|LPW!aE(LUmBp
zVYgMUWZ53wEHi$SuA4%b;boPT)vSj-kvgcRC-7I2dMuigqd;5CPQ049<A%_ql8RS%
z;HImm@C(65*miYY6peZ8@Y!Du1iLJ=&17(bHS82slW{nJ>R4{P+=d?sL`!-otN|Sn
zrIHMAk;zu2i0`rGr9y?4+a#5gAyv(}Sx0L`I|XX5+0|CWyZJVLD=#TQXL(t#t(3U=
zC>H!mmy+6j7XSgHn%&WD%FV0#t_7I70sGWAWNGB0m{K)5DA7Z;Qf#ajit$plR#{wz
zHiLbYb9m%X>9qc9B{r8i#?TXeA?cCgDWA=5Fe4=)V(;9cp=+|aZMT3_+bT_#t9s_y
zWNOTP;qBF|f0TdR5&FEkD=gJdkXd!`=%KfceH5&{<+-3NHFCAV?-UtM?LbrGe_laL
z?s}*uIxM<uEvJ7JZ!YmrR%ueRJj5&TG<~H`-OY9f0<yHtH+M?D&E6f5-flV|{G04Y
zHy%oBTAxLgf)s752e`(x>?HvzWeROgU>ua6cvsJ5_D<R?7A|gYwK-W_$BMHqRJ~eS
z)seOXQlYvcW*N`H7SgpYWc?)!96y5Ly0zA9-cC?>SfPT-T2@nI)g9%dR9XRaApAk{
z3c*5+o=AW=o5U7cZIXpdVF&81-4<jW0?kuiiU!+k##fsf#C?|83RD8O?rqc#)NLKB
zm3@$<HN8rwKxo#5Bbw?|lo`zQQcjC^PrJ;Z8Y~ufS%Gl~ra5&}^hTj&XK?7R_9>=#
zhb5Y@iM(=Ar;5sTQaLUm^60z|MQu7EWd&>V*>!8I?%{8WB852m(6bZsP~OJwc2~te
zN_5MU>JxW5O6$>dS&9(R2s8yftvhFBmG;V`{P2%}K!ER`;<C4rf%82UZe4n)VTSWi
z!ER{Od4M$YLC@h}(W?3=8UzhKE~|FlE{CGtOt`2}?&{J<RmH>>v*5ZYE5l@cp<B#;
z<PAGF;$F;e9q|<}Gq!e@bL6w~)=JAyxZZtJFf4Z-DE|O-DGE_>P=Rp2><6=Dg{Ri2
zMm|d<*qQTB5#lHDSGs^<qf&)7XtPw)kU`Hna^yS|i$cu)V@0in;cn@Rb?Bp3omP1z
ztH)Vds~MeIh<5{d1p><YsR}g;+7*(Jk`{bN6TTEC%;+>yR|=EZy0P|bTe@c@5vl5h
zIyyI+su*}M<8t4Ys#jj9H4A8iTBqL~kb4lYpU>o?CV6#7pXPdbB4;-v`+~~NdZA6C
z(6c!3>d>=4{{UcsZi^^_?u$0A@j<lsdY+13piVeyFwcp|`!@K(%4AiL+^Fu$n17tN
z7|iAEapKFW45wYWx+A=#V01z%YGdY}?Ze-YjL^Mj3jFL+?iyO=LY^2$S1u(DSM3^2
zJWkiZEzK`zei8YmG0ukc8Y<YjglB6wd|k;Jj=6D?v{{rvLUkkd0<E%p7+S8-RBWi$
zf4qr4R{R2j=D_Lne3VmWR)xJhwNmSJ9RkYV+5FagMP{&gqRbl$zOv$|K<E=g7;aW-
zkvz#>r-iHz8x7OxyROd(P_wvLEUi*~)FIe+Y&vvQF(XQh1jk;ATd1|a1YGf^j6w0H
zhvEo)Jc5OhpR;GI_OZQ6sLw}>i)V6k@egy}b&`fkMNLJWz%{HATyzNGa5bFVB|q<;
zs&<IF1}=YLJ*(_~7{7{xtU2XGIK{R2R{oWU)5IZ%rgJgeL%~`K$x7t?%kTY{)Nmtz
zi06J5s+BRdy`>jUPC-G4ctGyNsB2d8p$%xcU%}pvBw<*1o&K<^u0ckthRM0_7brA)
ztrfNo!Be)~hGIq7cwRgZsKYFHmicu~-n)gjl<Z$Bgodom)S~0hRi|LN#sKYQl|Hlm
z#RC)l&+}Q2q%wa~Kbrk@{_p<)GW~h}?f(EW{dxZF{{S*=8r2(Col!LE3r|~iIJh1S
zpUdWjZQMUY(aSX<W5m=58inf^`r(a@IhDGkO&l=MgP)$rRvQCl8jyhL`w90Fn%bAh
z^3YXEZ!iv}Wf7IO3QWLR8LG`y$IGW@BQ&ujw)5HHaUM(kCa?QtOL29F^vdD%wlH~D
zckf@Wao^e>``7ENclL+=^@hS?@wKrOF%<<eDjeh&SQ$Sry?QtL@9=grGn(}}??3%F
zbns8YR3_X^pS>4SH{^+ZQtq;L%C^Yrg9k;JjeXXtJ4GJVf-0>E;$!7A3eNNH?<J9C
zU`5d}b>6pM*gYTr0Gg%$02p44{=57gf=0?U+;5l%s*dHVd)@JD(N4%5*7SrL5Cdkb
zUf#QJ*eczbuQ^5S%@>9Sj!hi40RH7}7b!Xx2vJVOWN5kP)kgKe5pWjji}!|k{=w+~
z0Q}W2_=5Cr_21y^IzpJyZqAOfZAbqAt1Wk!RT>}(u`2yL0j)xHg`eMC_6j|j)o8RR
z)SRKWmntqd$lQdY4!qGYNL3ock*yN=Sci&o<~Bj*J6?}IlKnJh4-@GeB`jMgzXI|A
zc()iatgJ_%MiQnc6^1u%9fk5O2HdU?rRfX~BMkF!+SJPr-m5ydq4BI3f8&d=?i;&L
z6|H3E`ac1P#FkycjUxE_7ow<Py<ycc-i@J(sLGm(ubF5O;F|n@qj6Yfzq)Htb0_Uo
zpN8~SGOKF`8fBCm;QY2R))h=Z^lm89qDZLy%d)VJr<Tkc{v1U!{ot^meGLBq;X`YJ
zcvkAG(9}+U_JV$GS^$lcDhioF<o9la0@G#t7TZ0D?4NMVJ2iQQTMS&>O-TbULcfEC
z1H3Hng$H69S_qcP-P1nSqN_K3XlKuw#o}Bgxisy5jA1`WeGJN#U+7#Rr;Vvu*(q4%
z4yG(b^YLDv^tYy;=@)uK3dX;2=-Q^6@lAQQQZJBYA9BLtF`kFU`aYw5DUpoEVjo(m
zZt2&T+<AB-=qi}%r_t_Vrf}2j$n|X6d9OwP0NDYG&iqw>UpP75XPybmc*?yRF_HW(
zKSlt<??@-tIKKY!{UHAUwqCjFeyDd|yFckO!IL-SzeXGA%zbP&@z}RNcIeHPt*7lM
z`W5)vrvO-3aP#co;qz(dvYUq|0;rSqzb?Vf<l#%6!$_X}2_GV#BfX(fq$!<%1-y-W
zQ`yyBY1=66vbvNS)Y_EY#yP50DB;9bXyfr!g?2_w3DByPF*t%m7c%mj3?Z!_YGoN2
z8gh@!s=kY3veR`ZQ~sA~pRHnQ*4)z+sqY?Lk%LB#gTYa#&r4$(>^nDfoGZF%x1)Ll
z>Nu}RVrtWMP4k-iu5dLXGkL|Oe+qvI;=ObK0GQspd8Bu>(#h?>`a=<i6+7aUM>98^
zDpfHU+SMIFp_Gf>Nn=KUbP24-`cD&y4DO1xT=@G{EHzwJN711>s%~*DWa|Tzui+oz
zJWs9v0I%LVd8Bu>(%T#6sDDfHM{Hs-{PdJ~%gk!L)dGH#92W9Bx~AH73ddjpHLE|k
z(`@^KzI~rqBiYVhGL9OJ@(v^7vpm2ZRXxUMeO{<Ko5;3++E(`;Q`%J)VZ5(XY0Fxu
zynP=8#{U4SX?ZH6$Jf7OD?)QV6B~3|Ef;lnS8;bXKBvOcQ$DPt=e8??(zTF_mJj-B
zqX_!T$;V`&(FPzr<aP$qk6^XV;^?bx>F*y#pmy&|73(yqI-N&d)ZMBWY)qF(C4qYy
zc^Q`|2vw}y)uvk2>y@3JuhE`JKJLtW$HikzjtXV~M2(tku3S5re>I2^{HwneqQLqq
z(jTJ!v0k-i(tan`YB#+7a*lxg%N^)HLVk+34_^&Y&GpCQpO%|g`?!N^$KHlF)-ag7
zK5T}j7OrsDbSn)?ooqX);%J;hYvwz!(X5!XCgR3B(e$w{#uo4D*DY@B4(FWlX`-n^
zJ1TD6Tb+hl=;k~Yi$$Wun9_^?01b}wS}hPTB}6NQ=<ynl##1NxMv?a>4>c+YMzkr9
zcVMopqqI@mho^;~YfmvFtg4lLAm*D)9aiO(TGqHh<h@JK*>eHyE1$_#r;4pLs~pXH
zjFM2M`c<pqCe4Tl(xfs)ogG#h^PImo+?!3>({#edzH>Fr0i)~-JBC2_dp7eUs?lhQ
z$4}ASe~Dsk{ImZ66{5gasNmMK6G_f-umdn&oTF6XOELAFX}-HRluSIE_!(H7D5?5B
z!Cxl^0-4PCaQhJb9jWOC=EbnpHAO*^ldTM%mb$6vcjK`5derfo{{U+EYlcj^r_;>W
z<Z&6Bq45hvqS0dvX@{x*00~FrwA8IOa56m8MyUC$7>zQ8M-d&+3d`Uj0Gm!vLkX>g
zn{CjKpKFL&XteBKuzLZRhK?JD5y=<OrW?`3NhsnmIoQ#8cQi8Sy_T#1&x)r{5l?wv
zEN25$DGF_DUfQkOK_!ym;EilNHxL_@;ICGx6>>K4KIGS97~x&ak^{TF7K=n|y(9fs
z_?2@S)OX^vS}hifMWWGYv|22A{{Zc3{{X^OYj<+~Y9VN}UnoI*))db+{wjF}6u~ns
z3p8k&gHRp2tgRPWpRZ${*`C4K4PM@~9*taK<*m9Duy__6qrUL6R3ZkpvIMrxGUBRK
zr%yKY!$K9DbJuV%QL6V#ikA0zgdJPiImd;<@{KB3W>v5LxY-PCV>O?5mvve#7K<5f
z_D}IDhBa|oEf$MKqS0x$Md>3S{3UPr3dWf3?!UyMI{;aExllg|Vto<LnJKb#Y$;Bv
zN4bErxL@8KnnuUvmQ!nP^|+-|dj9~V@t_IW9hRLlE^%J3g=A6(i2O)8i7nlnuj)dn
zh9ilOo2<@WXqJB#&i9&aKw+-ARA|@kRCUVHXtYm16e3r6eXEMKG5sZcWe@)VU|e32
z!jC%5Tz;GMdwlUM{p<Dr0Hc4kGxx98{*C_B&)&aZ`ZxPiKYIOt=-=&3{p<Dr0Hc4k
zGxw}k9}VdoJ}SN{lc=$jY6tg(NsSk!3~9Lk03T<`FhZV=R2`@+AylT}qsaT!ywguq
zYDo7P&Yithy^@Ng>P{RsaOkp9XsZ5ic7Znz+PP`Qng!}iy+_&4_(xRCJt3BfJRlpm
z^(cvr%M6~cDVR1liS)^F@l-LT!fG=s<Kk*LdIUc-^SrpeQi&A!evd@wQhs6qxa-*H
zt#xcH{{TrJ`-lFg`7hP~0Q#TguY~^p0Qo5_h22^!82K;stiRR&0Ghw1GXDVh{{ZtZ
z{6YTd{{T967YgT8eM6e1M{48Plt4k=DEPv|K6(X~HM*wQw5GGc0ox4-dx<*+(z3CW
zbRAlxAA}-ug}i?w!k>t*-X<I5ocIOjQ`5jKJ_I`Cdqd(@=%3`;LdM~~mL8_{zFHNT
z{4M9=n+<^OWoO^fV`<aoV7R8x!s0SLmm$rYr%=P<D`#`KuLJDuU8*1jYRn%c!Mb<N
zcPW3iuF7BSuV`sE=C0;0UAGdm9%|@$!lq}sQ)@TjQ}Dq}9gFrep4|4zEu`$KTuDy1
zBp!9inKzG)ZmMJJv)Ho09%qtvE%rOWKYGJrNWM|{AoehBvD8TWLNsekHN+I$RXTG<
zowIz<rC&~?lFS@ijHq}|)kW`@xc<wyX|qxA3(SC@_NtWr0#!P(dAfC9iBE12?ef_j
zleD^_%D^>MGdgwEG)`JmE_Xnigu<M(8E-WeJEf$2)i`!-%WSE>wT%kKpO7ilSU753
zKM9DW9R_l*Sgm$6&R(e2u1%fO?o(6%-`RJB$u}tZDj@wMDd(PwwGWAIzn}P~GoXY|
zYy>^btvdxmzj3#5PRnX^#e&T90pEIN6As|oO=l1bGcB6%og9u~;<&Vas-_<j=&14k
z0I~stEk-${{%WQh3h+fZpB22Vn9_Chx@88k7}Ftor6}K3ZO_8g{{Tv3ZHJ-D;yi#=
zF_nyypyUTdVu($iMP@vl6}uMMeWuE!wgR1v!nF5pRFIESse$xYMXw~ryG`rc_aMa^
zHSB~ta<8)EuvY4uWzH$M8b|r(sZ}ae)Qc}%s3o+A2=Py=OS(k)ApLBePyxW}Cpwt(
zLYX;n6#Jy}OlC3sZf7)cOs*MCrd-q4!8>{dN^=6*qV`f5oOnl4pu02Pb3CEw>>G!x
zVN8EL{M?+vCo^GRm6f$9gVt3>wCDl*kW;ez3pa0p^pux<%0<^tk}&dvc;DoOHn73!
zonE2PBT@G*y2^{bK>&V=sL{wOMDs9<j)B1a+dRKv^6bb3I8)-p)YSYTXMTd@r-EU;
zm0FrMhy(3gx>{&FP^5m7a(v-esOIr+V10?rtHN4!La1v?bq)(QD8*Khc-6fVm>F<2
zRUaO}H;AudJ(%u(_52@#s17R-A?_bUE~-{;O2|8^bRI+S{{SVX+w8Di!7GV~Z+_*(
z4v04n-BiYl)Yy1g-t$tfn@Eil{;4VU?}8OG(a4d_fgf@6_ZGgwQS3N1#^}w;rAD86
zu*~9_hZ^cr1p7*rB&G*lt+`LnX1&3~!83Q?Ijayn?3s>hZCptnE7i?1`c8aD=D8g6
zH3ub2Fo%~Ka;r4H1$Si;&2JkU_u7kp#GhWR-18*H!)1LGPL~KCh+kH#a#KC76&Y2j
ztkn#Z){gx{m8tLQR+g2fw@n`ss(GBry#Z9u*Zk8wSdX|q=Us=eRh2B-uw_vD0jgyw
zkj9_IRewgI!*H<Nj^VO?Y4m`GKZ(5qxu-Umg6&GJOH7T0g{1dqYwGG%O7*EaC~>>T
z$sAo{mV2Y4R;q8R)oQMa*5MG|Ya_J^#SX*}t`>@VC%AVgf*PesP(FydrQoW3{{R)6
zq7V3-92GAm=k6TuaditTY^3%I`Jqzzk8uXHYVjyKnU;|sVyV%=HPNH4gQ?H)0z7WB
zX5nzSJkZQTK-a}pc&EWpxk{$|lS+}9=GD4wM|!<hG*uDPMVbeY<fJJ1qh<K0R#2E$
z@ug*QyOoGq_Fh}5Pdwda2#*O)`u0sTqN(Hm0Q9Chsy5_56-s^(qJ6=CQ`(of1V;E1
z&~21JLupU+x20-saFqW5z2vLE)kP|JdPK3L{hu@&J>*(+9n>key{SYjH7es;SqiB^
ztRf78d8e?>^{uj{im&u|Lpq^T4&2R+6TKf+hUT-`q3XS>4p(U9q9^LzJjRVZiH$Zv
z9tugfg5s#<YJ@1xhY?eltGI;}>Zdl1Mv_<k;|%EPG}!MDpma=6@IRU$bSjfiQTVUs
zv7u*LtyZgDld+y%iAOG|hgBeS1!gj=5tga@I}j18jQTgoH7g_k0E(ylT_N3ScKDxK
z#Sz2su`&G<A4#&;b4+yGI(Qmthodn)AWAeb<hCtNpbIT5Qy-XK*M+;ok7GnY`pYYe
zjTc!58kt>4jobb&G<_fG0@u-b-Iq^r^+yczo10Hion=$`r5`kBmdy}Fs2<qamF*55
zD87K)RPfOkK8|{S6H(z)8WmDLB<c872F1(5b4OTF8m}$4TI>VGHacyYSxJ{H=S6U`
zghVShWFfLbDxF+TwW^Qy1<J%?A_0F0=r%aes@j>@KyNVR6)Eo6HE}9M>9_mE9n&G@
z(qoWKz|^GNK<<}wPb}iBTb9Pm2vVtfZqWQCGbqf%!r|3BAkEE%KE4sd*6DFnWOu?l
zmPLFtxucwOADYS)Tm2MKLKl)hQ0=;b+1F;fGCiTd@+sKQMK>$8G}CC*8BS;+KK&6g
zs8jmm@I%gf8f>y9F{~WZ2Zq;DZQscpMP-x?nPG65S}fWl2yLgl?&cD!Q{V3WUW~V;
zhGMwF?;t4{E!GNcF<s`@5}In<as?U~dQ7HYYYVD}5PhT&-!%F-j~n${r=Mp1JPH&l
z6!RH~xJ>kp7>&(y#KX|)o9Rp$_7$oF9EZdeTsZDdW`8v3st|sPyO;SU-8%e}r{=QH
zI88F4pEI^S$@HEEnBLR7+CGw)xe24@r;`Cca-R=y(;6T)ILbG$C^KYAoU0?ws2-@<
zIld{oTT#M8OsbWsn4;#?dmK&NIwDg|PUCLm<J6{f%oZM*9592x(ZR@LXkMF)p9E;U
z@e8raew$6grpm)B>GfW-QsPL2%K3Qd=|2=KPoVKM%d?&BFRJzEj9y#R!Ums`tKvU2
z(;AYElSIt6$<cc*ElO$e2m)+YQDq+P3K+~gJ&kiV<W)W(u{FTaw)v^;!$LJ2)HZI|
zZdUQiV^t?>Na~wC5l1$;(|op-yTrJfjE2(spxmKMYO;N(!ly~SQ*vm=fSN(?s*nM1
zHmXhy$RjGud&Ake2fS$^ORgs{P7RPty`PC#J{W%R2cD}3gQkCoYqLiwRh^rXpa!vZ
zsMR;h5D}^kq%YMmW!Zk;hi0}KIO8)s)<*~+kgC-@-`flv&@Kww3yN_%1(#$g*b^lm
zB;obbc9g~kaF&8ciX>gj<g&NvAlkfGs>C!jeGr`dur55%sDG19a^7@I$|p0L{{S?{
z;lcXjqIDNuT&dH**Bi_`gPl*Wf~n~`*j6!=j%GVc!o^|e)2UY#iKrpOX$kG-=R%Tu
zsj+J)g~iEesW+mZeG0o<+^g*y^Hlz`iB0VSd3FmLRH$A%ZOsJV1XG}MO;%c{>YCOb
zARQ%18lp;uuy0A#3WeNvx^7-V=u!BLltDbxMc}&XjvQUvxe6ZRl&8Ih)Vtnk*vu<4
zJ1%MXhcqhG!!_({gB;1%#T-NME}>GahUB~ss+J!QkIH_yPk!)XIHQ9_<zx>fJ0^5e
z5bWRF;k_c!-FEHYHHUB21xL@cPfetwJ0(Jrqo&D(07uCZnY0d4n$fHCAQUmOR$Srl
z@Z_@veO6@GHMXePRHyj1_lkA>qd2LV<<S>mgLlC3(>D36)f^jA=GqV}jt;dUeIa5g
zP-%TC*&R>qMusJ@`cY*)0`_n7@!JrKl}f!z4cUKtiJ%7%Zg%;FPI)wb2tnC9bW_Oy
z4!aJ>UMgIzvo_6d(R{Wy%WRHHl~#djN9|CuOf_5}4?ZHK*%mtCWN1Q<(yUfmq=GbC
z#yGUQiKKlY%z2?h6UR%sWLyQ44OfJo0u@H$sI~5(-0goU=Bi@r5t-z?qax&}K}=g_
zKyKtMpp&*cdiIM>geTy}pp@P%Rm#;_SgVbI@*6S*3prVNDBF~5cFJz*o3+k%fGTzR
zllCfykSj4ltNuf(sQE;~aU~qVW~kiUBs6%%lW^5XwFiKnVH#D$eI`HQcQA`932iZF
z^$EDfQs00sC%RLx<nX^$kQEyZkO;h+EW^0NW#{iBvr7>cs@VCV;1#xNp~VI^<;#0j
zw^E5t;#0^hj0-cAn=7KVSGiW^b~XMeZE*OkspZwVoSh#@Ov}11Gom1y6xx3paP!%^
zf6~yVy7`8_YbGk~JTiYYU56qEDbZDHTO2p3o1cm&>{d{(pi{HeaMz7vn$Gcm*thX|
z4$boHTVtwK)M$fgjZl<W*Ku@2lVuTgnad+pmSUhNK%v)X%?+<`)wfd3A`l1;y9jVu
zfxmPQ6j*sV5tajmleeY|bUhR4>h|#u9;g6)tjkYOor}TLIhf#F+51zP%4VHfCSQK?
zjh58`_OEGB2;{dIN^Wa;o~t|V<pXT&w_{IpQiHU6NKT}Ffg@xcl8x-MIHOLR?z6bb
zQ`Kg0l>qFLvb7tm<Vp>cB<#0ht2`AtXPHtPV1XQwBh5k6C?Qe<l|bbAJ?#;wCx?kS
zn$vj7@5ye6_O#v4pMu}vr*`w1qo-D_<J&Z<H`)%NZ}9I9p;5sS8n3GMY*1=t3TkG0
zu!RG$9`4<m&>&Rf8zn-~X5_bYt>UiPl`8AScq!OcVxTF6#-_OQ2-IgpHkQz2=Cq*K
zHtO>-x+|etc&TtZ$zp2EWN+T7UsuAff~nl7+{4ZmT8^~A@>KB@j~?%rI9{(+*<Ae}
z(c?t?HX)Z`y4+G|G@Yb!ltDW6Tq0Ds`_iLqZH~qt7CPiRWoffbvY|rJWd%Z&4La;+
znqeOZRVU#nZKs#%NJgjDMx(l{vo{}lxYze5QfT3;5*78tS3h7)>K_Xb1BfFbY56bV
z3N6a^aMTKoZezRV3UGUJ=GGSV*PaJ>g-a6ljHlT+LUnv0;QCEGmbdL&W}Ulxs;%bU
zAfT96P&UtEP>`&lU2v@~zyHJlDG&hw0s;a70|WyB0RaI3000315g{=_QDJd`kr1J=
z!O`LHAn`Cj|Jncu0RaF3KOsf>LC=waM~P*}4bfDZNZ3lXYN2Vva5=!#QA+AF?NKUL
zss&#a7*S4NF@>**rpRm3E7U?%p%JT^xaQ`t_Xm1{EWxxK9<v>+lx#VGBkL90>mS0V
zw-?x%mF8qUp-~itQ1J)sBE0#YCbE_^i%4~Bx{OFuV`v5_IU!-{IicLhSC|Ua)_hdR
zY7L5ivl0IQe8r<${vw8T8_aH~YAyW6pkJ&|gIk7&GQz1~Z{e3gbirX+f(H=-dwNRV
z<FiuTHy4VcWtEL_4cPG&86lG_z6PfrA!<g=%^01fyr?BBYb$i=oM$+R)WHGf7VOqz
zg2ZyKq_Lp$94k_-AEXswV>KFP;D~74RI8U}7&*20fR|)c8zb6lq@c_L5418^`pX(|
zF79V=8v+_)4(2!LS46*(yN^&JR-law)WIG@sn;HuhLBz;q7AE(UslHu3oAac7^?Y>
zydc(1RJQ6A47xHtWs7iLP*oebsi(g&18sPKDf2hV^A)!3!7MpUlh8_*o0fX#?K4_q
zh@z=6c=eQb2XS7#;kx9ixLUd1ix8v+V9hm#J*APVq{qy1R9lyl0VHDE5OGRIH!gSY
z5su}Z1x^Nz=LN#^pdj0n!jKa3-JnyYYT;bmrlp-qOw1C@GRU>js4J(KDkDoRs^&49
zEQj7>`;LE^gi{9+qCmLuUB{h!%Qd)>%p2(t=b3H6z43~L46a?`U>_0Z#NU!TaW3&t
zF^$YN>vDp^X0e6CnS{;DF#)S(wdpb{V(K<(GLVz!C{xRsMMURE7g4Jee9Z^SG{KXP
zGb2!CWgZZQ7s?&P<>8g;?dm=i0ocxIQ<-f$g}g@%VzYOwuTDPl+#}m8!=3n^KM~OQ
zgsaY?v+#l<vVbFNFu7u?Twup=){|1h2$qV$Z5MYaP3EC7w=la=;?o&QOCn>z8@ow|
z*QtC?hNezPLa`pGsH=&JTEZ9e6dlCH#m%seWkN7;UM%+yph91grhihY+{92Hv=>^f
z%_8KGnQ`+fZamdo31da^4I7x=ZsD;g2<LYZZ;43IK{T~9p8aF02d*I8)_hE1<_<0t
za>3>rD1pia*xcjyEMOHyL>k<%mOhayeDym)T%**;ZF=<t<@cS;;sY-M{^BGO%hKdL
z!104Ewd)+KEkd?vm<&rAA;R-0vXd3Lqwh4!HvG!4aV`W-Dp8C};=4YI7c8-hpU*hi
ziCn=@9GQScz{m7GzzVR>2y((V5nFV<M}IFV$T{X1QBs48IGfGp=2j-+RNc#2PRQj3
zQ74)<T-G8xKbeJg#5D+4^BTm23e@6bS7fs=1Lj!e#6r=AGglj7=pvYCB@V+k)B=HS
zOl{$btDc!~@EH6+uQx6RD+So<{{Y>}!hzY#=>)XKIFMwh>z|2!9CvcuBBovP%<_!6
zikisv6Hv9X3<nUx#_kNW3XFlTpo31Omod~=TEN3$vK<>H#r5zo`0FEyv{LPEUnrK>
z?I~)VYFfIbr3#j}3ZZ}_5;ewQc*n9|ok7bLJlxj^UgZkT&{t>`PaMRmAG|2;U`tz9
zF)3I5%kwPT5Uj=IKP?b|I0fXyXra1**r+7n*R&l)M#yM6ZiBhlqrN=<08lNOt_(Po
z?6spAJvxK#Nq?AHd1HtuboYbeB(Jwg7%Kccc@mv<%-LD<I#g}l$427bTRDk$nE>Vr
zeWJ!^6eV#fx<bTvjl{C+60-qGi_Cn-hTbErI+Rh%;Vt${TbJXv#BPAEh#gg)!OYHe
z;MBRL{?f&kgB2Cua|_9V=4^)d8dl}OBD{I`gfVjo=5g%>)sVJZEDQB63iIs~c!Qxl
z$2Lp)ZVCpKU2m#bn$h1<rnPy4d5gR58o{R#v=NsO>dTs*yt5cfi>=?cF}B;o^8OGj
z6kkt{<!s`z$lzWcsv4y^WxIZ0dSA;LfP2W64BWMtFM%=7(_xf#=`prqDWALw!!6=q
zwm9Mk#LS98+XQjAWWiXL6=_lhaVn!rJgP1Gpl=^BZC3rnms!-}q4OI`;wPjreZmpq
za-ob$?cCvW86m5gV~L3!L0<T{FBPes`stYKXl3aFh(MWQ{{Yo1%4YR0xQ<{XwZ##B
zZF|HN8aTuNJCy{<&tz*g3JSyqp~V%)`>9r6NL|W><9A`}+breAOMbn2L<^;9OZXrF
zUALQmQuG^Ujr*`LE%JXUOrK^Tmqt0!DunXVT}u;5V6vl!iJBHX`IK!Lwi=e^=7r4x
z;%Fq#WwzRt_zye^bt(HLSVALvs8}2UZNv_{3&qCJb;lBd^6@f*OVnJv2q76h(*m=6
zPHH%rQWglQy2L;%7$6<b?k99;H!LY|J!OlNO)`O~1_7PlGa}l0i#dEva|~c{6?4oP
zov*dbYQs;03b{82G3cCGaFpSD-lN_q#(5@fGtc+p2vpWpYti6_(&DTzc#hC38hWCn
ziEdrQ%-esJn41u~)$Yu)#^XPj(&%xK>|A(YhCv=m{Y;;9#OuB08JTA@0$!t7voPW=
zYH&kRR8ZVzQ&SD{m}o;+2fiXaLvqy_nPD}bNsJncmE!t|MIMaBEmbIFAauI7nU7W7
zN3BN6Km=(J84S(5re6^lcP?6IlywBk(yn4S`Ie!8_;Ui;9-h$RzY&I5XHQZijIB;n
z+Q1iW5QdL;QyEK|jDws|HF`fE%%p(o`u<o7Xx@83ar7(oEW%sA%&@b{ehB*ryt4Ys
zw$3R`8RQRh2xwa<?b8>a@+P+j;sKnRV!<C-mpa6k66<ie$L41kWXfczRksPw`^!O4
zwYWhMc!~Tcn}+rFfvSLCKzWHNH*G1F*6XF0(*$5hNn-luQ%YfLTz5ONvI!}O$l!hB
zXHFm?P;At-0}~-LR7@6iE-J{=EXA%_l<OA(DT798`iGMdrXiRW52qsdmae=eoD};+
zjz`*9%&Rw;iqB_LaxvfMxQ)3t+x$Vm)4WE3xUlW@m%8T`m+VcnQ)fr42dtRS^(^EW
z7x-Y~QrGZ>Dc_#p%DftSfT@<`{!qsOX1j-xPcZHPy%9zyA#l*RN%)tSXPJ6YG%1dr
zVkD}ltow!2wmeGuCJh7u4~8{i8oGw%FMzhpjEPnz>J)~n=1^AdYepr}){slP=cuwM
z+RP*vhpJ;Mf~sy7TX_B>OIEWG*qSqP((`0&t_y#uo1Sjw3hQuEHxbGPAY{g+4Ls^N
zSY7cA89?*U{ll2I<+$rf?TYsJgC@SSd3u-g39$JO$5SHqCF9+UVJYB0lmg!-y;*n^
zE=|6ZZEvfe>w=n1kPYq8{$d%e&A4NDi0Dj1q)Z8^ys)iz3M$jrOmW{2GU<z)MJgV%
zQDBPH*K8aG+2T0{dyg1QRHkxgQ<-VQ3Q~_iZV!CBX$)G*hGjjCI3PAXl9Njh5N_FX
zm>lX8P~x;q-P5vCo3?0%gHEMdishV$ly?=<RZAX{)@{tgnxmd#Zy0BUBm*7Fjb>cU
zONC;$9CS0C$8!iFDB~ALGNwTXq10AB;uYnS?=3G!69A33{r><GhIM^Rptv#&6hLKf
z{9&gjU^5me<A=X<bgUBByj!_g7?gqGeVs9LQKyCGCWE`KNvgt%-mt4iw**nQM-hXb
zp0bR0rKP8irbWL9D)%pPTCE(C(3f7Q%^+ee8;lC-`GMiNWsqG1b|^f5F|qhKda%Do
z+X+U;Z#f0mb~OsYSl^jSrmwPNY3n0Yy=73FZP$j2y99SHZo%EPcyRZmSaFIKx8PDF
zNQ*ngOK}Yzv=k{G3KWL|rNy4j`~BE^b|!!BnPeuDx#yZ>t@At%>)Hi^d4wCJqb6IU
z6rm-T?{eq^K9~g6Lv%*B3KGjNFms@>z&>_m!%${_E#aVi46HFKO;laxTdv0cP{zQF
z=$#!sR|1_BVH!a2G3T*0nlB*}f=$5dnKTE>%Io{53ABEWd48$6UREpL=ZXd|i`!3;
z2`@i^f<#!RQJsz&HKovK0@1CWQ}8SO{eI?YHTJx=DuhkZ`UrctVZG_~{z>9#4H?a-
zOA!^{w1HxQNqR#RNn`c}xnDUuq>t-G6e&i_*P0*8t$*6ugN@6Uo||$o?DO7or(P_2
zoXF(T6pJK+qk$^4ymyi4w(N8Q`vH`m3}9d`kgnhZK=@8P5LHhyHZEk)cX_9s51fv%
zQz+v5fxYstLg$n(5A%uBC;$|7x_wSgP%r*as#=t8oA|kPW6M-rjQbFS=8`M#+(JUU
zbkP&9hDzbgck=DFMbtUrkd58!Ln`@o=tJh!KcD|lRvV<hHqY#SDx++_NmCvfA*1+#
z<I&=My?Q{5<A&i5Tko}&n-01iuK?WWM4Qv@{MGXL-4kAFzc8M!Xy8OrNX87QRxqk2
z@?LyCVExJ;B-jZ>(^`~TQs6UJY=TH&(z2XUL0LAI4Q-AIjj~8tK5+^=Bb@~Zt`_kc
zcRJO6dKvMs*B$!3R}HY(&Ut1@`tr4F<u6<8;2%a}p<j3D-D9i7A8cbU%Qru<psVsQ
z#7UemekU?OY(~CWSYmba9i-Ce*@c?2i3R_G>GM3tqwQF|!1y!t1adb1S*)F)(~u53
zB=~D3Qt!YAH1y<{rh)ugW2{`fB;{dKU2v;B7~OefY|*1?<~u^I*t>{h)4o%0PJDjZ
zDFXkf*W*6&3S}0$({S0TQ7B`~t*>R4E6=~=Vx-V$e!iG(>$iSKJ`8fj+Jz-k_IIN>
zd~eBJ#+>TXUoQw$lCdYrGIRK&cIK_-j7}}b0_lSi1wzzcui^u#iM57!iO7yf)=)of
z15G6CMfeA|>$}3I{t^SYc+b3+Nu21Oo89w5iUFi~6o0Pi(c|M;H~jl)UUprKEK-Db
zH|n>%fiB%n=E_&-tq$+BW^t!Mg5EPD7Pri(ADYN8dZe|#(wW7osa3u0jLm}Oj8$>4
zMCAc%2!eQbRV{e8z@BUwpUd5vxKPU|e-o65(d#BM)=9r0KHe4O(h<Tes}+%`6i2-1
zeCMHwH90c_Fng3?>T$p<%R+62!Is3bgEz#scTiN96a6bpV7pFv|Mez+0U6GB*5}A|
zB-3bUUYGTogfM27KMccJYlEH@bJw((ww<-Aj@#dz=6t&bc;Q;wJHK_9lom0=N9kZE
zAIDkg9F1Z;>INm}$lUXOc-UoG=ErgRJuqjK=ho4V?Jg<5__6)<^vpL)@-6%_?oHeH
zLdqW&-bUgY{Wf|2brWzW{W;t+zTkV^wIH3LkI{Kyr`y$;cUw6y6c}Yf`9m_9x?rUg
zy{kZzPjvHuJl9jq$0sVOpiO8=3Q6!IvhH!pecbmrbg4~(&Zf?iYKTYxdnotxY0b?)
z`P+U-8o+#Y`jHs~zY0aX^8sG%#NnzO1fstJhvZz9@AyYnQC53n7Gb|56Hu07HxZG`
zc>V9d%6p7-C5u3y6CGEiT|-o)GleFV^+gP^#aChFRNU*B&7Y6}mwTbE*EFHdy=wUS
z>m$*HW>j0Zq#U@MjK%CvHk6x>HE%}1lY%n~I|_3KDjV!1;MnpXN<RGidvmeHV1<@&
z78Ck3ns8y!cWN%e<BlhR2rMNY^Jz+GC47M7yz<XI`BI&2DX;UF9AU<cbId6odNE<Y
zUPP&RU3a9^#_T7@at@%@$se85HAqt-Ql^(~B2;r)oA%w+FAYx2cl2he7{+mkHyJ&(
zvk3|{1vuWuB--B=PWTRE1T{-6o%){OE+!pjC_H)L%3znB<NX7Z1lV9R37`$i{#JX;
zDN>FvtgMjR#JUPp8l*2!UQ&>2@*aUC^3nB`x370DJAqS6V$D>%32l9;{7p5BsB+&5
z)`$jy33-Es!tH9}NIpm4hU}qFMqLZ=$JlO;F6f8}bhLr=rQFKQ;i<(?)cBVTSZq%o
zoi$ea%RxLTNbzyb9mX^N^V~>NFp00?>07i164X+c-!N8vLEX?{lAH?^UzXTCLZMiL
z=y+h-#^3IfMMmRIr*}Ix2#u_91r$G9E-BhUE&Ev0`Gsc_*BvE5QhKs2BT~|lS6#!n
z;947UuL-dWp0}gd@L|HgtcwiCdw;SFCd!S-AyR3T0aVhk@O&HN(FCM(#h?Hyo$>Jq
zkyCN=(hQ<Gz{jzx>yIOmGG^<{ccs}3L{xuJD+heiDIwvUF`${zlRR=wVV1p<bS@Xw
z=K6q@iaYlv;$M)pj!=8eZk)UOgc^{>eB*6NXRC}rh*Ywdg$rRlFq`j1to<u*r{B~~
z%$v2J8-nd;z6}?v&9XR8)Dn>CNzo@6V|^2xbHE=m>v|-PEfU%{)@G5djGbt;!S`3N
z7BqWU?w5yXaRKMG+eqafc)?D#DD0u?F+&F+yK|#uTaZkbk8l7r5p<BbA0QAk_3uBF
z-`(O$9{+-GSW$ckuq&96M<<gcA0QstHH@*-w5vNfxMmJR_vyo8ijrbv2Oz?<87)Ly
zVb|cAh`w+uv0spo)Fjt6EI(QGkb4)ew=#iar_+Z-o8|3v3g-;Fouv>jvXpckF&PKf
z(g_(8FkNYQm_SMU${;tuZ3q7kg^XxvrI)J943i?wKIhQ6&GhPjKyMUvZqtJCj=z6j
zx~AOkmwb!|u3^ge6Mt+0Z>v{CB2YPXK^>&ehw}sT&N<sMH`fG~w@24|FA_;V8CL6M
zmkrX<XEe^Pm1lR@R8|x#@Em``*uZAm82<sl@SC&lTcUDWr5JyAXC|&$)hl}S3o~-u
zDA<ueeODwchnT=JDid9vfsjoab4y)zK9T*hu=U_%?GzIK4TCzr?gllyA*^EOfQuPz
z0Am3>`t10zrtq{5{iQ?c&{zU|BQ;uk5GVL-*73g1q=wL32LFz*{86F)CxXWE5+&E@
zN4<7#1UxDT>k~~V-cHxu=@IhP>|Jj0K(JvQcu9TLptX-ZmF8s}l~5Z{qEHL-&bw!B
zXsfF^7$1PPaCL@ERr9&+nLuwc#A-y|Ye_wpzZ}uNs~s=N;n(hsPHzveA0fup>wq%l
zkFnaba?C^%=v0NUV=87nla$NC2aVeDpUO^i5{L<H-2zVXdf`93RzVSWbUeZFt3qKa
zNvfo#WK<(eu2RIVjvhF36wof7h6Y^|9o=5*0LHtnBHL>-Q8AScRHHam*Uw6|NE;6_
z3+5>TYm1<bL3RnlXgci_JccVEaf}crmZv*(t*-Wh;s=Tms8qv9LiNkisn;&yKPLlv
zE%F+T9|LvvX{N6@jR+;^ytBS6o@@bUMAyojp35S3s54dh6+lv`q@xvm0iK}OUr*@-
zaV1V!Ydu8&<ckT>3qNQv_K_s<w&Dpm+ANiBw8Y~~7Yk1VvCj8&g+i9hbr*oaMH!`m
zn9J&5t$Q@?wD&W^Z2&Q}cse~d5c8=B*V;A7_NdIfIS-|AcfPHF0Q<jZc3B3~y>Z=+
z@<b)Z*Qrb|(YXeciPw9o23D5c97Bq!fap{4u7pVs;CU1;%MHcgbhgwki}(pfsLK+I
z3xV}33=D(y19xMT5CdzTV`-BHafi>q&BT9zy4u5ELVfQsaQBOg@3mvE!i2E{gyc*E
zxVlGjI+E|?ucO7z%pbF|?cnoV!ARC;?hhYoPjA=*R&lU*smZ*z4FafZUHNk7Xz5^)
zxe&{|$PV_?)XbzdIY)ON-Xl_}-aCrmNV-3)eWoSdC-Cxk6APq)g+Vtvq@B`W|Gmau
zPcH!+pW`4<rU^STX(k}e%PUINk|gj);Zwxo(0pMWP`EB&9>(6Z{tjs=lBTA3?Eh8`
zxzrQ^E1;z*yfE3?xT@h?Sx@s|!fKSCY(t2RJhqme+{SeE9MyzxY&D32CEK?k@L9{*
zTbr}F1>Jcs*s<mC#L*|Iz3jxy1Jhy4HI?_*^n~CGs{43_wy%%luXX61HKn{vFd99L
zl>D^ltHQrveX%y4GpI-VU_O;uzh%YOWq>_zuA}i`DfZ<qh&79igGGPy=KqO8^!J!R
zYTGDl+lUaBEu+i4CUNsrb|7r9RJv%>by2G=i3;fqo+2+@892t`BKVD4$N?G^+}N13
zH?v{-y8YbmhK1q3u7Xj=Pj<Qjw}&4lwNWSQH<+?Kzv+L`iJ8ZY{{n`@J<ROLR6Q*O
z5$d{eHE9+U*1$OfTOgJjZysb1e~YD8tN3$x^M}7l3)(kG<4VQuVi(31zVF<pV>%;X
zd@DD1jY$_9Ch`z#f^LSk-md}h6Q4{%t<CVm_1r(;10i91MpRuDm@x#;?ANainmw+t
z>EcX3eZj3dfUq3A&81r|O>6w)L_j4ks&@h>bN_J0<7K<LOwC9Cf=8~>t$K+}(6gxq
z?XfA;zG@Dj+9>+5vN6LE=Sf6-=My^}*FziLp9Fh%O%`voGh1YqUT60t9`Q?Pd$ahS
zphKbXW_yW7K=2{Y4z9LxALfkk-uBTsph;nnlqES0$oqQB#}u#*NAY?6hojZx1L8e_
zl_QFiWt=O}?Jd+W2`Q`9`Ar0EN2-QOo3E1s#nSQ1Y1FuIEM~KS$qaAe2Qx(f(Vrr}
z*_|6Ob9rg-frI{>%8108hQs;4xDSE+c5+H*+ImkGp~S=vsm-}ytCu*uQ~}0Osr@eN
z_uZy*yiWPxu3z^$mD--}ch4Ux(eIwv))|eUi-SDZ$0<El0R*xbCSQJ(mR?}}K`Xs{
z{PwVH$?l-ZU`70@Ia^4Dm+(q9^CQ@5YW3xd`qrj$k*>O=un5B7)pgYa)?}#P>Xmk`
z4n#}3RBwbsM)~!mhPc({^8H<dVh1-oquCaP(7QY&h8P_3tY_+=%Wt^hfV?B>bW|k!
zaQC_KWKe8Q48lo2u?(WK0$&f{HG%A_hk?}*B+T;2(NITcA42iIL2)9M+YaH;NH4K^
zPm#nMROEpG@0Tf~>gktUW><Qg6e8N;Uz}5FAqBs5mS=1DfoZ}-D^%jw>qNL1G(h2A
zL9;=gmcwKxFHpP0K23S0fnZ?KVlA%Rcy{~)%;fEDnB-``e>URmSaJ2b*5D97;LDqr
zzzdz{G>(*t-IUD$QW-ne)pguDC>op1FTpduOjPzl_}nI(wc-p5JAkjn?FgEbUFrqr
zU54hCU7qvWjFwTGb^lZRJZe4mOnNHd0HvH!jM8!PDfKc+ugNKfweO6lEIw7OO&vp)
zum2VwIK^3!ne({+)G&6N_b2PkFqP(rZjCVx(TW>MrdU59{et%G9j@`V)Hz2h+nO_O
z`BnS7_iDZBx>LUxJxg)Pu#T^i$G>xGXeQ{+_2{4fhqB+X{uU*Ly&~&e!Nd4Y8H28C
zOdn%e+RasNwW_UvZoB2cL&+<o*4+IT>1j!qQ@s;I2OoOdlXlnjzG&9%nVmJ?0<>sk
zsmEM5#;rt)G-+LVn+{YNfE9`z2%-!niWPp(?J)-@&i>6g`f7&SvPjYVDM-TbK^bp|
z3&Wh8xPmEOXq3^>G)f)gVm^io^{4F99-qKA>7e1Ybq79`ksw0=s>=M^F{#wP>9gPW
zGiOJmOw|v1XMzJS(Q)y0T)3<QQM0JTx4Q^)05MLGlA1g_cH4%ioLcBm_3nG^(u@fz
zQiqiXSc8>i(WFo!C`3f8hSs>qt+KGDeW0eZ<CS9>66TPBAdh}SC&b8UcO@JJ$L5JU
zKNdwBn#`?_KXd%j5D38gT-{~J$<P`Z+fsJV9M0Yoa6oBU0Qx-*{7oow9~lJtM}mjP
zzXSUBsM-<!v>#2+%3Z)eX09(Jhy{fF%h|U!qQSdAGcr)3xNrIw1E%R6I(526QPI5r
z4}~v$ef*r$7Rqb@msNRrE)nUdCDNjJ2|p-t!*Awv0z71_Dya`V`AlYdP==o#V06%j
z#Q#v_9C*fTN#}>{-z_sB3P08CKB@mm{EFoH!ZpT@y-pfSdToXe)O+&_TbdPvUW|#b
z5BzF6G*2ahWJ2d79qe9+f7#LgYu`TMe<;cs2ORHE-A#^-wU#<1C~uR-(XIO%Y6?qj
z;gT2ZxpwDFRDLec%yqQBfiz&tLRD{HX*uQ^toYvtSo#{2LDgwNrA5wu{Nq;%(*4Ct
z9zaX#e}!*HhtI<1!+r4R4TL$0^Hwm3xAq(~jWN9})d{h&^PW+=w?P>)vm6H`@+}>W
zb}hofwQ^I?LxTThjLEIDG}JV3Dm@K56X%d-LM=T>v!$Z@v8eEAR0d<ha&r<*%W4A)
zm2ggyns4I*;8lA?%bg+UcgiE*F*02k9X%59>9WOgj`{*=SYP>j9dPt*aQ;`Uh4Sdh
zqju=ZrPPDUq>tCtBtY<YvFaBmYQ7bpr(<$oivHm@EiTnLobBwCVqT0=d!H1lh<F@h
zJ@|dIZg2O=r#|jc`dqOU$L1H)WY-VaC=vCkaN(H!s<l(D9+NS*3HagC@6OmPh~H3j
zMX~YuuN{(x4t($O(ER;PEv6t}7;V}>{@YCaJorJmi6em`0&tEpOzD8HUt&dl?3?Y|
zq*?Q7Alz{Q>D@WRgYc#tE_9?y$n#OREk;r(2n%B0?+3M7><E>i`r6?Qc)JG$T33qJ
zOKfuoqcjY^<U&<Z&}karxe1d#*1QIORN*^z^h>@AQ<U|x5&Gjd@PuXKroR+CW{(M(
z3e#Le7}E4{AyYFy!&kX39tVWd$V2^Cu6QD1kk&_KoHb3|`GKU@03LT8YK8?lLefLo
zucQ-bZ&l5yTqH1qYtRb`PdrehbA#j3YeZYWe$9VxTvxlh3)|pPkujfW1EqNX1ELcF
z9-l8`NYVaA9`e%7KOm-a%!q{-Y@9yrwfL6jbvxEjdH<2qItQJ}K83*4v~q6D77Q!3
za(Iku4a=r6HS34xQgtYBno`d3WDMr=1MOL@(Gnydh?n}3JXAFO$mS4xGkQW)j8{Sw
zlJHv|v$_@mH-kL*u$RFh8ALzdE!-Y+0P2qikrW>h({#w7Ez-@n)FMm`e}FI{e^xdo
ze&1AkKa~cQi&RX*5kxOu8gVf^d08;i;_Z5Ie<F&ZsC<Zb8D%&Deqv*bx~A<n!SX`1
z6lfpZ21WMs`#sgkr<8u1k80fACNnXo4;gC3r~8-x`O_Q15q6JShpY?Xt(x`oqe!9f
zT^vi0=N)w`hECp@oTeEW`-Br&#@6mGncXLaEk{`nFp9HFt-*Y1tnb%DfY1Kp#H17x
zVJXZ7#(1>I9q>8{F(Rw$2m1jCTdGN`)YiNuEp;cOzIVwvVK6SUvSk-!!Np4#Gd0f0
z;=%!td&uLap0cly$P3x2U^9w|-&cwAodhlE2(~t4#1|(3xaH(;Nr-KlObxdc?xX~;
zpe>evbOZAwj;$38pJXvcUe~-77gR};%|f#SD@kJ>g&t?qwO>hjX<W-JnPCYD55`$_
zHb_vz4a)vi48$QC@$*QL+~a__<x3pl>6c}__1UrX^3we@qyJ8)zm@O^Q?OtQ1+{?>
zybPwXL=1d<HyOFmxDTp%8@FN3|M8iv_;rkrXCAe^2H{KwuBmmcp7p=<uRlOPIu@{E
z@=eI}gN<+&c-@ZW!F#>OZ*5;wP%6s#aIwA#8(9|DD(%*BJ>fj43|g=_&1<O}E^n`$
zu}f^)onY)H?B`I*l8;5uR~`j9Ah@r;9U57j=esruRj%n)=WElulI|bxPzn5HT8?sU
z_TDDFJ=&m$pe1OctFnIAL;g{5uffFBLNzNSodJFARhX)5mR2Wd{dG~%WJ@rEUWhPM
zzL)7r8S^NiCcNtM(`sp@x-)Q!y(94-MMGL>2w4lTrV^4BSUyoBDt)-v#@M<ty?hMo
zt!)7ZEIt-6j7kdekp}Tp7xQZXz(}UC@_v)5=9uZ#j4@G-O_(A+hpl><e(eW;!x9@4
zo9WGS`gC-p(c%>2ah2R|<@Ft^{H^b}%xg&fHNE+Z5~O;eK%p~M77(JO<5$5RDN-BO
z_DwkcTxA@TkvjP8+t}s!#x!s+1R5f)QYC*Cgyf%{up-mw&mVZ|j&n-LHw)N{w)UwD
zSZ9>-Lg2uDS_JhIdyysok+-MQnyaPpwnaTwb*ziG8Pwf)s?3%|%q-!FA&dYJBIFs9
z#FAn+SY`3mDKuVG)pwrPp}*1+N{d+cz|5)gu!6QwNnIL<tby;fk|LpgAKFtyGvp$!
z_`<BFlae=Or_Tu5?PpK_K#htwFB9Uei-NCO=^$!(?ANEo!S_076Pn4UOvL`X?VmcK
zJEjm8kQ+*8PI7zFMEvLX5ChDwP%-WYCv#CJymF%hav)D4)!aNOv#H3m{3FfVn(j$L
zs#PaJ#Qqh}D8b&8o;sq)2Uo9~K?JYCw66l*()Xa_{*QgkqWhU9Zsx~RxtS5x;!5AG
zqzNKCI*?Z^77xP!+(g2F;GkC{jT?unbqut(izwHbyzQXHj|nR39bzUeXtx5D9_D&T
z)@}|RX-IzwFZ<Q*)#KPa)O>8`#`m7=q3C0Lfn+*!loRE$V}4%pNO1JVcG}Njt>`kK
zEpBOEOw<r#nslUJ*-=QYg+@q>c&cPFit8uc09$r$utj{Zpl~JTW}pZ@kb@IR=-<=X
zVD=e-_d2a?L^$Is=_SD(`7x(N6(3p4IXuES)H;MV4!19R;+=J^yExvBnzu-i?`^<2
zms~ZH+aUt_LP&V?sT|3t-$GvV+5u4F2#(Vc$+VwP>7Y~kM@?qC0{C(mZ$pM;W5;zS
zqP8H>$xrr347DNh6i)Z-r8sWGGG0>O*_h~pIQQkqgX4jp5xh!_nEFUD$c7QTw*zP9
zWpowDBc&UkDYEl6aMIhFG4L`f{<uguE~JE=SHa&Uk<VI+;aaM;BD+I$Fsg@ND<z*!
zNZ_bVAPY-wrwENPJT?Wj2_Kz<uU&aP0}vV1lpU8@B-y2fZHt_x`syv_u0%fLn<NMc
z^F)<ihg)8baV{DV#J^YQ#7f_1%@8ntm|9yjEGmToM&A%ljIl&+w!(@i?UY@QZm2B{
z=#bVk8Yem_>iKwnh*?2%SM6TvF!8Ce{a!5>2-kN^ak(I74eisFtO1`x(%d`04d37`
z5x&d<DK-@jYvj&{hB2AA5<tEAlG%?nIiyk@jEe7yZAydfeF|q&{t~Zp6LwzyL>)~%
z{^(RS0fT)0hjC6ISmsz<$&!qoVi89;D3>c1d0=nRL?K|m!RG6wM~V8zx5~VuShL1@
zG~ObSt$H(h0_s{QrH}EhwRdVYfG};oxJ#4v>qbK4{KIJJ$D1`8zhn9B8Uucd@qD+`
zG0k%sQ;0`UW69tl8kd08k-vxtxuvWQwr0UoNfKINOsyH74nrv_T~LDdA9m0(M7T{d
z!KWa1qg5;PnE%8SBaW5BJKW%3I=*DDByqb{g-(c0T(fA?wXla5H3MQrs$gMBboH8a
z$AjbUE;r;#gfOJpU1732#)w$I{8+_pH*?1ME(%AiF5&b5D^!c8c<k;tWTxZk;G}Kx
zgv@9iG29Q)&ak;e;OUfsOoog|yC`kC(qF%Ku*3?^5#c8MBR~#|qE>avPHqG2l2Y-m
zLfh}P(DFv*^j|8=#ZcVu3+X}lJHXI#7uSxR?e@M#wv^172^SIh-x?e?V>YgAR1Kva
zJp72&WgO}+-BFQyl%(b6sRlQWpji6R*Dhx^0@#YjR+FO4anR!3$}w&CvAGPF#8079
zI;{H@Cj5Q1W-_Wn4kS5NL4Q)jl@NDHYB$99W23go2?QfK+`tIJV?zC(hm4o*GU$oc
zceF~?I$aMw#bCGQpN}PSZ+`0-<D1$gXVNQi8ID3Rj_h7h()C2X$xYW0FEqQ0!Zv}T
zmdiDN3slwbru+}Z8?&;kjd~l<u{jRfJ9;BF4yq1s{=5{}(_NRdD(D`g{+2O=2U0K8
z$YTL>EQhfCtnYVdCmAGHoYT4GBC3Y*U1nQ`W_+mGvy=jJ@h3-UKBgvu{Qhc9duT_g
zb2y0Wyz-tjM|~3&W16(X!IWr^RfekK_czywt|c;F-4sV;1VJ=0gNNh(?YC7{#ODDf
zfR7lZRB7375!hgRf<PtXp<LS?jpm7k!lK%e?Nh}wnB{n-%seUcE?tc0j;9k}Qr*G`
zKYIf8VN66kJv6=(tLN)|4s#pa_y|Sc9MoTVA>62y)&eZM0NEq-1IYm8%tbRlM|HgH
zw;Xi;-bTUb@7S?6#*Qttra+0g%<?qZ0(ZjI8;-liT3onlvoQopiqqTP_&-Z)_QfbL
zgR)f!c$U#J>Y~Rg``gU(CEk#!E$-XLYenkAtvKkH{1CG2ve$Vyk&IQ#xOSSMZk**f
zXS^oxppEZ^q(lPhAHB88^06Lwp~iQ_`F>OIp@SE>b@E>B)2%RhQrD(76%maykK6I#
z@6qztLTRIq>{?T3D@aGvZAbG%FkIxUoKGkI11B^e?Q)QN%6=s{@+@Xb=8%L2q=Pw)
z4<bSe@r-bPoVxMV@To|5E-gsDVLGt?28%L&+9y!6z8^dVJF00&4o~!?F0pHnfoLaU
zJJ8iy8?Nq?MwdvA+${1`l{8?R%-S<j+9Wt(td2=V(Nx_QJPjcVpwxq2l2;qFr1gI2
z!N>XsM1E1XYMt6bXn>jRhB^Vn=Q<4rj`%5pk*n!XX{=?!uT{Qo5sXvur+#Abt}1|*
z<n5b3u=hg}TF9++;i;)tcy*k++?A3!7rZZdQWceu9P(@4{8>NDMtllaxY7Y<NZN7P
z4pR2OQK%D?`T?EVQzmR5S0Vyxu*Ll+=?`K-?t`;xK~~CI_7d|?<#@IEdh6a1oNW@*
zdZLm>>hJPwZEq!$Iz>ck6A*UVXc^i@A#9$;!qy8K@n8nyYg+brb%UIR4ZlyyJ2$YI
z&+oq(`H;(&RHbQ1909p=+3ISDrba}$N7s>nd9MD2`K_g+Civ$Z+40N#V3ynlr&EY0
zZ#9}<sm$Xy-^{LrX+8-;;l$TYV%(o|-=e@xzFYyJM9h$nQ+NpYruw-_{;nR8o6#2~
z6ZvLil0ZJ3tDBD1$vw7nZ3Bf+AR<jT_4o<7ssCOnK7K!ES|bi(%)Yq0`71FLjIin!
z6d~`^Lznx4I9Icx=S@N401vFs9K|Mynhy<sUPUNhK2FoZJ%xC?Z0MfBy!HI$Pe@C{
zXZF(BaNLEl{qGWEZTM5Gx1HTtN{9MLx5-~aR?%4rl{qXfAq0ab!hQVLhX^#G_i>PC
zwpKhWy#fE6x#-!JXiJt43Xa^C$%0u=Go-mBsJflupBR?o@^HcEKI)<XISx$dv~k_j
zpaAC?Qw)N?%?60@!`{eYS092SI5&xp?QQI!_In8lh+m-^cnNfAnTDSXrq6J@gv^5S
z%|bWudJy|lVP&NUIF0mya^J4_C*dGUJ8ui;-I@``#4DN9-Z=k0MK@=!Da*{3)3~}4
zYmDx?^8B8HePLkr5WHV$o&1TQ27qSOJ4Bb;hkeaWY4sVgDQ%L~htB8u$t{4aoMoE{
zwtW1CnJG66w#eAqps=ItY?za$_ziaW8q?98NTjn!A;W&~UlHJE<dT3?9D{SZxNgf4
zg?t-E3huNwt>BSJCx>S&JhCzXE{>%ebHt$Y2g`~nbvCohCt{7XGN<*o8lH{o-N&RI
zE2bZg8+8o2U2uPfE>F)!^L}~4MYvHL%Tkw7Qtq2qPQ4+xpFW5o)aNO&3nWS?jKA!W
z8Gr|Mfpn*FjK*49gAFrDc3XtBsFOZ;dZ;mIQPDAxH+H`Fio;{Ew^28i@9r62(vup3
zmq6sx&WwJ%wr7?2%lrX~N{bgK+)|`A6b*ahE~~X3y{$>EK!SmHh8|Os@d86%`x2t$
zrIHV`|LR-Cf>A{b;+?`okf?E`k7qJ=vTDm+EK3v_ul0KRa9qgFQSa!KgafRS2{cqc
zgNvq{C#@3wkMSu3@r!)KT$O)_@fg|ui+Q?nyrK4+G$;ZjHsX3@pp9H^u+j8ETWOSD
zVhLZtX&9~`F=%r`ucEc(XIZt$pwiS)_7XoUC+~(I$TzZOMa0Jr&C;5q6`1)X6G7SJ
z-xBpw$aR~UcyC$vy~eLlYNxpk#w$?W6|Dn}c`}B(H^kho(DD!>$=S`YG6JjR@^}Yb
zlIA~o#3ERG7F@(+6tg%d3Yxs;#fec=KfVIrm-BIt4*yd{9_TQB7Ln|2rg7<@9EG({
zMXhsvq|M`vfChH}f_TBaOuZH%9hNUDO!b{WM@oVrG#d1~<PWe6>q;?3!m{VzAMuRu
zhu={CSP{ZU{!Imy%CoIfB^P>rm=H5Wf&`%NujJ<ybat)n9-wQkrczkNOi*u%Z+@g5
zF-s^+vHF+g_+BLBBuG-!Q5Fe`cH?E9);gkHYQolAFB~JZ^j2I7GqV3sNKhh=Holy*
z@3`S(S=tCE$?YHA>8!qCf$`^f1u|&gn|jjLXl$>3z?iBo=Sq9j`Gs!SCWOZ@h`P3t
zue8eC>#8Pv`VE1|AB_6VFfOgiq1%=$1ve%1#8)?B9SUc0o3?QK63Kw16KxZk=lkYe
zkkqi=DeiRXNRTzNTm>)NgWiV>iyVhu`D7-ky?=8{gVI(F%;_iJJwR<xJIJ0sye6`^
zAx3KAYMTW*Nn}xroH1fPjl1#8W<F@gdKs(MGlScNe^1wQ>WK^aX)4$H5M<x8FqL`J
z43ODFn5mJ5wMTrc?z45^d>i$(q)43{2F7b`X#GIGc>wnm%Z-f$L5^=_YUujIlnN9t
ztt!ag-mZ=lT%@h^25=}+2>8(i`@7n24+|zePNq(RE+&oHmw(oCI4I|`IVj1=nV&{|
zw@mZnwNY@b1&Z=iRH1;ayhkSoOPk8*6m5wK^MoyNkDyzJS<^M(OP}K`2h$Xx?@~o?
zLW9#Xb11$%vujS;BNzzKNWi{9#3JUh@0M0pZf5D<>$_^%s!Jj`D~z-c)1lOss|daX
zD0`PZ4}s!m<=pJ}ePutCc{2VXzBb=mzudkOZ{21j1wKn|=f~FX3^Gf$pp30bpGLI)
ztfvW2#5*#sP$!`=fYXS_Pj;8~d0!P2Q%}4-Ke_&9T$(m#Ut3zuIZZmBXGHsKTdn<i
zqkT^r8A$_h{V3O6!T3mFf#@wzC9$0rJ;JxVWZ&Q4TJL@uR9D9qejH1GJc43`-|R=_
z1^_G%AkYK+S6+g}a87bff-d|bPg8QlOg2;NdtDA584tbEQEK5g$xlbJ#m(I-YpDiY
z=z;7(*|W(@m5@dQ?)J!V7FU7{BKu#o>Aq*aNih=q&VS<R9MnVdGT09;V1GC)b9otA
zGT`4C2Xse4_Mv$Q0AkBxbtLx_^@tX7@9!7i7%CC%NqR|+a$ENc@dBwXBySdEcWWn%
z<x9~u&qYX$d_idR<+IUCpM?PZrTT*MKOX}iB~pfxdG2L(KQmkSV_Pv6-Sy|avK}rv
z?t~+$$(XirLw<RSP>ikb1FiHVu~4gT5$w{KKv7HWWuSZk-7pZTw&0UBe+}R4YrH=A
z+@t!Ds(D|NndeJ$X^{VuYO8H@Wx4YV8D0)RS_w*IrhP0|w@IqXf8t0?4JarVfHsE!
zu<@RYHqE;uda=BwS%2i6eX#{%tz0m8Bo=>^5lbku+%xyM6{$_({rr#D4uII@rI|Ez
zsCvTrGey%xhW7dP3rMVGne3Ff7PzMV-F=`+VZh?3P0nhjm%+KZ7f22fh|*xm>2IqW
zW>AMhZj8uMmpuk6%-KE>=(6l|Ndi5yEgp!vS}riUc}TzP4yyZb{9@Jj98-q4e13JA
zVr!S<QcjT|)>&j7bN&7EkhjgJ07*LAsQb-^Iby9%>v!v9h1j!0@vri(@=0XaGnRt;
zu;cjVjI(z|Ib&GU{Po9^O2T%hM42g55HF9ZDC7x6Zg<6B6Xl}+UC&;s&_q0{Yhs}F
z$^P$pJl$JO-yuBvl)C;Dfq=)p0Ja);`hMrVxSN?g5EB?Us5~SOmmkAO-`39bzPx@=
zZFfgbW@p9Z)$XLu%NtNqx17gep($8Hf0gMGsss}$W(of`Bo58Cj+7e^iU>vQ3C(ua
z=$>zqB#5F3C=Ox@rThD6L&Y!OQ2QX9LBxa#pCrxv(FdJvvz!`43JO$({BQF+KI+t>
z3A6VTDL5&ZpWEiaUQ#|?&Lpo8syaC7JY?yLp%xxQ22nUVswjiR=rC<S!86?%I{qa=
zBmzS%zT2y8bLA7>l0~d46pEv^SG6@PLrie+W%4}+6sj{UoajkQ5Gz&m5g$h}He~{3
z;DdVw(@%K^pe8dsc0y-Zh0D;U8MjuW&*mK5(<8a?I?RNGc?+0IPxO}|@z;?t@J#Pl
z5EME|U(cz&%{d@xF7E&tuH7tznLZ`Nk$p>QhBeFOS>-T3KK*D4Dw6R%-4K@yJs@_%
zE?@Jp7Q;Sq^^U|XPlVy7rn4wuu${$-qRwH7oxYat5mqH_=sf4$Di?aPIt5OH#!F<i
zO2kLTPNQ%M9vZ7yS~IaD@2lFUoP4<QRw+j;v!IzdX6QZ7%PdBpy$B)R6iH%H?-pN>
zhVSy|!jbs3iIZ4foEhcpg2opwG5``C`-D@KS~pZ?roh;rrhDWSE$A96eI!bw7&+yY
ziK>oaT4UAZ@wu@hGUL!z?;;dTd3vq8N4Z2YXCt~9toAbA7YsML%e#&(_{Eec_SE9{
zCac5ZRAXhW567g{Ex&^AHUW>NVLfN2@2z!R=&P11&X2w<6TPmgb#?2_A4>v`0-x&X
zz6OTymlai8y>#I?NnQ?w7i2NkOBL?_EG_?U7w+<<4EKLsxGMng|C(_BlTHGRYk*NT
zR)OtGeAEZb-!K|X4X2itMlZPQq8-A08=};~jjZ?T1Gu?lDyRus(3Ak%OhS8|@>bw7
z4l7|cfZY$(jehS-ZD1M5lb{?hr5IJu9RO%&hU}n01yU@sq*e1{v>ddEe4g>LqiHWx
z#38!uU$c$(SV_1^0%;4&J66|Wy!d|9yjdV`lrlFyCFqy0{M|7KK@l}XZWR>?1R_Po
zH~qoYa{L`T>^S9>t*y|4^4Bz|ke}hfPSWSWt=Ab9E9M_}_Jp(Hm?Ao}6+?~)io7IN
z$PVU6F1=z+du*8z2~o2YoR2#}f<UjB3!jI_@R(fJ#fa;RBQ15M^{7xeMu`QMA)cVi
zmK!T~Qk$Sfl;spVAVZYW9U_1^BoUdK)w0DRS!u0~q-#1i|HxLw%HCR6&PLF(Tvo=u
z$efKBoq2_(<y`sQYUmc*pxP#Mec4u<IfZ#&fW*7Xygmk48kZuR&4Jrq)d_;4m%K-$
zH7~-1R96(Y=$uEvCY9(S*g{qIzUf?jTk?Zz7X$E0*Ngt&$h#Ge&C^gnQR;$>ICEAb
z#>=+@E_6^rC46^0=iK-ENX4Ug6%sRw(K_%aG$}I_t}+}eLuo%+gRu>318I(iw#Kfb
zTbL+Cvk=R?-*$1h5Z2bi_D3-bkF@_#A~58B{q}i?r31KeXSXiIS7|f%>eAYOV{nNd
zQTOAv<0tNP=hHFfzu_>%Lzyx?<pYbxn`k!-CEFVp3@DHx(nAsK5XvM<xu>uY<wd!>
z(|}akKjHFk!k^31N`GA+3INA2$#}>p4G#!Nez;c9jZ+-{V2d8weON|SwiV!4?K01a
z7;EL=x5p|NKHfEPUNI2RCBNIZ{hen}vc4Sw8?w2-J7d^mxI0w;re+qAp3CrXQ8jNX
z^E@21e)a$ctvc0KH)g`LSvtuzv0{BwsGy1QWbvKhPPS|%YV_jq(ipfyh@BLTiqEBd
z5G-m7wB@@B!V(*uSZGt&A4O)phsUXbq|+bPdU691WM!3-$zOl{mD^CW9LYLZ0%Qq~
z;szz*XNRhmFz15>zQ!!&&i`<(2S{pX%n_Ns;4L_ulvnR8%8>O9EqNCMokW{|RnHXS
z=e^zp=3PmYzaAj!%4~J|M!G|MxrD0{98aF3rX9uxv=xg_QHqR4eg#Z@&4)$f^hDMv
z55xH3v<{YV0>GNy;{DzBCY{zr|2e^c++S_QN9>EfvPzlf6RFVi{a<X(5Gn4x@hP-C
zGRj8M49b+UMnXc1ci<nknLmb37+nf=JhY_{!;qgbh6N-l1pep!Gb&OJ+b@xt9;L2r
zirW)>=Zk;UDRW;|Pe?kpZ6C`VH)WdOZ5)|)VHfn6<2s$e4S!*<&ZJ$io;`4=7Y7uO
z5R!uJ1#``f%XqPZ1ClnHJL&2Y+u0Xjd?t(To}_eC<O5L8OzEBrq|}?9hri62?u|=<
z{}Mrqy`qH$5|3e|X)3osqirDugFO*+)D6OyWj9-$9*!q?-0<pxQD{%wV%!yKXLZ2N
z{gFdAn_StZfj23iODwlzr&p2phvbN1pAU)=w6gndTgb-dKNN{vJI{YRF&cCI14ew>
zwolSyae~Qt#pAJRZ0+S<d0__@9uCm0RrdTUCg<-p$EsT^0E*R>@mnlmy#)BO^A`;3
zji{o*@A-`)zPZ#OWtVn%fUUvZKuvUfUfZ33UBt*Hw6&@KT*Uw7PhbW=LpYIwh_NU$
zf4j`ZNCPkJsEN06$G4{IAj}R2el{mKRe;W4SQv6)cUjuK{Rm^SO3a2h8g&@66zD+6
zg9%_S8SYgFE`fh>BnD<H_cM{sdvpYDu`XCWywehjY{Z^bXx;H^3cvD}+U~FJbs&;A
zPqhgT31?k1_aZzcWHuo)j=Law{BSeAAX@Ad#^#}qvm0Lv7iAtsOWmzf7~CU6r6?m1
zOhL)T{KV`A|DM}asZ&AWZrFVxP<V33Q@T0BQGFQxH-GlC^a~M!&GMrEgMie9+!@zd
zerW&vSm0x<m&s!NiIr-wBi7XikkT{&UuHa+gE#%r9Vi9!Vw`l;+RA9P<pD4-Wm!xJ
z>YYRcy{*5v6|OCn6hBL9oX`H4^G^sp&<nbJ7SUO#zt_(`O7w8PQ<PI2_CeQQ>``N=
zl-52i1Jym6uHg9Z_s6jc6I8SbiM)2Pf&&ZT00v3n3?YN&6qV+Ofrfxcn4H`LvML}T
zIgtO)luzu4qjBT?4fuPNSR6%p@5p;s+dfD2KNovIpw~5zXzQO(B>X8>YNn+^BP+e|
z-l3Na3wyM0QZ)0cN$(M{m=tAPNIP2M3-7s(+Pc1_zL@{LQ)AjQQ@k@};QgW{w|YSI
z@}G;_U0mbocD;2J%+{u2IIG8@os>Gi|HFO!$0n}<uJjM4EFik^N8b~Jxvz7}Y?hms
z;X6Qw7D%q`?_G#fBeQzLN;2CR4*!WL5?6D?<MhGk;dh>3cm#3L*DxW^rxUdQP!8&S
zxGEbIK!H&Ax>0i1>-zI0%xq#4z9QWB+SW9kwHTvXC3##US*!(d9&7rB&*eh{D~IQM
zsxl|P;NO|9mC^P=SK%Dev2S<&h5n6X2U!m3P%QI{kK*_02oJgBwJ)XU(5o4-Ma$pb
z(M#I}?iW*wX{4tMRrKlq`yDv6)t?c~m{G!#VAuEJF*S07xN>C2H!s^RP+^Pf;^{)n
zBq+_{Rj5IQr->&xnT2->0#5G&0)mzaa|9Kx#k>V=LvIs@*3TZ5)P!sEEh%uQUIxh^
zy1`i|O6|@3R9#JD!?XN#4D6cQ?%Tu$W;gA_=Xbtn25IU9W8c(<JQ*fL3`{!QhEeQu
zTX{Vp#|B;XucHOnZNYy$wwoV^z|x~s&u{G7X$g@ge%wI?DP?3(o2Aj=dNO`6xXgLR
zkcS-ViDg1Y<)Tb)JhCOHdS9#Jf%{A0&{mifhZWnRg{M&o4qBFDi?ifVuk>`AAyiIG
zsiCgo5{dWP=)P9Su=t)5_bXxi@rS*kWCG6?5s##fN}Q6xEOVor=5&1fP%BCR(0-pm
zgv-T4uwt&c#>l=fc=^45n)<p~+;=s+?bERTP!KL6;>|xD8#=8&hy%%|2?t42Jd<?+
z8TgseDdfvom*P(GANV-yilHZJ9&h_Iv~=;WdtS(--$EoKOJHp6pA95@+dxLf<XQ-3
zKYU8}p`0jVu={HlG4J3zAKm@AMlieRb>$tx2>jzQeHIrQ5`j>nejn@m5jS>;LuWn_
z#&Ga1b%u>@R5#URD4Pv}V}J7-;gii+g^-K7Mbrd^Y1R4IVLv`$@p|EA`6nT?7BI_j
zkjzL2GGA~s<S1KZR<6x*r%~&`+3#pCLH9j-pHtq|4x<S0-J-w|EJ?bZQ7Izgn)xjS
zw=%l=%C+!(jtNV}TVv95mt=%ZVkjcu-AnBi&Y#L!JbeOFr0;){m2oh1c~<X>(CxPu
zpI;{Tg<WpHpoH_6;+CCz%}qEW3jlRxxUK-c5OxT+aG6FRG}6Mwx)k))_umy=t(O-U
zhkjs$!B^dIoc)0JGdgEG9ubID_=5NM*t*z~D1NPc+{vVAW+RvbH)1r`Q%RHULw0A^
z{B>UEidW5&&Bjxh?v~@?OBp}d|K>#*+%Sk;F}#@6^43VxmA@seO%yk2wSdjDsz@XT
zH1BV5SywRiZs+pBi6pEigh{A`pW;Tr$KvQsxnUF!NmP>r6V=&&C_<&Zbp)<?3K3md
z4&c-R?Oq*G$DdOz<UbT~7Y(mYl4t*(P+OT~oBTLIqUy8WS1(x=J;vP8);PzC9&bV(
zJ0hrGJ_t((gq#13g<C|7<xHc&Yf%Bd21|R=$$gN`A;GIYbA5Du#<c*Q&fD-(o&rOi
z1cTd!|4_Jpnu_0!DID<GX1GeLM>SsIrNt7l)|!IEOdTVpayrwfGlfq))N8c|J9$*F
z=YAW^+fz5UKW%T|=Y~teqh3z*k3;T$Y+c$`OUzISiJStCoZ(K{(Xui$p<A_HvlJ{c
zssL+DMz40Xsibe$sx8QsZ`Ua2{C35`NuncX_csp4J?u5Zu%rA>6RNTq)DqR7RUP`-
z-9cD@)fBElcM+?o;B@??Wm)SVHqa;zk0HrIfo+@%t34#vXE?^CJu%<C`+eSSeO~q5
z7ULGd<D*LC{y)EKRO8XwElt<RdiI9V%}sz>%muFtn;#rkkE8_7s`n>{eKZeF$w1Hp
z!D5D2fpa$k@N7WwT?4vhcw2Io(K$y~q$(ij2JgMPPacCA#Rb{SCyP69jSpTsdy`=$
zI8*aX&%R-qPzB0*(z^8ruGkq2M_suRa_`k9-M59f87te6@Ig#4%rMc6)!9m1BS>3U
zilZUxuDlFOcP%&RihlRkF#C~Hg=h|<k-IYkW`uS_%g@}neC<3PDZ8BgJ;_w*xLQ<A
z1ks;#|E}ONA|7W3*1{CjXdwh2M{9<F02}y@T_j!^oFK8CGXu2<f2*=Gc<I^%2Pvg5
zQBrVuY#9KFlUTl}wsFULu9ojUB1+g2>&8*9pxp<rL@3+vwBS{jUp09tY7P3CyABI~
z_N<1r@amKXJZgi|H-xrM_5#DM$XP|nw=ia*Ebp|E8=SzS05`2N8bLez0)(rZ;nfy%
zo3#RfN3#yeTfsiBRHWfU|Lb=o!5GN%8y><pFWJ?>s&hbB)(i??v_ad8-ws+9vkuNI
z`6(q4W2H;btkzy{Dw3IhC<e@FvM9@Fq%aNIHqQ}A26oIbL4OI~wh%&U15y2$T2k52
zP$eKv<fgBr&!hu>M-!qy6w8IQQTTrDth_xAJ~`uAtCQORY!cnrle;j<J@-M1kQnv{
zuiDiyCJ4F)eiNjU33*>^dvr^npT@O}{MJs6iXv*P91_t1ph!2jjHKk^^bG-Zs6zie
zsr$2i_K<Jr+BDoB`TlKM-<(d5cEAo!y49!KhrLW{!%8@|?oC6BXKM>s9U1%&MMPC&
znR9^Qn$bV*BsWfV*$|6>V2~bOpyYEl3q+fOmcwTy^8)nY#=7yS<S?!jPs1T)JoE?z
z3SD*;u!xC7Oqrr45HwN<hl6uK%OnlLPwbc~+x-kLcoc_svd<9Hs!UVk+DrK|3jSjy
z6!peR3QE)$yrp!9d!k!f6;*9huP;u?CqFijCSD)0B!-9QJ@HL0-pSwi7`|l+!b*fp
zC$GYeQI?5wJxYq=&Ha5kS^sLz%f2<~tX2eU?J=V5s5%_0%kVm-vT%3rWOK@&q|=Ic
zQfO+>LBxUjxR`0pzZ`t~46kHah&64mIoD14;fw&feXw3Jl3e@uEJFU1Eq2%ur2%lt
z28~1gqG7H~AkNNhogtPY9O>ESolmi{*SZs#OJgXObUD)6)%QdbtoN*jsq3CEY{x}X
zHJ`82y9H~IU~rE)yc1%~ny8S+7-DTLeFvpi%VI^ognLI0@O<E{?EtXCD0jRTYfQM1
zuV_<|`e?<Nth}bSl5Iqy?Wpi_CX@(_xoBBZ4)6g&Fia9izhD1NR*B~Oj+y|?L45~~
z4QY{nAz1&JPQ)_ajmNExaTX>K2s%ruJ9<G_vPgS>yLd{Evy$bp5e3-S+CWg07bh};
z^N)f5q2!tQD(?8Fk&kAfr?u1^fHw2!r{k+qlv{t~(6VPoQlBNN)wF}S{CXMdC?MfT
z6fu=;0zgnSeCa>}O)NOseAT%B#h7tG2=rz_@wBTY7ZHZggXn^BoovA7Iuu;Zl?!MZ
z4k_G##L58?UPU9ZMY}Av-t$nq1Hx*{aSDn$mJCixFP&#*fhyofYP`aqGS)ONYQH8O
z<>;>17WSfMvF6Bk?EX4m3K|KcD1<v3agMZBk7w#kuN*6JVPm3h4;Q50X%5`>EHW&d
z-MoXRNS3{fum~ip3_$$N@wrk?Wq<7EHVmw}Ea*IH!pd0Z5k}lCq>Y5mc@@LwPCfn{
zTG}6V*o8Qlw^~YJ%L(|WqwWW*Gp{pi@@p;aU|1wo%WfS85W_}4T}MNtl+t^)?Fws0
zmIH$WIN{_nzlD>~x{g1BN+-`8Q(AEyP~Hb#)3NemyP`Ft<sXPh^p*&ZXm;t$1zMpA
z5LQx<Ha9cSl<mgb0*7c=`s}OBCgdl~-ROm+Gx^telCVTH=Ns)PEf~PtSlsp!=GT1X
zpF@WiFYU@ySk@GL7lVf>l+iLQnWa^<SRLmbp-A%13khuCCX!&JWd+HPGu<$?#<GAu
z48>&GB#bVHQ8R=iJ&%O+)8UmcQAPTMyIb5trhI2H+CPtKKGawVjJLel{}xCbc*Ssb
z8QvJ1s*A(2^VyGnE0WPpLs2VICb<q?;f_2q{eR;vbac%B|9I>2f2^m{7C`0;>j`-E
zf8(wH$%g@4m|fM2<ZN75f+V;;c?POw=EA?C#XnH4)ge!SZwoqMwNB#7$-eb~0uog?
zC^aMD^DL~P%h?)^(jjTwQ2=feKut$wTtFwI&+rmjl!{4?i?xCEAfzIPF`J3x6JzK_
zr^_sc&6!?*7K&ww?HM6$p$d9<w8UgUTH#WoVhHh?F~*K%rOKyvPuCT+0UET7%XTei
zNIS{Q3#Jm2sSP2txUL}aSe%dh5PA(ZCv!ZUf?74)cbU8f_Sq({#JB^+BiXpbFeUin
z&A8%~d6-?%+9+{;G}3@XMhL}n93;54q+w7uEdsp`sr)af{%Rge4x<G|7G-Qf1(Lb-
z_A|mTh+%y8BKikmI405ng;vcAcuZXoPe1rwdG;3;?%l&6tq2aqR`SEBJy!0@4{%Ma
zG)*Fwt}B_p=ULJ;dt=5LZlwre8^N?qR-R53t1e1DN4aHFw|QCB)h39+K;U;$ZNSIZ
zik%X?B$w}Um@KrqzH_{35NickpcdlB2&%J-2nJI4M-SCz(okvAQW)WWmGpro2hT*%
zs<^(9d{`$W#Q8zNC$27<BA`v#D6AL|NnHrB%Wz8iredcVSw`cMx~bJ3o@d!YXB8V4
zy+Q~pXmq?nE2V|cMZhWAwXMD9q-KZK4&@iBc#Gz5p)wMOws0LsmAAT`<*tF65(%$s
zp#jVv^Zy?J#6Ua0rkEavW=%r{$4PY<5mKgT!-gZ(3#$_G97G*#dT$FTQ7;i{kK}cf
z=Q_kXbD!2HUUl{A?X=oF@!PwWQ7xw5qEO}wZJM)G7o;@JSQo7E0)?!7;fqkumn>Cc
zVQ88?r2*a$p#>KAnIENR6QGuH6BiPY*!SD>3@Tc%QL-o$X8F$10*-U9*q5Ah`SbRP
z)1BYJFl3)U$+fov*nnXfoTCebZ%Am$y&x##=N=_Yrj}(-tB6W^5z00tX+*dIFtsp?
zc#CEtYl(~A7|aT%oc;BVw{dfCIqwUoEqQ;bsdLrNepS}v26*SJQN+OvGDwzA<jFEd
zjLJ3FDP@S_aw;mTf>GY{2upD-RfdBVvkHkCiqMB(g_J^FY;&K;+dSvuW$_fc2oxH%
z{Y?vXflZS*0Et^Hs>BA_<JfaG+Eja|%mC;UHZ@o!e?dq9Z4Q`=G)V9aYiPq*)x@<n
zg0WVSr(HSE9%5+cH=7XITIhD~>GKLb`S9b!qeGm@DC#nSmR`P}pX{{=bBRjzl>L((
z6H<|5<jmDHfH4%&iHKOrY#Nmtg9zpaSxPD|a<8-$%MysKIOpbCSo4~&6=MfE-Uw!S
z{{TH>==bN7;f5Fr3g=H~6}aF=!EIAWgc0cmfvkFhZ!1>0exsXASag+J+F80pMkPma
zL_7i&2|&xG5o)~!jiTLSS4?mS*E#bQ)OpP#IQ0GVd7RKWb8*i90BEZQoafuDEy2#8
zsf4|uZA=q4I>lCx8-j!vd%O_6CilNs5NM<5yj-viS&EJf%9z{7Dvr%ZaM8J8Q@~J#
zQ5>ZdZS(&CeTcjp_j#n)b$&VdfvL`a*x5~a?FN;nIq3}R{{VTfhFz;U&xl($4^&gQ
zJ|ZzFnt+zIiIM*RA|j~GbRpU$f>Dj8nH8w536TQVFcGN|F0oU0BMM*;o?ma=y>0_6
zR_=I?DLLQII+KokLpZ*3KCBhVr%rP+hcLU#<^>dK9{lFg%xSl*>9LkKh9az{qc2}*
zUB+7WjN>x1U875hq$Vm%YLe8Qkk&~<aDF900)Xc=lq+<JQ+;8@yywI<RyC`f)b)+k
zKE8SF3)Rz}<LwbpXWAueXWC>fgQEwx{{Y|2uk{|h_LNn0`Zm(9&^{*1F9UeoCYnmP
z{RObpEWIO4W|E}!gdVPQ(mPOGCan&8#I!`?tfOo|VbA0m!OrsP`O3%IE`y$&`@=yY
zn=o6b+=Kf7(X7dWQHYj?=`jOn;VxE!D+4tqnVOdhsF_?P1{)C)H!eZ2I63Vt2sM-k
zS6CENrk;8DgmV(I>C654OqN3+Iof5x&*TeM^Gr)s9Ok~0xZh|1t@-Z)W%@B&(mDFy
zp&U}nJ7Fx^CdA8$Nr=R`x=e#Kqtu98pu2u}`a~G#H1zKX+QHA{2FG~J9iYX$=QWD@
z#VFUF-x7n?34R~QYF4lz&UT0>*17E%UQapwnU<LmK$Ud5YV(`LZiD6}tAfmJE4<XW
z<49?9GWUwxjRUl$79uS&#OOYG=^W7Kq;&^g5`wI?%*m4+=b3#>J>bK;Ke#n0?Ta`2
zN2OaXDO7p(jdyQ9Q#`rnI7>@qLJ(#LL9zM$Kzfz?j!KLNd9;CXB~cEM*u@o07sNPJ
z)tRiC*lct0;fZrl=e^_4QYy0V<{Sh3?W&1x6{ErXfuB}h8o7Ockg^F-Hj3MqF;#y<
zdc&$Aas9|zF{_SSl^1`<?xRo{d44zbnLA+N?T7|n^Qc5YW$pc%wqB4cY|FLn&e5|_
z-QB*Cn%tt%^c3NiZ8xT-qQ<(zW@?n)8U|_kE8VYH_nhW@Mh$P7ja&Z!nNS@m(06<E
z`@zZPTwIyF23~V9N+&E8-GW@RNaGBu)*T?%8Y>e|GSnb$TC*-jsgF9I@D0_cuHTOl
zUc4V~?7>za=7u*p_F<zP59fcAB@-cK=XfZ9tVHb`RI0?yLM@uH2nWO}BW%%8(5|wq
zhKuu$*YX{!pWpUaI{yHjtJ^&J^ntK246H@A=957LW5jovX@)Gs9c4x^Q<xM~RtKa~
z>6&u}i=Lprd-#hK`G-OO0GVDj?q&g-6U-KoXmlmoO08xvCTilzQq+!|blf=lM5LzC
z6%6ew3ukyh^_T5c?e78V@8g=o?@#lhf?vNrB?L`UZuFPKu?-@~nEC|<Wv;^M3XR4l
zp=OLkbU^^__5T2zxT}cUo+Uul{ePJ%Q!+JZQ)s*#XDwD6iP4y9=4@#ZG?ddBdzmcA
z!H<aKrW7I?xFGD>!82I-%<pJE+OVM3hxyjy1U7j6M`4=QTz8ZLk4UQw*<o&Gx*M#q
zWHId0WGXuW7G-ppuBlP(KHGXgiR$~p7d<)l{{S*U=d5<GMleRvR!|P&JDGWmrOd^0
znk6o=gprqVQ4!GbEV9<L5|ApG_0lP;cYQqmAuz4ikMnLq5bYm`qXMO6*o$K;Hbhq8
zxkE%L2WT#-jA=|C5NUGG+$mnBz91oy+e~!r(kfSn-cr|Sa{mCEG1V?E&&0Nr(z$l&
z0M=8i(h^|OIAc})6;|USJYnUA@6eC1q3xQ6tI{f~qPD$QngWk`M>6Yn=kxs536}0s
zgQM{hlNOul52`Q=lw^S3CTPl9)?~pfsta)yQY?Hzi>&_upV+mXG1&1d3W@@0ryRJH
zy-m6I{{S|V0cqLzm=4!FL`)fmbeguq2|%>5G(oZ{8tJMWCNbh#HC=Feb&iHTzj3OW
zb{a$uWT{<r@&0ENGQ2+#*mQn=izPw>X+qNl!s?l7+<b^P5K&7AfO(grS`mW4%F6{b
zgKZ`)bc&|6FQcJ;+kc)`WnRo{+eGRN*`1Q8#L-dc?JkO#7b=kGLR!k}jITm_>V4n>
zP%gY~^?o7D=)f!y=32(&ur;&otM`6C&3Yjoj?AczjH9<M+Z~zu2FIjVOrtCXMp8#?
zOqyWE4uJV()zGh42vYSPX#*=Z;oHo*mAlSL+P^(C{{TCKd`)EUG6!;F_P`U&=^=)`
zoEoes1U5q-Mu2fK7p1~M(j-@Si%x%FF?vSU4AuKar<)`5P|`O;$38#Kf!hA@F7s)k
zJN(Pxv_(Lvh7b>`CP{*z)*-SpXpXR!(=5Q)HDV?O?!YRdWOw#s(?lno`Q{)~Pk-}5
zvO7<Cl)FCCu<He41(01?Oa_aVIF?zKgGnq_VM&*CBN4E~a%L5?oT$_rABYl6i#g(C
zu2yXzSF5kv^AlF~fD!1x6oW)E+%>3B=vu%-wqnZQZ!;zA_?aZfoIu$fYFQ^qfIv_n
z%+^HSP<J%y1zXe?`S+TNg3&=2X|V`k@4uXTM9w_KT5eMR06!Rs(K81~RU*wWyU8+A
zMO83?6no3X@g0M#TAi!rI*1g#=6Hdn!!tvG393Izrc4|s<bqJxgEwo2tGX*ENR3!i
z-Y8CoSTI`W!*k{XAuUI=&j&70$Q3Yg5Ri3eHC#$Y?tcFO$V&Djp?I1Dqw_S|-{vZD
z6Gm#xHi%`F)d137!~qIr9|$?SL?G5}In2$^F%rS)^Xt-Db@HCi<}R#MdTZ~GNTQ+G
zAV8E}IawRg7o!GKRR|g8U{Dguc4Bh&iC3DCwM0a299$Zqw1Y9s5Lq)McG;9t;}mrL
zOVJn>T`xKOl_9J~AEl9>(O<XkFDfNnrc`%|Z0RgD49dA`;#;ksIG9T^bb!-2ULm9w
zQ&MEqBEk>i3aE_^VuGPYWzhn(z}DkYFoDsI#KG1JcRz_zJqI0~j%>>Vpo6%TmSChn
zTd@;pl<PHywLbEE!<-89v>ZnI%v+TT?i|Z+c&iL~#HtF#Xo%?&HcCOO6sl+f_iG=Q
z(q&>Q4|udG(see$7OgRM_U|hOODizu;%FcZ3z&46Kzqe(7=}@Aqcehv;%<urt0}1)
zkY<7kXOb+ivK~9Y<DZyUOuUm81Ovg<{dI+sA%82+6CjXmc$Z^}lq12I;V@IrPuw>?
zH9UVmP$S><7gIMOwh?Nmftj9=s)#EBLm%{ieuDT%h2C2REL=Jup^D#g0Ktn*jiS!b
z2h=FUlIAC+9<q!MW}vpAP;UvemQes44b$t7d@MzEe=sc!bCj_@kcJa74k?(Zmr;Q<
z(ghBUe$s*IJpm|&VB#X~2;BjO)Fv?lP8(hb=zB#V2n!m4>v@I%Vpw4ku0}GbmZVI~
zfcq!?fV<cBN-ClYb68fJKv~d;-d-bU@pTopL<eUNu9BcDgeRmBD(X8Tp_VSA1tV-?
zCR}=%l)+^5nz69!%7oLmr*BC@(=PBPL$YI%Fc|M)KTprJvO9hluAJ6oQ<;Kh<0@t}
zV3%ZjL~9>IOzO5`yS4+^im*%XFi{rXDlS~^`YNO_V31lpBD!~q#X?Zh+VTGYSY5X;
z4y0-kDVT|J0ijyTGxhw%Ty>cTq-lH9X1&Lv3tvjvfz>k1zol>Q8zV+M)VA{Fi+Xf3
zv_$)9WwQXs_8F4$z@(KCxd5tHR+R?~YJn2Y=$2MH^|<y*&JNQ70iyd1c&4abPQ<Q|
z<_dtXbK)xxR2JzA2A4^22%W|N${IpcOiZ(0(#H2j0E}^M5U?VYg%E5JJxJ?Cb4Uid
zdYWSn2o|*CZ=)cMluF&=XA>+1V3(P7wiuU(e0}BCw?egwwTd~NrJ&kroD`6QGqlAW
znR|y=YVNY)MWL(>l>l3nD*167uQ{2G9sRQzb{{ge*Di=p03x)<4q{CrYU7!`6s2}g
znUwCz%+6;BLSTA`s6bU)MjuH6u9D>)WdfZgfb2n=i(<mmHiT`;RCMRJyu9<vj>frB
z=a%y*?)ZY#wPF_QFL`uabS8|;0+CT}v!F`_%W9DY2GOoJV6^mt?G2(aM!Yoq!l%gs
zYhoCrOo(X~>CP~VFgft!{>y_v<8bc|$V~N=7hwD9{Rw4F?1kz>IAx<MC5*6kf+N~e
zw{(Oa>`;WGtg#7lsLk}X9obNKC9`1ONa;{B1hEVbIrB^*9a!-S(T8Cxx`Qo5vZ^_n
zVidGPEi*{WCZx{27G!{J96bi1t*i7Tg(RZrgflEyOqPQqy9s;Ljht{St0sVyU~`OV
zw>n1Ib&8VAV{G-3WsSj9TEum@j3KNkaWD&+PoTPvDF|k)r(Rt=OO`4Y338Qi#>)=o
z!>p-$C(;ISE2K>sf(~<7HnFeIPsA&vs#BsqEcKTOiY+nM5iN;B7R#F%v~?d@s5_q#
z1vQCSwBAscNirC23rmfKfv>E8dJFC>n3W7SDDkvw*%gLRq0TU(-ta{j_p_rdyu&LZ
zb(M>Wc_Vg2Z8eicOlsa6aTh`aGb}oz0RpCC4fK_f0j<Go?<}c^eWKAsy1P0aZ4wvF
zrK5#?Iq?}5VU}%iy3FoCD&}2hhzjCtK~2Sqmj%``K8_$_V8$>@DKT8flmEm3DG&hx
z00II50|WyB0RaI40003I03k6!QDJd`k)g5h!O`LI5FkJ>|Jncu0RaF3KM*ZlY`+c>
zNWdRw1*PSFcaDwb7$Qc=zPkRfAU%!~-#6@Kd>HG^{>&7-o?WtG7jPH1Iig*hP4kaQ
zU5xJ?mW2NOF^JwYJnrC7xE6RYP90Qrxr{{<M4qdFD7uc$uqA{VKCkzLqEn}%gbmbh
zeuwplQA@IXVI@~}m_S_#XE-wRu_wkfe3tmesx35j49f^2Q+@IYEgIu{>mp9J`(!{s
ztM+gSJl@zco(*@#Q&*~MSj2cMTgfDsx3+X9_&sF)aYns^kB9ie1yZ1wd1?lw<KI|9
ztcHho5j)7fh62-ker_jq$o9A`PJfdH1Y!702cDbWB?Jv$t_%mWpNuC%K{jODEzV?_
zay+!aIFK)<AlXQ95&kfhbOppPHD>;CAPeZ<I0O@ee>iYl1MYY2ns-|#w8H?qWG|K=
z0dDVIvOog`S%pfp#RT)NFpa~T>5QteRbRi~1VvJN{V|rCRC)3Bz)H&RTjM!tw^iJ{
zm@!%z$1@lTnr<f?eXt6oPen3_lt5JD2<_sYn2-%Q9(B$!27wB?c>V7HbDEXt%|U6Q
z9!EA60Q1C|z=$Tpx%z!`X8{&ZKKLVUch2#n;0K=Z0ZR_QeEqRC0O(J?DUvz9ADmPo
z5TUKR>+AQ(MLjI*6OszLcMbmlTF3yBJ5JsI0C<H^>e%lA1Hgx08L46y%HFVKh;JWx
ziU5X(#N+doW3X!oD$>g))9;X??m#*#h|qA1czVchfNs+PNmQJv)*&D++`2JchN(E5
z{iB1gWUr6sHpnDF_TTS}Bv4-QK`A9Y@r)Jt%Xqh<{{XqLI?)rUnhB<<_r^2??9-A8
zYp;xBK=<og)*ZG;j$1&6%ZbVo8XL=sQnPdMiU4m6bvVl4L$N#cGKt7$xi6ejKH|Dq
z;{*dh+B%%!HaD6);s>W*5?~W|$JBE7wNvW7IE1%u^?<b*WaH=Ygn%$=4eJgNiq0Mr
z0U(rEd}E4fklrfTtDE%4X^XI*M;$0Y-D7Xx&P;j$gtWNL8<e1$)a#s34>0MV_4vx5
zjw;mn{niu#Ea%mc3PdSdPdLcjI2``~onlCr4k3ca01r@KAAEMFPaIENS`{tVwsSoi
zeLsIV%pe@5zA&^X9Fg0z5rPp1nfSyY1&Z<eF$w^Yqn@>u39=k+;1W0vxl7mYSt?6S
z0qgx?&{h+AeeyyBOUKS^qzpUsafCu_>>PeBC=uglC}7kKd0Yvm_!EZ_luH0O^v16Q
z86Q6y{9%%yC$HYIU;)@0yKAl-a_V8p&({P29vwe-?}0;8hlz-^(rZ|}9)Wd@aB0yn
zr42T-=NN*C+3ST0v(RecAX{d<Vmrsc^Ts$F^Dr|dqMgi-36A5>tB&kGJ1hSHKKV79
z0;s-m2}l!L(tP`25P$)tR~UeENZ$Q1m87cZ{kTS;euKs`RroEY&EKz#76dIhbLo^y
zvKJJv*zNesbZx$}U=suAV-lJ-y>Ap7tGM)I(uT&2#nlAkec=+Va#1gdgw5^Uy1M)2
zP@T^^^uom`5SUdRdvPE8jFLS-HH}DwgZJYQ=)<7xe`X|vv&rxN;FV$qnd<oC5LoQw
zIq#eT0Zs(hxATZW6t*(AD3}QApE%eXA1`0uu3j{b)340$FQm6%K;K?6H%@>qX*sA1
zd;aiB&^TUqi0LONI>A&oXR6~UJ0pAR`^30jqfK=mOsp*Ao^jY$NnL9i03bnI&QQp)
zK-uy8)-a9$Olgo*Zpb)QgisyN#s<cKz?J_1pPZKnJWQ}^8xV<rvkeAMx*^690U%Qv
z5h}=$zP)C_Gy{h3_xZxwX~ix`rrA>OH&l1Q=Oc0U&4~mSqnlz9WY<y?%~jXe;|Kzf
zmHcE<7eTc1jUsomw@0S6igqnQ*YT%3(IMeIelZygG#cF5-fi%a0+q3b@w-YJHGB5J
zAn4a(_+9H1B8LvJAkgnz7vBNjp^WPg426cr2@AcgnGJ##Tww_pnak;oDiwF*Ab{6J
zKN%EYE!}&uakNMyuP#1xfL{gy7EwXl>yU&~lcIUb0i-1i1OdQ%Ot}Wn?T-eIDWv(p
zfZDd3a&?yLz;*m#iqzs;=Lj1P_+GHL3tN|Q^DtEaZC!2S4FDZbS?Pqd8<+8fEl5r~
z?>Gv%Nq^<UWCM}k@AZr+p{3ozY~AVS1gU89X#W5?#)MFGZPSW29CEVZ1pweDTE;vA
z2d|8q6e2Otu4t&*ZhJ|J2nb-l*$$FK7Hqfo+Z6(1zfy7QhopmK{rz#_ERy`;1snY0
zPzsw<c%fDceBwGmC^gl?_fIDAkPdtF{kgy|jd1b&;t-)2-CySlr7WD2zkD<ZGz{?j
z=J>#L9x$+DD5D&-_cNsNe|X1Wlq7YBL}d=1G1Stl={|qVWTUbqJ|u7oR<x>kz2PoV
zSM48+7zl_Ma85s*hBbmekFk*vK{s`I#S*Zw?=KzGseufdn6W|;Rr8CsS4aHBS!-eZ
z;t1O2>D%`Hup%IN(s}-|$(*Qmo%8#?v4~EaQ&*P(0zsr5z7yLSPa!uu#|mgxzk9-?
z1^}1{x~8Gm*Ayrz$-XD)jRFl*PC5Q@fJ4tezB0Jc6t6r_!||MrPkwMjGg5_@?TSRf
z>>dx_zBJKe67c%@?;6KIyHDNYfe{pNzh*{OBBCk%OysaEg|*i&T%5de78PmmUmW2l
zOo-5S9zD6rconFi;1GkU`>aVAqAmR3q}wQ`Jz(hVs<{gl)qSz#ZQ@ICL==xM-M_yW
zN=`cf-Xt)0^(%<R6HRzK^}qxoQT4zBL!WLkjm0W{`(+5c5Z?w=_G|-;iB0OS(-a;J
zQ&>X8McxQ0!}_=_0BRlN_h2tu_r|UyJVfg?ufXp4!G+Rq5j8dj;k<sZO2J2G-y)OF
zT}@qK-AZQYO8)Vd;5cnZ*A#-Gd^PpT1E$_}uQFmtHA1$-&wuVBIH0lX4Pnm+VJ~^-
zj(p`H3&nMTHd+zlIB7!D0gY;I;#w+Kjd5{=rpliC%{?o%-U_TlDDU+QjWM8AV~7Dp
zoOpk|;^d^%TfRRzt?*;K3eYF6&(i`B-1pW2UbP5*@rPJ!C!PMxMh4nLL(|h5l9~r7
z^@<=G5iHq`qs*qxIP}JD<siUu$fc@s83pE6X$M_$f4w41vE>8UmV9bv$E!$@sEavu
zqQ2PWP=uppAl!Yct)?ni5>rCH&f6>NmVj&`@}7NC`d|aNwj60J`Wd534W#yQ_r$E$
zw3p5mC?Sic=KNul2pa1KhbkxE9wVZ#^#1^vE}V?K$KxMrMBv-#;iXj%)L;WgYX=Yk
zD55>@`TOCNFmC?)Wf5m19y`G&pxMIWrP%LF!-@)Rj_~MKGo9r&_;qmxqb~trahd{r
z{V}mxCXFvyYFEmM^Q>iR)^d6M^}@J-o8-ZQ&_}#fAgd-AqG|&h>+_Xjjo3c<L<8!C
zE-YXmsF$#t)?Q@j4G+Y`H3x^u-{%^FX|fCH)AQ+taNPyxG#mt}dj9}%uo`af_x@uV
zO^`Ue;vh6VC$<DlNvF<LF2NiW09#Iyar^6ma;I>!>pW#?pahT3y|5^EQdgf}*B1iY
zJ->g9nw8m&-+Uy-?V4T;7y&x3FZ`ce9Xeg0`a=Ogphu#9xPffykC*(!0I+K9n&Sx0
zz$#x8&L`01lra~t_YK{k*RXY$8tM=)ie|__BA0`IRdGlnB8ZL$Pe<pxW>rq(w_Tm_
zzf&BAa8wFZL)(Mp$6Tz8=o>r=@%3RAj%0!0J7dB1#wA28pbngSo#es^dRFXidEmp+
z&{AyZ4qOqXDGtxM#<ph9f}y{h-?0?X_I7jh#flv(WRtu=YeC_=`}xUDf;P+Zl#-oI
zdf>=g;+nVH43I=9*8$AK=V&{<Yn%z1DZTC(n*#lqjrcTj{I%3M!m8K=<-}BaZ?E6R
zG}lrcLk}mn0l4&HwLG<hYVqr7js?FQdSt{<SDbm(Lk;l9pgY2H#3;T99pf4(RJa3$
zZ4`9>06qRNL_Ku}UNI`0NDj2?QwZqfptb8)i}`XGiV<{j>m8cT?;`EU>AuUq#ul_H
zma<~QFe&Qv_QpAEAUu2H6dWSrdIJr`E0Olg9kK7N{{XlwD;B3ExJVVy$3^eyfoNzT
zrRV-(Li13r{z=~O$~r3Qbi*vTZl%9Cm}OBFsDI2#=tE9Fb&I5qk#AY4GQyo7vjqYV
zeRn2tFR>Mmz@|D9-1oJ!pX(Ad85>9qU|K*C5IC3X_QD56E>$dCqzWNyiuKdSpK}S2
zj5LABYk!U4m@y#`a}7>EtO(jn@?Mv3F13mr1QZ$_+<2Ikaf%`H9N|<e6sj)Z;q`E0
zjOegekl-_xn*y&#rUg-?hv_)O!R!q_N34QXLtgy%`(Y7_SSHZ=;BeVACkI%A5fNTC
z>lj``2t5A)jt61_=%n+3bvlaFKfF;pJ^K6Mv^RKUxK;!tZ;65gDzvx&nMuAbRp=$B
zj9H+Eo&4Z%O%ujfN}aC@;{k!B+lF`2rtp9U_w~t6)~Uni8(sjrb-;PY>9Emh!RPTY
z)OMl5KKN{cPVa}#F=9&BKAFA&>Y@cXdbbNmRYYm+KY7OHh*Wj!e)xs;q0PK=iNp$H
ztA+(=0NuQ05e?D;^}XuHUf4zXMfbixjF5-(KW;w8-cyVs9DpL_gb`7G>5xN^)9Zhn
zM7Ka{Cp+gxR>Z9a4L!ej#7Mg!@4p{&7`JFroMA!$z_NO$?-*J#0iu`n_QzX-MNNBs
zaW}}21bI$A#ubGPspqUy<2oT$zJ_=_0SFuScYSeIi3+9gd}D<~RN%l<u%T%d9u0Hf
zS*fYot9s>sT#|aZT_d^pYkSQN6*yA#d5^(_Fx3E3^Zx)5hU!3|46?I_qZKC<8X2y>
zQoc8UjSH!D=cf3<2&8gg>3<nvfS$Lz*!INKD8b@B{karJL-%{duZ;$i-<KGY;!!*|
z7wVHrng=r<g(A;D^m_Nm#YcN}`(^=UjN1M&ZFKC~&bRG=8NV$(I560Q0Q0;-0Z1*%
zoT{d}-bzRfRQUYi22_tenV=*Ro-oq#9ZlzuP}gp8V7nS)0avOII8F^Lvb<rn0d_0C
zab1xYg8utr+O*bQQxFpIMD3^7pXU&iqNqUknDZ2XHSJyDq9(JQ9b_^g4YB0Y`)~?r
z3+D6o!a~hR&bZ649l?CR#wBzh-En-Z8^d_xotTGN67Ia>0?>n`>Pdx9Nv-9P8`nQ<
zZ~|^&NI^gwIsWrwz!twxwrSKOV1HQCqC_<S>+6UV8je)e!UPwSSQxIjrl%9$VIdTY
z(l-$UTU~42^Ug-^lv09o<2eHKI)3X8*#Lhh?-<2#5j(rz;{aXDBJtvW>i{GqHqkuw
z<I~>)YXN}(c=p7O_3!9-EAGq%35p#PgVD*DzFHWn;C`RX8zSdUKPLS$2tzJIuGeh1
zslkFM^}G@E`0a$#x3#-^y|9bwBm9r`)+ud(5jr)zG-Gx;_WtfYHbDhGILS5v(K#^)
zh~)BR8lhr3tYsSr%3qY{85TMYkrs)9B96n+>4F3l#8@+pViYMx@UtGf7k<tl!*nL{
z#G|sNt|B@&PA5N1W?F`^W6bKyhSAv+;eI^tIK!5YOpH^tKYw?dGTAS-{{YNnSJ^?G
z@r?p>c)kzcY)0ElkmEWsPT_U+pIov5Xlk*4IE13iQDJxV<H_%m6xOgw$6m(q9fBAk
zdj2uhmE()&j~|>T9w4jfziu$^jY3sTv*P`Iuwu-FFMmB@oJb)IP*Mlm97f88(zrp=
zky5hn5{+&mSnIq%m8OX3z#<CGm=_agK1?AucT^4j{qo4c1@YFq;{-?!2}T}F0nc71
z$9R@4bUSVF+`nwIV!H>Ej&WTfro7`C20G=@*@5U=z0aJsAeDq%)B@Fb_4{DCUMORC
z{9~f!vJIJDy6xy2r0bk$qD5nPrS|6&kahvzn%-f${9)rjhRb{(PH-475NXo`1tO`o
zrSN2NATv$$JNNw=mh6NBdGdWf?h@<LE2048{o@6;!@BKV8sqNc#9>pTLsZu;z45@(
zE4S+S#X$`YEbPW6uHN6KRQG^)mQrp7V!uZfPV<i)Ka8uQAR!a6xL7L35m!e89Ar_T
zgm-|;--`HdWfH75oNstgEA5B@rxbpeNX9p($MuP6Ql)PGxlHDQNL*W>!?WEzelwUs
zzWo0Hc<nE(e)?e6IdCsVFk{*ir|FJs+aFwkQj2fZo8ZC<T4=AP^qHd$dsBQ|T^S-y
z0XWyLDsMr*O=2SXR;!JBYZuWepbb4c#SE`zf{tIA_QEwz#W#2OFhfWesf_Zu6HCvw
zAS!}_!&t<HDmM-d)x3{RHn1qAb^H9~M-JoBn;-x;VKSmuhNpO$ZKyFFx^dD6PG#>6
zG?(M{{`Z0^QV4-(8t2oTKoq9g!_USLI|J%9z5Otx_&N#o$AA@9{{FeDz^@4d_g_ql
zMxnX~f4*=fjpgWa3541d3(NSz>lhC$Ul}z~M2*NkvG~eFNhnsj_nvs3a;l>{(~Ig?
zui|7)Ag5dQE5b|?Ei&@FoiARVnOI`B3cUCp57mW<Vqgm+=hq@`mWd7DC+DnVX9x}8
zbHVk;yHT{+-<PX{mElfTm+_87t0;rcRGl@m{xE|<MuD0bsUjt|1h~N}Rg4o205PiP
zHvkUz#&Ffqw}=#Pl!D(!oTKT7bJKq~hUy5;a#kmD9pV5;g%7?cAzW@d`)lutNg>qn
z#xw#GM|Gi`o|mi~LMBtLJv}fJr~o@2a&Qqljv-<bnRkdb>w$cFVMcX8`{kaIXm=`Q
zT{R__dVP3t%-E#b^-nx;`Nm`tqP1Ky1ql=cI@^n2C;{hwf2;%#0`8|8#-(u(YMf7O
zJCG6(&Dx-s;|N00-fN&h1&22RfGtF>-u`*QAk@Jaae_d1Y@Fc(0FraN{{S%(bUnX+
z%wuVkyXPD)5w)%S;s@&yFTacyV!H>Q-Y|(4$5ETcP$DD)x%y!=i@HOOb?yD+K?tSc
z#ONu*f#30jMO-?1Yc8~yve%?7)4_nYJa0pT+wFoIr3Z^F_rx-y3w{Gwx}Fj%_UFG$
zGIW5%6y<iF59c5#fEz{WF)fEk0qflGWF!Ot7Faua`rsEX%^sJlreFmOiOLXkdf>bq
zG}llEmpkJr>vz*RuTMCNDY3QDKAvzChG`@8^~yA-g0a`G@fuOHq4>um#xt#O`r*#W
zzMt24^Z*nd;f>r{HQnR)&LtKKgHP88Z8f!R#%O3VtLFM*^BR*1iLI+JD6T=qvH<w3
z$XdJyI>3OPkG1{!<6E5}OV&4ad=TeY6u`EQkGJpF3{65OuTI~5cn+wrYsVNAH3x<9
zesJZRK?m9Rz<Lm=f3f~C;Ul5HUa_Cmy(z_?yTBF64)FMV{#<Gh(5Yel0pD3HW|itq
z`tV{UIfJeqeej}nR>OR!N2jh6hBva@HZ}H-Lk55jY0e~g_ImF%h=O%I?sbHb6x}+&
z2~unw&OLaTmI5JEy>W)1$`7U}Q%8@c4WzO%P2hn=1N-Tc1}Idw-{0|wFe^;&zoxPB
zs!gvO;}=jHq1W&6gu#1iO7B|4H7RO1amHAP>%ULl@S(`%Hml&cL56Vv2!6Wv&9y*0
z57VCUAQ70Hc^<99pHU-IPW-p5F#wLmSIe$=V9K-;NN<3|pa6tCTD?!>7Ku@zJo#sE
zcn${>gDOL!NLIvh;}qFwC?!4`))7F&2$W&Qzl;hh+OPPYa?2eFwn)3y1?i$nI8P6p
znG(A3ma|fZH^;s*r0fS**BeUoSNe603D-kt_3??8X$4()!H95=^2VNM*yO`(H0jb{
z6I20sCF5kku8BH8#BUs9x+xMqFw3+h6A+t?TiwV0#|}-u;fX5jNyo1^C~b^RO>u*f
zW)A4|dbQ%<G(bR*bKLs7!kY%7dpEc3k_eW?1~Ug2b#!;bi-U9uHf=WVyi<4;=+^0r
zuofP_j?!;uc*zYC1YP*=c$%itMSJJ%^|;232%`T0Tfavisg^c*!Ais+>kgn1OWwcC
z004)NTp|cs&1?2yWdb_;o8ztJDd<wOUOL2Vl!LI}I0lFSwWHGz%cHI0jRv{j?Zk^W
z!8yYNNbEN!UYR2emT>saI@r+Ee0{NsE5lDbW5IXDyZzQl7oG2S46<{pH-bz!T362a
z#mOwZ7L({<!L(Wq)o*?$TzSO^6`@Ht>5w8VBtBj`^~)GQgsXnf?;UC$!l#}_D)XgF
zvr*T06(9$1SJKhrBs)~l_;;4u7#@dc;5>JN_@a&u{02co&JFot=GPY6q1b-dVd53F
z?x(LuI18zPheLWyg-8=b^!niA9g%nZhFnpERy_=U0(anQ_;6J^<+XitTM27l7%X5b
zt~05ZLV9vx1TiWveln1%Q8zIbr8a5*0J&R9BO|q)C4KGz8srB50L&_eM%4HE<fdv}
zjx*beK>k1GJ@gTgU;uvH)eIca8gG~E@5UGx?SOE9Jz%2VQLkcoUtC3q9yz$$uw+;Q
zgF%4B^;`3h*sZPpm?Nk+2~b@hX3ysh=IcU8!@sW>frX(dIlqVRmG2Ws0rl|j6QM*~
z80pvV-vppck@Wm#imxXCL1%xr1ndsUyNIm~<i{bi;D3DJz?WnvTl(_j3J4J4dcVH7
zgcU&)I9`8DARf8zHWCKA-fyH>HWD3z%wTA6^YJh`ZlJ6^axTMBdb`75AX0HBe=`kI
z%Om;68fv2T^}-B+HYTO#(>8Y0d!g;C=PMU19z{6)o?SZGKyqK@-^MOp=nrLlzq~4i
zg@bNaS;{js6Tq8)b%YX$W7!vjStKA}OKvZmMgvF*`97I@0Ri2-pVl22bs}$mFf%NM
z!=1-&c)+E7g}(=<w~LB4&{6K|0|E^n13%G<V8)@N>U8Sm@=ZC;bM5Y7r-4Snx#^A$
z#O2%B{bta?7^lgOg|A2B+XJRju%};aG#Z2ux_vvsEbjuUU^If+z~JERtHi+&Q2}~`
z(}k>6czV5NKnO9njHyhVdU1#xoD<_$mUzMJYGc$Fhd=HU)6kIj`eEWj1pH-o?c==M
z&>q`d0MMe@h7}>!kh9R^qM(8YTy-&?jS(n)dH0_Fm^~vAD?EDO>7X`t{r%P}eXJA@
zKAAcQe2?*l!IhSC@sWU?^VSfeidT3IgsgkGh(bCOqZW#tNX_3~f6PK!-#~soKKZez
z!*2aDKrYF?>4y~S)`t|LmzBsU#W{M!^x-7Mp7kfv_GD|dRW+#@^~jq#p}+U6h+wB%
z9dnQoRpcHu{bNNnO4PPYB9v6@1Kt+OCp|OQ1ogxEyCJdV_VDkO!Eh&%=6JuCB!#5x
z4^QQXsoguz^?fiwJIe#$?Zp>?E5qsF#8Wi}j~=VPt__j)CE4h6Zyu!$rGEH~XWIeg
z^upc9&i!84JrsfL-yVN^#SEoNfIR;IPfP&VRUuQ5>A<l-A-ml0mm|Av#Op_=5L^ff
zZS;PaScYv3(S#?V96rB%x<N0v=ZsiI)*K2`&~m6$R77&9MMwC?vq9`9r><~{l@s^$
z#;B!dXYccu<C5e_Po?_d5C{W*J~G(O*{>75{CnU4l4uwZsiF7u#VE`xNzaw_ckPB6
zAwqGR)tw(6*isgyS0MoAdJYw6YP-raxVEnvUm1vDpfiKl&ilbbY*y>XesKbCbyc0;
z7y>|(VaJD_@*MCgb&kh%p*(qTjFCwX>jXgPdDfldNe3%%l}(HhiYOO^K6%8P7Nq=Q
zFN0tMWWM~lP2Ac%{{Waq)j*g)u7Q--9pM`5RE{J-n@fN&N%D29SkZ2d_l2<$T|8fm
zKu)00IQ{%$s9vSL1}0iFx0A1o)SJY!_jrbMLN(hkC-1!vZ$S-rtX*SUFiJk{(eH+(
zshm~Y*CCG}(uL^rF(5!~7WTfF22mX^{qc!SSsrK4nVNJGI%@mzbc9LTun#rEtpo9W
zcoXlITwGe=zg7ih_6>Bpo;|QCQKDt<*x-_(PY03Tz8Wn>03UPq#@qyXgVgbbPp&41
z)5Z)c07sGK=Pas77dP<8pg;!w*|+p*$NI!;nmdW(DsV=}^ODn01+@G)sM9Q3d*N{r
zJHA|2L^cjr^@i4tc~Rp5ien9YUU0KwNJnAd_QANT2yJn_zUQU`r$>yS1-Gm)d&GIx
zu%@-8^}r^wylq3}hFLILwHLnnz?33(TfK=SxbY^HDw1gW{_~4`qfZ;}jc@IRWUyD$
zl6U-Jp3#6XQl)pkZ~;3Wk61?Mp&eWhyHlrc^^6EE?J!UaqBq+JP#6U#cm*|(un2p6
z;?Rytbuaab4##JI^AS`+JLd!%xG)rpqX%vvV39u0`;tHfZQ+7iX5lQCSPVhB6HV&^
zm39*Ig@VaVy2c`aZmy3w#^Mx%uAJ4t4nZ@2ZjRgI3h=<FrCX%19En}ge3bgN#M)WN
z+I{ku2`H6M#v&QRX1%X3Ia`DvZC~pi$p{hg1E%JXt#?id>t)_8C{O{W2TL-sBZW?i
z8h(fQ!5kTOa^?Oq%ad{l`hNF<k?8u@3pBW0Ja@^0(zjY|ICD@4Ab_1c`(meILuGvU
z{{Wa24OL@i{A(LZI<5PkUF5;Ev3LG?#uWfidB@a66fFJk6Hp!fhuVmkE>W?R)9<Vc
zV<g{f%2ohCe?#}<3nT%yL&hp7j5_tjY8kg8utHLuVJ?j<p^g$*-r}#u9~%!q<-R5?
zd_Xqt{fov%j0p-pz<TdDB(W<?*E^p70Bi{&Rp#06>xe?Usj9r?UK^fHxb{Aor>ZJk
zLqScxJYZS??|3M#+JAqHNnmEj^OdUOknc8Bcn6#m9}*nkX)-qEnr3KIMmJu@0XA&6
zqq~2+Z@8UZ;DB;DUFM|}S+nnemWXiof9@I$wn4a<ZsHNsA%+Vmiq~K6Xa|lry>sW^
z0UCfh7f1Pwo>Pc&%!WZXsLAK)i(D_Eg1?M1n-mQ^{*y<i(JxVxf+G45L)+5^v7j3T
zPoqKpVhoPdRTtZwP1ko093MPn&LAOQTK3{?w$@EF@VOesE0CV&2JvXd2BiJ_VF7n7
z?r3$RpKO8Wk!w-kEA#Dxlp=zGe((2%IRP3yZ+`y(jBSEZwddr*VhWR`@$LQK)@Tw6
z{>%;}#FT8WiE^~iBT?aXdzeq52eR@Vis9%05x2|ZSybNf6d$;l3Gxzj_<SZZ!>r%c
z_Q$cLX{DSmQT*Tqpe|UIb<ch<WJ{qJQ_i0mH7FjBZ%h<KI}KnpP$^voE&GRzW<c||
zcM(8J>-fsG4Y#R?4ob34Uw_2KkF@J&Tl+uEx-AB9at<p~5u!@HH9i>(wqZdy{%6KP
z?#U5sFQZNUxWc#shTC^Nee+!psf~Xgd%`dPD0eZWcm(GJs?nP?;vjOAAKhSpSluy-
z5xzb00u@pY5~>V>-OOhb&Z{r=&QvK~eRHM*hxc6ILJ~s7Fn|E<Kg>!_smHEuDKMz)
zA~8G1+UrtxzNZ@u2m$i6rl0J}HkSwZ`{M-FAy-rWWoQ<(0wxq&^(R<9I*KnW76U@k
zcJy!_z`UD^9S!^B`pAX8!1T2I`p5ey{)Qfx_l#{I3LCYm_qTZkK-ZvY^6MQGJ(Ewf
zM>tx}DA@B^=3}!DqNQJV^OJjkHu6Kn-T=|O4I7<4j660r)HiROA2<pdbPGLx*chIa
zA?7yxWGU+NXec`G+Ze2jAh8FF`1Q#+pk=g?*lD;mKov{kwmz6FXQ08$I{W4Eku){X
zcD1X{u{cQ%K&owK^lt(op|lPMLfk9+E45WWUE)e`d<kz~o#2Q&1$Lr;xG37}*0BJp
zx&_6ciA&xV(y|xENE3vr;0WFHWeRc50a)j?xpmqod)!nC`%>X9eFiD1tx49C`R50@
zQHUY+V-$sZrPVLa@a>WSSKE4dHHJi>1>UCr0D8q5i4G<4=I@_55GX6E%l%+<S{{Di
zOeA1rcP383YXj2}6?f>@AOaQM`<&z`Jl*Tg7Xgf^h$$Jjc);)!^?v>^1cY1FJWO=~
zG;x7Q92WU~KDZkQO+8|Y4?x35wcafwqUh`Q_`*>;TiiPN!W#jg^D&ey0$vYqPH{&S
zp%h#7{{Xl%Vg)zrykR7)O&RT*7-8-k@w`HT5~Fk;jpq}i;=pD2Vk!RsDZOpEEiWsJ
z>-Ay@$ZE4|dr)yu<1Fv!df)`rP_Rn%i}uDb7*yu=`~Lv>P^IMK8B)!=a!59Zdcq9?
zPM(J@Ca}6Y@lx=5<*{iiM;wVhH-_*9T6r$;G8KXooxhwbF!e$UQV?&NDZ_!Hg}W@D
zzA?0<s%^YaGvB;M3lyLn@9mXwh*s|JTzKaYq!J<N<0Jxtsq~%WLMX2ue}CK!D>ygS
zENar8nBogOo9h_VYzJ80Q@tmQPzh2_=Zq|AH+g>V3TWO|S7sHMrc(I-0C0c?=+2I{
z>xe@do|uUwFHd;N6njH=0+2gWImT4$VX_P7_v09eq1mln-yQ4MB}L%0OWrYRIwaPQ
z?-(cqcrE;4bQRlpb%=Q9n{A`*jlh_zpzz`~#Efa!0&w>Bza1?Lib3SPA;+tiWcuj|
z@|;LKNWnWFZgOB0%jw+pH82=nA8XUSYySYsED8p?&HX&_&W+}PK1imQ$>C?V2|}pu
zb?Ji}yg=t}LA<5*8gXLq04j$;jkspl#}__lzj{hFBn`W7&C51mXy$bvdw|WVG=gia
z1}KagHcCYLO`F8JQLGx9z4wa6<WDbu&M+#TP24P)*3{L?3W>en4sf32xCR99xCMB%
zl$%sSP2uc<7|!rDC?=EU=P)jW_18H8QBu5l{&9#Pq7LS;?4@m(JZwjPy4C@qqPo^J
zf<sB;>5U$fYCLcH!?RW_{qc+uAklr#Ui-j9FA7bs8BBX>y~n@4@V{f7ZGV3_#%|Il
z?|8`5OTiiLz32B{B8K~MuI<z{J5J|f`rww%yga9^-FsrO1Fc@n6X|Mvj(5Q;rQPx8
z@_+dOcrY$V+F{Xhuy{P(WZZN<8XZk?*LJzYN=I4tF|i3%9Zr9G!IDFWOdEY<1<Jez
zD33cTckP-xm&S6p=uf4?&f_Zhb<g|G8Kr<ztGp#tXC)qW&Tpg0@@Ki;aP6;Lvmb3(
z*Ii)%Evi0phDck-uNa$~){SwAY6(ic^??xP<l~G^Yb5imeFq*eJ(wCFY(;|76{YKD
z6qR1|cZ*KMnmGFV;4PLGo{ks;fLuhIYjdBz9Tujk>5Z8hYU_<*LSx8v_~*yY7D9(4
zZ2h<mLBm9!x10vkSU`pc0!F&!z#vM!xnM%eL$?S87G1o4PE7DH2RK)ikpj~84&Q=L
zZDUU=C<cIb9M5ds97zG{$P00WNx`?(#D?N1G!OCT{{Vxse!`&|DR$sIum1o;YHP`s
zOBQXH;)n(jeBwXR!JB`7{w2ZdzC#%Vuy%-p$O7??%RM|<!vM6fCvWwKVD?k9;Gxpu
zp0v`ZJyt)HUikq5_B!9&8iXNVnaQ_bIL&N2CtxRMTHwKPlnR`OIzN4I+c?*JdB6(<
zrI6mesBhO1oRh}Z{x*H^9H?)8<njCCCjRmpW{2klDvF+5OcMqigZsxstRY?86|E-=
zFf2oz6A<1IfjPizs=S9C_mfJ_y8ZtEn1~M8C!8fB6L_T!uMZ|iVBsvDa$+C|5$n!x
zLZEzj^ww#u8}-2ep2}|fe)z(LxJ8^}fQ>!AacGd~q2HWnLjqUq-<(%f8Z&GQ1UdVB
z;{ZGaez*uqw)+0e6$K!Z(Bt>cBpM|hw_HGKC6I-B-V+eDn>Ic#>wz6b=plY(_gqbj
z%?R~`YEo3zx!>C-+K51RXa4{o03;e*6oC*x2pazY<JDFw`(S7@!Tp#lzlF!ig9Mxa
zA4~(s{&Hh75h|s1v`|P(ceCG}d}pDao?p0KHa#z{Ng#zA2Y#!0I3EYWsuVdsJP+ZJ
zI4W{IpAkMVRjmbHhhErPfDvCVM1oc;J%1UiDs5)Z<N6#lAly+OF1hi7l|=E*-G5ka
zq;(KHo;Y&iq0s5``{|MBaC_pTWY8X%ZRN%97T^zF3`G)i&fVYulDA&?M(1pAoDEUo
zm&26{x(IONE{Hnq!6O>lKB>k%(+!dwU!}<^5w9}=5kfxC-%Jt=(5HtGqAN|tj}V;i
zelV;IN<(9L31=}T$JgiSgg}x3ZxF%?3O#8)*nrt0jaSp@kwM^#Prf*6(Do<q;|a7e
z-Rl9u&h6*=$kAPUP5OFa9lZ__r`6xa5>PgT2XE&R-cq$2_Qh%u=%-v};O%m9jBiK(
z0NPyUGedCS_{6nL^z+ih1Xa|$_l&q2s&ZvV*m(-SdN0+=M3kngi{p4Zw=ig6Pl3iJ
z{HPJ@7-TeqNF(3Z=MCWvIyvfBd&giwQ_{raUc6vO8LE}sI{N0~S0d~=#JGT@=;C<5
z4gw>P97Wee!OyrnWT>Pbb8zB_fx&=5d@%8e6pLWpae}SAp4<qDy)E7dbTHfJ2`6T}
zxDJ5r)$Zq<gr?;nj=YBc*#oAw?*NdzK5jPcHhaJTU2df3tKLZ6-3=SAPMyJpILU2B
zx;UJD@K_8*2hv<s1HBOB{rqM5DK0L)FP|4Q^O=D{o_+h?EkLmju~@`U0<+*#6|rz|
z0@L@-E&@8F169SP4TSVxO=VUrs_9pGV;BK!aBbpzfBiZC0L<godU+I_E^MHt#x5pb
ztn{)(EMYpzp9lnc!<(Q5ib_87E(g$g5&B*E!@Msz7Kf<QpPVMv0cZs7zMoto*KBvf
zZv%uNtE*3a;MjZPPPh5aRnnT>pV!wbb|Q%2BOZ17V%X79L#}$nMTTorJX0?mxY~fz
zr=~CvPTnR_>RTQhA$ebX0h)3!QN45KPB4VGN4{8vsvGb3@rVf;a*uLL^O10g1b*82
z!c<UeJ?E|x4k2sl&Lk}C*Cr(si_Swt#~q&dNo<fEoBsf~Ees@^xZ4iDKYRcvR6^>%
zznp^sP~OM8l0b|UZm=$*L=>HKgADge)?6mTK;qb#7ppx7D!HRQ@%hd-q~a$N6%Q3_
zW2ar=0gjq07m7my!zoY#Iner=5B`$na%iWK8##e6o-)Ad?&Ts#$64srY5Z|LLpxT=
z@IWYiYB)y#&fR9gVAHq88<SLQ6(k{Ou9%+39#~sV>fn@fK~-T<Xh36&>P5H;wi+Qj
z&rdZvz4TY{nm4I37ybZFQwrO4CD)u|hSD5AU(OPUCDf0bfMgR*0Z$x9-w%XHgB-8P
z^~4RJ?u4HH8~DUk1=nwu8rNNT{C-@h#2Y7AW8<Mp_^-}Dnlu@Q#4SewBYq2sgPs7$
zGzqYs^NwteqI=>1F<~_3u$wp52HDBehZOidAKpw!bn~l-0odki8z!dE=k{ZSLb}{1
zCK8mb7rpNQ*a4;W$Fy)z-V!>z9b*K|qK)Gf0ClU}FeK#Yk87g?MHSjr<0K<y{{RDk
z(8PsAUOeL+5Yc!~Ogk?5iRpu3c@y!AY1QR9T%Bd2d(tg6%WxE?X-wG5$@7C(Kn^M&
zlblwq7mGOGpxlx5a4Jpip5mBz?~7yW)JM<%08ZwsO-wbjQ8ot8vCg|WXkZ%bBMY?X
zK<WTU;fX(v@G9H5nB~I;6+u9e)Ro!pj7%a3gbLbrYAsxADgOWs{N~R=RGC`<9gXN1
zMmtSH$<a-j+F7jh^GQn{5V-r%&nnSPUnl;65+D#4j8H++<fxkY$yL0gL*_oXfTGMc
z03GOFaR3%5Y0VkGvz!DIIDs!)<-C0}f`~Ur^L^gFIqMmX&=-B|^w)>JaYq{~>iprO
zI7e==;!;NX<N?D*AOf1xWd87h1>IqCz!!RaVi-j28IghGZ>}W<;&;7c6cX~uh)Zds
z$2l){9KXM=3;}O*cvKOnU*Fq=3Ry)sKA!l|MvOZ*3MwfnzE3!aq=Zg=ae_RFtkeKG
zE)3d?qz9{sET>0oCI#S%uk3IHTB8k3@V>v+RX_x$3yB5n@cZJ3Ky8-`sP$HmZ}`M!
z6U<J%GjualE4F{rbBnsL_?OR&Z^Z&!fKmX!DFgygd3vOMJI4{&;h<+}+kJFq;b+LG
zq3Nf({{ZXO9}qj&q6#oBYol#UB)<vhYgyTs*C*&D4<c(f58CVZ6xId&85Mvb+p6ap
zNOgfyD^n3n3y6;E=O(G9wVY4q2ky~G3M*U_zh(Z|UuCREtSNlIxr~X=Rb5ZF{)F6&
z7KsJ>D*R&(h>1py{{YWiBH)`m2ltW<AmEBWKYX<Yb5Imtg!#ZqPNM}HKEqfF(1>uu
zSf*59;m4^p#tC%<dw*Uqk1SSl{{AqKE56ITsBtZM`(gxM8^3%-0DNe1UT>v&e_Te%
zTaXBaK9isG2XhV4klHoFta?%s4!G|i(u;1-@AZP2_^4|D!rdV4*{$M)io8C?u=W+-
zM{;=M0iZHp&FzRMM2U6#{bNN{fMYZY&lo~xr+wof0g5@ohG>H~T^q*>&|Mx*afC!x
zuL<Y#fg&m<eD{kepf&g%V_hz=qsNpja>O$gcOmIGdbq|Ns0s7N7`S(6ZHY7+cQu<A
zmlUTHPR>Bqa2ib%DG_eRq}Le5#UU55ol&937`yHOz#(2WyaY8l1!Fh0XVX1A^zt~w
zgvc=C6R@0}q5Nn3?B=iQl;-|icPr}RW&NJ`{$J_qoU;ABah$K)*9XWA+kZLXzf7DH
zsq%je5?g>Rt_RtgdVZPUIZ4Bmrrg{LI3PDX7@_O{NF9H(FACkRO}@?r*eU}@ZEui$
z=5qK*qaIJE7p%M#1?I0S{e3W^f=~<t$D6Omt}uq|eldmc1Ye=~^ujG`v(t>c?IfIF
z!ZV%|c%?vQqRaD$pcjXF#t0SU_rX!&HotsvY6W3A#3D&8HQp?)9E6T0s*KUj5lgh3
zZ_^0lL_p=)cZ!sP^R9cJy=9|C1KS-+c?<kv1C|HIC<e91oC`Q1b=Dbp)vL=et+K9H
z0+!j@U$-BQ@jyFpc=f~^)uWD2za|PS@*ivC1cmoo*k=~~G~@A49;$)EPVm{e6p*6U
z#pjvES~C@CwFKX$&rA|0o(~K1iDTowF(K70XC!$Lhf=v!A;Q4#v{&he=$UH)$6zb#
z#wF{q<Tf<Ca_cW5#<wK=V6}QlW?~ajiQ^6r;ek`YftwD59EvL<D5NySp#<1g3EWFK
zRMD}Fom!NNO!LK($W=n`4>W@7?T&!h0}yWBvgu7t0*L1y@y;*doPWb3anSf&1O<te
zj`R3n{{WVLqY%X_Nv}Aupko!PyLYTM!5vDvx@__HFhnIXO2VPyW&K>zxBCy7l}a|`
zXg_bxAaaDX&GW5}2vJ*uj4pvmP3o5iq??L!f)QBXx9{_Y0rH-iZLMkqo%qD*AWkWp
z7KI)-@r^VRdwy{Qr=mY>K@O!yj<O;i{{TKQN*r=l;Z&nYS(^za;{a(2R?loB8!L@Y
zAKhZ~_7q9u@s4#jMMIl`pw<Vj6o~J8#~rOXc)<W}&|dJS4GN|HF-%P9B|Lw)3}_7-
z_WkwAOopQA3BV7Ni6(NIR@{!;KJyzqS^x&!S{pjYb5eYOuMp^D?|kB|)XTd(j1NG<
zb)rl=R<8Tw5|?HuQJ}dFJyXj9i|PPlca8G(!lHB-q15Dx-kXZ7aW)7;m16nDt%Z^Y
zdto3Su44ZH;}*vwuT0b{@_&pjvtD%P?7^($(1w5ltDrJ=WVLkKNU-ccYn&>PMoV{6
z5v+V${OMu^@}1ste{2275fogg9XAZoqwlB#6%;6%GtKsY;o`q$aPy||mDqnb_`yeH
zc<?ZB`pj3CO9*=qdO2+1WNX#H#MO?1M@{dEfE!KPh<^Bfqy~nC^wIahX9{X)Ri0b(
zf>TN0*R<>U!#a2X7lBQlsb)Jt(Y>TGXl-R(zouwW_-w?dm05bn6l{AOshvq7?EUW$
z0bbSCI8mn>;lWV6&kP|IkuSawxm^<elME^1IbT;9ol=txf!6rKH{?co;f3_3gvbze
zL5JJ-&OSOdM%j)8uAX@D{NYeJARUG_M_Wm!Su2N;uWTyOQ+}`W?~O=Mj5$7^zH+BP
zF1Yi~5E7sT$$6<yQNuTe$h0rt$DD!iwhR|{UWcwUnNso(VEpe_6h4y1g|4}82wK4v
z3&c~u31%U30%)UCd$9Mzq9!UNfjiNDaq($TiDmBgzPThNo#T|=FS_~45N3e8XpIR6
zXPo1|Z~kKMS6-O}JYOI0Ic?JU%Ae;U{xkL;_&;fZ{{R{L4B-Ab<@+#C>7N8^##~Mc
z*Xzbp;Ty-H{{V~i7*Qy-F3oTEj9PRqiD3=Q6&A(saMk-`i4mu?-g)Hv<!qy^B5C$q
zGz;+LH9dGAr+AXh5n;olPh?nl^yT7WYIUR6_T#V#Q7_-;5W=KOBZ+qPH<ZgoNMMvx
z-%pH5*x%Rs#1L<1pQZ#63$7=o0k{XFkG2{V0<TY|92_SGNZ=j}U{mXhR7oh3@A%^k
zN`kBReQ`rh%PXIZVMufq;RqT4*XI~-s9ze^D<CDVt~FNz+rMligdB#`cyT5H_;1?=
z_5nA=;LEXcXnkS$)&=5$bzK_b^6h%W1A=!k0li*V>4x#|fy{~X4Pe|Fk#?h^m(4IW
zMHP#7<AmeMmcYE@(Y`;-Bev=UCjC4A0GTEh%Sj47irG8OUvPGXRht({I>siFg+)i9
z;OZIV`!mJ%V6s~HVm<a89JuKGxBmbKAMwr%!#rQZKS7*d!#~94jQ*oO4M&X0=y*85
zXvfA6{NQAq+8{hhg-|G67&o7cjyDu?ax3Rvo5uuFXl&U$oZ?($DEoaer$FFFK7WQ+
z>%b`|Tw$z08n{2)R~kGxUl>vjpgZ(&1RfAwOm{#A)$x1zaA8pr198A;PTt(+@T!#W
zAc76oc;bVg48NhJ^MlMeZ4-`njVF5gV?qf$;4LhX0)qblxDztNzCWy5h>5rNc))`5
zYR$iVbpry<{{WmcTSH!4RC|W?)&TY+Zv-C>3e_qVF8cG%7NF1}?0WO8CQk(Ne*SPL
zU+2@ax^no<vSNg(qTSIioC!;m?06T`-`g$el?r+Wj#JYM@L2@!eyTb#_T+nZA!sEn
z+=;wRR3JLh-=3VQSzgvA{{VkE2_3QpI8yRB_TbA@9HJ(jw*7N>BTTyop+fpEoc$(4
z{9|Q*8LHXWoB)Efqj$UeGiq0;GHO3_{#<%0ati@|!FQTPc>MkX-h4cN>D}G>S$f!m
zeN_VN6#yHXanLi*_`?8wxMpV?6MkNtDpQ%IU40L>1!Z&&7k&HXqoaY;r;m(Sam2cJ
z;O5E(nju;{Crp{yX`sF`nkqW-SNGc!0QO=D1J19}^tOo=15*(IqT^v6IU4ht0(w~C
z0T_i%{%{OTc5$i3GNXqh4s(6OYJKoTA3R_^#~8)4-vBA~@sALi!mvEBr0dQu1Or!n
z;~tHT^!j5UqM_G+d`GVEtBFS49c+5U5-@#mr;w%9%^zE8=w1H4m;%MB6r6cX*boVg
zJ<suk93IXAM}^3(;H|>*y$5({_yLJTw|(fT)09P(T|jkq@4#W35e)%nkdAuCfq+6c
z1~#Q(@=w{I+rNLl9mz@u9<D4fAB!VS3CGhVR?37WvrN{HILAaDwFu4~aqWap;e&wi
z!FAG$^~HsQkJEx+L%aT5Kqz>5-gV^MG@<UyDx`SceT;S(02=yYF!EFU<2NYUr|rY*
zT%12{06X672^yRLfuPH=lvQxSgBu1HZ+r2Uni|39i`KGg#D0TchX}85fFiYiTxP>(
zWU2Zoij5%E+6#B+`eXG+Sr{wV?>Ox28aS|qOB@oMaX&eE+hDZ%;L#~vSYW!0?7|UC
z&0k#5xgS$rF@qS@Y&_x6YG+t9L=~8;$nOb<zp0ZEfFR(_0e0L@ji!g6P2dR&O1Ihd
zm`&M(W3uu0`{GMR#pA{LVWr3cFz5-S-#N|$JoSVfjyB_N;Rd<q7~6$$lKg8OtX2Fe
zg+oX!(YA!;apT`C@m^Z-b}X-^{9~k%@lpURjNh&Yk$@T#;E`;&llaw?fF8BR73ezy
zOf|tD;|2Q>N-cw;9RC2!wm@7vL0aP*0=#68WQ=zod|$(?l@+dol;YwEz}oJC!MVO>
z{{Z+%SFAFu#=;lVIogf^l#X~YZsh3h_)N6Glzs*q>YP!(LoQwA5A}k8p9TRscre5a
zFdRKNEVjs`X2kf$NfOlGKH50NToBm<Z5?g=<q8c0WbOObHAtasT{r4JxG&9*%Dfa)
z>|q@$Jhw;Ve~e^ZF#hr+O*OL_3(LSx97Zn`Npd(!I}`VuPDN~5ap>Vtf<t>F>E|3l
zq*&|b;OXiG=)jIbq|FF5J?0Mw7aAaO!;C4nw*k!86vx<bNNSg?ELPMJs{U}UB1reX
zC!AwX3D+BN!QdU>5GuU}RRGlUj7tr6_pA88;97yVx15iWRjF@yZ;NU=+jQ^0?-u}p
z?5PRzy103Zp-3$}9sFW1I;1n?h%4#m4Ek6U+f&`(@#%?C4&4DGo!OBcC%_i^V_dk8
z2gaf(eP^~<%=jCDdZ*jYG-b_F-0ytxju6;g=t<Qs@Y{JqjM>HC{trX{00*O-p@48<
z=)(q2P9BUfdN4S8aCl(B=)(f416AAC)AN$$uuJM3{Ne!A2~s83#u$*Y-wJ(kMk&sD
ziS7NeiVRJ*GuQlO0!3ccgy+BaI)suv;y3IATgy;D9v*O8LR+&Ff*w>fnIau9_r+3G
zHpqFz4MU)J;|S>>J!TW*{oX`b$D9B-V6<L)_09qj=OLlP2Wo|@^$%QdC>=R-^vX8H
zSFhQH9RO_6aFfv3_`rol`{C6BH!)TXiF(6d&3PWanXJarqI0r-GL7aW1@!mAYQP}0
zZtJLZ)W^C+(kUL-kCOlj13-*Zx}R*6QGv`s92E=y05H~|-B#uCQ9hXkiUlU^;9B;<
zvYZ2bHM?zcA;vJ0+0_(hbJIX!B*MrmqVswj7)Akj`t1PG-d!960xlm31Lq;u4~#CD
zCk7cZWWkdF!_kIJnKEQJdN9T1s4-q{H6t1AW6|ZpFbxf^1BhV-MUN(J_#duta5rOP
z`7%xh5|u;pGV~6M<MqOXSl;#TfE5A;m#k4p9-+Kd)AjEKH&i({uii3p8V(NJBpSRp
zuPDmje(*@(bBGJBoY?Vk93NYV;H5dpk0qqS5jo9-(aPVi>4A_1(N*ibG6+hiljjhl
zMuP5RA|9pjjta8zFs~;ivv~EJ2UQ;!#<I7G?}@ZE9R$0+_{U@PL+&tK)oerrv>SD+
zzH_MX0*)h{A9oa02#A5HIc(!o3QBZz06vX<eQ+oN$ZCp}<d*m*HFq?_*spt|*z=40
zkXvV{Kb|=cBT0e-v{~@swwl$1;m6l=3M}f4ys7WcScrr?)MAr_*Qs8yioa&L(s^E9
z3chgdve%4Dpgyg3xZ&X7LsGxaFbH8$q-f)JTFCKC&1iil8W3~{h*<hl4-5)@0evvg
z3!wMj4YOdZ-1>T$Ks2=Ap8o(`M@=GExqu>!b%k&ubN0kiA_RE22nMXxp4p)y0(e|b
zN~!9(-ZG&`tzk&nj?kFF<P*9yejm|~H6)DYVuJumk!Jfi@zz(4fTqwkYt!?IlNYPC
z174N*F+&x>?t}NAT)#lTdNMz{%|auhZwe9?rSmZENTm71EdYZ$@sUs&r;HNOrG)QT
zD32o0n!#1pNor5dH9$>|U4OX{_(^~mEg+HB2&ud^n*$Jp=<5MOn(;9tNkSt0XNH?Z
ze`W-O*2QeZ5)}mDCyWGJk*2UNv>G+5iflA(n!-v8tAE_mkqf4O&AEPN`|!4gD9|rW
zt-l{jmDB9AU>BLT`W|xqwPOTRY3J*Pg~0-qcRqE9U^s_(+TMlu#YBr$z75x>>yvN>
zhWb7tO?@&=Z;BG@!3UpT82FNTj9om~tL#7lhYESGj5&lG%A?R4z5@xF>F#Tn1mmgc
zh)(TWK;1DP0BPP_7Tk0pADkoI=I$hzf)J?y4h`3<0trMnk=gCeBE1RHRoAVz=Mf<e
z!KYh3*ho^viyv>!0W4TPo!?Aph_0snGRuJ!c=q<h1RE3da{V$OMgu#oZxsL*khdy^
z!CzU{I@oM3-X;Lb0%=Do`ft1n!WLAIQwT{U$ACVrvbW3)6d-t>Ju!ztA?$#&>Ay^|
zp;Usj9j{#c?+89op}q6hmnf*-L~mZ3m`OqpWB!Bjl+29nQGXv?BX3BWQ+O(<BCj_m
zP%i~OH}}M<cOXm^v;h$2;3#1TI9rJbCeH*D7|VX;#ztxfoT&8Fz=fKR^?*d*Nt+6L
z;k@<6DuVaMNCM9N@faYOJr7(l92cva=hK{U)&j{m{NS!E4ZPq~eB3W83_{{Y#U=}-
zfS}hdAJJ9zo)0)xR>##hzi7CP_=|I3-zuMcRUrUHu8SRQd>L}bR5fk$Z+>tgRAP<?
z&dhXBoJ8gW-Jdw9mu8AtPi=A2j7<^4EYnW)g$Ol)z$o@z`KBI1K(#h~wSIB*6Ry4=
z1h3y2UT9#VeWX3^7d67X-;ioOE0-Eu((&u#c@)tg8x14(z(uJI0ju-p6d^%dNZ0Yk
zG5tOpv(CR}Xal{R_I|DkR4M`R`?#RIfKJzswg8B_zb^5hi$XnMAmp1TU)Cf{C}VKi
zSAcVe*Gh>I{c+t;B+&K1u!TZ)OYCFqfT&*i^!~7buMdaM+wFlK2cgql!TPyiV2**<
z{cr^W1d6rR=CfDiXo{!u7_2~Vpl{=6+b$muL3xEbW+J2y2Wb7UmJ@C88}!KuG$*PV
z>jZVY04T3F4G5rj)+%ueu065#4W3T$g4;K)Jum@DvwlPOyhX8)(6~U(1L=jeIGS8#
z1D^2KmOWtFp7;X~rVN{6kTfR8(+j39-Ei!{SX+SegAbsUd~bg^ERxhy2nzP`iMl~N
z=M#nwnf~LuB3}VaIY!bd)p(=!!D%5DE8O|Qt6MOL1bhM+Q-!#Dhg<9SM;ImZ$9{-E
zY#ol;AS2MD@s3cSa0%+)Ja?DUj2qPsp!W0akVXS0mN*7=<Hj7a?a{&$z&-K~#f4i3
zl_TdHtEGc&9T9pP^d)YvL*cCi3E<5^b|42lZhCJx0NtZK-;){_07+wAPp=q=sT)IL
z>BD-;+6hSnYpL&urp&}`X}&1991x*hLxO!ixI|mJgNa^3HJqQ_#v{1h;Z;!3I4|+W
z1Oa&rBGQmUkKN!DmxMK&%awFv=)q2b7ss9bb4oc46!2glffr^}@DJ56jJK#AzjOV<
zcurq{^Yeo38aHQ0zRU-Kx+SX{`5YfMIY3XTyO2>xcBj&ue0$*fbd*IKFRwW74dZ*6
z__;zhn)si-*pQ7)SCb$r*bVO$K>^mz{XcrmS+8$6P(@!$=Q;$Zk8B#yqM*9P5Y%k`
z^6__teY)NpkvvQ`k7i9Acr$9aKI_g!kkddq>lA>^q^`1kT!c;Uc;fOo*L%w!VoX95
zy%|C_hYk>k6*m6(!K*0ijoWe5$?1WI)w2UP%ojqht^DEM13DXB^8@O`IY5rl<Bz5>
z?ckK#pI-4P9*Q<Z&}=4YH6p-j=r<X>aur0?S{|(Vu1>7^4XFKr$4?oxatGj9*O;aB
zxS!Hc8o-zeL+0em1>yyn=yr31V&q#r?Zvrn8NSQoBjQdk4)WCtQSiIho-qUxTDE+5
zmApvRc-!?K^BH0wt6-n*9RBgBZgcgY8O;QqwCm>J)!U_S-vVwCOU?~;i;HyneX?Xm
z1ij<3$_y+_iz?qoukVc3Qh*08A&){mU0q-@=rO!{U|n%kBLnZS`)4{mf<Oj!^>X5X
zLUibF=k0`vDu^b*z5&4ks46?B-k9!i4+1{pw=58nD}-D7VuLj)t4ims;R>SBWR&}0
z7=+`{S^MjOP}^gQq&sv|jGzIfMZ3lt!FHyOF@|Y2ucozy6orcQ@#~3Uw6lMvRzv}=
z{No@zdu38;-f?tB`OSi1<+{W;&zwetI7ZD`>xjg)wHFY|yOzHE;}ng=DSO5yusGk0
ztX!2Gs7Nz##4PY3S*(E|%TBYCfk5kiaB6@r2eKytuU8m@XF@33<MDDG3%DOqcK66=
zK!aPFo+sNiq74N|Z}Wsk47wC`#!C+1P3}5VhubvP&jMBSYvgjP#Dk;~a+B$BaRy2d
zdDYjn$44sSH)j@Y9ru(-3Bm{gd`Z8)GE&}xF@0_@OXRac{yxSV*#?a`Q~1C(k=1UV
zm(vH~Nd}AUzuAdEf}7gZ{quyD6`eDM`0E0COC6eY@8j=-5fal+w)gvTCBkn{(t6{7
z0`e>Vwf4$V2tj>3`(@w&(uaKi05~Bvl%79sXdn(-w*uo*6YGa5K)#MYyr#8-kZJtl
zM9BfPqxis(vubR8@Bl$w7>FON{ct1%qG}Or^$dst=+Zns-1x-@r7vIM=kJk2(3gXq
zKRCd)6bdop&hT0UVdb>tH9`l4`(qA9xt{0O3IuY1)W#b}JnrC>x;zQ*a=St<E&l+x
zDNu`f#~7NN@%>_H9ap)GV(Y&c2(MU1?<L3pOp3sGTq<Cy0jx$+1MK+5O-GK0%ldVl
zc)X171lZmB{{XnHP<HX+V-|`+9sdAWA{UbTdBI>6)#F}pu&GJQF0hA#nsvL5fzC>Z
zmv$-LR(6L4!6e#aYKD&V<-Ta6SC9FUee!LwL*VhneQ*I31tQV!1XyuwVW*B>`0Qkf
zBhCl(M<EhdQMhBt(A-ozW2_L9T}w%FktfC^V@harg*1=jHcNy6@c46D@279G>6d~X
zYm_?n#soN45ih1i5xuD-4{Y49+CN_oAp#{1)2@B+dwYXg^}`r|=2Cq(ae@&F06AOj
zg^mhQUOHa5G0~|}i|`nbno@CnKKLkL-Hn|-mjZwWm!JEDf+1LIxM5W&pjQAg;E66h
z`3`D8#v$sd(d9ZZs8=SNIH!*AMQ!c~>w7&tF`1)K9Cvu|eX$;iyguppesi1v2?ojp
z?^xOflXU|3*@zr#5|sd*Ya7}m@xPC@O!*UVYJGEhiZ@+fOd-IH(}}DA!FS=o$yC0W
zEEk@6`sVuT&_ApmE{Dzpao{Cf6mAR<kmV0vaRYC@B{+A4OC;c2PSsr(#`rkI3JbNn
z`r}yd0Fe!Sa%C359q-T1a;;EPi}}cPRF-k}_{S|o)Z;Z?B;3mkqeL6~{N{j_cWN)W
zlv#*S6l0VHR`53v*bRrJL(T_GH9{i(^M%YOkcCC-UVF=TD%vU$?WOv-@QRaNxVm*R
zBs2x2b9fpDhGi7TPR!tPNZxUY766vFcq!@56FQ=uYCPzVqT$8n@WO!b{{VZzOS6;6
zk3^n9=QT(*H81DO<DQ%`uP3%9oQU)9hzu0LLKV^Uz!pmtw2q&x^}v!3BZ7YY@C@Q@
zb@ao{UR8ziA2^Xk+G2fiP=Uf}qkgzGhh^`Xb@79M166i$^f6ExZ5~s@j50=1w;&s}
zBgWqGkZ$T~<A{Ok!E`TS#wDH?q=o^4O&X6lkX8<#Tx|@&r>T%Juy?8*efY+WH59sz
zz3`!6pxtA`&>xHerI&Q74Ep7&8(BF=*Zal{wuB3R0{ml(q3{5Yz6x46P=!w)ILh3G
znpWUdg+&uz7|1jrCjK*EfB@{DPu=E(X;Vi|aEgYG1@-R%e7dc#uNY(`2qCU8S$k{k
zf&d;IGyx4q44B%h<M_l?M%+Dko#Seb>XE;EacBSoT75mRc!O4YVrGV@r+*xH`NB{M
zM>XBSNYMrFw-PD<knxCzA(z({rhKp>slHWv&fCyKhVx#X#s2^;HKLDUjE9(B-f*;n
zgn;4k*u&Pbxu2_8jX;B=1@<g2zB8rGmv3p{=KxY&0@OE%_W8sBGLaEIRa87>3mGw2
z*S6h%ShfX34oAXZd7X*0_Ovi;LwDUl-qao8wquA&3Ic+m$VMg)&kfB!?m2jXK$Huv
zNa08g!f=N0u2fK~q^}+EfEPs-U+8e9NC|chzIw>CkwOS<-?kAVN#68U`f;2Q;S~78
zV6fZ1E+H{R)P^7~=@@%&1w>(8_kZWt82bP>dB#A#Q@=l)Zq=LyPjQ-=MBA9*kS4gp
ze^3EEoGJk;wy%rnj=g1KL5b(QLAs>@ybAr8oG5gH+&B2fqMkJ(-@Y0$AmMq(hD8#2
zPqrmB;KP_@VNT}jF@UCx)#G1`u~*oxu=P@{4!6PPVy2-h;(fJ-tub_O4fIY=Yy?g6
zVIjo4%+uio@AUpLmI{SnJs-VcWzLV+SXLI7jt(%S2&F2TImK@+-tV8^8Nqd`aO0sp
zhY-RsPz3tz{qu^o*f}Szbqm9zMnKx=o#XkCgrfzgc{_IOnjTOgy`C}YcQX@4{r)nh
zq9W@^FB27gtP7!Ezs>+Z7@d=y_{T^?Lus*9;MNknQ?GU@dEO>LMN4RBNwsm1-%je$
z<|jU$_-VYFsUyC)zSvaFS;+|n_P)kC7>4kM*ND|TWcOu7D1vwir-vW%tEDg2(>?MJ
zbyJgy`03*ivK~!ey<fHuCK6i1bLZ`Wq`M){pSEu+6(E2co}=`|IYfbqCsV8U$S9*5
zUPXQ<-vN%04+3%Y#Z9dtJ)Zl+AHZ1;9<Bs*>Z`u)B|8#0$n1g?Wqe{$tr;KJjNDvn
zQ1rr5qE5TQ61hP+^~4o|2$%qL;We5iksWi4Ac8DM3$QiW@&loBU(lccmoGT=)vCe^
zey`gJaf+MA`M+EPX5t?BO^Ag@>xd-*qBq>Z1rFdlxL{>&-u&aJwjg-#1hyqa?7Q~f
zD{vcme))5lxAp6SV?YlYGD40$G8iMf>ncZpi7q~J9haOiRjvh;62oI%e%wq3P+eDh
z!KuA&UIPlZ3f`RNunQXKC)@qTn58&c_k<v99z8OEP+&6%piajaoLa03e3=;eV%;8h
zr;_^O0iL4wr})X(cA$dk<y0}}lGoS(>+p4lurdp!HLrY_K4?%rI)(={#;jYHpkRGd
zHh1>T^f<Zx!9Jf_3#KVUlics$S*1S^fCG7&t`|f|?(z9CWfZ73(0Y`7VC;asnx24=
z@#^DT)C6H~tisR$CAy>bFoeda=mB~5##jq<zFZn4Zbsh!09+9yic%<kF%V5waw>h_
zt~PK|`W$_G#C~FiuxNVM-Gk{3a&P;H4P!%j^u#TS-Q4T@!U9;k=*e4*udWIMh{%@}
zL&3Dx2&TX-m~lp=p8o)xQ>8e&#f(XG_k>Dr;RYZlZE=9o7Dt3@r*GE?27t%E2{yv(
zUp6!fF;c)jm;`8Sz6FLM3C|0BcZw)91h1ZPG%7z#MG!+!JKhL_>tNrEo(9l|1wOjK
zmM0_Q7?KK}cHmrC+i-{%jX1)BUxrY@*ORwe#b{V>qbXg7^qAR2fa?W93#XiNXc6Kg
zch0bLK%hAG^v%UfN0%r<l8|$w_s)8oW5zI!T};(7l<|rOY6x!@stXnueNqAIh?1x}
zdEocNUFbz^4G{2L(SS9MrtS2&ubE-6Hv4nYoKy#trGFgyWo>vW1~b+->4CH{ll1Gn
zQ^LKpc5Rxz*rY}h6$2kS`dn0Ot?aRUiQktVcE07J{s-F|_UWr<wkkW<#z<Z0x*~dT
z`NO6fplwaf7Z8sTL}Z5@p^ZR#oV3~b-<%1fDC)wK=bUq?1==Z-FrACN%GaDGoDU68
z=rNDLscJ9RoAHW?peH~z_xj~q=R*V=Jy{KA+;QowK-qV*Q_enAUFoan`Qz_|0Yq7j
z0aai;X27kwex?8r8+_vw475j_a@RY}3$wp`1y8OrKuI^g4^~f{l`A4NiS;sH65<hw
zJ{dtAft_{n6D=)9{g?%=TBRs|+$jw_jt?B-n?uZpzc?#cmfaT+Vw4{J2N_*p6E?Vf
zgV4hO9C~3xA)DdLo-w!1{csQ=LEXP>J>&+kL=mH3bNAaS2a&bn{{TO1n7YqXd-dZ1
z^$ToyJm!rIMdpSXtkF3!;4D?E7mO$c7WdZhMT<3n3~f2<5X(VQUwjMU(1|v6$VKr_
zb)iMB>83BXByWbL>zV6+Tu@9{Ti_p$zAQIp6jfKK$mb--PGkLGt1QlrnXb9}VD52N
zXz{<CC`yC|Efe?C7bk`6G#NNkelU|k^P~AX_Qb7Kqfj6otM<c7OzQYv6w_R|!{8O2
zJWW>qa5vDn!Yz>F)ws4?;EK92uYVXFR0BZdp6)CnvJMbhPVrB<4;A71{jlRz2tqmU
zxABUDLJ7x59++G1<lUS8SPl;pHO<DsNP;`NxHQv_HO{gm0CUc+AT6R0>mKY4fZ%~0
zr1AT#)ZG>noKyt{m(BkGm^C^@S1b*1r2<ZzYdZ<PaDlG5OlGu*;DEfFg=_2|t~$u?
zQtYp{+Y;Nm?#7I21T3Sx9e27ndj9~p2^D)C{{XpS0f8NE?^y~3-_1X~VcMKNm;?Y!
z)!vw~Pg|5^uYK<jn=P$v!v><D?)r0r!W=q%Fw!48-?mt|8ZXuQ<GR`99CrP2l$r>=
zZx9O*qfI&b_l=V8TgD}v6kZq4+1^NvY9CAnCh+BHAV9cUDx#^k^vE3e4X}mkJmGq`
zBHOF|`tgm>z=4Us2k#iJQJvy-9<1>nOtIn(qhRmX>8vyYLhoHB{I*W;_@hNSDsj92
z05Q#4_NZS4)2_S5l8}c>XHlMcxabOkn+fZ&a9==Fk4Jnv-JIYQvG%EGeqViJ(Eys^
zx1CJ&$*$3^%Uj}{af25shAK1-q4M>x0bLLkYN-v6E)_R*tio@B<HtCsgo3J0{@;An
zkt7b=KYcL(Q05Z_m=4hR35}glXdOd+?--aHPEHJZ3kHK<y2n5f72jE5J<I4f@6G`&
zcTYH(6OA1g5HzC+Jum>|fjYSa622*r72J+<l@V#p!Z>tk#6UVY!(;I40CYll!zqh;
zEuTZ@B4Q(Z-oEZJKu8Y!{l6J~t9?4Z_Ygzc(_G^ai^^kyjzyMUureXqK0D1OBcQwA
z^NgFM>A?szTzVaeab+pf$A5f6iFA5hesUS4S9X%`1$0gB?ezA>5Mq(#Zm?mxfXYo{
z859$lEc#;8P<d111e=cvb1U)q!dFV+^#1^JWL60P-jBaOjCci19TOS>3pO1XHU)zR
zewYPmOTRew807JX=zSR%awBQ57rzgDRIL&e1>N_4*<@hDXT8P5w3S_#m`nf>D%4N~
z>;CTmEwG{4BkRpQy29xJhfQjF*U6iHz*{?0=Y#2xxH%A<J%R6c9b}U!XuB@c-;Cc1
zM2c)*Dbsk#7F%+Pr(_xoZ?-*=ovK5npWwhMQb$nRsw3aFX`pa2-3h?&HRA(75kscc
z#OKWeTOuD<>3}}mI9acpv_cYI{eOIBn->?hT+}nWMhWq^?U3M!_rM5>4XwKG;~*We
z1jCS#uweRN)DeZRra&!y7sfHr#Sk7`U_ufUY`o<~6Ls|CCstr+*Ul+QQ7;&Rs(?Dk
zhpV%%Y(uuueJ~Mid2TMMUT(oYbos(K0(9Ub@;C(sY-dvn#I!&z%YJYLKv}0*86CZM
zgr>EvmU-hU6ss0&eBqlK5bvM9TmnP8hiu#ob|988E2vM^kyWI}nEwE{XaoeVl)~Q>
zfNQ?|cZxz2r|Y~i;ZfX`3}I<3P**&BT{Iqi<N~ON1AO9Xs{(NysGvb9dAU458~Dfw
z2;jyj9ABB1IwU=74S@iT^~c^nR~9sQG-@5q@}FEOFeQjOr$1a2iK3FLDb6=c12$=U
zzl>6dDv;~bzP-#J=yGY;KRfm35IF$x*$*4Rz+c#&K8l}A0H0fInm?>Ex2siAz&&~6
zqa2m(h|x!SS1Mpyw24Ylv6Wf$X@kcI^TB317ld}m2RgdO@zw#NIW+n4kiei5sjNI%
zFz7eW=j(u$paOU28Bm~2p}B&hRGu2-$TplDyqEXR6=z^0#l;W_$USd1A$do>GK$^A
zyf&!*_{J5`4lR6Tk_~sMk^vkcz2ktVN<D85!e+p5LvD>Bys^k?hXyG}5I>wsY?HU!
z3MjO|`hy*jFgbhDIsJ)_v_arGesF_BioCz34>mbNewZL7d1!HiVnGw0TzK)KuKcbd
zEqI#0-WJjn+I1(4u@W1~1@k;Hs}!U=dgBNJB?r(=cg89ks3CQD=i3M-paXV&`o}O$
zkZaSA7_nzr<M&u3DA1&9&(G%rgZKc3N_dg!Uax#&UM&>ooCgk;cM^Va2yjg%jMQRE
z{{RLA1Zru=!h1v;!I;<u(1SgD^vN!)7><wL@o#oQ!^gq+#0E_92L3ns!D%6EYN0%Q
zaf$U|KDK)PF-uFQl|A_9844DJ;2&+~(O}9FJ(lA3g3%-FCa;<8cWy|FN5G;l4Y*4!
zEP{9n%iDZoy#OjJNm?(VjYd`w0f&=-eK0_XgI12O4g7e;%?$^h(~s8yiCi67E)JJM
zoxSYec_E4_1Af20FikeK*Dd|A-#pcY2aMKh3h8`(ut>4wo#D|!YmevYibGLX+X-|-
z$nSGE2)u`s#S9HAzpho$8{b!qLKKK^r{4t$S6eynEDCwV0&pXTu@F4ZrZc_gS!Z53
z>kWcbz?vNp#$8^J(BS=(38Pqs*OzB|=N5~=Hs5al09+geXrm#p{g|Rj53=w1&6=ZK
z7u)&o3vU<~ewju9SZE&JdBzE}f^0uro{G(blaPcfi1o-rw`Y9o78@r8o>9RfN-s9q
z{^Fw&*ho|U<qi-!gx_CJrYfc>Z3A5E9o-izhi<W9T|~6~m<(tIyw6;?(I_}5n1JB)
zz$Orp>6^exZ(caW44f^+OaN8gCHh?96##tD*I_e%1C^E2Ys__&K}x~M6;6G<@#U|t
ztpVJ{O)5cM&u`t~v}`8$bb5Yp;8h4lk{WCO05Io@paXYY4`IiVuRbsVDZLbOZ$^#C
zj<olTk0jc0kTM&{`OoRRP@3{KQ~)4odDM4|kWmqBv)lgV1>(9Jb;J9-MG|)eU2z=$
z0JtdnlHJaxGGSEc9Di7hf!cAa{NfBE_YVhXe;21IJ&1T)fZLseTo|i!m(R`$QeJq5
z_mFi?e||8a>}#$sI7J&_1~A7i_sE*SWpwU5xL{s3dc!rKfz}X`pNv2}tsfrvt^h;4
zD3rdp5Jm6pg9BR7_lOD-?70Z2os0NU_`~20-Rz$Eb>ym%?|12pQBc{cYm65xG@73r
z-m%cD-lOmGeekJNO@Q!y{<xX3p+NKa$U0~`-#D0bb01uFinbi?d}QSGo8g~qy|OrC
zw;I9d1Z%r}U)tm15TreQe%|;7Q7ZBuAAA`ifg25P2|S1(Z{HD6SUEJeD-x~i&B;!n
zi<U-MH(>d~5gaJ}aL1Q2-C+#2o#4hGk*yo{xGa+i*(UITXruzahIo6!*bTQ}pX>C^
z=A_ZETK@ULmZXk<Z=So#H4PPjLT_F@F~o!{swIcFUNJNdYQZ7DIdcSIqSU${^)tXr
z!see{Fn@ytbjAHc=bo`T<XD6bs;u;#VX*#?MF5-A-n?%MKp=_leYRkbuK);tm~lZ4
zL2mtw8KX?y^xw8NwC(524|mfTOxFU{y#(ucco-=Z9cy?afdO;Y6k3}`xW<dYQ`X{O
zXy9_2!30d&UU8U%)XoolRz{X@*BMQYXB=_qfY9lu+XlK3sPTxBOrpBSS0`7#uu>Ok
zE+OeCAI@Bd2E8U3tKr@l4`jrwQ&8Rm6p{5o_szPy71%$FmPn9mZ+NH>VSP_Nd49NL
zP}L><=MjsGv{U!^$pmqHpQjh|7Vv@?;pd+i$<iB0bWP~vH8C_JYsMRK_OI@}@&~pc
zaQE@Y*9s^L6I|Xq(Tqf>N4vb#T-Z6ur$957kcJNZ4w&t~T!J79`Th37Q*Ii@Kfioa
zG?iW9ygwHbFtO98_j$Mj>((Sf6rM9!Xpaysa8xwtI(;WOWg~ckYjaov%Ty|NzPm7F
zqL;84d|SUdxY5cO6cnCzxJ(aW1Jc-7-V|AHV3qH$?8e&BdXOJsj{M>02@zq3h?M-X
zydVSAJh!(I^7t-G(3V7gLk`89U|?Tt3UkS&=(u$elENZ|QTX)4Rb&NyI@cz?_!0}r
z6=ghoar2%tnke|e2?&IO`v<R_M(ccI(~qB?GhP&+bLqw{HVD@iXi#bfd-%e%X|O+^
z-f+k}q;J9e<G3!G14rwX1m~4!Ub#S1Xs&SNLuHp&2IoQF@4iHVP4$F1P>ackNvTxn
z-Y1mTZ?;GoQ7@6^WgDS8{9~a*WWkh4MaMu~S>q66r^X10l4xRno^h7Y3O>KCJ{VVD
zAI>NiA|(&MzDgulfw8mWtUs`#Hf}X=Y6=sqMJF}YPJNF4yUh_Kt{cR^)&o;UgAwoN
z`N0UC61wMH_xl`iI)zYPwdU*N4>T&N!R7m9d{A_n%`<{pKaR0fMlksH-?!f$y%nO<
z{QfW|!~`bY&zu^H3WldR#>7N*3?t1(UOu^g3>}@rSb`y`Zb}qtZQQ};vZ!`R+GD!V
zn;Kmm@!K&=V%PwHb?$oL5opC;M1$##iWz&QeYtwMq`)<`FX1QC35{~oNGFkBu1A(d
zD5qvGMqVAzN?$g|wrV}WVON3AQR;eP;%^Ouhn9A5xUCH^CKd4<y{>I$pM0jrBJi76
z1%)|in>OlvbH&I_j^Vhvb=zI}#5M*ktLNGNGV~xXTo_{j-cO9G0AZp%VM9J!uQ;}~
zn)Utkz=EehrWlG{BlR!<)gZjD^^hk?7uVAWtdMNq8o?_Ob+h-*0EKBauOGh{K+$(N
zDc%T83q73a1y^$=+20?$<bh8KiC9CAjIc#7VTdC1u=?Q^s7Z{3cE=6x0Wn8j*$BcZ
z2+f}k7T8v%bLrcOHAR&k96!6muV;tSdi>z&8c;d!77Ux)xvl>II2&xuDXa-kf=BoJ
z$yPK{nQWpft~#;()>|GhMAPH&;EACI7>#|nH{_gT4m&<ZJP4q43Uc`em^#8>I|Hfu
zpIil6JZe5YH=A)p>8`bU%93Ce+qe3~X;5xHKYo}|)S*2XG&VKo4x~l_ZyewdZm-+>
z!U=16E(0*7EKWS@M_A*5r@>tNC+Uh<3MEBeJkESRxvQFx8Zhuw<GY8oC>%9D^BNj#
z>4yi1`(jd+RTW>C9+;N{L<~Cz#vcOR7MrhojG9g=0}JT9e^}C4?+E>U4mk%yqB;$<
z^NtmOgd)`J{xEX>KuxG#cMHWj?5WN%Ke%5~U@fPr63U;P1RVeX_qd=$+VjX`rFt{x
zSV|~O<4V>STFV>)_}&$0LOeY2mdyZ7@w_7;W4D|DQ&5MmAB<%!JnHWj$S^<d5Zo50
zOLdwNvC!1PRDzMdF(T`^4Qm>pFw@r}0cF;^pT;u4eH#7NGOD|a=l15BN*vrGh4TF1
ziY;xv`D<rX^uW+v_yETX0c4Hb_VmEt0v+45@qq*=6i9k~KKXEj%k7BNrh-e27`j4h
z_xsMdSO9!}a^zN%U$6O&5C)K<a%<$lMN-!xHGj-dPKOVF->wouU54rZ05gzjJecbn
Ys8~`b@#7v;0k7T9IPl8SgX+)!*&8Ibr~m)}

literal 0
HcmV?d00001

diff --git a/samples/data/mask.png b/samples/data/mask.png
new file mode 100644
index 0000000000000000000000000000000000000000..0666232d49b6b9a512665900b39818acee325b65
GIT binary patch
literal 3943
zcmV-t518<YP)<h;3K|Lk000e1NJLTq004jh004jp1ONa4X*a1r00002VoOIv0RM-N
z%)bBt010qNS#tmY3ljhU3ljkVnw%H_000McNliru+z1;87aRf6WXb>l4){q#K~#9!
z<(*BBG|5rNf0>o_);-T^AFb_`Ss8(3FcKGz=7O|Z$rpbD`wLi{_z)a9apMypapeLR
zvl4sc!U0aUWXTe?*T%3sGhOdh$sr@NvR-<gnb}?AF1pk+J<~ls9sm4CWMpJygu?Rw
z`et7CXX;6~sqkHe#}oET!CbxYUCmS9eip<DGcjT185LgRo=qq?k&}^=l42;xNKwE*
z!-Ze-h6h^4D}bmV($j!;pTK@u04P9<a5~&Uqe)3fFr*j~QW7$9?l_T=$}<TXLxLtj
zq0v}c{)dWt8b0Hf+|w~(85n^xU*%mCBQ-zezgB{2^a>n>#tM}7i)M}=S1n&PN<>LX
zNXbda$w<jDWHQzULq>)X2sA00ghbxKUGC!!)ZEiBVCfi`ctrsW^tAkpf8#%Bv1km2
zf-{DZiGhIu%gD$SE||VZ0F>bC?^=X6GS(RxIT;0MpnOh2juHS237K#}3jhd%5M{Dh
z`S>Fvg9z6%Fi>&NfAZf9C?*;8k(QQ$mXTgwH!w2F`#gsSj{=|;wAX>k38@HMl2Ve9
zlT(WL4iZB~ju8kFQGr=`X3H^QQS$Le22q58PC#hs<lfNI(NWXV%kO%6I)R{HFu{Ia
z8=!W^-cj37kW!FS$k!7YIc3BNrwFMifI(rxJ1Nm+f+o9hb5_8?Fw!&9G0@S7xD72e
zH6=9-867zdEry<s1kJz@4CvE9FmI=RJoc%Ku@}Ciq@d(1f)^sZAtQ~9U?T71Z3$s6
zQ38N7U?USFHd2G0o|b_|7*KGcrlg^wp(UrGp(UZ0&pDtNnb6_gpAEoTe1pc2l9G~9
zP>S&9AbcTQ$OQnWB#9WoL`<iW(Vx8n!t`YW2gb-6^a4XmK~EboASdTSM=ffW(vZ-i
z(ewg=5`$=;2Y?rUw)eBi%PA>1%edcBQgRYdy!fdYKR@!x!uYAkYnTC;uSpD<Glmm=
zummkV4JR(N)HGD&RHRfGG8zmGnhq^r2bmM>qdYLz#rLD{b-AG6L`lIN1$Ue%!&OA$
zr{WXDGrtOxA82e4fBtZO4xfT0pF|BD6LLB_3K|L;YA%6~ITsp|a5-Dk0b?+-*IBRv
zCl){9uQ>CEoE8GCFBXqjJYq50YS%$lD>hv7^VCLhu1{g}G-&xbKjyoSxJZllSc?Cb
ze7Kki)LS6f17Hio^8irT=ymbm#@u-v0{B<{OnzhE20%Re-;O3h_2{(}{rc#yzbcsw
zuR&mg(k!85$An%7!2+KM3%0K`d-J}=M8uE9*Bjz*#lAlJYQZ$#N`JK&{cEpP5fJO&
zTN&eTqQx&*tBu(gKjB-Ch`*)&R`b<c=^qQEc1Bf2<8Pz&q1qL8&(_Zne2qj_3G3IH
zwbSdTz0*S2PdM|v8wzv$koWm!0M=qKGRLeTFf;zM$LH4W^LYOwe{o}Ro{xZ!`O^r3
z1n2_<@!~qHsGZxBia$S-{*#*%-`zgs$C1JrS(5A3bgE<V6^4|Ilp-4c^UcQlK8>tF
ziV4o$#t5n)z6)a%l#$W<-pvB=5lwiUiz*C}9~v1!Z~S~f<Gh)IZvB=n)*CATv_$tZ
z5x*1$aPt7n_Pz*V63e(%0WC`yC;sU`4Db+S3?bs6mjEb<^rRugad#L1W(TPZ^q=Uq
z3@{SqbNYYc)u9-mu;h$#O=}})8PO8S%Q;Du=V?pT+kv1A;%`&{1}2&QPL!N^#ret#
z{B|KI=ouw?Jgps}i4HBXJ_kX0uxU_9%qk;WZJmf8(34P*iwP__U5&!r?ljoZlhKpx
ztp+C&nFCIoIB|CnKVZI{Wcy@Xg+a~iE9XLf2f|T|pyxnN!9dF(yVM(3IF%W|$9j)r
z1RVq?i7F4*7%=bjxX`yc7zCy4E4G+D1}F?CP9lCuejx34x005YmTm=rUQQE*?D>>%
zxM^_Sw`321ENpPO7d{?|y}g?s+E}&-08Pf7i0@yd4y6WV*uLLY0c7LbC%a@(H0`#j
zKSzWLxu^R!Ad^k+f|8W<Amcx8kma)2y9z)OA_6WGfP=WgGXpZ&!oM;LlwrBxHn)#9
z4Q4^PuNpx|51-a_3>)CLM<+?zas#|G4HlBvn1}R+W7q(<J7_^hKMfhH3jh@F^ByUM
z2%iK7+`bXC0f02@dYLr<DSyN(NrdsKtw#!oC=j_)(y-Er0Qi7Erj)eCG$b?~g8`lg
zMzR}oUi0pf0gB(_JxPF^^ZAZ(Qz1_z33<zRd?gxS4T{&2*_}pNeaBb~dGS?9^E9OV
zZt!#k029*PO<;gJhzh76Xd*+L^L4(mv;p(jCnN`RfQSHaJ79{}d?P4<b~H&aGWsf{
zvLC|+%mBC)0H*}+#0;?foL=&QlVAXkGYzU>1zgs%vv&&>_$igVz~}R;1FC>e*LE4q
z3kH0~uVe+F54IaXWCXRCd9|44wX>BvtpC&jM}MBweJYq<PlM?u6ZCj0K$Huq*J}Z@
zNnKTkiwf%K7{F#Vs1ND^tcP^V@F-7^gStS=(2f=^*rf_2!DrqcXgLZ1I&zTb3;_d*
zcPL)0?H}@ezRF%^_`XN>(k1QwGWW0fr)PrT^3TUu1vDS<`o&7%#Q(p#eg9dZF_#;n
zur#p>7(U<>jusqC0qiO`=F?;5L^-X<`MPA&KSsK1x4-3AFQoCy6(IiaPmKu7M=dNi
z${{w8Qpjl<`+|@9F(2O$5B$$fYy*4@cv@)77h%4B`)=+?<`hY&0XcR)pWNKep2x5U
z)3t1I55YC(;g*G-<HG(t>dujwaZ!H4MrXE;5goF`2efxoz|&$1Jg{(}&?wQdY<DU>
zVnBRqe~Q#Vhvma@EWjuSvd5L2p<scH2%`?QLE_mx2m}iiuu&rU5VjMK7a#Hx_Ke$U
z&|mJ%eQW`4KR>|YW%SAND;2O(vSgB{?aGHfmJ8HN$)*JWlZZdp@tC~;br=O0Wtd0$
zH2|(k#~AAH*s!N|lpR8X0=SG}v$O%eT#YYI6)%d_0fhG^unRKkH3K@S$JEO_X%80~
zb29pj@o75^cJ$&9E!PFdNS3tp0RmT;X8}M*BTo)e%gY|b3z&^vFM{{<)LfPT^ny$m
z)L?{zZHtV7{w){kEdx9Ry&Nq%%9$bW)pSybtE1%-#{?Zce5o?02IDh}65av~9X$;l
z4GouQJ*Z)#r=g~%r4@77AIJl6cA$|#?&$U$p(dcy(9??zIMU6RV|2ABR{*p$GX5<M
zjZ703zN8Ykzj+Q=YAWG*vp7Dly_3*UQB#R291h2ThL$Q6<6P7cvSJ?#Y6_~IV<opg
z{?Jg19ccFgBJRmLGKNQ64p-D%sA*}|HAJiwEUl?1$v9EJwBq(}B?lY}8mhp74FIlk
z>xF^~8TWF^{wONoZkJ#L`)DN)F`y!&mJ`Gk*{a;o?OTD0idtO0xQd9B&~PP5AIP~-
z@~}Era|B1|L9D>AX9EUWQfg8zWW2%1e({mEMH(@HtptA;08BI(3^^A$RdW;u%&y!$
z6_u<9`mG<3$to!cXR9~_2W-TER@OSptuf<QO0^W!@^zYz#(+we!Y%zGEN)z|pd+ED
zr6!}2v^jME4%jdYG_n{P7W+a1fD4Q(IXvUeRrohJ$^q&JDaJqSRg-huh6*F+22xBA
z-X4Sj9gU3sxX6eL86qQ!294oikxpz6VFj$jIVKT*UgjJBBDVl;Lv6`pD8QWeGB@F{
zdzGY|5`YhdCkL?svkLUGEpAs3NIgNf6*A(p>MsWhg;<FSPrD6Rbi812J47utRaAW9
zhc_$w--XehyU(nXLCs9{X^}-e)BnAj1z?O+U|K-1Vt{ACBpqG`$)<KM{ev4whTXgU
z4c`gk+o$#y@aO9?enz}E_#=MJM>j?OaV7pYHs)^!0QU=btmqffK`Mybt$kjfY_Btf
zx*EyZ{?2ZG+TP<w7R!ZiU1_tSxBtd%k^hoq?*_YATG&UUFZPkZ1iK?)SJ&QiEWWc6
zbSr**{rlA}BW7QWk)QB8_*(obGKw}bk?Up!HcGBp6Kq<USNj6)Y?HU_BOHjofBLj0
zD@#iLGk+2uuN7e{#+uM-1=Ttc-(DgAHAUDHeG9`L3*hhh^Q)|VWGEE>!0+?EoG09?
zF8BQ9(z-W{dg91mw*uD{z<vD}KII2IxzEs+2PsM#8WmD*@ywZ5+;QT}9cL-Sm~xUT
zJsFuC#9Ve4)Yr_;!n(xG_a<q(HquFpJ=bchrl#V7H#EGV;tiK*-=~+qx#s%3)dnmP
z?=ke^CH5hms;A*hO~ortoM<R%Daa`3q#lWhAka~(tuaz0yYNC-b(tEa)t2iy*UIko
zg_;W&DlXj1_a-Di4DxqR6aNV<H!VFz-fm!`XQY>&e>E)~54=k)<ydp+P>_cPoO4xz
z1frwsPOYZREZ{RbM`@-wO72P%0Jx{2;z8~;7unkFL*d+M(SY-D6@Ahk*dXO-tc;6m
zfY?i*xulaOc&;E<Deczgc8ezBfR2<wEzH&wT|;QLHwfQa*}JT`(DJ~AijD`#YiMQc
zrv>%x%li#(c`yX=dr^XpQUItpaS;ZbD5OxkZ@8C6^@(&i!o2nzT=!9*xIc!hu1=O`
zzTM)5Mz*;c9%NIm6RyPKKQ*`E8M~k^m)x5srv@brB^hVwMOVo7YA)saeeE7C1=`$w
zJ)w>f`F-GGlpvPR>TtW)9TyrZQ9mE(ZX>OKD)pZ<3$WujIx_p7l9_ZJEy$!CZz(+q
zQ?Ul=qU)%USs)4Skz0qlB>ge6`a?+C^VX)J4nOrFvvCY#Kc9Jf&iTg|G;H=J&9~Le
zD5N<@A&plH0l+t3HFCLyRuUy)`l)X=eZ!*Vsv~$Enyt?LM>|oGF&u-k&m8*~1K`Kd
zZ=_@xQu3%sg%a)0IPsb@?@$U85E%S?V50+DqfC1vBX78;iu%s_sc;aXJxoZDb6UAY
zUorqO2#HL1$SgcXPOd_e^A7KF=Cml(rdP9o4LpFJiqE7mw_T0wdNH(itDi57`aE&(
z83c``TkW~T|Kxd#X!{!s$g9u$f98@D<4x`%{4Xd$)Ly7b1%m(p002ovPDHLkV1k$B
BE>i#i

literal 0
HcmV?d00001

diff --git a/samples/data/tmpl.png b/samples/data/tmpl.png
new file mode 100644
index 0000000000000000000000000000000000000000..999ac704ccfd5df9c339d79b6ff73ab3df88d1a5
GIT binary patch
literal 5911
zcmW-l2{e@78^_<-F!l)%vWz7nQ4x_XLUyub-^socCSxQ^vQ5Z7m8k4n7-1y4qQ$<<
z$iB-u_W%6;=Y8*e&V9~3_defy-*dj7^Tr$KX)w@o(gFa$prxs1M2YDC8VwbtZ&$SM
zqXa4kZ4EWx{J#g+T#Th;;J%uV0sw%H<-Z03vU52oN$Nl?U3Kb78ZH_H_0$F}j*`0k
z_@Q~=Js&SGXYWAD5CHD^JKG03JMo1)4s_$w(9$(<l5nO5fQ#W;YAO#y$Jed3UpQ$5
zpZ!(bG0aN8Hg?xM`X&2Ko@g1!1uCCs9cUH*if2N%c9_1TvX4LZ*y|O$&Ur!yH4<RH
zuR7fghF46|T`QeJ+owq9S6vG+Q}kXy#jvwoS?$C6-K_ak^Qrtbmv7_YmqT*PP9Dn@
zq#lRIwepfnj)QgW6G09?<9-H3EHRQ8(2gkFzscMAT^<Kn`{7?9jy@^ypQ%x|8PJlF
z(otWPIhh(xrkRzQ2!JvzpWou><<xLRzk_*BA+!)$4t);urLv>4gW>RkDv7b(U4pd;
z2wwC8w=Rh0L>a^$`ePeUh+@MFASLgG=@AtDpqfjKao*Is(}X+*R7Ma%2~-J42F-=i
zlj=8~>J^Gy)&lHkNCzODNog_Q6519-_GUtWk4|;JN$p-9Dg5G%RFDSIgfcW3D&?xg
zFX!SXL_s-s_*k5W5O?#LM^!QCAN(j6lA*xoqhC>?&qSsI8k?*k9F3dEitF52EwHUk
z;OzHQy_%Zg`!`M35{!S@PtCG@6yU8K9xf9^gNd%oQD8+Q)4Y@*y3s0)zs3hvE9Kvz
zaZ6#P;3y>U3arv7q&<8k7BId_qQzf(u&o4~4cv8sDj*-t(^UIcAlM&%SWVw;JVBMK
zGnDH9)K9U0sCl$5zHqfW4$SB>4pG`m;pc~G#sjJT0^{3b>*T)-{Ak0-r|PB_zfP?X
z_VYZmJ82t>diJJ;+;UUbm$G%Lh1F#_5pRp3O0vL79GZo0Gu32g9TYX;V*n4Pu1Y)a
zMkROe<RxzRY>^!q4dfW@Zp00lLM=7`SKk;QOa5r%OJFO1NUN3&I|4CxE~aHT+*T~L
z;HDOrBzUN6$}1)|OgNb0uaBkO$_FbV!DI}<0L6P5OZY7yl>lTUC0Ipy`NCiz+Y31V
ziw<<~j2~XzJF4JDUi>O7f#!LOK7f<yR0ztHx`QMm@*MA605=p9C`-P&T9^Q=-B)FU
zKEF!3)w_+7!@N|W&#R+-$l#~)rkfAVKDSv1?)J&Xdf1s=l)v$8<8=G{Vgj@?70`c2
z^^>jbKz+XjxGqGB?H$+G1W_f3aGsdMP~(5wFRK(h?v^6ckmTLj68#L!S>Uof%*2+!
zRxdp#W|-!bT(GNQ{rjfoz*d1`;wvojPpO5ON%T?z$)j&Xu~FmK6<opK6J^pty4Y3}
zZ^?<Wu`MwaAAvio`-d*C!I}mkq(S81kC^taC?EJq7^9|~YZAT1WpzY`+L{6E^LCT}
ziisz*%1n!IeGRq-C`BAhg`A(AogTM~_nmzMjYA175P6>Vq^`>LYSd!~s0vynAw@9$
z5<QiREO#~*%kq2kAXP9vewM?YgGV6=@ti+RP^Fc4d-mVxWMcp|CD2nrey{H7WEaKF
zau~Am)8Bh$A19Jvm#D%F?c{Kr4<PSwDu(Mm2%FX`{%w13d}=%06tY=So~Ga~NRZgF
zjriBm8os;qvroPnwIHhHD*|HVZ>IwNmA#)`v@kdKjXw6?O>RcrijjM1g7*ny)Gbm}
z_8iD0$D17VYi_r0V0*sh=I3Y6__`gPD*n2q&Z>T&5aQ~XSjDro9yKe<CF*g&Ne3kJ
zDxIx-zfFvYw+)~{U?AJSEi=xSe!gp!MUtD4dnIXZtDL_ud^CS$WuQs$_CctM@8IGC
zx_0K%r@U9LTxnZg{&*?5tIb#TR&*QiI6GT5X+(oneRY5aLrD3TRiq;BQRWlqD^M9U
zzy-Y45mq`S&n7DwI?^sYkW#*<qy#WewMIq`4xSUK*A#vI;IM{TE?*KI$um!w22(fB
z>u_qw`i8if^V7{qPhbB#MHXdK7fBA>$SQvVQg)DX>XVD!<}#5<0$-okuJSwVHTQS?
zHr=XzlyRzwkJ~<JAxYxt$-r0fzNh}`TTH1Z_fDD{OLr^gC1`Xwo_ij8peK31u$=PI
zc(ST{u;1W(*&nLV+0&y%aw5`JQADdSn}>M)7ToJyc^UqFxUvy~PlW>gc9IRNnETuh
zFiYs&@{I+CY;<$@!A2$%j{--_cCVNcFJtV|xZeaj<qpT{iBFHCdz`ifnA*IUp}%xL
z$ENYQbMzfJ?}z51`tY{l>vfT5TkTBepQD$L#<j0E@7@H+q-Z8Z-sG;x4FQm-@*6E*
zO=dR*t$g$Oxq7Fwjj1NvU=q<PeD>vSo2Mt_f7kDh{*%=#js1%Hr>hKl{;$wT0Ewpm
z>-p$BnwsfObL-6hWWc~_(VI+W7C1Ql(iE*fB#aPBSnOd>E1II+wn6O;6@;WrdWO%$
zus`i=hPr=zkh3bWPX$LNBYg*puQ<`8+K6_NOU#WkohqMK_`k__-jc!B439H_E3GQm
zibuKeUMuw>>kBz=N()kO8jftacT4w!APiBwSTGGaS{SzE>&67noyi!@|IHkVwV;jQ
z@gBm;=-mO=mPiXG9q}wz_(>%+7=vV2^wT$aNf9od8t7mwUq?qFTfUcPFhaQyNmw{(
z`U4uc8Io=E+1i5|e#EoYETJrh?!PhPO+=%48*koO^^t}<J;>HE60MSpc9TcUWs~E9
zoB;A`=;;qwE$!~L3w8mf=^Qt+X>dCLOk$Yy{wnY)AHDn#ch{8ZHd~^_z$O0~0=v#S
zKlDR9FnNldE$oBT8pGqD5KoC4A1fU<ihU$SgjabT0v0xKbYRPm;5Ek3DcJQwsaO$Z
zWhlo74&@}hvk5Ms8r)qv=bs5y<AH6JP0lP&S<?^x4JFKc^SIzVmL>NJK=hz&MU}^+
zrjLZ7<_H8Qob&?Jz|}U@Cd@vV`o{VyL3=e+cfL&p$8$dN;#P>>m$mV-MOf`mnVAJ4
zbRkBcm3*X{w^z@6f-hGw-%AH`vqFFzZG6yJ#y7{)%W!YGql#5d6O%XM+9DkpJ8e_S
zexNU}ELr)A=z<9K&mblyDoI1`&gnBnX8b?}h#9<<eU4wH`uxts#CZk(S>2FtAVB;g
z9W&V~nPfDj+^mzMU9G9t2x*3uiEF=dj-U6B7w?nI`Hud+#(Mx|`~h`e9*lY2D?#O^
z(YG^6OWrx0%+Jdk8Wn%rL6vxqh7gnO;A!@kUVhm}zpuK#e=#7(IV(@Ec)tgG#UNt<
z8-8#V4cdYL18L{3r1>Qa?Qu_;nim~kDQk?4dAbz;7Crd8n^5k?3vvJ=6LzwcQqv+O
zI>vQr9&6?36~F%O@|x1?6}wcKo5y4_M|Jsm8Y7otf&5}M4ATkjc*0U0IG*ca(J3~j
zh{{=P>~M*<@S_g#wV7=&1kt0Fg0}}^E~A1D@WW~QlzBG`23E!U*4*n<4Ot1nhhI1r
zv!;jv9Hxpe$t0opG$vaH#Xt$%WnD0Vat?1r3{2A7f>ZVQ+j#Beu}vyynu+;wfn;zX
zy8Z>w8Ff8c#Y)sVQ5LpEtp^)qg^)`X3(IXsEQU=>*m|qgtVa}t`prrnv7Rvnad)X>
zuSM}bDQ%$xEQfomZ_!5BHFZLoqWrL6bD*{Jkkz65kSh$+TFy0^cAw-C!O^bV5&Tku
zWF}kj$U}jaR8m8%v-D<&X^nu@G?x3n11nH%5l8!vZHvZ(N4$$KF%?D8&gmN8&c9?|
z^(dkda(1i*wJq!HRLkF>>KwIQuJKs+R)HXbF9599Ha>kDgGCW}FOYkW9oa#Fgh9~p
zX&R~^b;HD$7lfCNBt{yMWx9WHnT1)SZSF@b1&djG=+{?@KCU;p$~el+z&-2GPwqs^
zbbg^HvC9%O>a>RYg+?RA&QIBFTtO$%b^EqSxzuh(yJe}$*O-*PJ!v5qO##v8O~X(B
z)n2q6k><URc<JhEnY*dwgra4Z1JhSFmejA$wKh|}!Bs;jvB>P#cF_igp8dZ3Tz^3b
zeSTQt(%oEFJ|Ck5pL=IHPmj|r^B22!Cd*C5nk>pHEy{58<}40ltdMR&gOcgm9qMgU
z5q*Bg0top_&0=4DheOr$LZ^kt)HuOf%}WrS|IaeKFfy8^dg?)TQJzP`dzpfPET9FK
za=WP%)!AQzdD7%>?H>4U`ip0-VEAI7L9Ht2HJ~f&zz(_v=X6#|a@-BOOEKv0p$Z!*
z4ev9Yv*urZSiukPMP2G^cK&;n$phzzxuwex<<mLqciVU=Bb)9B_c?LT3xi6b8#HR8
zPa_k0UE(Pm@t=+NIQ`=H)Bdgt(SSU^IMI|B&Zaz)Uhyi?v-YPOD`mpp-Q7PF?C46o
z|4t9&{ZU%&)D-r#x+f^+@<+QTj#@sI`7PzBg)CQ!y)g=7wMa4#{wr!Yh1O;DIvbqR
z8XeR6Ei`@y9<$m&`jIDJZGH85`<rEaRGBAdJLVP2mH7kHlZZV2Tpi&yw_9n~ex(X(
zX`n4xLHf@I6brOFHUmK!Cu%DFqJC9$Y{2O6-^LX95!9mSLv&TG`?{IA;ehMDic|)h
z0iZgttp@fY@MnK1ZnnV~z26i0KJk(F1d7%m^c5OQI~<iPoAiIMYB=tjm}jg#378_4
zArX@?EMQi(XBToTQz+K2X6TeA_y~(T-n0aLVFUu0lG+qu-94_adWD_=s#Rg^kk_|#
z<!Q!;I5e!Kzjzw~8&nH_yhu7}FoPx)ToChU5X5qXOGH6Xq#MV@#@BTiD%F~5tqkW|
z*$XQoZVG5!z!rVfN-I1*o)9=UkjS+Tqq1vx0mL}ylv}AlaVrPMso1B#VNqr4Ft=q}
zxviRetjbt4B>uz5YDcrpfS{IxAm7^Aj;T;I$8O@rz)Thkhv>37p8a|}RLsg#!$STr
zbDAKv01aMmaO#F@ORc;YP##l5XHUPHiFk^u)Rf^*0I5-Uit%LFLWyRJEtOB{7NT}n
z(l!1C0}WP|f}3yP7{I;#YkA$fDol<1s61x_5=PREeFli4kqJl+$5J^BVz8iFI<0vu
z5mVXU{o&^;Y7$g+LxPX}FS;@i%glh*1C5V;J7Rb0ztFdCf@bRg{B6*mdEgjAxA>p(
zx*ta>83R?y@8|}zl%F~lIXS`X=Btj9;Op*m500GXI>m@D>cDf*BQU$XKEtG=;lcT5
zBbYfnLk(;i)eIGh2Li4f2WX&0We(nIEK*s_gB!lzRlzwuVKPF;KrCdqR}97Zb|6hl
z70rGG3@aKNUdL3gxd+bJs1ibF2@ob5F_->K-^`<}y@9x8?5F(1jh?SY!{K1zwH4u#
zgbe>+UvkrMxw+j(jMBgJaJT^$tmbt3WwU>H&^D9K&4ihTy&|5ABo$Tmu(B>#j7qao
zE=;g);R*HCz^jA3d?{?`n~mX_&wFxWgf2<Y(l4VRvp+WLZ9w17cVy8z>ouOIcW7QM
z+wn^n#b(tU!DRZnv-DsVS&+(4o7Y*wu5wo@<Q6~I{wO=Gpz0m4OALZfEYi9-o!-dm
z@@MX*Er;{nM8hJcmH1pT?GvTcFy(a`Ai=Dz4BIzvj&dqgHRo+=1^Wu#G?IKZ(EH3P
zR?1P|O;q;3l5ayAx-I&w<+Ojk^}B&2#9Byc!tlYz1Z}aNqmt(%o_GKWY63AMp7G;_
zb9LyKEyAtP=B-qbeMeF2?lV@G)P|`SSWSMQhz^PcAqJDa#^-V@{IRaOuD^r83G~v~
z(Dx~UaCWQo2R7$JX11td75|#`cgc!jM1jp=$QKi`Ofba0j7=+Z96f#VuW_~BWBGg`
z$0vWCuW@XD*wV*@LBg)@Xgx{UOYBoIdqBJtTzq(_=n-5P+mp<z7%`e2-uxzbbGkY5
zWTR>G>=&d(9}z6G`R$DA!W=8Nw{@rj&-vK#X6yM-rtQ<iN9-(C`r(_+2h)-JW3I&G
zIjsI!7&Up1|5vN&4r)qv@xm|{6Y6NW_?!*e78W2H49oZ644aaBH|d>eG~4p&>ewNt
z^P{&Sw5;m<>6KL$Whsk?VaZmaFNH@^4thK?`-G~+u~*0ai4@nD&-tF>e95#V{re7g
zzL(j-o?Cgf)Uy`i?j-+WQP<pS2)T!{tuaW!z0yWF-+9dEP@9ZVO*b=-WNK_9nVH1k
z%TXjt>Mv!VdWVC&atQ3W%)a+*Tk{r_0>rh&G)zoPgU9zzl!6ERG$SWvc7Jk}RM>zb
zHdBk1M>0X@d|+nZi;E@E=l5#r<Dnb+Hj(_)xpl*;=ntQIZz!u9j=mKsg;Yr-ye<FK
zyB$P9FcXupw<09>`H6Y|#}pw6Z$D(+ikPYv=TH9abEh-vE^gv3>foM|<Km7_>*>Ca
z1msLbf-uXtwKdazy>s@TeqEq@h+Ubq!{cF#dl^0@pKdVbGW4+bvC|2Aq#sleGZzoX
zx6Mk@MH9Z>F_BbF>TrmnyoV7<6r!Pk;ZVk^nXZ$yiz7pL{6wtg*~P~E$g{)9qodV|
z^9APC$j2}{p~~U$5!=Yq_xSSjAjN`6%aNdYWAI&rIl;-a`4Pe6j3j9?I87wBX32u}
z>!ao%Ka7<bjc^8&>*?Bq(kq+x$xf-V1Q@sH&~b}KcG3C%5BbfA2)3>3Wq_hIp(D8F
zhNGqn;=r#ZAID_3l!QRD9FPa6cz&%?ZQOz2^K)4|y*+!5XIAobqKo<PRMp-~jw656
z7Yegj-AL)C5#qjz4Z#ceA=iznn^xirgC$|Kdvci%_Sf9aY?k<WT@q7Q_+xtcRxBvq
zfYOZY>;@_N4(v@OZy??{Jqe3@VgHyHzAN)lHIDYoYbhJKjpwgf5Nqjgttspmtth`}
zx7hHyUa9Nw<Zesrw_y8NBh>UyLgZDj2$Hq)wI-@D?TnAdtb0kLw>13{8ZI#`kbo@Q
zh+7U;Q%gZS(8Z*n8952JN6WLFxsHaco?ziWTD@Y{kzD>depL>xML$&OZC>}Kk8?zX
z>ue>f0!UR+?arYBvrkg6{O|^3$pAb;SoLo{yBRYAfkdkP!GTqxEhXi_W%wCVc6f}&
zJjY3*azRs<tJ6Q#i1MyVBU9VWln9Sx$%~PH>Yi@UrAu%)ZPCqB`KN{P6TX5J&1fb0
zLU67kbX#i|caz47RN%ov57FOO+**i64mGbYYTwpA%$+1)9@;<uJ|Eg>lrFsGC%%=e
zM=5OH;ae|@WR=(w3+Mtn*KYSw_^1^w`vO_C5yO!a!0vnypm<!3ifD3ALEUY8`*opK
z%sM{W%xqB&9pjG*TiO5AHo$ALojgn@TUm^YzM1O#*&+2n@ljX-D|+B=0yUZYBOzr9
zd;s2kcJVNP0mR7GzammV_xM$Bo^vm3X>nKeCELe@=_==Ung>DX$aSb2pXvuq!j#6p
z&B=7ul<)-LBl2Nk61r6BVOv23yTNT}L@DqdoIjrvJ#N(qvHqNHO`|_qnzIes&qm{`
z<WNaaKFQD-E!7r<y};QyP1YzXS>je$+|Ja%@dE^|8y8O1g4%N*B1$ZsU6RPKkwQG6
zo%RoM!AWP0?g10hrxk1`TtWm!<g-s6)I-$!UV(%t>lH$QfsyDg6my5)DcEVzoPh*b
z0ikP{dNzfCjdDqjTCo_<H3oEmM@<-495czmJ3OXOSMGMhwEES3+V-zz3~zlHV<+;5
mb=B-1m2b&6Op|0P$&u66#*c7dWwHPMPiv{`sg<kRVg3ggSSVEh

literal 0
HcmV?d00001


From 698c367d8d17f88e03415aa95107332fa03a199e Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Fri, 16 Jan 2015 11:00:56 +0300
Subject: [PATCH 47/55] fix ocv_add_module CMake macro:

it tries to remove 'opencv_' prefix from 'name' variable, but
it creates new variable with the same name as module
---
 cmake/OpenCVModule.cmake | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake
index c5325e20f1..c7a3f46023 100644
--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@@ -113,7 +113,6 @@ endmacro()
 macro(ocv_add_module _name)
   ocv_debug_message("ocv_add_module(" ${_name} ${ARGN} ")")
   string(TOLOWER "${_name}" name)
-  string(REGEX REPLACE "^opencv_" "" ${name} "${name}")
   set(the_module opencv_${name})
 
   # the first pass - collect modules info, the second pass - create targets

From 10cc7c78dd85b014e3cdcb08eac165c2cfe049a1 Mon Sep 17 00:00:00 2001
From: Jay Bosamiya <jaybosamiya@gmail.com>
Date: Sat, 17 Jan 2015 22:53:30 +0530
Subject: [PATCH 48/55] Fix typo

---
 samples/cpp/grabcut.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/cpp/grabcut.cpp b/samples/cpp/grabcut.cpp
index 110e0ff770..7ab28f66b0 100644
--- a/samples/cpp/grabcut.cpp
+++ b/samples/cpp/grabcut.cpp
@@ -22,10 +22,10 @@ static void help()
         "\tleft mouse button - set rectangle\n"
         "\n"
         "\tCTRL+left mouse button - set GC_BGD pixels\n"
-        "\tSHIFT+left mouse button - set CG_FGD pixels\n"
+        "\tSHIFT+left mouse button - set GC_FGD pixels\n"
         "\n"
         "\tCTRL+right mouse button - set GC_PR_BGD pixels\n"
-        "\tSHIFT+right mouse button - set CG_PR_FGD pixels\n" << endl;
+        "\tSHIFT+right mouse button - set GC_PR_FGD pixels\n" << endl;
 }
 
 const Scalar RED = Scalar(0,0,255);

From 9a1a9d9aff19af3b1cbbbc9e4fc033a949ad66ff Mon Sep 17 00:00:00 2001
From: Marvin Smith <marvin_smith1@me.com>
Date: Sat, 17 Jan 2015 16:59:56 -0800
Subject: [PATCH 49/55] Updating CMake to get GDAL working again on OS X.

---
 modules/imgcodecs/CMakeLists.txt     | 1 +
 modules/imgcodecs/src/grfmt_gdal.cpp | 1 +
 2 files changed, 2 insertions(+)

diff --git a/modules/imgcodecs/CMakeLists.txt b/modules/imgcodecs/CMakeLists.txt
index 3d0110ef48..893a2c1c91 100644
--- a/modules/imgcodecs/CMakeLists.txt
+++ b/modules/imgcodecs/CMakeLists.txt
@@ -51,6 +51,7 @@ if(HAVE_OPENEXR)
 endif()
 
 if(HAVE_GDAL)
+  add_definitions(-DHAVE_GDAL)
   include_directories(SYSTEM ${GDAL_INCLUDE_DIR})
   list(APPEND GRFMT_LIBS ${GDAL_LIBRARY})
 endif()
diff --git a/modules/imgcodecs/src/grfmt_gdal.cpp b/modules/imgcodecs/src/grfmt_gdal.cpp
index f172f6f9aa..b0caa7bd09 100644
--- a/modules/imgcodecs/src/grfmt_gdal.cpp
+++ b/modules/imgcodecs/src/grfmt_gdal.cpp
@@ -38,6 +38,7 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
+#include "precomp.hpp"
 #include "grfmt_gdal.hpp"
 
 #ifdef HAVE_GDAL

From 91fbe00caa7d035594fc1a45b4d26715f0d2c19b Mon Sep 17 00:00:00 2001
From: Marvin Smith <marvin_smith1@me.com>
Date: Sat, 17 Jan 2015 19:28:58 -0800
Subject: [PATCH 50/55] Updated gdal tutorial to include comments.

---
 .../raster-gdal/raster_io_gdal.markdown       | 16 +++----
 .../imgcodecs/include/opencv2/imgcodecs.hpp   |  2 +
 .../HighGUI/GDAL_IO/gdal-image.cpp            | 46 +++++++++----------
 3 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/doc/tutorials/highgui/raster-gdal/raster_io_gdal.markdown b/doc/tutorials/highgui/raster-gdal/raster_io_gdal.markdown
index a60c754551..feb2421170 100644
--- a/doc/tutorials/highgui/raster-gdal/raster_io_gdal.markdown
+++ b/doc/tutorials/highgui/raster-gdal/raster_io_gdal.markdown
@@ -3,7 +3,7 @@ Reading Geospatial Raster files with GDAL {#tutorial_raster_io_gdal}
 
 Geospatial raster data is a heavily used product in Geographic Information Systems and
 Photogrammetry. Raster data typically can represent imagery and Digital Elevation Models (DEM). The
-standard library for loading GIS imagery is the Geographic Data Abstraction Library (GDAL). In this
+standard library for loading GIS imagery is the Geographic Data Abstraction Library [(GDAL)](http://www.gdal.org). In this
 example, we will show techniques for loading GIS raster formats using native OpenCV functions. In
 addition, we will show some an example of how OpenCV can use this data for novel and interesting
 purposes.
@@ -13,8 +13,8 @@ Goals
 
 The primary objectives for this tutorial:
 
--   How to use OpenCV imread to load satellite imagery.
--   How to use OpenCV imread to load SRTM Digital Elevation Models
+-   How to use OpenCV [imread](@ref imread) to load satellite imagery.
+-   How to use OpenCV [imread](@ref imread) to load SRTM Digital Elevation Models
 -   Given the corner coordinates of both the image and DEM, correllate the elevation data to the
     image to find elevations for each pixel.
 -   Show a basic, easy-to-implement example of a terrain heat map.
@@ -54,9 +54,9 @@ signed shorts.
 Notes
 -----
 
-### Lat/Lon (Geodetic) Coordinates should normally be avoided
+### Lat/Lon (Geographic) Coordinates should normally be avoided
 
-The Geodetic Coordinate System is a spherical coordinate system, meaning that using them with
+The Geographic Coordinate System is a spherical coordinate system, meaning that using them with
 Cartesian mathematics is technically incorrect. This demo uses them to increase the readability and
 is accurate enough to make the point. A better coordinate system would be Universal Transverse
 Mercator.
@@ -94,8 +94,8 @@ Below is the output of the program. Use the first image as the input. For the DE
 the SRTM file located at the USGS here.
 [<http://dds.cr.usgs.gov/srtm/version2_1/SRTM1/Region_04/N37W123.hgt.zip>](http://dds.cr.usgs.gov/srtm/version2_1/SRTM1/Region_04/N37W123.hgt.zip)
 
-![](images/gdal_output.jpg)
+![Input Image](images/gdal_output.jpg)
 
-![](images/gdal_heat-map.jpg)
+![Heat Map](images/gdal_heat-map.jpg)
 
-![](images/gdal_flood-zone.jpg)
+![Heat Map Overlay](images/gdal_flood-zone.jpg)
diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
index a22d3dca76..b0c942172c 100644
--- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp
+++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
@@ -90,6 +90,8 @@ enum { IMWRITE_PNG_STRATEGY_DEFAULT      = 0,
 
 /** @brief Loads an image from a file.
 
+@anchor imread
+
 @param filename Name of file to be loaded.
 @param flags Flags specifying the color type of a loaded image:
 -   CV_LOAD_IMAGE_ANYDEPTH - If set, return 16-bit/32-bit image when the input has the
diff --git a/samples/cpp/tutorial_code/HighGUI/GDAL_IO/gdal-image.cpp b/samples/cpp/tutorial_code/HighGUI/GDAL_IO/gdal-image.cpp
index 48ef254406..6e7c950a26 100644
--- a/samples/cpp/tutorial_code/HighGUI/GDAL_IO/gdal-image.cpp
+++ b/samples/cpp/tutorial_code/HighGUI/GDAL_IO/gdal-image.cpp
@@ -1,13 +1,13 @@
-/**
+/*
  * gdal_image.cpp -- Load GIS data into OpenCV Containers using the Geospatial Data Abstraction Library
 */
 
-/// OpenCV Headers
+// OpenCV Headers
 #include "opencv2/core/core.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
 #include "opencv2/highgui/highgui.hpp"
 
-/// C++ Standard Libraries
+// C++ Standard Libraries
 #include <cmath>
 #include <iostream>
 #include <stdexcept>
@@ -15,22 +15,22 @@
 
 using namespace std;
 
-/// define the corner points
-///    Note that GDAL can natively determine this
+// define the corner points
+//    Note that GDAL library can natively determine this
 cv::Point2d tl( -122.441017, 37.815664 );
 cv::Point2d tr( -122.370919, 37.815311 );
 cv::Point2d bl( -122.441533, 37.747167 );
 cv::Point2d br( -122.3715,   37.746814 );
 
-/// determine dem corners
+// determine dem corners
 cv::Point2d dem_bl( -122.0, 38);
 cv::Point2d dem_tr( -123.0, 37);
 
-/// range of the heat map colors
+// range of the heat map colors
 std::vector<std::pair<cv::Vec3b,double> > color_range;
 
 
-/// List of all function prototypes
+// List of all function prototypes
 cv::Point2d lerp( const cv::Point2d&, const cv::Point2d&, const double& );
 
 cv::Vec3b get_dem_color( const double& );
@@ -43,7 +43,7 @@ void add_color( cv::Vec3b& pix, const uchar& b, const uchar& g, const uchar& r )
 
 
-/**
+/*
  * Linear Interpolation
  * p1 - Point 1
  * p2 - Point 2
@@ -54,7 +54,7 @@ cv::Point2d lerp( cv::Point2d const& p1, cv::Point2d const& p2, const double& t
                         ((1-t)*p1.y) + (t*p2.y));
 }
 
-/**
+/*
  * Interpolate Colors
 */
 template <typename DATATYPE, int N>
@@ -69,7 +69,7 @@ cv::Vec<DATATYPE,N> lerp( cv::Vec<DATATYPE,N> const& minColor,
     return output;
 }
 
-/**
+/*
  * Compute the dem color
 */
 cv::Vec3b get_dem_color( const double& elevation ){
@@ -103,7 +103,7 @@ cv::Vec3b get_dem_color( const double& elevation ){
     return lerp( color_range[idx].first, color_range[idx+1].first, t);
 }
 
-/**
+/*
  * Given a pixel coordinate and the size of the input image, compute the pixel location
  * on the DEM image.
 */
@@ -122,7 +122,7 @@ cv::Point2d world2dem( cv::Point2d const& coordinate, const cv::Size& dem_size
     return output;
 }
 
-/**
+/*
  * Convert a pixel coordinate to world coordinates
 */
 cv::Point2d pixel2world( const int& x, const int& y, const cv::Size& size ){
@@ -139,7 +139,7 @@ cv::Point2d pixel2world( const int& x, const int& y, const cv::Size& size ){
     return lerp( leftSide, rightSide, rx );
 }
 
-/**
+/*
  * Add color to a specific pixel color value
 */
 void add_color( cv::Vec3b& pix, const uchar& b, const uchar& g, const uchar& r ){
@@ -150,12 +150,12 @@ void add_color( cv::Vec3b& pix, const uchar& b, const uchar& g, const uchar& r )
 }
 
 
-/**
+/*
  * Main Function
 */
 int main( int argc, char* argv[] ){
 
-    /**
+    /*
      * Check input arguments
     */
     if( argc < 3 ){
@@ -163,22 +163,22 @@ int main( int argc, char* argv[] ){
         return 1;
     }
 
-    /// load the image (note that we don't have the projection information.  You will
-    /// need to load that yourself or use the full GDAL driver.  The values are pre-defined
-    /// at the top of this file
+    // load the image (note that we don't have the projection information.  You will
+    // need to load that yourself or use the full GDAL driver.  The values are pre-defined
+    // at the top of this file
     cv::Mat image = cv::imread(argv[1], cv::IMREAD_LOAD_GDAL | cv::IMREAD_COLOR );
 
-    /// load the dem model
+    // load the dem model
     cv::Mat dem = cv::imread(argv[2], cv::IMREAD_LOAD_GDAL | cv::IMREAD_ANYDEPTH );
 
-    /// create our output products
+    // create our output products
     cv::Mat output_dem(   image.size(), CV_8UC3 );
     cv::Mat output_dem_flood(   image.size(), CV_8UC3 );
 
-    /// for sanity sake, make sure GDAL Loads it as a signed short
+    // for sanity sake, make sure GDAL Loads it as a signed short
     if( dem.type() != CV_16SC1 ){ throw std::runtime_error("DEM image type must be CV_16SC1"); }
 
-    /// define the color range to create our output DEM heat map
+    // define the color range to create our output DEM heat map
     //  Pair format ( Color, elevation );  Push from low to high
     //  Note:  This would be perfect for a configuration file, but is here for a working demo.
     color_range.push_back( std::pair<cv::Vec3b,double>(cv::Vec3b( 188, 154,  46),   -1));

From 9a0e038a48d33b2fa729428ff5a8184b0023cae0 Mon Sep 17 00:00:00 2001
From: Marvin Smith <marvin_smith1@me.com>
Date: Sat, 17 Jan 2015 20:37:51 -0800
Subject: [PATCH 51/55] Fixed compile warning from redefined macro.

---
 modules/imgcodecs/CMakeLists.txt     |  1 -
 modules/imgcodecs/src/grfmt_gdal.cpp | 13 +++++++++++--
 modules/imgcodecs/src/grfmt_gdal.hpp | 14 ++++++++++----
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/modules/imgcodecs/CMakeLists.txt b/modules/imgcodecs/CMakeLists.txt
index 893a2c1c91..3d0110ef48 100644
--- a/modules/imgcodecs/CMakeLists.txt
+++ b/modules/imgcodecs/CMakeLists.txt
@@ -51,7 +51,6 @@ if(HAVE_OPENEXR)
 endif()
 
 if(HAVE_GDAL)
-  add_definitions(-DHAVE_GDAL)
   include_directories(SYSTEM ${GDAL_INCLUDE_DIR})
   list(APPEND GRFMT_LIBS ${GDAL_LIBRARY})
 endif()
diff --git a/modules/imgcodecs/src/grfmt_gdal.cpp b/modules/imgcodecs/src/grfmt_gdal.cpp
index b0caa7bd09..0311630950 100644
--- a/modules/imgcodecs/src/grfmt_gdal.cpp
+++ b/modules/imgcodecs/src/grfmt_gdal.cpp
@@ -39,10 +39,16 @@
 //
 //M*/
 #include "precomp.hpp"
-#include "grfmt_gdal.hpp"
+
+// GDAL Macros
+#include "cvconfig.h"
 
 #ifdef HAVE_GDAL
 
+// Our Header
+#include "grfmt_gdal.hpp"
+
+
 /// C++ Standard Libraries
 #include <iostream>
 #include <stdexcept>
@@ -196,7 +202,10 @@ GdalDecoder::~GdalDecoder(){
 /**
  * Convert data range
 */
-double range_cast( const GDALDataType& gdalType, const int& cvDepth, const double& value ){
+double range_cast( const GDALDataType& gdalType,
+                   const int& cvDepth,
+                   const double& value )
+{
 
     // uint8 -> uint8
     if( gdalType == GDT_Byte && cvDepth == CV_8U ){
diff --git a/modules/imgcodecs/src/grfmt_gdal.hpp b/modules/imgcodecs/src/grfmt_gdal.hpp
index b2cd224467..73d39c9470 100644
--- a/modules/imgcodecs/src/grfmt_gdal.hpp
+++ b/modules/imgcodecs/src/grfmt_gdal.hpp
@@ -42,16 +42,15 @@
 #ifndef __GRFMT_GDAL_HPP__
 #define __GRFMT_GDAL_HPP__
 
+/// OpenCV FMT Base Type
+#include "grfmt_base.hpp"
+
 /// Macro to make sure we specified GDAL in CMake
 #ifdef HAVE_GDAL
 
 /// C++ Libraries
 #include <iostream>
 
-/// OpenCV Libraries
-#include "grfmt_base.hpp"
-#include "precomp.hpp"
-
 /// Geospatial Data Abstraction Library
 #include <gdal/cpl_conv.h>
 #include <gdal/gdal_priv.h>
@@ -61,6 +60,13 @@
 /// Start of CV Namespace
 namespace cv {
 
+/**
+ * Convert GDAL Pixel Range to OpenCV Pixel Range
+*/
+double range_cast( const GDALDataType& gdalType,
+                   const int& cvDepth,
+                   const double& value );
+
 /**
  * Convert GDAL Palette Interpretation to OpenCV Pixel Type
 */

From cc97c57fd495c25c7872d25a7acef7c100e5cfed Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@itseez.com>
Date: Fri, 16 Jan 2015 13:22:51 +0300
Subject: [PATCH 52/55] Accuracy fix for blenaders in stitching module.

---
 modules/stitching/src/blenders.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/modules/stitching/src/blenders.cpp b/modules/stitching/src/blenders.cpp
index caae058681..015ceb025f 100644
--- a/modules/stitching/src/blenders.cpp
+++ b/modules/stitching/src/blenders.cpp
@@ -476,9 +476,11 @@ static bool ocl_normalizeUsingWeightMap(InputArray _weight, InputOutputArray _ma
 
 void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
 {
-    Mat src = _src.getMat();
-    Mat weight = _weight.getMat();
+    Mat src;
+    Mat weight;
 #ifdef HAVE_TEGRA_OPTIMIZATION
+    src = _src.getMat();
+    weight = _weight.getMat();
     if(tegra::normalizeUsingWeightMap(weight, src))
         return;
 #endif
@@ -488,9 +490,12 @@ void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
             !ocl_normalizeUsingWeightMap(_weight, _src) )
 #endif
     {
+        src = _src.getMat();
+        weight = _weight.getMat();
+
         CV_Assert(src.type() == CV_16SC3);
 
-        if(weight.type() == CV_32FC1)
+        if (weight.type() == CV_32FC1)
         {
             for (int y = 0; y < src.rows; ++y)
             {

From 5978ef1df5a1333877b2c916af44f878d4dbee71 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Mon, 19 Jan 2015 10:43:49 +0300
Subject: [PATCH 53/55] Fix arm linux toolchain file

Use find_program() to set CMAKE_C_COMPILER and CMAKE_CXX_COMPILER.

Originally the variables was set to compiler name, without absolute path.
CMake 3.0 tries to find them in CMAKE_FIND_ROOT_PATH (since
CMAKE_FIND_ROOT_PATH_MODE_PROGRAM was set to ONLY) and fails.
---
 platforms/linux/arm-gnueabi.toolchain.cmake | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/platforms/linux/arm-gnueabi.toolchain.cmake b/platforms/linux/arm-gnueabi.toolchain.cmake
index 2c5b7406d8..448dfa6b1c 100644
--- a/platforms/linux/arm-gnueabi.toolchain.cmake
+++ b/platforms/linux/arm-gnueabi.toolchain.cmake
@@ -5,13 +5,12 @@ set(CMAKE_SYSTEM_PROCESSOR arm)
 set(GCC_COMPILER_VERSION "4.6" CACHE STRING "GCC Compiler version")
 
 set(FLOAT_ABI_SUFFIX "")
-
 if (NOT SOFTFP)
   set(FLOAT_ABI_SUFFIX "hf")
 endif()
 
-set(CMAKE_C_COMPILER    arm-linux-gnueabi${FLOAT_ABI_SUFFIX}-gcc-${GCC_COMPILER_VERSION})
-set(CMAKE_CXX_COMPILER  arm-linux-gnueabi${FLOAT_ABI_SUFFIX}-g++-${GCC_COMPILER_VERSION})
+find_program(CMAKE_C_COMPILER NAMES arm-linux-gnueabi${FLOAT_ABI_SUFFIX}-gcc-${GCC_COMPILER_VERSION})
+find_program(CMAKE_CXX_COMPILER NAMES arm-linux-gnueabi${FLOAT_ABI_SUFFIX}-g++-${GCC_COMPILER_VERSION})
 set(ARM_LINUX_SYSROOT /usr/arm-linux-gnueabi${FLOAT_ABI_SUFFIX} CACHE PATH "ARM cross compilation system root")
 
 set(CMAKE_CXX_FLAGS           ""                    CACHE STRING "c++ flags")

From 3f8cefea0e0c64668cbbfebfd14ddac0d42f6f4b Mon Sep 17 00:00:00 2001
From: Ben Hagen <c.ben.hagen@gmail.com>
Date: Mon, 19 Jan 2015 21:51:19 +0100
Subject: [PATCH 54/55] store user-provided data in PlaneTracker class

---
 samples/python2/plane_tracker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/python2/plane_tracker.py b/samples/python2/plane_tracker.py
index 5ae0933bc0..94c8f94809 100755
--- a/samples/python2/plane_tracker.py
+++ b/samples/python2/plane_tracker.py
@@ -77,7 +77,7 @@ class PlaneTracker:
                 descs.append(desc)
         descs = np.uint8(descs)
         self.matcher.add([descs])
-        target = PlanarTarget(image = image, rect=rect, keypoints = points, descrs=descs, data=None)
+        target = PlanarTarget(image = image, rect=rect, keypoints = points, descrs=descs, data=data)
         self.targets.append(target)
 
     def clear(self):

From 5422cbcecb854f0b7c181ac50d1d7b9e5939d9a3 Mon Sep 17 00:00:00 2001
From: Ben Hagen <c.ben.hagen@gmail.com>
Date: Mon, 19 Jan 2015 21:57:23 +0100
Subject: [PATCH 55/55] make PlaneTracker class more thread-safe

---
 samples/python2/plane_tracker.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/samples/python2/plane_tracker.py b/samples/python2/plane_tracker.py
index 5ae0933bc0..20dd2ca4f4 100755
--- a/samples/python2/plane_tracker.py
+++ b/samples/python2/plane_tracker.py
@@ -87,10 +87,10 @@ class PlaneTracker:
 
     def track(self, frame):
         '''Returns a list of detected TrackedTarget objects'''
-        self.frame_points, self.frame_descrs = self.detect_features(frame)
-        if len(self.frame_points) < MIN_MATCH_COUNT:
+        frame_points, frame_descrs = self.detect_features(frame)
+        if len(frame_points) < MIN_MATCH_COUNT:
             return []
-        matches = self.matcher.knnMatch(self.frame_descrs, k = 2)
+        matches = self.matcher.knnMatch(frame_descrs, k = 2)
         matches = [m[0] for m in matches if len(m) == 2 and m[0].distance < m[1].distance * 0.75]
         if len(matches) < MIN_MATCH_COUNT:
             return []
@@ -103,7 +103,7 @@ class PlaneTracker:
                 continue
             target = self.targets[imgIdx]
             p0 = [target.keypoints[m.trainIdx].pt for m in matches]
-            p1 = [self.frame_points[m.queryIdx].pt for m in matches]
+            p1 = [frame_points[m.queryIdx].pt for m in matches]
             p0, p1 = np.float32((p0, p1))
             H, status = cv2.findHomography(p0, p1, cv2.RANSAC, 3.0)
             status = status.ravel() != 0