Normalize line endings and whitespace

2012-10-17 03:18:30 +04:00
parent 69020da607
commit 04384a71e4
1516 changed files with 258846 additions and 258162 deletions
@@ -1,103 +1,103 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-////////////////////////////////////////////////////////////////////////////////
-//
-// NVIDIA CUDA implementation of Brox et al Optical Flow algorithm
-//
-// Algorithm is explained in the original paper:
-//      T. Brox, A. Bruhn, N. Papenberg, J. Weickert:
-//      High accuracy optical flow estimation based on a theory for warping.
-//      ECCV 2004.
-//
-// Implementation by Mikhail Smirnov
-// email: msmirnov@nvidia.com, devsupport@nvidia.com
-//
-// Credits for help with the code to:
-// Alexey Mendelenko, Anton Obukhov, and Alexander Kharlamov.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef _ncv_optical_flow_h_
-#define _ncv_optical_flow_h_
-
-#include "NCV.hpp"
-
-/// \brief Model and solver parameters
-struct NCVBroxOpticalFlowDescriptor
-{
-    /// flow smoothness
-    Ncv32f alpha;
-    /// gradient constancy importance
-    Ncv32f gamma;
-    /// pyramid scale factor
-    Ncv32f scale_factor;
-    /// number of lagged non-linearity iterations (inner loop)
-    Ncv32u number_of_inner_iterations;
-    /// number of warping iterations (number of pyramid levels)
-    Ncv32u number_of_outer_iterations;
-    /// number of linear system solver iterations
-    Ncv32u number_of_solver_iterations;
-};
-
-/////////////////////////////////////////////////////////////////////////////////////////
-/// \brief Compute optical flow
-///
-/// Based on method by Brox et al [2004]
-/// \param [in]  desc              model and solver parameters
-/// \param [in]  gpu_mem_allocator GPU memory allocator
-/// \param [in]  frame0            source frame
-/// \param [in]  frame1            frame to track
-/// \param [out] u                 flow horizontal component (along \b x axis)
-/// \param [out] v                 flow vertical component (along \b y axis)
-/// \return                        computation status
-/////////////////////////////////////////////////////////////////////////////////////////
-
-NCV_EXPORTS
-NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
-                             INCVMemAllocator &gpu_mem_allocator,
-                             const NCVMatrix<Ncv32f> &frame0,
-                             const NCVMatrix<Ncv32f> &frame1,
-                             NCVMatrix<Ncv32f> &u,
-                             NCVMatrix<Ncv32f> &v,
-                             cudaStream_t stream);
-
-#endif
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// NVIDIA CUDA implementation of Brox et al Optical Flow algorithm
+//
+// Algorithm is explained in the original paper:
+//      T. Brox, A. Bruhn, N. Papenberg, J. Weickert:
+//      High accuracy optical flow estimation based on a theory for warping.
+//      ECCV 2004.
+//
+// Implementation by Mikhail Smirnov
+// email: msmirnov@nvidia.com, devsupport@nvidia.com
+//
+// Credits for help with the code to:
+// Alexey Mendelenko, Anton Obukhov, and Alexander Kharlamov.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef _ncv_optical_flow_h_
+#define _ncv_optical_flow_h_
+
+#include "NCV.hpp"
+
+/// \brief Model and solver parameters
+struct NCVBroxOpticalFlowDescriptor
+{
+    /// flow smoothness
+    Ncv32f alpha;
+    /// gradient constancy importance
+    Ncv32f gamma;
+    /// pyramid scale factor
+    Ncv32f scale_factor;
+    /// number of lagged non-linearity iterations (inner loop)
+    Ncv32u number_of_inner_iterations;
+    /// number of warping iterations (number of pyramid levels)
+    Ncv32u number_of_outer_iterations;
+    /// number of linear system solver iterations
+    Ncv32u number_of_solver_iterations;
+};
+
+/////////////////////////////////////////////////////////////////////////////////////////
+/// \brief Compute optical flow
+///
+/// Based on method by Brox et al [2004]
+/// \param [in]  desc              model and solver parameters
+/// \param [in]  gpu_mem_allocator GPU memory allocator
+/// \param [in]  frame0            source frame
+/// \param [in]  frame1            frame to track
+/// \param [out] u                 flow horizontal component (along \b x axis)
+/// \param [out] v                 flow vertical component (along \b y axis)
+/// \return                        computation status
+/////////////////////////////////////////////////////////////////////////////////////////
+
+NCV_EXPORTS
+NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
+                             INCVMemAllocator &gpu_mem_allocator,
+                             const NCVMatrix<Ncv32f> &frame0,
+                             const NCVMatrix<Ncv32f> &frame1,
+                             NCVMatrix<Ncv32f> &u,
+                             NCVMatrix<Ncv32f> &v,
+                             cudaStream_t stream);
+
+#endif
@@ -1,461 +1,461 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 
-// 
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-////////////////////////////////////////////////////////////////////////////////
-//
-// NVIDIA CUDA implementation of Viola-Jones Object Detection Framework
-//
-// The algorithm and code are explained in the upcoming GPU Computing Gems
-// chapter in detail:
-//
-//   Anton Obukhov, "Haar Classifiers for Object Detection with CUDA"
-//   PDF URL placeholder
-//   email: aobukhov@nvidia.com, devsupport@nvidia.com
-//
-// Credits for help with the code to:
-// Alexey Mendelenko, Cyril Crassin, and Mikhail Smirnov.
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#ifndef _ncvhaarobjectdetection_hpp_
-#define _ncvhaarobjectdetection_hpp_
-
-#include <string>
-#include "NCV.hpp"
-
-
-//==============================================================================
-//
-// Guaranteed size cross-platform classifier structures
-//
-//==============================================================================
-#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__  > 4
-typedef Ncv32f __attribute__((__may_alias__)) Ncv32f_a;
-#else
-typedef Ncv32f Ncv32f_a;
-#endif
-
-struct HaarFeature64
-{
-    uint2 _ui2;
-
-#define HaarFeature64_CreateCheck_MaxRectField                  0xFF
-
-    __host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u /*clsWidth*/, Ncv32u /*clsHeight*/)
-    {
-        ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES);
-        ((NcvRect8u*)&(this->_ui2.x))->x = (Ncv8u)rectX;
-        ((NcvRect8u*)&(this->_ui2.x))->y = (Ncv8u)rectY;
-        ((NcvRect8u*)&(this->_ui2.x))->width = (Ncv8u)rectWidth;
-        ((NcvRect8u*)&(this->_ui2.x))->height = (Ncv8u)rectHeight;
-        return NCV_SUCCESS;
-    }
-
-    __host__ NCVStatus setWeight(Ncv32f weight)
-    {
-        ((Ncv32f_a*)&(this->_ui2.y))[0] = weight;
-        return NCV_SUCCESS;
-    }
-
-    __device__ __host__ void getRect(Ncv32u *rectX, Ncv32u *rectY, Ncv32u *rectWidth, Ncv32u *rectHeight)
-    {
-        NcvRect8u tmpRect = *(NcvRect8u*)(&this->_ui2.x);
-        *rectX = tmpRect.x;
-        *rectY = tmpRect.y;
-        *rectWidth = tmpRect.width;
-        *rectHeight = tmpRect.height;
-    }
-
-    __device__ __host__ Ncv32f getWeight(void)
-    {
-        return *(Ncv32f_a*)(&this->_ui2.y);
-    }
-};
-
-
-struct HaarFeatureDescriptor32
-{
-private:
-
-#define HaarFeatureDescriptor32_Interpret_MaskFlagTilted        0x80000000
-#define HaarFeatureDescriptor32_Interpret_MaskFlagLeftNodeLeaf  0x40000000
-#define HaarFeatureDescriptor32_Interpret_MaskFlagRightNodeLeaf 0x20000000
-#define HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures      0x1F
-#define HaarFeatureDescriptor32_NumFeatures_Shift               24
-#define HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset    0x00FFFFFF
-
-    Ncv32u desc;
-
-public:
-
-    __host__ NCVStatus create(NcvBool bTilted, NcvBool bLeftLeaf, NcvBool bRightLeaf,
-                              Ncv32u numFeatures, Ncv32u offsetFeatures)
-    {
-        if (numFeatures > HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures)
-        {
-            return NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER;
-        }
-        if (offsetFeatures > HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset)
-        {
-            return NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE;
-        }
-        this->desc = 0;
-        this->desc |= (bTilted ? HaarFeatureDescriptor32_Interpret_MaskFlagTilted : 0);
-        this->desc |= (bLeftLeaf ? HaarFeatureDescriptor32_Interpret_MaskFlagLeftNodeLeaf : 0);
-        this->desc |= (bRightLeaf ? HaarFeatureDescriptor32_Interpret_MaskFlagRightNodeLeaf : 0);
-        this->desc |= (numFeatures << HaarFeatureDescriptor32_NumFeatures_Shift);
-        this->desc |= offsetFeatures;
-        return NCV_SUCCESS;
-    }
-
-    __device__ __host__ NcvBool isTilted(void)
-    {
-        return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagTilted) != 0;
-    }
-
-    __device__ __host__ NcvBool isLeftNodeLeaf(void)
-    {
-        return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagLeftNodeLeaf) != 0;
-    }
-
-    __device__ __host__ NcvBool isRightNodeLeaf(void)
-    {
-        return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagRightNodeLeaf) != 0;
-    }
-
-    __device__ __host__ Ncv32u getNumFeatures(void)
-    {
-        return (this->desc >> HaarFeatureDescriptor32_NumFeatures_Shift) & HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures;
-    }
-
-    __device__ __host__ Ncv32u getFeaturesOffset(void)
-    {
-        return this->desc & HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset;
-    }
-};
-
-struct HaarClassifierNodeDescriptor32
-{
-    uint1 _ui1;
-
-    __host__ NCVStatus create(Ncv32f leafValue)
-    {
-        *(Ncv32f_a *)&this->_ui1 = leafValue;
-        return NCV_SUCCESS;
-    }
-
-    __host__ NCVStatus create(Ncv32u offsetHaarClassifierNode)
-    {
-        this->_ui1.x = offsetHaarClassifierNode;
-        return NCV_SUCCESS;
-    }
-
-    __host__ Ncv32f getLeafValueHost(void)
-    {
-        return *(Ncv32f_a *)&this->_ui1.x;
-    }
-
-#ifdef __CUDACC__
-    __device__ Ncv32f getLeafValue(void)
-    {
-        return __int_as_float(this->_ui1.x);
-    }
-#endif
-
-    __device__ __host__ Ncv32u getNextNodeOffset(void)
-    {
-        return this->_ui1.x;
-    }
-};
-
-#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__  > 4
-typedef Ncv32u __attribute__((__may_alias__)) Ncv32u_a;
-#else
-typedef Ncv32u Ncv32u_a;
-#endif
-
-struct HaarClassifierNode128
-{
-    uint4 _ui4;
-
-    __host__ NCVStatus setFeatureDesc(HaarFeatureDescriptor32 f)
-    {
-        this->_ui4.x = *(Ncv32u *)&f;
-        return NCV_SUCCESS;
-    }
-
-    __host__ NCVStatus setThreshold(Ncv32f t)
-    {
-        this->_ui4.y = *(Ncv32u_a *)&t;
-        return NCV_SUCCESS;
-    }
-
-    __host__ NCVStatus setLeftNodeDesc(HaarClassifierNodeDescriptor32 nl)
-    {
-        this->_ui4.z = *(Ncv32u_a *)&nl;
-        return NCV_SUCCESS;
-    }
-
-    __host__ NCVStatus setRightNodeDesc(HaarClassifierNodeDescriptor32 nr)
-    {
-        this->_ui4.w = *(Ncv32u_a *)&nr;
-        return NCV_SUCCESS;
-    }
-
-    __host__ __device__ HaarFeatureDescriptor32 getFeatureDesc(void)
-    {
-        return *(HaarFeatureDescriptor32 *)&this->_ui4.x;
-    }
-
-    __host__ __device__ Ncv32f getThreshold(void)
-    {
-        return *(Ncv32f_a*)&this->_ui4.y;
-    }
-
-    __host__ __device__ HaarClassifierNodeDescriptor32 getLeftNodeDesc(void)
-    {
-        return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.z;
-    }
-
-    __host__ __device__ HaarClassifierNodeDescriptor32 getRightNodeDesc(void)
-    {
-        return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.w;
-    }
-};
-
-
-struct HaarStage64
-{
-#define HaarStage64_Interpret_MaskRootNodes         0x0000FFFF
-#define HaarStage64_Interpret_MaskRootNodeOffset    0xFFFF0000
-#define HaarStage64_Interpret_ShiftRootNodeOffset   16
-
-    uint2 _ui2;
-
-    __host__ NCVStatus setStageThreshold(Ncv32f t)
-    {
-        this->_ui2.x = *(Ncv32u_a *)&t;
-        return NCV_SUCCESS;
-    }
-
-    __host__ NCVStatus setStartClassifierRootNodeOffset(Ncv32u val)
-    {
-        if (val > (HaarStage64_Interpret_MaskRootNodeOffset >> HaarStage64_Interpret_ShiftRootNodeOffset))
-        {
-            return NCV_HAAR_XML_LOADING_EXCEPTION;
-        }
-        this->_ui2.y = (val << HaarStage64_Interpret_ShiftRootNodeOffset) | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes);
-        return NCV_SUCCESS;
-    }
-
-    __host__ NCVStatus setNumClassifierRootNodes(Ncv32u val)
-    {
-        if (val > HaarStage64_Interpret_MaskRootNodes)
-        {
-            return NCV_HAAR_XML_LOADING_EXCEPTION;
-        }
-        this->_ui2.y = val | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodeOffset);
-        return NCV_SUCCESS;
-    }
-
-    __host__ __device__ Ncv32f getStageThreshold(void)
-    {
-        return *(Ncv32f_a*)&this->_ui2.x;
-    }
-
-    __host__ __device__ Ncv32u getStartClassifierRootNodeOffset(void)
-    {
-        return (this->_ui2.y >> HaarStage64_Interpret_ShiftRootNodeOffset);
-    }
-
-    __host__ __device__ Ncv32u getNumClassifierRootNodes(void)
-    {
-        return (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes);
-    }
-};
-
-
-NCV_CT_ASSERT(sizeof(HaarFeature64) == 8);
-NCV_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
-NCV_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
-NCV_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
-NCV_CT_ASSERT(sizeof(HaarStage64) == 8);
-
-
-//==============================================================================
-//
-// Classifier cascade descriptor
-//
-//==============================================================================
-
-
-struct HaarClassifierCascadeDescriptor
-{
-    Ncv32u NumStages;
-    Ncv32u NumClassifierRootNodes;
-    Ncv32u NumClassifierTotalNodes;
-    Ncv32u NumFeatures;
-    NcvSize32u ClassifierSize;
-    NcvBool bNeedsTiltedII;
-    NcvBool bHasStumpsOnly;
-};
-
-
-//==============================================================================
-//
-// Functional interface
-//
-//==============================================================================
-
-
-enum
-{
-    NCVPipeObjDet_Default               = 0x000,
-    NCVPipeObjDet_UseFairImageScaling   = 0x001,
-    NCVPipeObjDet_FindLargestObject     = 0x002,
-    NCVPipeObjDet_VisualizeInPlace      = 0x004,
-};
-
-
-NCV_EXPORTS NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
-                                                        NcvSize32u srcRoi,
-                                                        NCVVector<NcvRect32u> &d_dstRects,
-                                                        Ncv32u &dstNumRects,
-            
-                                                        HaarClassifierCascadeDescriptor &haar,
-                                                        NCVVector<HaarStage64> &h_HaarStages,
-                                                        NCVVector<HaarStage64> &d_HaarStages,
-                                                        NCVVector<HaarClassifierNode128> &d_HaarNodes,
-                                                        NCVVector<HaarFeature64> &d_HaarFeatures,
-            
-                                                        NcvSize32u minObjSize,
-                                                        Ncv32u minNeighbors,      //default 4
-                                                        Ncv32f scaleStep,         //default 1.2f
-                                                        Ncv32u pixelStep,         //default 1
-                                                        Ncv32u flags,             //default NCVPipeObjDet_Default
-            
-                                                        INCVMemAllocator &gpuAllocator,
-                                                        INCVMemAllocator &cpuAllocator,
-                                                        cudaDeviceProp &devProp,
-                                                        cudaStream_t cuStream);
-
-
-#define OBJDET_MASK_ELEMENT_INVALID_32U     0xFFFFFFFF
-#define HAAR_STDDEV_BORDER                  1
-
-
-NCV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
-                                                           NCVMatrix<Ncv32f> &d_weights,
-                                                           NCVMatrixAlloc<Ncv32u> &d_pixelMask,
-                                                           Ncv32u &numDetections,
-                                                           HaarClassifierCascadeDescriptor &haar,
-                                                           NCVVector<HaarStage64> &h_HaarStages,
-                                                           NCVVector<HaarStage64> &d_HaarStages,
-                                                           NCVVector<HaarClassifierNode128> &d_HaarNodes,
-                                                           NCVVector<HaarFeature64> &d_HaarFeatures,
-                                                           NcvBool bMaskElements,
-                                                           NcvSize32u anchorsRoi,
-                                                           Ncv32u pixelStep,
-                                                           Ncv32f scaleArea,
-                                                           INCVMemAllocator &gpuAllocator,
-                                                           INCVMemAllocator &cpuAllocator,
-                                                           cudaDeviceProp &devProp,
-                                                           cudaStream_t cuStream);
-
-
-NCV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
-                                                         NCVMatrix<Ncv32f> &h_weights,
-                                                         NCVMatrixAlloc<Ncv32u> &h_pixelMask,
-                                                         Ncv32u &numDetections,
-                                                         HaarClassifierCascadeDescriptor &haar,
-                                                         NCVVector<HaarStage64> &h_HaarStages,
-                                                         NCVVector<HaarClassifierNode128> &h_HaarNodes,
-                                                         NCVVector<HaarFeature64> &h_HaarFeatures,
-                                                         NcvBool bMaskElements,
-                                                         NcvSize32u anchorsRoi,
-                                                         Ncv32u pixelStep,
-                                                         Ncv32f scaleArea);
-
-
-#define RECT_SIMILARITY_PROPORTION      0.2f
-
-
-NCV_EXPORTS NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
-                                                     Ncv32u numPixelMaskDetections,
-                                                     NCVVector<NcvRect32u> &hypotheses,
-                                                     Ncv32u &totalDetections,
-                                                     Ncv32u totalMaxDetections,
-                                                     Ncv32u rectWidth,
-                                                     Ncv32u rectHeight,
-                                                     Ncv32f curScale,
-                                                     cudaStream_t cuStream);
-
-
-NCV_EXPORTS NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
-                                                   Ncv32u numPixelMaskDetections,
-                                                   NCVVector<NcvRect32u> &hypotheses,
-                                                   Ncv32u &totalDetections,
-                                                   Ncv32u totalMaxDetections,
-                                                   Ncv32u rectWidth,
-                                                   Ncv32u rectHeight,
-                                                   Ncv32f curScale);
-
-
-NCV_EXPORTS NCVStatus ncvHaarGetClassifierSize(const std::string &filename, Ncv32u &numStages,
-                                               Ncv32u &numNodes, Ncv32u &numFeatures);
-
-
-NCV_EXPORTS NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
-                                               HaarClassifierCascadeDescriptor &haar,
-                                               NCVVector<HaarStage64> &h_HaarStages,
-                                               NCVVector<HaarClassifierNode128> &h_HaarNodes,
-                                               NCVVector<HaarFeature64> &h_HaarFeatures);
-
-
-NCV_EXPORTS NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
-                                             HaarClassifierCascadeDescriptor haar,
-                                             NCVVector<HaarStage64> &h_HaarStages,
-                                             NCVVector<HaarClassifierNode128> &h_HaarNodes,
-                                             NCVVector<HaarFeature64> &h_HaarFeatures);
-
-
-
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// NVIDIA CUDA implementation of Viola-Jones Object Detection Framework
+//
+// The algorithm and code are explained in the upcoming GPU Computing Gems
+// chapter in detail:
+//
+//   Anton Obukhov, "Haar Classifiers for Object Detection with CUDA"
+//   PDF URL placeholder
+//   email: aobukhov@nvidia.com, devsupport@nvidia.com
+//
+// Credits for help with the code to:
+// Alexey Mendelenko, Cyril Crassin, and Mikhail Smirnov.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef _ncvhaarobjectdetection_hpp_
+#define _ncvhaarobjectdetection_hpp_
+
+#include <string>
+#include "NCV.hpp"
+
+
+//==============================================================================
+//
+// Guaranteed size cross-platform classifier structures
+//
+//==============================================================================
+#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__  > 4
+typedef Ncv32f __attribute__((__may_alias__)) Ncv32f_a;
+#else
+typedef Ncv32f Ncv32f_a;
+#endif
+
+struct HaarFeature64
+{
+    uint2 _ui2;
+
+#define HaarFeature64_CreateCheck_MaxRectField                  0xFF
+
+    __host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u /*clsWidth*/, Ncv32u /*clsHeight*/)
+    {
+        ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES);
+        ((NcvRect8u*)&(this->_ui2.x))->x = (Ncv8u)rectX;
+        ((NcvRect8u*)&(this->_ui2.x))->y = (Ncv8u)rectY;
+        ((NcvRect8u*)&(this->_ui2.x))->width = (Ncv8u)rectWidth;
+        ((NcvRect8u*)&(this->_ui2.x))->height = (Ncv8u)rectHeight;
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus setWeight(Ncv32f weight)
+    {
+        ((Ncv32f_a*)&(this->_ui2.y))[0] = weight;
+        return NCV_SUCCESS;
+    }
+
+    __device__ __host__ void getRect(Ncv32u *rectX, Ncv32u *rectY, Ncv32u *rectWidth, Ncv32u *rectHeight)
+    {
+        NcvRect8u tmpRect = *(NcvRect8u*)(&this->_ui2.x);
+        *rectX = tmpRect.x;
+        *rectY = tmpRect.y;
+        *rectWidth = tmpRect.width;
+        *rectHeight = tmpRect.height;
+    }
+
+    __device__ __host__ Ncv32f getWeight(void)
+    {
+        return *(Ncv32f_a*)(&this->_ui2.y);
+    }
+};
+
+
+struct HaarFeatureDescriptor32
+{
+private:
+
+#define HaarFeatureDescriptor32_Interpret_MaskFlagTilted        0x80000000
+#define HaarFeatureDescriptor32_Interpret_MaskFlagLeftNodeLeaf  0x40000000
+#define HaarFeatureDescriptor32_Interpret_MaskFlagRightNodeLeaf 0x20000000
+#define HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures      0x1F
+#define HaarFeatureDescriptor32_NumFeatures_Shift               24
+#define HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset    0x00FFFFFF
+
+    Ncv32u desc;
+
+public:
+
+    __host__ NCVStatus create(NcvBool bTilted, NcvBool bLeftLeaf, NcvBool bRightLeaf,
+                              Ncv32u numFeatures, Ncv32u offsetFeatures)
+    {
+        if (numFeatures > HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures)
+        {
+            return NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER;
+        }
+        if (offsetFeatures > HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset)
+        {
+            return NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE;
+        }
+        this->desc = 0;
+        this->desc |= (bTilted ? HaarFeatureDescriptor32_Interpret_MaskFlagTilted : 0);
+        this->desc |= (bLeftLeaf ? HaarFeatureDescriptor32_Interpret_MaskFlagLeftNodeLeaf : 0);
+        this->desc |= (bRightLeaf ? HaarFeatureDescriptor32_Interpret_MaskFlagRightNodeLeaf : 0);
+        this->desc |= (numFeatures << HaarFeatureDescriptor32_NumFeatures_Shift);
+        this->desc |= offsetFeatures;
+        return NCV_SUCCESS;
+    }
+
+    __device__ __host__ NcvBool isTilted(void)
+    {
+        return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagTilted) != 0;
+    }
+
+    __device__ __host__ NcvBool isLeftNodeLeaf(void)
+    {
+        return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagLeftNodeLeaf) != 0;
+    }
+
+    __device__ __host__ NcvBool isRightNodeLeaf(void)
+    {
+        return (this->desc & HaarFeatureDescriptor32_Interpret_MaskFlagRightNodeLeaf) != 0;
+    }
+
+    __device__ __host__ Ncv32u getNumFeatures(void)
+    {
+        return (this->desc >> HaarFeatureDescriptor32_NumFeatures_Shift) & HaarFeatureDescriptor32_CreateCheck_MaxNumFeatures;
+    }
+
+    __device__ __host__ Ncv32u getFeaturesOffset(void)
+    {
+        return this->desc & HaarFeatureDescriptor32_CreateCheck_MaxFeatureOffset;
+    }
+};
+
+struct HaarClassifierNodeDescriptor32
+{
+    uint1 _ui1;
+
+    __host__ NCVStatus create(Ncv32f leafValue)
+    {
+        *(Ncv32f_a *)&this->_ui1 = leafValue;
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus create(Ncv32u offsetHaarClassifierNode)
+    {
+        this->_ui1.x = offsetHaarClassifierNode;
+        return NCV_SUCCESS;
+    }
+
+    __host__ Ncv32f getLeafValueHost(void)
+    {
+        return *(Ncv32f_a *)&this->_ui1.x;
+    }
+
+#ifdef __CUDACC__
+    __device__ Ncv32f getLeafValue(void)
+    {
+        return __int_as_float(this->_ui1.x);
+    }
+#endif
+
+    __device__ __host__ Ncv32u getNextNodeOffset(void)
+    {
+        return this->_ui1.x;
+    }
+};
+
+#if defined __GNUC__ && __GNUC__ > 2 && __GNUC_MINOR__  > 4
+typedef Ncv32u __attribute__((__may_alias__)) Ncv32u_a;
+#else
+typedef Ncv32u Ncv32u_a;
+#endif
+
+struct HaarClassifierNode128
+{
+    uint4 _ui4;
+
+    __host__ NCVStatus setFeatureDesc(HaarFeatureDescriptor32 f)
+    {
+        this->_ui4.x = *(Ncv32u *)&f;
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus setThreshold(Ncv32f t)
+    {
+        this->_ui4.y = *(Ncv32u_a *)&t;
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus setLeftNodeDesc(HaarClassifierNodeDescriptor32 nl)
+    {
+        this->_ui4.z = *(Ncv32u_a *)&nl;
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus setRightNodeDesc(HaarClassifierNodeDescriptor32 nr)
+    {
+        this->_ui4.w = *(Ncv32u_a *)&nr;
+        return NCV_SUCCESS;
+    }
+
+    __host__ __device__ HaarFeatureDescriptor32 getFeatureDesc(void)
+    {
+        return *(HaarFeatureDescriptor32 *)&this->_ui4.x;
+    }
+
+    __host__ __device__ Ncv32f getThreshold(void)
+    {
+        return *(Ncv32f_a*)&this->_ui4.y;
+    }
+
+    __host__ __device__ HaarClassifierNodeDescriptor32 getLeftNodeDesc(void)
+    {
+        return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.z;
+    }
+
+    __host__ __device__ HaarClassifierNodeDescriptor32 getRightNodeDesc(void)
+    {
+        return *(HaarClassifierNodeDescriptor32 *)&this->_ui4.w;
+    }
+};
+
+
+struct HaarStage64
+{
+#define HaarStage64_Interpret_MaskRootNodes         0x0000FFFF
+#define HaarStage64_Interpret_MaskRootNodeOffset    0xFFFF0000
+#define HaarStage64_Interpret_ShiftRootNodeOffset   16
+
+    uint2 _ui2;
+
+    __host__ NCVStatus setStageThreshold(Ncv32f t)
+    {
+        this->_ui2.x = *(Ncv32u_a *)&t;
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus setStartClassifierRootNodeOffset(Ncv32u val)
+    {
+        if (val > (HaarStage64_Interpret_MaskRootNodeOffset >> HaarStage64_Interpret_ShiftRootNodeOffset))
+        {
+            return NCV_HAAR_XML_LOADING_EXCEPTION;
+        }
+        this->_ui2.y = (val << HaarStage64_Interpret_ShiftRootNodeOffset) | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes);
+        return NCV_SUCCESS;
+    }
+
+    __host__ NCVStatus setNumClassifierRootNodes(Ncv32u val)
+    {
+        if (val > HaarStage64_Interpret_MaskRootNodes)
+        {
+            return NCV_HAAR_XML_LOADING_EXCEPTION;
+        }
+        this->_ui2.y = val | (this->_ui2.y & HaarStage64_Interpret_MaskRootNodeOffset);
+        return NCV_SUCCESS;
+    }
+
+    __host__ __device__ Ncv32f getStageThreshold(void)
+    {
+        return *(Ncv32f_a*)&this->_ui2.x;
+    }
+
+    __host__ __device__ Ncv32u getStartClassifierRootNodeOffset(void)
+    {
+        return (this->_ui2.y >> HaarStage64_Interpret_ShiftRootNodeOffset);
+    }
+
+    __host__ __device__ Ncv32u getNumClassifierRootNodes(void)
+    {
+        return (this->_ui2.y & HaarStage64_Interpret_MaskRootNodes);
+    }
+};
+
+
+NCV_CT_ASSERT(sizeof(HaarFeature64) == 8);
+NCV_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
+NCV_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
+NCV_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
+NCV_CT_ASSERT(sizeof(HaarStage64) == 8);
+
+
+//==============================================================================
+//
+// Classifier cascade descriptor
+//
+//==============================================================================
+
+
+struct HaarClassifierCascadeDescriptor
+{
+    Ncv32u NumStages;
+    Ncv32u NumClassifierRootNodes;
+    Ncv32u NumClassifierTotalNodes;
+    Ncv32u NumFeatures;
+    NcvSize32u ClassifierSize;
+    NcvBool bNeedsTiltedII;
+    NcvBool bHasStumpsOnly;
+};
+
+
+//==============================================================================
+//
+// Functional interface
+//
+//==============================================================================
+
+
+enum
+{
+    NCVPipeObjDet_Default               = 0x000,
+    NCVPipeObjDet_UseFairImageScaling   = 0x001,
+    NCVPipeObjDet_FindLargestObject     = 0x002,
+    NCVPipeObjDet_VisualizeInPlace      = 0x004,
+};
+
+
+NCV_EXPORTS NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
+                                                        NcvSize32u srcRoi,
+                                                        NCVVector<NcvRect32u> &d_dstRects,
+                                                        Ncv32u &dstNumRects,
+
+                                                        HaarClassifierCascadeDescriptor &haar,
+                                                        NCVVector<HaarStage64> &h_HaarStages,
+                                                        NCVVector<HaarStage64> &d_HaarStages,
+                                                        NCVVector<HaarClassifierNode128> &d_HaarNodes,
+                                                        NCVVector<HaarFeature64> &d_HaarFeatures,
+
+                                                        NcvSize32u minObjSize,
+                                                        Ncv32u minNeighbors,      //default 4
+                                                        Ncv32f scaleStep,         //default 1.2f
+                                                        Ncv32u pixelStep,         //default 1
+                                                        Ncv32u flags,             //default NCVPipeObjDet_Default
+
+                                                        INCVMemAllocator &gpuAllocator,
+                                                        INCVMemAllocator &cpuAllocator,
+                                                        cudaDeviceProp &devProp,
+                                                        cudaStream_t cuStream);
+
+
+#define OBJDET_MASK_ELEMENT_INVALID_32U     0xFFFFFFFF
+#define HAAR_STDDEV_BORDER                  1
+
+
+NCV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
+                                                           NCVMatrix<Ncv32f> &d_weights,
+                                                           NCVMatrixAlloc<Ncv32u> &d_pixelMask,
+                                                           Ncv32u &numDetections,
+                                                           HaarClassifierCascadeDescriptor &haar,
+                                                           NCVVector<HaarStage64> &h_HaarStages,
+                                                           NCVVector<HaarStage64> &d_HaarStages,
+                                                           NCVVector<HaarClassifierNode128> &d_HaarNodes,
+                                                           NCVVector<HaarFeature64> &d_HaarFeatures,
+                                                           NcvBool bMaskElements,
+                                                           NcvSize32u anchorsRoi,
+                                                           Ncv32u pixelStep,
+                                                           Ncv32f scaleArea,
+                                                           INCVMemAllocator &gpuAllocator,
+                                                           INCVMemAllocator &cpuAllocator,
+                                                           cudaDeviceProp &devProp,
+                                                           cudaStream_t cuStream);
+
+
+NCV_EXPORTS NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
+                                                         NCVMatrix<Ncv32f> &h_weights,
+                                                         NCVMatrixAlloc<Ncv32u> &h_pixelMask,
+                                                         Ncv32u &numDetections,
+                                                         HaarClassifierCascadeDescriptor &haar,
+                                                         NCVVector<HaarStage64> &h_HaarStages,
+                                                         NCVVector<HaarClassifierNode128> &h_HaarNodes,
+                                                         NCVVector<HaarFeature64> &h_HaarFeatures,
+                                                         NcvBool bMaskElements,
+                                                         NcvSize32u anchorsRoi,
+                                                         Ncv32u pixelStep,
+                                                         Ncv32f scaleArea);
+
+
+#define RECT_SIMILARITY_PROPORTION      0.2f
+
+
+NCV_EXPORTS NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
+                                                     Ncv32u numPixelMaskDetections,
+                                                     NCVVector<NcvRect32u> &hypotheses,
+                                                     Ncv32u &totalDetections,
+                                                     Ncv32u totalMaxDetections,
+                                                     Ncv32u rectWidth,
+                                                     Ncv32u rectHeight,
+                                                     Ncv32f curScale,
+                                                     cudaStream_t cuStream);
+
+
+NCV_EXPORTS NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
+                                                   Ncv32u numPixelMaskDetections,
+                                                   NCVVector<NcvRect32u> &hypotheses,
+                                                   Ncv32u &totalDetections,
+                                                   Ncv32u totalMaxDetections,
+                                                   Ncv32u rectWidth,
+                                                   Ncv32u rectHeight,
+                                                   Ncv32f curScale);
+
+
+NCV_EXPORTS NCVStatus ncvHaarGetClassifierSize(const std::string &filename, Ncv32u &numStages,
+                                               Ncv32u &numNodes, Ncv32u &numFeatures);
+
+
+NCV_EXPORTS NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
+                                               HaarClassifierCascadeDescriptor &haar,
+                                               NCVVector<HaarStage64> &h_HaarStages,
+                                               NCVVector<HaarClassifierNode128> &h_HaarNodes,
+                                               NCVVector<HaarFeature64> &h_HaarFeatures);
+
+
+NCV_EXPORTS NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
+                                             HaarClassifierCascadeDescriptor haar,
+                                             NCVVector<HaarStage64> &h_HaarStages,
+                                             NCVVector<HaarClassifierNode128> &h_HaarNodes,
+                                             NCVVector<HaarFeature64> &h_HaarFeatures);
+
+
+
 #endif // _ncvhaarobjectdetection_hpp_
@@ -1,154 +1,154 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef _ncv_alg_hpp_
-#define _ncv_alg_hpp_
-
-#include "NCV.hpp"
-
-
-template <class T>
-static void swap(T &p1, T &p2)
-{
-    T tmp = p1;
-    p1 = p2;
-    p2 = tmp;
-}
-
-
-template<typename T>
-static T divUp(T a, T b)
-{
-    return (a + b - 1) / b;
-}
-
-
-template<typename T>
-struct functorAddValues
-{
-    static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
-    {
-        //Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
-        *dst = *src;
-    }
-    static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
-    {
-        in1out += in2;
-    }
-};
-
-
-template<typename T>
-struct functorMinValues
-{
-    static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
-    {
-        //Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
-        *dst = *src;
-    }
-    static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
-    {
-        in1out = in1out > in2 ? in2 : in1out;
-    }
-};
-
-
-template<typename T>
-struct functorMaxValues
-{
-    static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
-    {
-        //Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
-        *dst = *src;
-    }
-    static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
-    {
-        in1out = in1out > in2 ? in1out : in2;
-    }
-};
-
-
-template<typename Tdata, class Tfunc, Ncv32u nThreads>
-static __device__ Tdata subReduce(Tdata threadElem)
-{
-    Tfunc functor;
-
-    __shared__ Tdata _reduceArr[nThreads];
-    volatile Tdata *reduceArr = _reduceArr;
-    functor.assign(reduceArr + threadIdx.x, &threadElem);
-    __syncthreads();
-
-    if (nThreads >= 256 && threadIdx.x < 128)
-    {
-        functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 128]);
-    }
-    __syncthreads();
-
-    if (nThreads >= 128 && threadIdx.x < 64)
-    {
-        functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 64]);
-    }
-    __syncthreads();
-
-    if (threadIdx.x < 32)
-    {
-        if (nThreads >= 64)
-        {
-            functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 32]);
-        }
-        if (nThreads >= 32 && threadIdx.x < 16)
-        {
-            functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 16]);
-            functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 8]);
-            functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 4]);
-            functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 2]);
-            functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 1]);
-        }
-    }
-
-    __syncthreads();
-    Tdata reduceRes;
-    functor.assign(&reduceRes, reduceArr);
-    return reduceRes;
-}
-
-
-#endif //_ncv_alg_hpp_
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef _ncv_alg_hpp_
+#define _ncv_alg_hpp_
+
+#include "NCV.hpp"
+
+
+template <class T>
+static void swap(T &p1, T &p2)
+{
+    T tmp = p1;
+    p1 = p2;
+    p2 = tmp;
+}
+
+
+template<typename T>
+static T divUp(T a, T b)
+{
+    return (a + b - 1) / b;
+}
+
+
+template<typename T>
+struct functorAddValues
+{
+    static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
+    {
+        //Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
+        *dst = *src;
+    }
+    static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
+    {
+        in1out += in2;
+    }
+};
+
+
+template<typename T>
+struct functorMinValues
+{
+    static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
+    {
+        //Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
+        *dst = *src;
+    }
+    static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
+    {
+        in1out = in1out > in2 ? in2 : in1out;
+    }
+};
+
+
+template<typename T>
+struct functorMaxValues
+{
+    static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
+    {
+        //Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
+        *dst = *src;
+    }
+    static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
+    {
+        in1out = in1out > in2 ? in1out : in2;
+    }
+};
+
+
+template<typename Tdata, class Tfunc, Ncv32u nThreads>
+static __device__ Tdata subReduce(Tdata threadElem)
+{
+    Tfunc functor;
+
+    __shared__ Tdata _reduceArr[nThreads];
+    volatile Tdata *reduceArr = _reduceArr;
+    functor.assign(reduceArr + threadIdx.x, &threadElem);
+    __syncthreads();
+
+    if (nThreads >= 256 && threadIdx.x < 128)
+    {
+        functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 128]);
+    }
+    __syncthreads();
+
+    if (nThreads >= 128 && threadIdx.x < 64)
+    {
+        functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 64]);
+    }
+    __syncthreads();
+
+    if (threadIdx.x < 32)
+    {
+        if (nThreads >= 64)
+        {
+            functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 32]);
+        }
+        if (nThreads >= 32 && threadIdx.x < 16)
+        {
+            functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 16]);
+            functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 8]);
+            functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 4]);
+            functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 2]);
+            functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 1]);
+        }
+    }
+
+    __syncthreads();
+    Tdata reduceRes;
+    functor.assign(&reduceRes, reduceArr);
+    return reduceRes;
+}
+
+
+#endif //_ncv_alg_hpp_
@@ -1,99 +1,99 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-// this file does not contain any used code.
-
-#ifndef _ncv_color_conversion_hpp_
-#define _ncv_color_conversion_hpp_
-
-#include "NCVPixelOperations.hpp"
-
-#if 0
-enum NCVColorSpace
-{
-    NCVColorSpaceGray,
-    NCVColorSpaceRGBA,
-};
-
-template<NCVColorSpace CSin, NCVColorSpace CSout, typename Tin, typename Tout> struct __pixColorConv {
-static void _pixColorConv(const Tin &pixIn, Tout &pixOut);
-};
-
-template<typename Tin, typename Tout> struct __pixColorConv<NCVColorSpaceRGBA, NCVColorSpaceGray, Tin, Tout> {
-static void _pixColorConv(const Tin &pixIn, Tout &pixOut)
-{
-    Ncv32f luma = 0.299f * pixIn.x + 0.587f * pixIn.y + 0.114f * pixIn.z;
-    _TDemoteClampNN(luma, pixOut.x);
-}};
-
-template<typename Tin, typename Tout> struct __pixColorConv<NCVColorSpaceGray, NCVColorSpaceRGBA, Tin, Tout> {
-static void _pixColorConv(const Tin &pixIn, Tout &pixOut)
-{
-    _TDemoteClampNN(pixIn.x, pixOut.x);
-    _TDemoteClampNN(pixIn.x, pixOut.y);
-    _TDemoteClampNN(pixIn.x, pixOut.z);
-    pixOut.w = 0;
-}};
-
-template<NCVColorSpace CSin, NCVColorSpace CSout, typename Tin, typename Tout>
-static NCVStatus _ncvColorConv_host(const NCVMatrix<Tin> &h_imgIn,
-                             const NCVMatrix<Tout> &h_imgOut)
-{
-    ncvAssertReturn(h_imgIn.size() == h_imgOut.size(), NCV_DIMENSIONS_INVALID);
-    ncvAssertReturn(h_imgIn.memType() == h_imgOut.memType() &&
-                    (h_imgIn.memType() == NCVMemoryTypeHostPinned || h_imgIn.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
-    NCV_SET_SKIP_COND(h_imgIn.memType() == NCVMemoryTypeNone);
-    NCV_SKIP_COND_BEGIN
-
-    for (Ncv32u i=0; i<h_imgIn.height(); i++)
-    {
-        for (Ncv32u j=0; j<h_imgIn.width(); j++)
-        {
-            __pixColorConv<CSin, CSout, Tin, Tout>::_pixColorConv(h_imgIn.at(j,i), h_imgOut.at(j,i));
-        }
-    }
-
-    NCV_SKIP_COND_END
-    return NCV_SUCCESS;
-}
-#endif
-
-#endif //_ncv_color_conversion_hpp_
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+// this file does not contain any used code.
+
+#ifndef _ncv_color_conversion_hpp_
+#define _ncv_color_conversion_hpp_
+
+#include "NCVPixelOperations.hpp"
+
+#if 0
+enum NCVColorSpace
+{
+    NCVColorSpaceGray,
+    NCVColorSpaceRGBA,
+};
+
+template<NCVColorSpace CSin, NCVColorSpace CSout, typename Tin, typename Tout> struct __pixColorConv {
+static void _pixColorConv(const Tin &pixIn, Tout &pixOut);
+};
+
+template<typename Tin, typename Tout> struct __pixColorConv<NCVColorSpaceRGBA, NCVColorSpaceGray, Tin, Tout> {
+static void _pixColorConv(const Tin &pixIn, Tout &pixOut)
+{
+    Ncv32f luma = 0.299f * pixIn.x + 0.587f * pixIn.y + 0.114f * pixIn.z;
+    _TDemoteClampNN(luma, pixOut.x);
+}};
+
+template<typename Tin, typename Tout> struct __pixColorConv<NCVColorSpaceGray, NCVColorSpaceRGBA, Tin, Tout> {
+static void _pixColorConv(const Tin &pixIn, Tout &pixOut)
+{
+    _TDemoteClampNN(pixIn.x, pixOut.x);
+    _TDemoteClampNN(pixIn.x, pixOut.y);
+    _TDemoteClampNN(pixIn.x, pixOut.z);
+    pixOut.w = 0;
+}};
+
+template<NCVColorSpace CSin, NCVColorSpace CSout, typename Tin, typename Tout>
+static NCVStatus _ncvColorConv_host(const NCVMatrix<Tin> &h_imgIn,
+                             const NCVMatrix<Tout> &h_imgOut)
+{
+    ncvAssertReturn(h_imgIn.size() == h_imgOut.size(), NCV_DIMENSIONS_INVALID);
+    ncvAssertReturn(h_imgIn.memType() == h_imgOut.memType() &&
+                    (h_imgIn.memType() == NCVMemoryTypeHostPinned || h_imgIn.memType() == NCVMemoryTypeNone), NCV_MEM_RESIDENCE_ERROR);
+    NCV_SET_SKIP_COND(h_imgIn.memType() == NCVMemoryTypeNone);
+    NCV_SKIP_COND_BEGIN
+
+    for (Ncv32u i=0; i<h_imgIn.height(); i++)
+    {
+        for (Ncv32u j=0; j<h_imgIn.width(); j++)
+        {
+            __pixColorConv<CSin, CSout, Tin, Tout>::_pixColorConv(h_imgIn.at(j,i), h_imgOut.at(j,i));
+        }
+    }
+
+    NCV_SKIP_COND_END
+    return NCV_SUCCESS;
+}
+#endif
+
+#endif //_ncv_color_conversion_hpp_
@@ -1,350 +1,350 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef _ncv_pixel_operations_hpp_
-#define _ncv_pixel_operations_hpp_
-
-#include <limits.h>
-#include <float.h>
-#include "NCV.hpp"
-
-template<typename TBase> inline __host__ __device__ TBase _pixMaxVal();
-template<> static inline __host__ __device__ Ncv8u  _pixMaxVal<Ncv8u>()  {return UCHAR_MAX;}
-template<> static inline __host__ __device__ Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
-template<> static inline __host__ __device__ Ncv32u _pixMaxVal<Ncv32u>() {return  UINT_MAX;}
-template<> static inline __host__ __device__ Ncv8s  _pixMaxVal<Ncv8s>()  {return  CHAR_MAX;}
-template<> static inline __host__ __device__ Ncv16s _pixMaxVal<Ncv16s>() {return  SHRT_MAX;}
-template<> static inline __host__ __device__ Ncv32s _pixMaxVal<Ncv32s>() {return   INT_MAX;}
-template<> static inline __host__ __device__ Ncv32f _pixMaxVal<Ncv32f>() {return   FLT_MAX;}
-template<> static inline __host__ __device__ Ncv64f _pixMaxVal<Ncv64f>() {return   DBL_MAX;}
-
-template<typename TBase> inline __host__ __device__ TBase _pixMinVal();
-template<> static inline __host__ __device__ Ncv8u  _pixMinVal<Ncv8u>()  {return 0;}
-template<> static inline __host__ __device__ Ncv16u _pixMinVal<Ncv16u>() {return 0;}
-template<> static inline __host__ __device__ Ncv32u _pixMinVal<Ncv32u>() {return 0;}
-template<> static inline __host__ __device__ Ncv8s  _pixMinVal<Ncv8s>()  {return CHAR_MIN;}
-template<> static inline __host__ __device__ Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
-template<> static inline __host__ __device__ Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
-template<> static inline __host__ __device__ Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}
-template<> static inline __host__ __device__ Ncv64f _pixMinVal<Ncv64f>() {return DBL_MIN;}
-
-template<typename Tvec> struct TConvVec2Base;
-template<> struct TConvVec2Base<uchar1>  {typedef Ncv8u TBase;};
-template<> struct TConvVec2Base<uchar3>  {typedef Ncv8u TBase;};
-template<> struct TConvVec2Base<uchar4>  {typedef Ncv8u TBase;};
-template<> struct TConvVec2Base<ushort1> {typedef Ncv16u TBase;};
-template<> struct TConvVec2Base<ushort3> {typedef Ncv16u TBase;};
-template<> struct TConvVec2Base<ushort4> {typedef Ncv16u TBase;};
-template<> struct TConvVec2Base<uint1>   {typedef Ncv32u TBase;};
-template<> struct TConvVec2Base<uint3>   {typedef Ncv32u TBase;};
-template<> struct TConvVec2Base<uint4>   {typedef Ncv32u TBase;};
-template<> struct TConvVec2Base<float1>  {typedef Ncv32f TBase;};
-template<> struct TConvVec2Base<float3>  {typedef Ncv32f TBase;};
-template<> struct TConvVec2Base<float4>  {typedef Ncv32f TBase;};
-template<> struct TConvVec2Base<double1> {typedef Ncv64f TBase;};
-template<> struct TConvVec2Base<double3> {typedef Ncv64f TBase;};
-template<> struct TConvVec2Base<double4> {typedef Ncv64f TBase;};
-
-#define NC(T)       (sizeof(T) / sizeof(TConvVec2Base<T>::TBase))
-
-template<typename TBase, Ncv32u NC> struct TConvBase2Vec;
-template<> struct TConvBase2Vec<Ncv8u, 1>  {typedef uchar1 TVec;};
-template<> struct TConvBase2Vec<Ncv8u, 3>  {typedef uchar3 TVec;};
-template<> struct TConvBase2Vec<Ncv8u, 4>  {typedef uchar4 TVec;};
-template<> struct TConvBase2Vec<Ncv16u, 1> {typedef ushort1 TVec;};
-template<> struct TConvBase2Vec<Ncv16u, 3> {typedef ushort3 TVec;};
-template<> struct TConvBase2Vec<Ncv16u, 4> {typedef ushort4 TVec;};
-template<> struct TConvBase2Vec<Ncv32u, 1> {typedef uint1 TVec;};
-template<> struct TConvBase2Vec<Ncv32u, 3> {typedef uint3 TVec;};
-template<> struct TConvBase2Vec<Ncv32u, 4> {typedef uint4 TVec;};
-template<> struct TConvBase2Vec<Ncv32f, 1> {typedef float1 TVec;};
-template<> struct TConvBase2Vec<Ncv32f, 3> {typedef float3 TVec;};
-template<> struct TConvBase2Vec<Ncv32f, 4> {typedef float4 TVec;};
-template<> struct TConvBase2Vec<Ncv64f, 1> {typedef double1 TVec;};
-template<> struct TConvBase2Vec<Ncv64f, 3> {typedef double3 TVec;};
-template<> struct TConvBase2Vec<Ncv64f, 4> {typedef double4 TVec;};
-
-//TODO: consider using CUDA intrinsics to avoid branching
-template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a);};
-template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a, 0, USHRT_MAX);}
-template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a, 0, UINT_MAX);}
-template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
-
-//TODO: consider using CUDA intrinsics to avoid branching
-template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a+0.5f);}
-template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a+0.5f, 0, USHRT_MAX);}
-template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);}
-template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
-
-template<typename Tout> inline Tout _pixMakeZero();
-template<> static inline __host__ __device__ uchar1 _pixMakeZero<uchar1>() {return make_uchar1(0);}
-template<> static inline __host__ __device__ uchar3 _pixMakeZero<uchar3>() {return make_uchar3(0,0,0);}
-template<> static inline __host__ __device__ uchar4 _pixMakeZero<uchar4>() {return make_uchar4(0,0,0,0);}
-template<> static inline __host__ __device__ ushort1 _pixMakeZero<ushort1>() {return make_ushort1(0);}
-template<> static inline __host__ __device__ ushort3 _pixMakeZero<ushort3>() {return make_ushort3(0,0,0);}
-template<> static inline __host__ __device__ ushort4 _pixMakeZero<ushort4>() {return make_ushort4(0,0,0,0);}
-template<> static inline __host__ __device__ uint1 _pixMakeZero<uint1>() {return make_uint1(0);}
-template<> static inline __host__ __device__ uint3 _pixMakeZero<uint3>() {return make_uint3(0,0,0);}
-template<> static inline __host__ __device__ uint4 _pixMakeZero<uint4>() {return make_uint4(0,0,0,0);}
-template<> static inline __host__ __device__ float1 _pixMakeZero<float1>() {return make_float1(0.f);}
-template<> static inline __host__ __device__ float3 _pixMakeZero<float3>() {return make_float3(0.f,0.f,0.f);}
-template<> static inline __host__ __device__ float4 _pixMakeZero<float4>() {return make_float4(0.f,0.f,0.f,0.f);}
-template<> static inline __host__ __device__ double1 _pixMakeZero<double1>() {return make_double1(0.);}
-template<> static inline __host__ __device__ double3 _pixMakeZero<double3>() {return make_double3(0.,0.,0.);}
-template<> static inline __host__ __device__ double4 _pixMakeZero<double4>() {return make_double4(0.,0.,0.,0.);}
-
-static inline __host__ __device__ uchar1 _pixMake(Ncv8u x) {return make_uchar1(x);}
-static inline __host__ __device__ uchar3 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z) {return make_uchar3(x,y,z);}
-static inline __host__ __device__ uchar4 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z, Ncv8u w) {return make_uchar4(x,y,z,w);}
-static inline __host__ __device__ ushort1 _pixMake(Ncv16u x) {return make_ushort1(x);}
-static inline __host__ __device__ ushort3 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z) {return make_ushort3(x,y,z);}
-static inline __host__ __device__ ushort4 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z, Ncv16u w) {return make_ushort4(x,y,z,w);}
-static inline __host__ __device__ uint1 _pixMake(Ncv32u x) {return make_uint1(x);}
-static inline __host__ __device__ uint3 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z) {return make_uint3(x,y,z);}
-static inline __host__ __device__ uint4 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z, Ncv32u w) {return make_uint4(x,y,z,w);}
-static inline __host__ __device__ float1 _pixMake(Ncv32f x) {return make_float1(x);}
-static inline __host__ __device__ float3 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z) {return make_float3(x,y,z);}
-static inline __host__ __device__ float4 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z, Ncv32f w) {return make_float4(x,y,z,w);}
-static inline __host__ __device__ double1 _pixMake(Ncv64f x) {return make_double1(x);}
-static inline __host__ __device__ double3 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z) {return make_double3(x,y,z);}
-static inline __host__ __device__ double4 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z, Ncv64f w) {return make_double4(x,y,z,w);}
-
-
-template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampZ_CN {static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix);};
-
-template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 1> {
-static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
-{
-    Tout out;
-    _TDemoteClampZ(pix.x, out.x);
-    return out;
-}};
-
-template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 3> {
-static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
-{
-    Tout out;
-    _TDemoteClampZ(pix.x, out.x);
-    _TDemoteClampZ(pix.y, out.y);
-    _TDemoteClampZ(pix.z, out.z);
-    return out;
-}};
-
-template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 4> {
-static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
-{
-    Tout out;
-    _TDemoteClampZ(pix.x, out.x);
-    _TDemoteClampZ(pix.y, out.y);
-    _TDemoteClampZ(pix.z, out.z);
-    _TDemoteClampZ(pix.w, out.w);
-    return out;
-}};
-
-template<typename Tin, typename Tout> static inline __host__ __device__ Tout _pixDemoteClampZ(Tin &pix)
-{
-    return __pixDemoteClampZ_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampZ_CN(pix);
-}
-
-
-template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampNN_CN {static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix);};
-
-template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 1> {
-static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
-{
-    Tout out;
-    _TDemoteClampNN(pix.x, out.x);
-    return out;
-}};
-
-template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 3> {
-static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
-{
-    Tout out;
-    _TDemoteClampNN(pix.x, out.x);
-    _TDemoteClampNN(pix.y, out.y);
-    _TDemoteClampNN(pix.z, out.z);
-    return out;
-}};
-
-template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 4> {
-static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
-{
-    Tout out;
-    _TDemoteClampNN(pix.x, out.x);
-    _TDemoteClampNN(pix.y, out.y);
-    _TDemoteClampNN(pix.z, out.z);
-    _TDemoteClampNN(pix.w, out.w);
-    return out;
-}};
-
-template<typename Tin, typename Tout> static inline __host__ __device__ Tout _pixDemoteClampNN(Tin &pix)
-{
-    return __pixDemoteClampNN_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampNN_CN(pix);
-}
-
-
-template<typename Tin, typename Tout, typename Tw, Ncv32u CN> struct __pixScale_CN {static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w);};
-
-template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 1> {
-static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
-{
-    Tout out;
-    typedef typename TConvVec2Base<Tout>::TBase TBout;
-    out.x = (TBout)(pix.x * w);
-    return out;
-}};
-
-template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 3> {
-static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
-{
-    Tout out;
-    typedef typename TConvVec2Base<Tout>::TBase TBout;
-    out.x = (TBout)(pix.x * w);
-    out.y = (TBout)(pix.y * w);
-    out.z = (TBout)(pix.z * w);
-    return out;
-}};
-
-template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 4> {
-static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
-{
-    Tout out;
-    typedef typename TConvVec2Base<Tout>::TBase TBout;
-    out.x = (TBout)(pix.x * w);
-    out.y = (TBout)(pix.y * w);
-    out.z = (TBout)(pix.z * w);
-    out.w = (TBout)(pix.w * w);
-    return out;
-}};
-
-template<typename Tin, typename Tout, typename Tw> static __host__ __device__ Tout _pixScale(Tin &pix, Tw w)
-{
-    return __pixScale_CN<Tin, Tout, Tw, NC(Tin)>::_pixScale_CN(pix, w);
-}
-
-
-template<typename Tin, typename Tout, Ncv32u CN> struct __pixAdd_CN {static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2);};
-
-template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 1> {
-static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
-{
-    Tout out;
-    out.x = pix1.x + pix2.x;
-    return out;
-}};
-
-template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 3> {
-static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
-{
-    Tout out;
-    out.x = pix1.x + pix2.x;
-    out.y = pix1.y + pix2.y;
-    out.z = pix1.z + pix2.z;
-    return out;
-}};
-
-template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 4> {
-static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
-{
-    Tout out;
-    out.x = pix1.x + pix2.x;
-    out.y = pix1.y + pix2.y;
-    out.z = pix1.z + pix2.z;
-    out.w = pix1.w + pix2.w;
-    return out;
-}};
-
-template<typename Tin, typename Tout> static __host__ __device__ Tout _pixAdd(Tout &pix1, Tin &pix2)
-{
-    return __pixAdd_CN<Tin, Tout, NC(Tin)>::_pixAdd_CN(pix1, pix2);
-}
-
-
-template<typename Tin, typename Tout, Ncv32u CN> struct __pixDist_CN {static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2);};
-
-template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 1> {
-static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
-{
-    return Tout(SQR(pix1.x - pix2.x));
-}};
-
-template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 3> {
-static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
-{
-    return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z));
-}};
-
-template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 4> {
-static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
-{
-    return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z) + SQR(pix1.w - pix2.w));
-}};
-
-template<typename Tin, typename Tout> static __host__ __device__ Tout _pixDist(Tin &pix1, Tin &pix2)
-{
-    return __pixDist_CN<Tin, Tout, NC(Tin)>::_pixDist_CN(pix1, pix2);
-}
-
-
-template <typename T> struct TAccPixWeighted;
-template<> struct TAccPixWeighted<uchar1> {typedef double1 type;};
-template<> struct TAccPixWeighted<uchar3> {typedef double3 type;};
-template<> struct TAccPixWeighted<uchar4> {typedef double4 type;};
-template<> struct TAccPixWeighted<ushort1> {typedef double1 type;};
-template<> struct TAccPixWeighted<ushort3> {typedef double3 type;};
-template<> struct TAccPixWeighted<ushort4> {typedef double4 type;};
-template<> struct TAccPixWeighted<float1> {typedef double1 type;};
-template<> struct TAccPixWeighted<float3> {typedef double3 type;};
-template<> struct TAccPixWeighted<float4> {typedef double4 type;};
-
-template<typename Tfrom> struct TAccPixDist {};
-template<> struct TAccPixDist<uchar1> {typedef Ncv32u type;};
-template<> struct TAccPixDist<uchar3> {typedef Ncv32u type;};
-template<> struct TAccPixDist<uchar4> {typedef Ncv32u type;};
-template<> struct TAccPixDist<ushort1> {typedef Ncv32u type;};
-template<> struct TAccPixDist<ushort3> {typedef Ncv32u type;};
-template<> struct TAccPixDist<ushort4> {typedef Ncv32u type;};
-template<> struct TAccPixDist<float1> {typedef Ncv32f type;};
-template<> struct TAccPixDist<float3> {typedef Ncv32f type;};
-template<> struct TAccPixDist<float4> {typedef Ncv32f type;};
-
-#endif //_ncv_pixel_operations_hpp_
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef _ncv_pixel_operations_hpp_
+#define _ncv_pixel_operations_hpp_
+
+#include <limits.h>
+#include <float.h>
+#include "NCV.hpp"
+
+template<typename TBase> inline __host__ __device__ TBase _pixMaxVal();
+template<> static inline __host__ __device__ Ncv8u  _pixMaxVal<Ncv8u>()  {return UCHAR_MAX;}
+template<> static inline __host__ __device__ Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
+template<> static inline __host__ __device__ Ncv32u _pixMaxVal<Ncv32u>() {return  UINT_MAX;}
+template<> static inline __host__ __device__ Ncv8s  _pixMaxVal<Ncv8s>()  {return  CHAR_MAX;}
+template<> static inline __host__ __device__ Ncv16s _pixMaxVal<Ncv16s>() {return  SHRT_MAX;}
+template<> static inline __host__ __device__ Ncv32s _pixMaxVal<Ncv32s>() {return   INT_MAX;}
+template<> static inline __host__ __device__ Ncv32f _pixMaxVal<Ncv32f>() {return   FLT_MAX;}
+template<> static inline __host__ __device__ Ncv64f _pixMaxVal<Ncv64f>() {return   DBL_MAX;}
+
+template<typename TBase> inline __host__ __device__ TBase _pixMinVal();
+template<> static inline __host__ __device__ Ncv8u  _pixMinVal<Ncv8u>()  {return 0;}
+template<> static inline __host__ __device__ Ncv16u _pixMinVal<Ncv16u>() {return 0;}
+template<> static inline __host__ __device__ Ncv32u _pixMinVal<Ncv32u>() {return 0;}
+template<> static inline __host__ __device__ Ncv8s  _pixMinVal<Ncv8s>()  {return CHAR_MIN;}
+template<> static inline __host__ __device__ Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
+template<> static inline __host__ __device__ Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
+template<> static inline __host__ __device__ Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}
+template<> static inline __host__ __device__ Ncv64f _pixMinVal<Ncv64f>() {return DBL_MIN;}
+
+template<typename Tvec> struct TConvVec2Base;
+template<> struct TConvVec2Base<uchar1>  {typedef Ncv8u TBase;};
+template<> struct TConvVec2Base<uchar3>  {typedef Ncv8u TBase;};
+template<> struct TConvVec2Base<uchar4>  {typedef Ncv8u TBase;};
+template<> struct TConvVec2Base<ushort1> {typedef Ncv16u TBase;};
+template<> struct TConvVec2Base<ushort3> {typedef Ncv16u TBase;};
+template<> struct TConvVec2Base<ushort4> {typedef Ncv16u TBase;};
+template<> struct TConvVec2Base<uint1>   {typedef Ncv32u TBase;};
+template<> struct TConvVec2Base<uint3>   {typedef Ncv32u TBase;};
+template<> struct TConvVec2Base<uint4>   {typedef Ncv32u TBase;};
+template<> struct TConvVec2Base<float1>  {typedef Ncv32f TBase;};
+template<> struct TConvVec2Base<float3>  {typedef Ncv32f TBase;};
+template<> struct TConvVec2Base<float4>  {typedef Ncv32f TBase;};
+template<> struct TConvVec2Base<double1> {typedef Ncv64f TBase;};
+template<> struct TConvVec2Base<double3> {typedef Ncv64f TBase;};
+template<> struct TConvVec2Base<double4> {typedef Ncv64f TBase;};
+
+#define NC(T)       (sizeof(T) / sizeof(TConvVec2Base<T>::TBase))
+
+template<typename TBase, Ncv32u NC> struct TConvBase2Vec;
+template<> struct TConvBase2Vec<Ncv8u, 1>  {typedef uchar1 TVec;};
+template<> struct TConvBase2Vec<Ncv8u, 3>  {typedef uchar3 TVec;};
+template<> struct TConvBase2Vec<Ncv8u, 4>  {typedef uchar4 TVec;};
+template<> struct TConvBase2Vec<Ncv16u, 1> {typedef ushort1 TVec;};
+template<> struct TConvBase2Vec<Ncv16u, 3> {typedef ushort3 TVec;};
+template<> struct TConvBase2Vec<Ncv16u, 4> {typedef ushort4 TVec;};
+template<> struct TConvBase2Vec<Ncv32u, 1> {typedef uint1 TVec;};
+template<> struct TConvBase2Vec<Ncv32u, 3> {typedef uint3 TVec;};
+template<> struct TConvBase2Vec<Ncv32u, 4> {typedef uint4 TVec;};
+template<> struct TConvBase2Vec<Ncv32f, 1> {typedef float1 TVec;};
+template<> struct TConvBase2Vec<Ncv32f, 3> {typedef float3 TVec;};
+template<> struct TConvBase2Vec<Ncv32f, 4> {typedef float4 TVec;};
+template<> struct TConvBase2Vec<Ncv64f, 1> {typedef double1 TVec;};
+template<> struct TConvBase2Vec<Ncv64f, 3> {typedef double3 TVec;};
+template<> struct TConvBase2Vec<Ncv64f, 4> {typedef double4 TVec;};
+
+//TODO: consider using CUDA intrinsics to avoid branching
+template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a);};
+template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a, 0, USHRT_MAX);}
+template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a, 0, UINT_MAX);}
+template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
+
+//TODO: consider using CUDA intrinsics to avoid branching
+template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a+0.5f);}
+template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a+0.5f, 0, USHRT_MAX);}
+template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);}
+template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
+
+template<typename Tout> inline Tout _pixMakeZero();
+template<> static inline __host__ __device__ uchar1 _pixMakeZero<uchar1>() {return make_uchar1(0);}
+template<> static inline __host__ __device__ uchar3 _pixMakeZero<uchar3>() {return make_uchar3(0,0,0);}
+template<> static inline __host__ __device__ uchar4 _pixMakeZero<uchar4>() {return make_uchar4(0,0,0,0);}
+template<> static inline __host__ __device__ ushort1 _pixMakeZero<ushort1>() {return make_ushort1(0);}
+template<> static inline __host__ __device__ ushort3 _pixMakeZero<ushort3>() {return make_ushort3(0,0,0);}
+template<> static inline __host__ __device__ ushort4 _pixMakeZero<ushort4>() {return make_ushort4(0,0,0,0);}
+template<> static inline __host__ __device__ uint1 _pixMakeZero<uint1>() {return make_uint1(0);}
+template<> static inline __host__ __device__ uint3 _pixMakeZero<uint3>() {return make_uint3(0,0,0);}
+template<> static inline __host__ __device__ uint4 _pixMakeZero<uint4>() {return make_uint4(0,0,0,0);}
+template<> static inline __host__ __device__ float1 _pixMakeZero<float1>() {return make_float1(0.f);}
+template<> static inline __host__ __device__ float3 _pixMakeZero<float3>() {return make_float3(0.f,0.f,0.f);}
+template<> static inline __host__ __device__ float4 _pixMakeZero<float4>() {return make_float4(0.f,0.f,0.f,0.f);}
+template<> static inline __host__ __device__ double1 _pixMakeZero<double1>() {return make_double1(0.);}
+template<> static inline __host__ __device__ double3 _pixMakeZero<double3>() {return make_double3(0.,0.,0.);}
+template<> static inline __host__ __device__ double4 _pixMakeZero<double4>() {return make_double4(0.,0.,0.,0.);}
+
+static inline __host__ __device__ uchar1 _pixMake(Ncv8u x) {return make_uchar1(x);}
+static inline __host__ __device__ uchar3 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z) {return make_uchar3(x,y,z);}
+static inline __host__ __device__ uchar4 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z, Ncv8u w) {return make_uchar4(x,y,z,w);}
+static inline __host__ __device__ ushort1 _pixMake(Ncv16u x) {return make_ushort1(x);}
+static inline __host__ __device__ ushort3 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z) {return make_ushort3(x,y,z);}
+static inline __host__ __device__ ushort4 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z, Ncv16u w) {return make_ushort4(x,y,z,w);}
+static inline __host__ __device__ uint1 _pixMake(Ncv32u x) {return make_uint1(x);}
+static inline __host__ __device__ uint3 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z) {return make_uint3(x,y,z);}
+static inline __host__ __device__ uint4 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z, Ncv32u w) {return make_uint4(x,y,z,w);}
+static inline __host__ __device__ float1 _pixMake(Ncv32f x) {return make_float1(x);}
+static inline __host__ __device__ float3 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z) {return make_float3(x,y,z);}
+static inline __host__ __device__ float4 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z, Ncv32f w) {return make_float4(x,y,z,w);}
+static inline __host__ __device__ double1 _pixMake(Ncv64f x) {return make_double1(x);}
+static inline __host__ __device__ double3 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z) {return make_double3(x,y,z);}
+static inline __host__ __device__ double4 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z, Ncv64f w) {return make_double4(x,y,z,w);}
+
+
+template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampZ_CN {static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix);};
+
+template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 1> {
+static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
+{
+    Tout out;
+    _TDemoteClampZ(pix.x, out.x);
+    return out;
+}};
+
+template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 3> {
+static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
+{
+    Tout out;
+    _TDemoteClampZ(pix.x, out.x);
+    _TDemoteClampZ(pix.y, out.y);
+    _TDemoteClampZ(pix.z, out.z);
+    return out;
+}};
+
+template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 4> {
+static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
+{
+    Tout out;
+    _TDemoteClampZ(pix.x, out.x);
+    _TDemoteClampZ(pix.y, out.y);
+    _TDemoteClampZ(pix.z, out.z);
+    _TDemoteClampZ(pix.w, out.w);
+    return out;
+}};
+
+template<typename Tin, typename Tout> static inline __host__ __device__ Tout _pixDemoteClampZ(Tin &pix)
+{
+    return __pixDemoteClampZ_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampZ_CN(pix);
+}
+
+
+template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampNN_CN {static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix);};
+
+template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 1> {
+static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
+{
+    Tout out;
+    _TDemoteClampNN(pix.x, out.x);
+    return out;
+}};
+
+template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 3> {
+static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
+{
+    Tout out;
+    _TDemoteClampNN(pix.x, out.x);
+    _TDemoteClampNN(pix.y, out.y);
+    _TDemoteClampNN(pix.z, out.z);
+    return out;
+}};
+
+template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 4> {
+static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
+{
+    Tout out;
+    _TDemoteClampNN(pix.x, out.x);
+    _TDemoteClampNN(pix.y, out.y);
+    _TDemoteClampNN(pix.z, out.z);
+    _TDemoteClampNN(pix.w, out.w);
+    return out;
+}};
+
+template<typename Tin, typename Tout> static inline __host__ __device__ Tout _pixDemoteClampNN(Tin &pix)
+{
+    return __pixDemoteClampNN_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampNN_CN(pix);
+}
+
+
+template<typename Tin, typename Tout, typename Tw, Ncv32u CN> struct __pixScale_CN {static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w);};
+
+template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 1> {
+static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
+{
+    Tout out;
+    typedef typename TConvVec2Base<Tout>::TBase TBout;
+    out.x = (TBout)(pix.x * w);
+    return out;
+}};
+
+template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 3> {
+static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
+{
+    Tout out;
+    typedef typename TConvVec2Base<Tout>::TBase TBout;
+    out.x = (TBout)(pix.x * w);
+    out.y = (TBout)(pix.y * w);
+    out.z = (TBout)(pix.z * w);
+    return out;
+}};
+
+template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 4> {
+static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
+{
+    Tout out;
+    typedef typename TConvVec2Base<Tout>::TBase TBout;
+    out.x = (TBout)(pix.x * w);
+    out.y = (TBout)(pix.y * w);
+    out.z = (TBout)(pix.z * w);
+    out.w = (TBout)(pix.w * w);
+    return out;
+}};
+
+template<typename Tin, typename Tout, typename Tw> static __host__ __device__ Tout _pixScale(Tin &pix, Tw w)
+{
+    return __pixScale_CN<Tin, Tout, Tw, NC(Tin)>::_pixScale_CN(pix, w);
+}
+
+
+template<typename Tin, typename Tout, Ncv32u CN> struct __pixAdd_CN {static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2);};
+
+template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 1> {
+static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
+{
+    Tout out;
+    out.x = pix1.x + pix2.x;
+    return out;
+}};
+
+template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 3> {
+static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
+{
+    Tout out;
+    out.x = pix1.x + pix2.x;
+    out.y = pix1.y + pix2.y;
+    out.z = pix1.z + pix2.z;
+    return out;
+}};
+
+template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 4> {
+static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
+{
+    Tout out;
+    out.x = pix1.x + pix2.x;
+    out.y = pix1.y + pix2.y;
+    out.z = pix1.z + pix2.z;
+    out.w = pix1.w + pix2.w;
+    return out;
+}};
+
+template<typename Tin, typename Tout> static __host__ __device__ Tout _pixAdd(Tout &pix1, Tin &pix2)
+{
+    return __pixAdd_CN<Tin, Tout, NC(Tin)>::_pixAdd_CN(pix1, pix2);
+}
+
+
+template<typename Tin, typename Tout, Ncv32u CN> struct __pixDist_CN {static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2);};
+
+template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 1> {
+static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
+{
+    return Tout(SQR(pix1.x - pix2.x));
+}};
+
+template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 3> {
+static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
+{
+    return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z));
+}};
+
+template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 4> {
+static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
+{
+    return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z) + SQR(pix1.w - pix2.w));
+}};
+
+template<typename Tin, typename Tout> static __host__ __device__ Tout _pixDist(Tin &pix1, Tin &pix2)
+{
+    return __pixDist_CN<Tin, Tout, NC(Tin)>::_pixDist_CN(pix1, pix2);
+}
+
+
+template <typename T> struct TAccPixWeighted;
+template<> struct TAccPixWeighted<uchar1> {typedef double1 type;};
+template<> struct TAccPixWeighted<uchar3> {typedef double3 type;};
+template<> struct TAccPixWeighted<uchar4> {typedef double4 type;};
+template<> struct TAccPixWeighted<ushort1> {typedef double1 type;};
+template<> struct TAccPixWeighted<ushort3> {typedef double3 type;};
+template<> struct TAccPixWeighted<ushort4> {typedef double4 type;};
+template<> struct TAccPixWeighted<float1> {typedef double1 type;};
+template<> struct TAccPixWeighted<float3> {typedef double3 type;};
+template<> struct TAccPixWeighted<float4> {typedef double4 type;};
+
+template<typename Tfrom> struct TAccPixDist {};
+template<> struct TAccPixDist<uchar1> {typedef Ncv32u type;};
+template<> struct TAccPixDist<uchar3> {typedef Ncv32u type;};
+template<> struct TAccPixDist<uchar4> {typedef Ncv32u type;};
+template<> struct TAccPixDist<ushort1> {typedef Ncv32u type;};
+template<> struct TAccPixDist<ushort3> {typedef Ncv32u type;};
+template<> struct TAccPixDist<ushort4> {typedef Ncv32u type;};
+template<> struct TAccPixDist<float1> {typedef Ncv32f type;};
+template<> struct TAccPixDist<float3> {typedef Ncv32f type;};
+template<> struct TAccPixDist<float4> {typedef Ncv32f type;};
+
+#endif //_ncv_pixel_operations_hpp_
@@ -1,99 +1,99 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-
-#ifndef _ncvpyramid_hpp_
-#define _ncvpyramid_hpp_
-
-#include <memory>
-#include <vector>
-#include "NCV.hpp"
-
-#if 0 //def _WIN32
-
-template <class T>
-class NCV_EXPORTS NCVMatrixStack
-{
-public:
-    NCVMatrixStack() {this->_arr.clear();}
-    ~NCVMatrixStack()
-    {
-        const Ncv32u nElem = this->_arr.size();
-        for (Ncv32u i=0; i<nElem; i++)
-        {
-            pop_back();
-        }
-    }
-    void push_back(NCVMatrix<T> *elem) {this->_arr.push_back(std::tr1::shared_ptr< NCVMatrix<T> >(elem));}
-    void pop_back() {this->_arr.pop_back();}
-    NCVMatrix<T> * operator [] (int i) const {return this->_arr[i].get();}
-private:
-    std::vector< std::tr1::shared_ptr< NCVMatrix<T> > > _arr;
-};
-
-
-template <class T>
-class NCV_EXPORTS NCVImagePyramid
-{
-public:
-
-    NCVImagePyramid(const NCVMatrix<T> &img,
-                    Ncv8u nLayers,
-                    INCVMemAllocator &alloc,
-                    cudaStream_t cuStream);
-    ~NCVImagePyramid();
-    NcvBool isInitialized() const;
-    NCVStatus getLayer(NCVMatrix<T> &outImg,
-                       NcvSize32u outRoi,
-                       NcvBool bTrilinear,
-                       cudaStream_t cuStream) const;
-
-private:
-
-    NcvBool _isInitialized;
-    const NCVMatrix<T> *layer0;
-    NCVMatrixStack<T> pyramid;
-    Ncv32u nLayers;
-};
-
-#endif //_WIN32
-
-#endif //_ncvpyramid_hpp_
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#ifndef _ncvpyramid_hpp_
+#define _ncvpyramid_hpp_
+
+#include <memory>
+#include <vector>
+#include "NCV.hpp"
+
+#if 0 //def _WIN32
+
+template <class T>
+class NCV_EXPORTS NCVMatrixStack
+{
+public:
+    NCVMatrixStack() {this->_arr.clear();}
+    ~NCVMatrixStack()
+    {
+        const Ncv32u nElem = this->_arr.size();
+        for (Ncv32u i=0; i<nElem; i++)
+        {
+            pop_back();
+        }
+    }
+    void push_back(NCVMatrix<T> *elem) {this->_arr.push_back(std::tr1::shared_ptr< NCVMatrix<T> >(elem));}
+    void pop_back() {this->_arr.pop_back();}
+    NCVMatrix<T> * operator [] (int i) const {return this->_arr[i].get();}
+private:
+    std::vector< std::tr1::shared_ptr< NCVMatrix<T> > > _arr;
+};
+
+
+template <class T>
+class NCV_EXPORTS NCVImagePyramid
+{
+public:
+
+    NCVImagePyramid(const NCVMatrix<T> &img,
+                    Ncv8u nLayers,
+                    INCVMemAllocator &alloc,
+                    cudaStream_t cuStream);
+    ~NCVImagePyramid();
+    NcvBool isInitialized() const;
+    NCVStatus getLayer(NCVMatrix<T> &outImg,
+                       NcvSize32u outRoi,
+                       NcvBool bTrilinear,
+                       cudaStream_t cuStream) const;
+
+private:
+
+    NcvBool _isInitialized;
+    const NCVMatrix<T> *layer0;
+    NCVMatrixStack<T> pyramid;
+    Ncv32u nLayers;
+};
+
+#endif //_WIN32
+
+#endif //_ncvpyramid_hpp_
@@ -1,220 +1,220 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#ifndef _ncvruntimetemplates_hpp_
-#define _ncvruntimetemplates_hpp_
-#if defined _MSC_VER &&_MSC_VER >= 1200
-#pragma warning( disable: 4800 )
-#endif
-
-
-#include <stdarg.h>
-#include <vector>
-
-
-////////////////////////////////////////////////////////////////////////////////
-// The Loki Library
-// Copyright (c) 2001 by Andrei Alexandrescu
-// This code accompanies the book:
-// Alexandrescu, Andrei. "Modern C++ Design: Generic Programming and Design
-//     Patterns Applied". Copyright (c) 2001. Addison-Wesley.
-// Permission to use, copy, modify, distribute and sell this software for any
-//     purpose is hereby granted without fee, provided that the above copyright
-//     notice appear in all copies and that both that copyright notice and this
-//     permission notice appear in supporting documentation.
-// The author or Addison-Welsey Longman make no representations about the
-//     suitability of this software for any purpose. It is provided "as is"
-//     without express or implied warranty.
-// http://loki-lib.sourceforge.net/index.php?n=Main.License
-////////////////////////////////////////////////////////////////////////////////
-
-namespace Loki
-{
-    //==============================================================================
-    // class NullType
-    // Used as a placeholder for "no type here"
-    // Useful as an end marker in typelists
-    //==============================================================================
-
-    class NullType {};
-
-    //==============================================================================
-    // class template Typelist
-    // The building block of typelists of any length
-    // Use it through the LOKI_TYPELIST_NN macros
-    // Defines nested types:
-    //     Head (first element, a non-typelist type by convention)
-    //     Tail (second element, can be another typelist)
-    //==============================================================================
-
-    template <class T, class U>
-    struct Typelist
-    {
-        typedef T Head;
-        typedef U Tail;
-    };
-
-    //==============================================================================
-    // class template Int2Type
-    // Converts each integral constant into a unique type
-    // Invocation: Int2Type<v> where v is a compile-time constant integral
-    // Defines 'value', an enum that evaluates to v
-    //==============================================================================
-
-    template <int v>
-    struct Int2Type
-    {
-        enum { value = v };
-    };
-
-    namespace TL
-    {
-        //==============================================================================
-        // class template TypeAt
-        // Finds the type at a given index in a typelist
-        // Invocation (TList is a typelist and index is a compile-time integral
-        //     constant):
-        // TypeAt<TList, index>::Result
-        // returns the type in position 'index' in TList
-        // If you pass an out-of-bounds index, the result is a compile-time error
-        //==============================================================================
-
-        template <class TList, unsigned int index> struct TypeAt;
-
-        template <class Head, class Tail>
-        struct TypeAt<Typelist<Head, Tail>, 0>
-        {
-            typedef Head Result;
-        };
-
-        template <class Head, class Tail, unsigned int i>
-        struct TypeAt<Typelist<Head, Tail>, i>
-        {
-            typedef typename TypeAt<Tail, i - 1>::Result Result;
-        };
-    }
-}
-
-
-////////////////////////////////////////////////////////////////////////////////
-// Runtime boolean template instance dispatcher
-// Cyril Crassin <cyril.crassin@icare3d.org>
-// NVIDIA, 2010
-////////////////////////////////////////////////////////////////////////////////
-
-namespace NCVRuntimeTemplateBool
-{
-    //This struct is used to transform a list of parameters into template arguments
-    //The idea is to build a typelist containing the arguments
-    //and to pass this typelist to a user defined functor
-    template<typename TList, int NumArguments, class Func>
-    struct KernelCaller
-    {
-        //Convenience function used by the user
-        //Takes a variable argument list, transforms it into a list
-        static void call(Func *functor, ...)
-        {
-            //Vector used to collect arguments
-            std::vector<int> templateParamList;
-
-            //Variable argument list manipulation
-            va_list listPointer;
-            va_start(listPointer, functor);
-            //Collect parameters into the list
-            for(int i=0; i<NumArguments; i++)
-            {
-                int val = va_arg(listPointer, int);
-                templateParamList.push_back(val);
-            }
-            va_end(listPointer);
-
-            //Call the actual typelist building function
-            call(*functor, templateParamList);
-        }
-
-        //Actual function called recursively to build a typelist based
-        //on a list of values
-        static void call( Func &functor, std::vector<int> &templateParamList)
-        {
-            //Get current parameter value in the list
-            NcvBool val = templateParamList[templateParamList.size() - 1];
-            templateParamList.pop_back();
-
-            //Select the compile time value to add into the typelist
-            //depending on the runtime variable and make recursive call.
-            //Both versions are really instantiated
-            if (val)
-            {
-                KernelCaller<
-                    Loki::Typelist<typename Loki::Int2Type<1>, TList >,
-                    NumArguments-1, Func >
-                    ::call(functor, templateParamList);
-            }
-            else
-            {
-                KernelCaller<
-                    Loki::Typelist<typename Loki::Int2Type<0>, TList >,
-                    NumArguments-1, Func >
-                    ::call(functor, templateParamList);
-            }
-        }
-    };
-
-    //Specialization for 0 value left in the list
-    //-> actual kernel functor call
-    template<class TList, class Func>
-    struct KernelCaller<TList, 0, Func>
-    {
-        static void call(Func &functor)
-        {
-            //Call to the functor's kernel call method
-            functor.call(TList()); //TList instantiated to get the method template parameter resolved
-        }
-
-        static void call(Func &functor, std::vector<int> &templateParams)
-        {
-            (void)templateParams;
-            functor.call(TList());
-        }
-    };
-}
-
-#endif //_ncvruntimetemplates_hpp_
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef _ncvruntimetemplates_hpp_
+#define _ncvruntimetemplates_hpp_
+#if defined _MSC_VER &&_MSC_VER >= 1200
+#pragma warning( disable: 4800 )
+#endif
+
+
+#include <stdarg.h>
+#include <vector>
+
+
+////////////////////////////////////////////////////////////////////////////////
+// The Loki Library
+// Copyright (c) 2001 by Andrei Alexandrescu
+// This code accompanies the book:
+// Alexandrescu, Andrei. "Modern C++ Design: Generic Programming and Design
+//     Patterns Applied". Copyright (c) 2001. Addison-Wesley.
+// Permission to use, copy, modify, distribute and sell this software for any
+//     purpose is hereby granted without fee, provided that the above copyright
+//     notice appear in all copies and that both that copyright notice and this
+//     permission notice appear in supporting documentation.
+// The author or Addison-Welsey Longman make no representations about the
+//     suitability of this software for any purpose. It is provided "as is"
+//     without express or implied warranty.
+// http://loki-lib.sourceforge.net/index.php?n=Main.License
+////////////////////////////////////////////////////////////////////////////////
+
+namespace Loki
+{
+    //==============================================================================
+    // class NullType
+    // Used as a placeholder for "no type here"
+    // Useful as an end marker in typelists
+    //==============================================================================
+
+    class NullType {};
+
+    //==============================================================================
+    // class template Typelist
+    // The building block of typelists of any length
+    // Use it through the LOKI_TYPELIST_NN macros
+    // Defines nested types:
+    //     Head (first element, a non-typelist type by convention)
+    //     Tail (second element, can be another typelist)
+    //==============================================================================
+
+    template <class T, class U>
+    struct Typelist
+    {
+        typedef T Head;
+        typedef U Tail;
+    };
+
+    //==============================================================================
+    // class template Int2Type
+    // Converts each integral constant into a unique type
+    // Invocation: Int2Type<v> where v is a compile-time constant integral
+    // Defines 'value', an enum that evaluates to v
+    //==============================================================================
+
+    template <int v>
+    struct Int2Type
+    {
+        enum { value = v };
+    };
+
+    namespace TL
+    {
+        //==============================================================================
+        // class template TypeAt
+        // Finds the type at a given index in a typelist
+        // Invocation (TList is a typelist and index is a compile-time integral
+        //     constant):
+        // TypeAt<TList, index>::Result
+        // returns the type in position 'index' in TList
+        // If you pass an out-of-bounds index, the result is a compile-time error
+        //==============================================================================
+
+        template <class TList, unsigned int index> struct TypeAt;
+
+        template <class Head, class Tail>
+        struct TypeAt<Typelist<Head, Tail>, 0>
+        {
+            typedef Head Result;
+        };
+
+        template <class Head, class Tail, unsigned int i>
+        struct TypeAt<Typelist<Head, Tail>, i>
+        {
+            typedef typename TypeAt<Tail, i - 1>::Result Result;
+        };
+    }
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Runtime boolean template instance dispatcher
+// Cyril Crassin <cyril.crassin@icare3d.org>
+// NVIDIA, 2010
+////////////////////////////////////////////////////////////////////////////////
+
+namespace NCVRuntimeTemplateBool
+{
+    //This struct is used to transform a list of parameters into template arguments
+    //The idea is to build a typelist containing the arguments
+    //and to pass this typelist to a user defined functor
+    template<typename TList, int NumArguments, class Func>
+    struct KernelCaller
+    {
+        //Convenience function used by the user
+        //Takes a variable argument list, transforms it into a list
+        static void call(Func *functor, ...)
+        {
+            //Vector used to collect arguments
+            std::vector<int> templateParamList;
+
+            //Variable argument list manipulation
+            va_list listPointer;
+            va_start(listPointer, functor);
+            //Collect parameters into the list
+            for(int i=0; i<NumArguments; i++)
+            {
+                int val = va_arg(listPointer, int);
+                templateParamList.push_back(val);
+            }
+            va_end(listPointer);
+
+            //Call the actual typelist building function
+            call(*functor, templateParamList);
+        }
+
+        //Actual function called recursively to build a typelist based
+        //on a list of values
+        static void call( Func &functor, std::vector<int> &templateParamList)
+        {
+            //Get current parameter value in the list
+            NcvBool val = templateParamList[templateParamList.size() - 1];
+            templateParamList.pop_back();
+
+            //Select the compile time value to add into the typelist
+            //depending on the runtime variable and make recursive call.
+            //Both versions are really instantiated
+            if (val)
+            {
+                KernelCaller<
+                    Loki::Typelist<typename Loki::Int2Type<1>, TList >,
+                    NumArguments-1, Func >
+                    ::call(functor, templateParamList);
+            }
+            else
+            {
+                KernelCaller<
+                    Loki::Typelist<typename Loki::Int2Type<0>, TList >,
+                    NumArguments-1, Func >
+                    ::call(functor, templateParamList);
+            }
+        }
+    };
+
+    //Specialization for 0 value left in the list
+    //-> actual kernel functor call
+    template<class TList, class Func>
+    struct KernelCaller<TList, 0, Func>
+    {
+        static void call(Func &functor)
+        {
+            //Call to the functor's kernel call method
+            functor.call(TList()); //TList instantiated to get the method template parameter resolved
+        }
+
+        static void call(Func &functor, std::vector<int> &templateParams)
+        {
+            (void)templateParams;
+            functor.call(TList());
+        }
+    };
+}
+
+#endif //_ncvruntimetemplates_hpp_