diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index b5a0835110..660a6df73d 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -1397,7 +1397,7 @@ public:
 };
 
 ////////////////////////////////// CascadeClassifier_GPU //////////////////////////////////////////
-// The cascade classifier class for object detection.
+// The cascade classifier class for object detection: supports old haar and new lbp xlm formats and nvbin for haar cascades olny.
 class CV_EXPORTS CascadeClassifier_GPU
 {
 public:
@@ -1407,42 +1407,28 @@ public:
 
     bool empty() const;
     bool load(const std::string& filename);
-    void release();
-
-    /* returns number of detected objects */
-    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor=1.2, int minNeighbors=4, Size minSize=Size());
-
-    bool findLargestObject;
-    bool visualizeInPlace;
-
-    Size getClassifierSize() const;
-private:
-
-    struct CascadeClassifierImpl;
-    CascadeClassifierImpl* impl;
-};
-
-// The cascade classifier class for object detection.
-class CV_EXPORTS CascadeClassifier_GPU_LBP
-{
-public:
-    CascadeClassifier_GPU_LBP(cv::Size detectionFrameSize = cv::Size());
-    ~CascadeClassifier_GPU_LBP();
-
-    bool empty() const;
-    bool load(const std::string& filename);
-    void release();
-
-    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.1, int minNeighbors = 4,
-    cv::Size maxObjectSize = cv::Size()/*, Size minSize = Size()*/);
-    Size getClassifierSize() const;
+    void release();
 
-private:
+    /* returns number of detected objects */
+    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, double scaleFactor = 1.2, int minNeighbors = 4, Size minSize = Size());
+
+    bool findLargestObject;
+    bool visualizeInPlace;
+
+    Size getClassifierSize() const;
+
+private:
     struct CascadeClassifierImpl;
     CascadeClassifierImpl* impl;
-};
-
-////////////////////////////////// SURF //////////////////////////////////////////
+    struct HaarCascade;
+    struct LbpCascade;
+    friend class CascadeClassifier_GPU_LBP;
+
+public:
+    int detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize = Size(), double scaleFactor = 1.1, int minNeighbors = 4);
+};
+
+////////////////////////////////// SURF //////////////////////////////////////////
 
 class CV_EXPORTS SURF_GPU
 {
diff --git a/modules/gpu/perf/perf_objdetect.cpp b/modules/gpu/perf/perf_objdetect.cpp
index a9b3d7e547..c376586c9b 100644
--- a/modules/gpu/perf/perf_objdetect.cpp
+++ b/modules/gpu/perf/perf_objdetect.cpp
@@ -70,7 +70,7 @@ GPU_PERF_TEST_1(LBPClassifier, cv::gpu::DeviceInfo)
 
     cv::gpu::GpuMat img(img_host);
         cv::gpu::GpuMat gpu_rects;
-    cv::gpu::CascadeClassifier_GPU_LBP cascade(img.size());
+    cv::gpu::CascadeClassifier_GPU cascade;
     ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/lbpcascade/lbpcascade_frontalface.xml")));
 
     cascade.detectMultiScale(img, gpu_rects);
diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp
index 927b42249e..61f5c94310 100644
--- a/modules/gpu/src/cascadeclassifier.cpp
+++ b/modules/gpu/src/cascadeclassifier.cpp
@@ -46,439 +46,54 @@
 
 using namespace cv;
 using namespace cv::gpu;
-using namespace std;
-
-#if !defined (HAVE_CUDA)
-// ============ old fashioned haar cascade ==============================================//
-cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU()               { throw_nogpu(); }
-cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string&)  { throw_nogpu(); }
-cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU()              { throw_nogpu(); }
-
-bool cv::gpu::CascadeClassifier_GPU::empty() const              { throw_nogpu(); return true; }
-bool cv::gpu::CascadeClassifier_GPU::load(const string&)        { throw_nogpu(); return true; }
-Size cv::gpu::CascadeClassifier_GPU::getClassifierSize() const  { throw_nogpu(); return Size(); }
-
-int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& , GpuMat& , double , int , Size)  { throw_nogpu(); return 0; }
-
-// ============ LBP cascade ==============================================//
-cv::gpu::CascadeClassifier_GPU_LBP::CascadeClassifier_GPU_LBP(cv::Size /*frameSize*/){ throw_nogpu(); }
-cv::gpu::CascadeClassifier_GPU_LBP::~CascadeClassifier_GPU_LBP()                     { throw_nogpu(); }
-
-bool cv::gpu::CascadeClassifier_GPU_LBP::empty() const                               { throw_nogpu(); return true; }
-bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string&)                         { throw_nogpu(); return true; }
-Size cv::gpu::CascadeClassifier_GPU_LBP::getClassifierSize() const                   { throw_nogpu(); return Size(); }
-void cv::gpu::CascadeClassifier_GPU_LBP::allocateBuffers(cv::Size /*frame*/)         { throw_nogpu();}
-
-int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const cv::gpu::GpuMat& /*image*/, cv::gpu::GpuMat& /*objectsBuf*/,
-double /*scaleFactor*/, int /*minNeighbors*/, cv::Size /*maxObjectSize*/){ throw_nogpu(); return 0;}
-
-#else
-
-cv::Size operator -(const cv::Size& a, const cv::Size& b)
-{
-    return cv::Size(a.width - b.width, a.height - b.height);
-}
-
-cv::Size operator +(const cv::Size& a, const int& i)
-{
-    return cv::Size(a.width + i, a.height + i);
-}
+using namespace std;
 
-cv::Size operator *(const cv::Size& a, const float& f)
-{
-    return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
-}
+#if !defined (HAVE_CUDA)
 
-cv::Size operator /(const cv::Size& a, const float& f)
-{
-    return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
-}
+cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU()               { throw_nogpu(); }
+cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string&)  { throw_nogpu(); }
+cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU()              { throw_nogpu(); }
+bool cv::gpu::CascadeClassifier_GPU::empty() const                    { throw_nogpu(); return true; }
+bool cv::gpu::CascadeClassifier_GPU::load(const string&)              { throw_nogpu(); return true; }
+Size cv::gpu::CascadeClassifier_GPU::getClassifierSize() const        { throw_nogpu(); return Size();}
+void cv::gpu::CascadeClassifier_GPU::release()                        { throw_nogpu(); }
+int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat&, GpuMat&, double, int, Size)       {throw_nogpu(); return -1;}
+int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat&, GpuMat&, Size, Size, double, int) {throw_nogpu(); return -1;}
 
-bool operator <=(const cv::Size& a, const cv::Size& b)
-{
-    return a.width <= b.width && a.height <= b.width;
-}
+#else
 
-struct PyrLavel
-{
-    PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window) : order(_order)
-    {
-        scale = pow(_scale, order);
-        sFrame = frame / scale;
-        workArea = sFrame - window + 1;
-        sWindow = window * scale;
-    }
-
-    bool isFeasible(cv::Size maxObj)
-    {
-        return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
-    }
-
-    PyrLavel next(float factor, cv::Size frame, cv::Size window)
-    {
-        return PyrLavel(order + 1, factor, frame, window);
-    }
-
-    int order;
-    float scale;
-    cv::Size sFrame;
-    cv::Size workArea;
-    cv::Size sWindow;
-};
-
-namespace cv { namespace gpu { namespace device
-{
-    namespace lbp
-    {
-        void classifyPyramid(int frameW,
-                             int frameH,
-                             int windowW,
-                             int windowH,
-                             float initalScale,
-                             float factor,
-                             int total,
-                             const DevMem2Db& mstages,
-                             const int nstages,
-                             const DevMem2Di& mnodes,
-                             const DevMem2Df& mleaves,
-                             const DevMem2Di& msubsets,
-                             const DevMem2Db& mfeatures,
-                             const int subsetSize,
-                             DevMem2D_<int4> objects,
-                             unsigned int* classified,
-                             DevMem2Di integral);
-
-        void connectedConmonents(DevMem2D_<int4>  candidates, int ncandidates, DevMem2D_<int4> objects,int groupThreshold, float grouping_eps, unsigned int* nclasses);
-    }
-}}}
-
-struct cv::gpu::CascadeClassifier_GPU_LBP::CascadeClassifierImpl
+struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
 {
 public:
-    struct Stage
-    {
-        int    first;
-        int    ntrees;
-        float  threshold;
-    };
+    CascadeClassifierImpl(){}
+    virtual ~CascadeClassifierImpl(){}
 
-    bool read(const FileNode &root);
-    void allocateBuffers(cv::Size frame = cv::Size());
-    bool empty() const {return stage_mat.empty();}
+    virtual unsigned int process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors,
+                      bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize, cv::Size maxObjectSize) = 0;
 
-    int process(const GpuMat& image, GpuMat& objects, double scaleFactor, int groupThreshold, cv::Size maxObjectSize);
-
-private:
-
-    enum stage { BOOST = 0 };
-    enum feature { LBP = 0 };
-
-    static const stage stageType = BOOST;
-    static const feature featureType = LBP;
-
-    cv::Size NxM;
-    bool isStumps;
-    int ncategories;
-    int subsetSize;
-    int nodeStep;
-
-    // gpu representation of classifier
-    GpuMat stage_mat;
-    GpuMat trees_mat;
-    GpuMat nodes_mat;
-    GpuMat leaves_mat;
-    GpuMat subsets_mat;
-    GpuMat features_mat;
-
-    GpuMat integral;
-    GpuMat integralBuffer;
-    GpuMat resuzeBuffer;
-
-    GpuMat candidates;
-    static const int integralFactor = 4;
+    virtual cv::Size getClassifierCvSize() const = 0;
+    virtual bool read(const string& classifierAsXml) = 0;
 };
 
-void cv::gpu::CascadeClassifier_GPU_LBP::CascadeClassifierImpl::allocateBuffers(cv::Size frame)
-{
-    if (frame == cv::Size())
-        return;
-
-    if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
-    {
-        resuzeBuffer.create(frame, CV_8UC1);
-
-        integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
-        NcvSize32u roiSize;
-        roiSize.width = frame.width;
-        roiSize.height = frame.height;
-
-        cudaDeviceProp prop;
-        cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
-
-        Ncv32u bufSize;
-        ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
-        integralBuffer.create(1, bufSize, CV_8UC1);
-
-        candidates.create(1 , frame.width >> 1, CV_32SC4);
-    }
-}
-
-// currently only stump based boost classifiers are supported
-bool CascadeClassifier_GPU_LBP::CascadeClassifierImpl::read(const FileNode &root)
-{
-    const char *GPU_CC_STAGE_TYPE       = "stageType";
-    const char *GPU_CC_FEATURE_TYPE     = "featureType";
-    const char *GPU_CC_BOOST            = "BOOST";
-    const char *GPU_CC_LBP              = "LBP";
-    const char *GPU_CC_MAX_CAT_COUNT    = "maxCatCount";
-    const char *GPU_CC_HEIGHT           = "height";
-    const char *GPU_CC_WIDTH            = "width";
-    const char *GPU_CC_STAGE_PARAMS     = "stageParams";
-    const char *GPU_CC_MAX_DEPTH        = "maxDepth";
-    const char *GPU_CC_FEATURE_PARAMS   = "featureParams";
-    const char *GPU_CC_STAGES           = "stages";
-    const char *GPU_CC_STAGE_THRESHOLD  = "stageThreshold";
-    const float GPU_THRESHOLD_EPS       = 1e-5f;
-    const char *GPU_CC_WEAK_CLASSIFIERS = "weakClassifiers";
-    const char *GPU_CC_INTERNAL_NODES   = "internalNodes";
-    const char *GPU_CC_LEAF_VALUES      = "leafValues";
-    const char *GPU_CC_FEATURES         = "features";
-    const char *GPU_CC_RECT             = "rect";
-
-    std::string stageTypeStr = (string)root[GPU_CC_STAGE_TYPE];
-    CV_Assert(stageTypeStr == GPU_CC_BOOST);
-
-    string featureTypeStr = (string)root[GPU_CC_FEATURE_TYPE];
-    CV_Assert(featureTypeStr == GPU_CC_LBP);
-
-    NxM.width =  (int)root[GPU_CC_WIDTH];
-    NxM.height = (int)root[GPU_CC_HEIGHT];
-    CV_Assert( NxM.height > 0 && NxM.width > 0 );
-
-    isStumps = ((int)(root[GPU_CC_STAGE_PARAMS][GPU_CC_MAX_DEPTH]) == 1) ? true : false;
-    CV_Assert(isStumps);
-
-    FileNode fn = root[GPU_CC_FEATURE_PARAMS];
-    if (fn.empty())
-        return false;
-
-    ncategories = fn[GPU_CC_MAX_CAT_COUNT];
-
-    subsetSize = (ncategories + 31) / 32;
-    nodeStep = 3 + ( ncategories > 0 ? subsetSize : 1 );
-
-    fn = root[GPU_CC_STAGES];
-    if (fn.empty())
-        return false;
-
-    std::vector<Stage> stages;
-    stages.reserve(fn.size());
-
-    std::vector<int> cl_trees;
-    std::vector<int> cl_nodes;
-    std::vector<float> cl_leaves;
-    std::vector<int> subsets;
-
-    FileNodeIterator it = fn.begin(), it_end = fn.end();
-    for (size_t si = 0; it != it_end; si++, ++it )
-    {
-        FileNode fns = *it;
-        Stage st;
-        st.threshold = (float)fns[GPU_CC_STAGE_THRESHOLD] - GPU_THRESHOLD_EPS;
-
-        fns = fns[GPU_CC_WEAK_CLASSIFIERS];
-        if (fns.empty())
-            return false;
-
-        st.ntrees = (int)fns.size();
-        st.first = (int)cl_trees.size();
-
-        stages.push_back(st);// (int, int, float)
-
-        cl_trees.reserve(stages[si].first + stages[si].ntrees);
-
-        // weak trees
-        FileNodeIterator it1 = fns.begin(), it1_end = fns.end();
-        for ( ; it1 != it1_end; ++it1 )
-        {
-            FileNode fnw = *it1;
-
-            FileNode internalNodes = fnw[GPU_CC_INTERNAL_NODES];
-            FileNode leafValues = fnw[GPU_CC_LEAF_VALUES];
-            if ( internalNodes.empty() || leafValues.empty() )
-                return false;
-
-            int nodeCount = (int)internalNodes.size()/nodeStep;
-            cl_trees.push_back(nodeCount);
-
-            cl_nodes.reserve((cl_nodes.size() + nodeCount) * 3);
-            cl_leaves.reserve(cl_leaves.size() + leafValues.size());
-
-            if( subsetSize > 0 )
-                subsets.reserve(subsets.size() + nodeCount * subsetSize);
-
-            // nodes
-            FileNodeIterator iIt = internalNodes.begin(), iEnd = internalNodes.end();
-
-            for( ; iIt != iEnd; )
-            {
-                cl_nodes.push_back((int)*(iIt++));
-                cl_nodes.push_back((int)*(iIt++));
-                cl_nodes.push_back((int)*(iIt++));
-
-                if( subsetSize > 0 )
-                    for( int j = 0; j < subsetSize; j++, ++iIt )
-                        subsets.push_back((int)*iIt);
-            }
-
-            // leaves
-            iIt = leafValues.begin(), iEnd = leafValues.end();
-            for( ; iIt != iEnd; ++iIt )
-                cl_leaves.push_back((float)*iIt);
-        }
-    }
-
-    fn = root[GPU_CC_FEATURES];
-    if( fn.empty() )
-        return false;
-    std::vector<uchar> features;
-    features.reserve(fn.size() * 4);
-    FileNodeIterator f_it = fn.begin(), f_end = fn.end();
-    for (; f_it != f_end; ++f_it)
-    {
-        FileNode rect = (*f_it)[GPU_CC_RECT];
-        FileNodeIterator r_it = rect.begin();
-        features.push_back(saturate_cast<uchar>((int)*(r_it++)));
-        features.push_back(saturate_cast<uchar>((int)*(r_it++)));
-        features.push_back(saturate_cast<uchar>((int)*(r_it++)));
-        features.push_back(saturate_cast<uchar>((int)*(r_it++)));
-    }
-
-    // copy data structures on gpu
-    stage_mat.upload(cv::Mat(1, stages.size() * sizeof(Stage), CV_8UC1, (uchar*)&(stages[0]) ));
-    trees_mat.upload(cv::Mat(cl_trees).reshape(1,1));
-    nodes_mat.upload(cv::Mat(cl_nodes).reshape(1,1));
-    leaves_mat.upload(cv::Mat(cl_leaves).reshape(1,1));
-    subsets_mat.upload(cv::Mat(subsets).reshape(1,1));
-    features_mat.upload(cv::Mat(features).reshape(4,1));
-
-    return true;
-}
-
-int cv::gpu::CascadeClassifier_GPU_LBP::CascadeClassifierImpl::process(const GpuMat& image, GpuMat& objects, double scaleFactor, int groupThreshold, cv::Size maxObjectSize)
-{
-    CV_Assert(!empty() && scaleFactor > 1 && image.depth() == CV_8U);
-
-    const int defaultObjSearchNum = 100;
-    const float grouping_eps = 0.2f;
-
-    if( !objects.empty() && objects.depth() == CV_32S)
-        objects.reshape(4, 1);
-    else
-        objects.create(1 , image.cols >> 4, CV_32SC4);
-
-    // used for debug
-    // candidates.setTo(cv::Scalar::all(0));
-    // objects.setTo(cv::Scalar::all(0));
-
-    if (maxObjectSize == cv::Size())
-        maxObjectSize = image.size();
-
-    allocateBuffers(image.size());
-
-    unsigned int classified = 0;
-    GpuMat dclassified(1, 1, CV_32S);
-    cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
-
-    PyrLavel level(0, 1.0f, image.size(), NxM);
-
-    while (level.isFeasible(maxObjectSize))
-    {
-        int acc = level.sFrame.width + 1;
-        float iniScale = level.scale;
-
-        cv::Size area = level.workArea;
-        int step = 1 + (level.scale <= 2.f);
-
-        int total = 0, prev  = 0;
-
-        while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize))
-        {
-            // create sutable matrix headers
-            GpuMat src  = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height));
-            GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1));
-            GpuMat buff = integralBuffer;
-
-            // generate integral for scale
-            gpu::resize(image, src, level.sFrame, 0, 0, CV_INTER_LINEAR);
-            gpu::integralBuffered(src, sint, buff);
-
-            // calculate job
-            int totalWidth = level.workArea.width / step;
-            // totalWidth  = ((totalWidth + WARP_MASK) / WARP_SIZE) << WARP_LOG;
-
-            total += totalWidth * (level.workArea.height / step);
-
-            // go to next pyramide level
-            level = level.next(scaleFactor, image.size(), NxM);
-            area = level.workArea;
-
-            step = (1 + (level.scale <= 2.f));
-            prev = acc;
-            acc += level.sFrame.width + 1;
-        }
-
-        device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
-            leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral);
-    }
-
-    if (groupThreshold <= 0  || objects.empty())
-        return 0;
-
-    cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
-    device::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr<unsigned int>());
-
-    // candidates.copyTo(objects);
-    cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
-    cudaSafeCall( cudaDeviceSynchronize() );
-    return classified;
-}
-
-cv::gpu::CascadeClassifier_GPU_LBP::CascadeClassifier_GPU_LBP(cv::Size detectionFrameSize) : impl(new CascadeClassifierImpl()) { (*impl).allocateBuffers(detectionFrameSize); }
-cv::gpu::CascadeClassifier_GPU_LBP::~CascadeClassifier_GPU_LBP(){ delete impl; }
-
-
-bool cv::gpu::CascadeClassifier_GPU_LBP::empty() const
+struct cv::gpu::CascadeClassifier_GPU::HaarCascade : cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
 {
-    return (*impl).empty();
-}
+public:
+    HaarCascade() : lastAllocatedFrameSize(-1, -1)
+    {
+        ncvSetDebugOutputHandler(NCVDebugOutputHandler);
+    }
 
-bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string& classifierAsXml)
-{
-    FileStorage fs(classifierAsXml, FileStorage::READ);
-    return fs.isOpened() ? (*impl).read(fs.getFirstTopLevelNode()) : false;
-}
+    bool read(const string& filename)
+    {
+        ncvSafeCall( load(filename) );
+        return true;
+    }
 
-int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& objects, double scaleFactor, int groupThreshold, cv::Size maxObjectSize)
-{
-    return (*impl).process(image, objects, scaleFactor, groupThreshold, maxObjectSize);
-}
-
-// ============ old fashioned haar cascade ==============================================//
-struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
-{
-    CascadeClassifierImpl(const string& filename) : lastAllocatedFrameSize(-1, -1)
-    {
-        ncvSetDebugOutputHandler(NCVDebugOutputHandler);
-        ncvSafeCall( load(filename) );
-    }
-
-
-    NCVStatus process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors,
-                      bool findLargestObject, bool visualizeInPlace, NcvSize32u ncvMinSize,
-                      /*out*/unsigned int& numDetections)
-    {
-        calculateMemReqsAndAllocate(src.size());
+    NCVStatus process(const GpuMat& src, GpuMat& objects, float scaleStep, int minNeighbors,
+                      bool findLargestObject, bool visualizeInPlace, cv::Size ncvMinSize,
+                      /*out*/unsigned int& numDetections)
+    {
+        calculateMemReqsAndAllocate(src.size());
 
         NCVMemPtr src_beg;
         src_beg.ptr = (void*)src.ptr<Ncv8u>();
@@ -507,6 +122,8 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
         roi.width = d_src.width();
         roi.height = d_src.height();
 
+        NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height);
+
         Ncv32u flags = 0;
         flags |= findLargestObject? NCVPipeObjDet_FindLargestObject : 0;
         flags |= visualizeInPlace ? NCVPipeObjDet_VisualizeInPlace  : 0;
@@ -514,7 +131,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
         ncvStat = ncvDetectObjectsMultiScale_device(
             d_src, roi, d_rects, numDetections, haar, *h_haarStages,
             *d_haarStages, *d_haarNodes, *d_haarFeatures,
-            ncvMinSize,
+            winMinSize,
             minNeighbors,
             scaleStep, 1,
             flags,
@@ -525,16 +142,35 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
         return NCV_SUCCESS;
     }
 
+    unsigned int process(const GpuMat& image, GpuMat& objectsBuf, float scaleFactor, int minNeighbors,
+                      bool findLargestObject, bool visualizeInPlace, cv::Size minSize, cv::Size maxObjectSize)
+    {
+        CV_Assert( scaleFactor > 1 && image.depth() == CV_8U);
 
-    NcvSize32u getClassifierSize() const  { return haar.ClassifierSize; }
-    cv::Size getClassifierCvSize() const { return cv::Size(haar.ClassifierSize.width, haar.ClassifierSize.height); }
+        const int defaultObjSearchNum = 100;
+        if (objectsBuf.empty())
+        {
+            objectsBuf.create(1, defaultObjSearchNum, DataType<Rect>::type);
+        }
 
+        cv::Size ncvMinSize = this->getClassifierCvSize();
 
-private:
+        if (ncvMinSize.width < (unsigned)minSize.width && ncvMinSize.height < (unsigned)minSize.height)
+        {
+            ncvMinSize.width = minSize.width;
+            ncvMinSize.height = minSize.height;
+        }
 
+        unsigned int numDetections;
+        ncvSafeCall(this->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections));
 
-    static void NCVDebugOutputHandler(const std::string &msg) { CV_Error(CV_GpuApiCallError, msg.c_str()); }
+        return numDetections;
+    }
+
+    cv::Size getClassifierCvSize() const { return cv::Size(haar.ClassifierSize.width, haar.ClassifierSize.height); }
 
+private:
+    static void NCVDebugOutputHandler(const std::string &msg) { CV_Error(CV_GpuApiCallError, msg.c_str()); }
 
     NCVStatus load(const string& classifierFile)
     {
@@ -581,7 +217,6 @@ private:
         return NCV_SUCCESS;
     }
 
-
     NCVStatus calculateMemReqsAndAllocate(const Size& frameSize)
     {
         if (lastAllocatedFrameSize == frameSize)
@@ -623,7 +258,6 @@ private:
         return NCV_SUCCESS;
     }
 
-
     cudaDeviceProp devProp;
     NCVStatus ncvStat;
 
@@ -644,55 +278,448 @@ private:
 
     Ptr<NCVMemStackAllocator> gpuAllocator;
     Ptr<NCVMemStackAllocator> cpuAllocator;
+
+    virtual ~HaarCascade(){}
 };
 
+cv::Size operator -(const cv::Size& a, const cv::Size& b)
+{
+    return cv::Size(a.width - b.width, a.height - b.height);
+}
 
-cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() : findLargestObject(false), visualizeInPlace(false), impl(0) {}
-cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string& filename) : findLargestObject(false), visualizeInPlace(false), impl(0) { load(filename); }
-cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU() { release(); }
-bool cv::gpu::CascadeClassifier_GPU::empty() const { return impl == 0; }
-void cv::gpu::CascadeClassifier_GPU::release() { if (impl) { delete impl; impl = 0; } }
+cv::Size operator +(const cv::Size& a, const int& i)
+{
+    return cv::Size(a.width + i, a.height + i);
+}
 
+cv::Size operator *(const cv::Size& a, const float& f)
+{
+    return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
+}
 
-bool cv::gpu::CascadeClassifier_GPU::load(const string& filename)
+cv::Size operator /(const cv::Size& a, const float& f)
 {
-    release();
-    impl = new CascadeClassifierImpl(filename);
-    return !this->empty();
+    return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
+}
+
+bool operator <=(const cv::Size& a, const cv::Size& b)
+{
+    return a.width <= b.width && a.height <= b.width;
 }
 
+struct PyrLavel
+{
+    PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize)
+    {
+        do
+        {
+            order = _order;
+            scale = pow(_scale, order);
+            sFrame = frame / scale;
+            workArea = sFrame - window + 1;
+            sWindow = window * scale;
+            _order++;
+        } while (sWindow <= minObjectSize);
+    }
+
+    bool isFeasible(cv::Size maxObj)
+    {
+        return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
+    }
+
+    PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize)
+    {
+        return PyrLavel(order + 1, factor, frame, window, minObjectSize);
+    }
+
+    int order;
+    float scale;
+    cv::Size sFrame;
+    cv::Size workArea;
+    cv::Size sWindow;
+};
+
+namespace cv { namespace gpu { namespace device
+{
+    namespace lbp
+    {
+        void classifyPyramid(int frameW,
+                             int frameH,
+                             int windowW,
+                             int windowH,
+                             float initalScale,
+                             float factor,
+                             int total,
+                             const DevMem2Db& mstages,
+                             const int nstages,
+                             const DevMem2Di& mnodes,
+                             const DevMem2Df& mleaves,
+                             const DevMem2Di& msubsets,
+                             const DevMem2Db& mfeatures,
+                             const int subsetSize,
+                             DevMem2D_<int4> objects,
+                             unsigned int* classified,
+                             DevMem2Di integral);
+
+        void connectedConmonents(DevMem2D_<int4>  candidates, int ncandidates, DevMem2D_<int4> objects,int groupThreshold, float grouping_eps, unsigned int* nclasses);
+    }
+}}}
+
+struct cv::gpu::CascadeClassifier_GPU::LbpCascade : cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
+{
+public:
+    struct Stage
+    {
+        int    first;
+        int    ntrees;
+        float  threshold;
+    };
+
+    LbpCascade(){}
+    virtual ~LbpCascade(){}
+
+    virtual unsigned int process(const GpuMat& image, GpuMat& objects, float scaleFactor, int groupThreshold, bool findLargestObject,
+        bool visualizeInPlace, cv::Size minObjectSize, cv::Size maxObjectSize)
+    {
+        CV_Assert(scaleFactor > 1 && image.depth() == CV_8U);
+
+        const int defaultObjSearchNum = 100;
+        const float grouping_eps = 0.2f;
+
+        if( !objects.empty() && objects.depth() == CV_32S)
+            objects.reshape(4, 1);
+        else
+            objects.create(1 , image.cols >> 4, CV_32SC4);
+
+        // used for debug
+        // candidates.setTo(cv::Scalar::all(0));
+        // objects.setTo(cv::Scalar::all(0));
+
+        if (maxObjectSize == cv::Size())
+            maxObjectSize = image.size();
+
+        allocateBuffers(image.size());
+
+        unsigned int classified = 0;
+        GpuMat dclassified(1, 1, CV_32S);
+        cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
+
+        PyrLavel level(0, 1.0f, image.size(), NxM, minObjectSize);
+
+        while (level.isFeasible(maxObjectSize))
+        {
+            int acc = level.sFrame.width + 1;
+            float iniScale = level.scale;
+
+            cv::Size area = level.workArea;
+            int step = 1 + (level.scale <= 2.f);
+
+            int total = 0, prev  = 0;
+
+            while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize))
+            {
+                // create sutable matrix headers
+                GpuMat src  = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height));
+                GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1));
+                GpuMat buff = integralBuffer;
+
+                // generate integral for scale
+                gpu::resize(image, src, level.sFrame, 0, 0, CV_INTER_LINEAR);
+                gpu::integralBuffered(src, sint, buff);
+
+                // calculate job
+                int totalWidth = level.workArea.width / step;
+                total += totalWidth * (level.workArea.height / step);
+
+                // go to next pyramide level
+                level = level.next(scaleFactor, image.size(), NxM, minObjectSize);
+                area = level.workArea;
+
+                step = (1 + (level.scale <= 2.f));
+                prev = acc;
+                acc += level.sFrame.width + 1;
+            }
+
+            device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
+                leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral);
+        }
+
+        if (groupThreshold <= 0  || objects.empty())
+            return 0;
+
+        cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
+        device::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr<unsigned int>());
+
+        cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
+        cudaSafeCall( cudaDeviceSynchronize() );
+        return classified;
+    }
+
+    virtual cv::Size getClassifierCvSize() const { return NxM; }
+
+    bool read(const string& classifierAsXml)
+    {
+        FileStorage fs(classifierAsXml, FileStorage::READ);
+        return fs.isOpened() ? read(fs.getFirstTopLevelNode()) : false;
+    }
+
+private:
+
+    void allocateBuffers(cv::Size frame)
+    {
+        if (frame == cv::Size())
+            return;
+
+        if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
+        {
+            resuzeBuffer.create(frame, CV_8UC1);
+
+            integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
+            NcvSize32u roiSize;
+            roiSize.width = frame.width;
+            roiSize.height = frame.height;
+
+            cudaDeviceProp prop;
+            cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
+
+            Ncv32u bufSize;
+            ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
+            integralBuffer.create(1, bufSize, CV_8UC1);
+
+            candidates.create(1 , frame.width >> 1, CV_32SC4);
+        }
+    }
+
+    bool read(const FileNode &root)
+    {
+        const char *GPU_CC_STAGE_TYPE       = "stageType";
+        const char *GPU_CC_FEATURE_TYPE     = "featureType";
+        const char *GPU_CC_BOOST            = "BOOST";
+        const char *GPU_CC_LBP              = "LBP";
+        const char *GPU_CC_MAX_CAT_COUNT    = "maxCatCount";
+        const char *GPU_CC_HEIGHT           = "height";
+        const char *GPU_CC_WIDTH            = "width";
+        const char *GPU_CC_STAGE_PARAMS     = "stageParams";
+        const char *GPU_CC_MAX_DEPTH        = "maxDepth";
+        const char *GPU_CC_FEATURE_PARAMS   = "featureParams";
+        const char *GPU_CC_STAGES           = "stages";
+        const char *GPU_CC_STAGE_THRESHOLD  = "stageThreshold";
+        const float GPU_THRESHOLD_EPS       = 1e-5f;
+        const char *GPU_CC_WEAK_CLASSIFIERS = "weakClassifiers";
+        const char *GPU_CC_INTERNAL_NODES   = "internalNodes";
+        const char *GPU_CC_LEAF_VALUES      = "leafValues";
+        const char *GPU_CC_FEATURES         = "features";
+        const char *GPU_CC_RECT             = "rect";
+
+        std::string stageTypeStr = (string)root[GPU_CC_STAGE_TYPE];
+        CV_Assert(stageTypeStr == GPU_CC_BOOST);
+
+        string featureTypeStr = (string)root[GPU_CC_FEATURE_TYPE];
+        CV_Assert(featureTypeStr == GPU_CC_LBP);
+
+        NxM.width =  (int)root[GPU_CC_WIDTH];
+        NxM.height = (int)root[GPU_CC_HEIGHT];
+        CV_Assert( NxM.height > 0 && NxM.width > 0 );
+
+        isStumps = ((int)(root[GPU_CC_STAGE_PARAMS][GPU_CC_MAX_DEPTH]) == 1) ? true : false;
+        CV_Assert(isStumps);
+
+        FileNode fn = root[GPU_CC_FEATURE_PARAMS];
+        if (fn.empty())
+            return false;
+
+        ncategories = fn[GPU_CC_MAX_CAT_COUNT];
+
+        subsetSize = (ncategories + 31) / 32;
+        nodeStep = 3 + ( ncategories > 0 ? subsetSize : 1 );
+
+        fn = root[GPU_CC_STAGES];
+        if (fn.empty())
+            return false;
+
+        std::vector<Stage> stages;
+        stages.reserve(fn.size());
+
+        std::vector<int> cl_trees;
+        std::vector<int> cl_nodes;
+        std::vector<float> cl_leaves;
+        std::vector<int> subsets;
+
+        FileNodeIterator it = fn.begin(), it_end = fn.end();
+        for (size_t si = 0; it != it_end; si++, ++it )
+        {
+            FileNode fns = *it;
+            Stage st;
+            st.threshold = (float)fns[GPU_CC_STAGE_THRESHOLD] - GPU_THRESHOLD_EPS;
+
+            fns = fns[GPU_CC_WEAK_CLASSIFIERS];
+            if (fns.empty())
+                return false;
+
+            st.ntrees = (int)fns.size();
+            st.first = (int)cl_trees.size();
+
+            stages.push_back(st);// (int, int, float)
+
+            cl_trees.reserve(stages[si].first + stages[si].ntrees);
+
+            // weak trees
+            FileNodeIterator it1 = fns.begin(), it1_end = fns.end();
+            for ( ; it1 != it1_end; ++it1 )
+            {
+                FileNode fnw = *it1;
+
+                FileNode internalNodes = fnw[GPU_CC_INTERNAL_NODES];
+                FileNode leafValues = fnw[GPU_CC_LEAF_VALUES];
+                if ( internalNodes.empty() || leafValues.empty() )
+                    return false;
+
+                int nodeCount = (int)internalNodes.size()/nodeStep;
+                cl_trees.push_back(nodeCount);
+
+                cl_nodes.reserve((cl_nodes.size() + nodeCount) * 3);
+                cl_leaves.reserve(cl_leaves.size() + leafValues.size());
+
+                if( subsetSize > 0 )
+                    subsets.reserve(subsets.size() + nodeCount * subsetSize);
+
+                // nodes
+                FileNodeIterator iIt = internalNodes.begin(), iEnd = internalNodes.end();
+
+                for( ; iIt != iEnd; )
+                {
+                    cl_nodes.push_back((int)*(iIt++));
+                    cl_nodes.push_back((int)*(iIt++));
+                    cl_nodes.push_back((int)*(iIt++));
+
+                    if( subsetSize > 0 )
+                        for( int j = 0; j < subsetSize; j++, ++iIt )
+                            subsets.push_back((int)*iIt);
+                }
+
+                // leaves
+                iIt = leafValues.begin(), iEnd = leafValues.end();
+                for( ; iIt != iEnd; ++iIt )
+                    cl_leaves.push_back((float)*iIt);
+            }
+        }
+
+        fn = root[GPU_CC_FEATURES];
+        if( fn.empty() )
+            return false;
+        std::vector<uchar> features;
+        features.reserve(fn.size() * 4);
+        FileNodeIterator f_it = fn.begin(), f_end = fn.end();
+        for (; f_it != f_end; ++f_it)
+        {
+            FileNode rect = (*f_it)[GPU_CC_RECT];
+            FileNodeIterator r_it = rect.begin();
+            features.push_back(saturate_cast<uchar>((int)*(r_it++)));
+            features.push_back(saturate_cast<uchar>((int)*(r_it++)));
+            features.push_back(saturate_cast<uchar>((int)*(r_it++)));
+            features.push_back(saturate_cast<uchar>((int)*(r_it++)));
+        }
+
+        // copy data structures on gpu
+        stage_mat.upload(cv::Mat(1, stages.size() * sizeof(Stage), CV_8UC1, (uchar*)&(stages[0]) ));
+        trees_mat.upload(cv::Mat(cl_trees).reshape(1,1));
+        nodes_mat.upload(cv::Mat(cl_nodes).reshape(1,1));
+        leaves_mat.upload(cv::Mat(cl_leaves).reshape(1,1));
+        subsets_mat.upload(cv::Mat(subsets).reshape(1,1));
+        features_mat.upload(cv::Mat(features).reshape(4,1));
+
+        return true;
+    }
+
+    enum stage { BOOST = 0 };
+    enum feature { LBP = 1, HAAR = 2 };
+    static const stage stageType = BOOST;
+    static const feature featureType = LBP;
+
+    cv::Size NxM;
+    bool isStumps;
+    int ncategories;
+    int subsetSize;
+    int nodeStep;
+
+    // gpu representation of classifier
+    GpuMat stage_mat;
+    GpuMat trees_mat;
+    GpuMat nodes_mat;
+    GpuMat leaves_mat;
+    GpuMat subsets_mat;
+    GpuMat features_mat;
+
+    GpuMat integral;
+    GpuMat integralBuffer;
+    GpuMat resuzeBuffer;
+
+    GpuMat candidates;
+    static const int integralFactor = 4;
+};
+
+cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU()
+: findLargestObject(false), visualizeInPlace(false), impl(0) {}
+
+cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const string& filename)
+: findLargestObject(false), visualizeInPlace(false), impl(0) { load(filename); }
+
+cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU() { release(); }
+
+void cv::gpu::CascadeClassifier_GPU::release() { if (impl) { delete impl; impl = 0; } }
+
+bool cv::gpu::CascadeClassifier_GPU::empty() const { return impl == 0; }
 
 Size cv::gpu::CascadeClassifier_GPU::getClassifierSize() const
 {
     return this->empty() ? Size() : impl->getClassifierCvSize();
 }
 
-
 int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor, int minNeighbors, Size minSize)
 {
-    CV_Assert( scaleFactor > 1 && image.depth() == CV_8U);
     CV_Assert( !this->empty());
+    return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, cv::Size());
+}
 
-    const int defaultObjSearchNum = 100;
-    if (objectsBuf.empty())
+int cv::gpu::CascadeClassifier_GPU::detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize, double scaleFactor, int minNeighbors)
+{
+    CV_Assert( !this->empty());
+    return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, maxObjectSize);
+}
+
+bool cv::gpu::CascadeClassifier_GPU::load(const string& filename)
+{
+    release();
+
+    std::string fext = filename.substr(filename.find_last_of(".") + 1);
+    std::transform(fext.begin(), fext.end(), fext.begin(), ::tolower);
+
+    if (fext == "nvbin")
     {
-        objectsBuf.create(1, defaultObjSearchNum, DataType<Rect>::type);
+        impl = new HaarCascade();
+        return impl->read(filename);
     }
 
-    NcvSize32u ncvMinSize = impl->getClassifierSize();
+    FileStorage fs(filename, FileStorage::READ);
 
-    if (ncvMinSize.width < (unsigned)minSize.width && ncvMinSize.height < (unsigned)minSize.height)
+    if (!fs.isOpened())
     {
-        ncvMinSize.width = minSize.width;
-        ncvMinSize.height = minSize.height;
+        impl = new HaarCascade();
+        return impl->read(filename);
     }
 
-    unsigned int numDetections;
-    ncvSafeCall( impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections) );
+    const char *GPU_CC_LBP = "LBP";
+    string featureTypeStr = (string)fs.getFirstTopLevelNode()["featureType"];
+    if (featureTypeStr == GPU_CC_LBP)
+        impl = new LbpCascade();
+    else
+        impl = new HaarCascade();
 
-    return numDetections;
+    impl->read(filename);
+    return !this->empty();
 }
 
+//////////////////////////////////////////////////////////////////////////////////////////////////////
 
 struct RectConvert
 {
@@ -708,7 +735,6 @@ struct RectConvert
     }
 };
 
-
 void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThreshold, double eps, std::vector<Ncv32u> *weights)
 {
     vector<Rect> rects(hypotheses.size());
diff --git a/modules/gpu/src/cuda/lbp.cu b/modules/gpu/src/cuda/lbp.cu
index bbbe0bf449..e96692cefb 100644
--- a/modules/gpu/src/cuda/lbp.cu
+++ b/modules/gpu/src/cuda/lbp.cu
@@ -290,6 +290,7 @@ namespace cv { namespace gpu { namespace device
         {
             const int block = 128;
             int grid = divUp(workAmount, block);
+            cudaFuncSetCacheConfig(lbp_cascade, cudaFuncCachePreferL1);
             Cascade cascade((Stage*)mstages.ptr(), nstages, (ClNode*)mnodes.ptr(), mleaves.ptr(), msubsets.ptr(), (uchar4*)mfeatures.ptr(), subsetSize);
             lbp_cascade<<<grid, block>>>(cascade, frameW, frameH, windowW, windowH, initialScale, factor, workAmount, integral.ptr(), integral.step / sizeof(int), objects, classified);
         }
diff --git a/modules/gpu/test/test_objdetect.cpp b/modules/gpu/test/test_objdetect.cpp
index a7db7667be..927762b2b0 100644
--- a/modules/gpu/test/test_objdetect.cpp
+++ b/modules/gpu/test/test_objdetect.cpp
@@ -302,13 +302,13 @@ PARAM_TEST_CASE(LBP_Read_classifier, cv::gpu::DeviceInfo, int)
 
 TEST_P(LBP_Read_classifier, Accuracy)
 {
-    cv::gpu::CascadeClassifier_GPU_LBP classifier;
+    cv::gpu::CascadeClassifier_GPU classifier;
     std::string classifierXmlPath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/lbpcascade_frontalface.xml";
     ASSERT_TRUE(classifier.load(classifierXmlPath));
 }
 
-INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_Read_classifier, 
-						testing::Combine(ALL_DEVICES, testing::Values<int>(0)));
+INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_Read_classifier,
+                        testing::Combine(ALL_DEVICES, testing::Values<int>(0)));
 
 
 PARAM_TEST_CASE(LBP_classify, cv::gpu::DeviceInfo, int)
@@ -344,7 +344,7 @@ TEST_P(LBP_classify, Accuracy)
     for (; it != rects.end(); ++it)
         cv::rectangle(markedImage, *it, CV_RGB(0, 0, 255));
 
-    cv::gpu::CascadeClassifier_GPU_LBP gpuClassifier;
+    cv::gpu::CascadeClassifier_GPU gpuClassifier;
     ASSERT_TRUE(gpuClassifier.load(classifierXmlPath));
 
     cv::gpu::GpuMat gpu_rects;
@@ -352,23 +352,23 @@ TEST_P(LBP_classify, Accuracy)
     int count = gpuClassifier.detectMultiScale(tested, gpu_rects);
 
     cv::Mat downloaded(gpu_rects);
-	const cv::Rect* faces = downloaded.ptr<cv::Rect>();
+    const cv::Rect* faces = downloaded.ptr<cv::Rect>();
     for (int i = 0; i < count; i++)
     {
         cv::Rect r = faces[i];
 
 #if defined (LOG_CASCADE_STATISTIC)
-		std::cout << r.x << " " << r.y  << " " << r.width << " " << r.height << std::endl;
-#endif
+        std::cout << r.x << " " << r.y  << " " << r.width << " " << r.height << std::endl;
         cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
+#endif
     }
 
 #if defined (LOG_CASCADE_STATISTIC)
-	cv::imshow("Res", markedImage); cv::waitKey();
+    cv::imshow("Res", markedImage); cv::waitKey();
 #endif
 }
 
 INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_classify,
-						testing::Combine(ALL_DEVICES, testing::Values<int>(0)));
+                        testing::Combine(ALL_DEVICES, testing::Values<int>(0)));
 
 } // namespace