From acc031aabafe07bf2430b5881b0721e326efae3f Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <no@email>
Date: Mon, 25 Jun 2012 11:13:50 +0000
Subject: [PATCH] added MOG_GPU and MOG2_GPU (Gaussian Mixture background
 subtraction)

---
 modules/gpu/include/opencv2/gpu/gpu.hpp | 128 +++++
 modules/gpu/perf/perf_video.cpp         | 185 +++++++
 modules/gpu/perf_cpu/perf_video.cpp     | 163 ++++++
 modules/gpu/src/bgfg_mog.cpp            | 241 ++++++++
 modules/gpu/src/cuda/bgfg_mog.cu        | 703 ++++++++++++++++++++++++
 modules/gpu/src/cuda/fgd_bgfg.cu        |  42 ++
 modules/gpu/test/test_video.cpp         | 202 ++++++-
 7 files changed, 1655 insertions(+), 9 deletions(-)
 create mode 100644 modules/gpu/src/bgfg_mog.cpp
 create mode 100644 modules/gpu/src/cuda/bgfg_mog.cu
diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index 383c80dac8..800e88e181 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -1963,6 +1963,134 @@ private:
     std::auto_ptr<Impl> impl_;
 };
 
+/*!
+ Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm
+
+ The class implements the following algorithm:
+ "An improved adaptive background mixture model for real-time tracking with shadow detection"
+ P. KadewTraKuPong and R. Bowden,
+ Proc. 2nd European Workshp on Advanced Video-Based Surveillance Systems, 2001."
+ http://personal.ee.surrey.ac.uk/Personal/R.Bowden/publications/avbs01/avbs01.pdf
+*/
+class CV_EXPORTS MOG_GPU
+{
+public:
+    //! the default constructor
+    MOG_GPU(int nmixtures = -1);
+
+    //! re-initiaization method
+    void initialize(Size frameSize, int frameType);
+
+    //! the update operator
+    void operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = 0.0f, Stream& stream = Stream::Null());
+
+    int history;
+    float varThreshold;
+    float backgroundRatio;
+    float noiseSigma;
+
+private:
+    int nmixtures_;
+
+    Size frameSize_;
+    int nframes_;
+
+    GpuMat weight_;
+    GpuMat sortKey_;
+    GpuMat mean_;
+    GpuMat var_;
+};
+
+/*!
+ The class implements the following algorithm:
+ "Improved adaptive Gausian mixture model for background subtraction"
+ Z.Zivkovic
+ International Conference Pattern Recognition, UK, August, 2004.
+ http://www.zoranz.net/Publications/zivkovic2004ICPR.pdf
+*/
+class CV_EXPORTS MOG2_GPU
+{
+public:
+    //! the default constructor
+    MOG2_GPU(int nmixtures = -1);
+
+    //! re-initiaization method
+    void initialize(Size frameSize, int frameType);
+
+    //! the update operator
+    void operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate = -1.0f, Stream& stream = Stream::Null());
+
+    //! computes a background image which are the mean of all background gaussians
+    void getBackgroundImage(GpuMat& backgroundImage, Stream& stream = Stream::Null()) const;
+
+    // parameters
+    // you should call initialize after parameters changes
+
+    int history;
+
+    //! here it is the maximum allowed number of mixture components.
+    //! Actual number is determined dynamically per pixel
+    float varThreshold;
+    // threshold on the squared Mahalanobis distance to decide if it is well described
+    // by the background model or not. Related to Cthr from the paper.
+    // This does not influence the update of the background. A typical value could be 4 sigma
+    // and that is varThreshold=4*4=16; Corresponds to Tb in the paper.
+
+    /////////////////////////
+    // less important parameters - things you might change but be carefull
+    ////////////////////////
+
+    float backgroundRatio;
+    // corresponds to fTB=1-cf from the paper
+    // TB - threshold when the component becomes significant enough to be included into
+    // the background model. It is the TB=1-cf from the paper. So I use cf=0.1 => TB=0.
+    // For alpha=0.001 it means that the mode should exist for approximately 105 frames before
+    // it is considered foreground
+    // float noiseSigma;
+    float varThresholdGen;
+
+    //correspondts to Tg - threshold on the squared Mahalan. dist. to decide
+    //when a sample is close to the existing components. If it is not close
+    //to any a new component will be generated. I use 3 sigma => Tg=3*3=9.
+    //Smaller Tg leads to more generated components and higher Tg might make
+    //lead to small number of components but they can grow too large
+    float fVarInit;
+    float fVarMin;
+    float fVarMax;
+
+    //initial variance  for the newly generated components.
+    //It will will influence the speed of adaptation. A good guess should be made.
+    //A simple way is to estimate the typical standard deviation from the images.
+    //I used here 10 as a reasonable value
+    // min and max can be used to further control the variance
+    float fCT; //CT - complexity reduction prior
+    //this is related to the number of samples needed to accept that a component
+    //actually exists. We use CT=0.05 of all the samples. By setting CT=0 you get
+    //the standard Stauffer&Grimson algorithm (maybe not exact but very similar)
+
+    //shadow detection parameters
+    bool bShadowDetection; //default 1 - do shadow detection
+    unsigned char nShadowDetection; //do shadow detection - insert this value as the detection result - 127 default value
+    float fTau;
+    // Tau - shadow threshold. The shadow is detected if the pixel is darker
+    //version of the background. Tau is a threshold on how much darker the shadow can be.
+    //Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
+    //See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
+
+private:
+    int nmixtures_;
+
+    Size frameSize_;
+    int frameType_;
+    int nframes_;
+
+    GpuMat weight_;
+    GpuMat variance_;
+    GpuMat mean_;
+
+    GpuMat bgmodelUsedModes_; //keep track of number of modes per pixel
+};
+
 ////////////////////////////////// Video Encoding //////////////////////////////////
 
 // Works only under Windows
diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp
index 45a695452b..71680c84c2 100644
--- a/modules/gpu/perf/perf_video.cpp
+++ b/modules/gpu/perf/perf_video.cpp
@@ -331,6 +331,191 @@ INSTANTIATE_TEST_CASE_P(Video, FGDStatModel, testing::Combine(
     ALL_DEVICES,
     testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi"))));
 
+//////////////////////////////////////////////////////
+// MOG
+
+IMPLEMENT_PARAM_CLASS(LearningRate, double)
+
+GPU_PERF_TEST(MOG, cv::gpu::DeviceInfo, std::string, Channels, LearningRate)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
+    int cn = GET_PARAM(2);
+    double learningRate = GET_PARAM(3);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cv::gpu::GpuMat d_frame;
+    cv::gpu::MOG_GPU mog;
+    cv::gpu::GpuMat foreground;
+
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    if (cn != 3)
+    {
+        cv::Mat temp;
+        if (cn == 1)
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+        else
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+        cv::swap(temp, frame);
+    }
+
+    d_frame.upload(frame);
+
+    mog(d_frame, foreground, learningRate);
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (cn != 3)
+        {
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+        }
+
+        d_frame.upload(frame);
+
+        startTimer(); next();
+        mog(d_frame, foreground, learningRate);
+        stopTimer();
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, MOG, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    testing::Values(LearningRate(0.0), LearningRate(0.01))));
+
+//////////////////////////////////////////////////////
+// MOG2
+
+GPU_PERF_TEST(MOG2_update, cv::gpu::DeviceInfo, std::string, Channels)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
+    int cn = GET_PARAM(2);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cv::gpu::GpuMat d_frame;
+    cv::gpu::MOG2_GPU mog2;
+    cv::gpu::GpuMat foreground;
+
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    if (cn != 3)
+    {
+        cv::Mat temp;
+        if (cn == 1)
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+        else
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+        cv::swap(temp, frame);
+    }
+
+    d_frame.upload(frame);
+
+    mog2(d_frame, foreground);
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (cn != 3)
+        {
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+        }
+
+        d_frame.upload(frame);
+
+        startTimer(); next();
+        mog2(d_frame, foreground);
+        stopTimer();
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, MOG2_update, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
+    testing::Values(Channels(1), Channels(3), Channels(4))));
+
+GPU_PERF_TEST(MOG2_getBackgroundImage, cv::gpu::DeviceInfo, std::string, Channels)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
+    int cn = GET_PARAM(2);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cv::gpu::GpuMat d_frame;
+    cv::gpu::MOG2_GPU mog2;
+    cv::gpu::GpuMat foreground;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (cn != 3)
+        {
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+        }
+
+        d_frame.upload(frame);
+
+        mog2(d_frame, foreground);
+    }
+
+    cv::gpu::GpuMat background;
+    mog2.getBackgroundImage(background);
+
+    TEST_CYCLE()
+    {
+        mog2.getBackgroundImage(background);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, MOG2_getBackgroundImage, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
+    testing::Values(Channels(1), Channels(3), Channels(4))));
+
 //////////////////////////////////////////////////////
 // VideoWriter
 
diff --git a/modules/gpu/perf_cpu/perf_video.cpp b/modules/gpu/perf_cpu/perf_video.cpp
index efd3955f29..ec42182026 100644
--- a/modules/gpu/perf_cpu/perf_video.cpp
+++ b/modules/gpu/perf_cpu/perf_video.cpp
@@ -165,6 +165,169 @@ INSTANTIATE_TEST_CASE_P(Video, FGDStatModel, testing::Combine(
     ALL_DEVICES,
     testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi"))));
 
+//////////////////////////////////////////////////////
+// MOG
+
+IMPLEMENT_PARAM_CLASS(LearningRate, double)
+
+GPU_PERF_TEST(MOG, cv::gpu::DeviceInfo, std::string, Channels, LearningRate)
+{
+    std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
+    int cn = GET_PARAM(2);
+    double learningRate = GET_PARAM(3);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cv::BackgroundSubtractorMOG mog;
+    cv::Mat foreground;
+
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    if (cn != 3)
+    {
+        cv::Mat temp;
+        if (cn == 1)
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+        else
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+        cv::swap(temp, frame);
+    }
+
+    mog(frame, foreground, learningRate);
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (cn != 3)
+        {
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+        }
+
+        startTimer(); next();
+        mog(frame, foreground, learningRate);
+        stopTimer();
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, MOG, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
+    testing::Values(Channels(1), Channels(3)/*, Channels(4)*/),
+    testing::Values(LearningRate(0.0), LearningRate(0.01))));
+
+//////////////////////////////////////////////////////
+// MOG2
+
+GPU_PERF_TEST(MOG2_update, cv::gpu::DeviceInfo, std::string, Channels)
+{
+    std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
+    int cn = GET_PARAM(2);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cv::BackgroundSubtractorMOG2 mog2;
+    cv::Mat foreground;
+
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    if (cn != 3)
+    {
+        cv::Mat temp;
+        if (cn == 1)
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+        else
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+        cv::swap(temp, frame);
+    }
+
+    mog2(frame, foreground);
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (cn != 3)
+        {
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+        }
+
+        startTimer(); next();
+        mog2(frame, foreground);
+        stopTimer();
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, MOG2_update, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
+    testing::Values(Channels(1), Channels(3)/*, Channels(4)*/)));
+
+GPU_PERF_TEST(MOG2_getBackgroundImage, cv::gpu::DeviceInfo, std::string, Channels)
+{
+    std::string inputFile = perf::TestBase::getDataPath(std::string("gpu/video/") + GET_PARAM(1));
+    int cn = GET_PARAM(2);
+
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cv::BackgroundSubtractorMOG2 mog2;
+    cv::Mat foreground;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (cn != 3)
+        {
+            cv::Mat temp;
+            if (cn == 1)
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            else
+                cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA);
+            cv::swap(temp, frame);
+        }
+
+        mog2(frame, foreground);
+    }
+
+    cv::Mat background;
+    mog2.getBackgroundImage(background);
+
+    TEST_CYCLE()
+    {
+        mog2.getBackgroundImage(background);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, MOG2_getBackgroundImage, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::string("768x576.avi"), std::string("1920x1080.avi")),
+    testing::Values(/*Channels(1),*/ Channels(3)/*, Channels(4)*/)));
+
 //////////////////////////////////////////////////////
 // VideoWriter
 
diff --git a/modules/gpu/src/bgfg_mog.cpp b/modules/gpu/src/bgfg_mog.cpp
new file mode 100644
index 0000000000..a2525ace78
--- /dev/null
+++ b/modules/gpu/src/bgfg_mog.cpp
@@ -0,0 +1,241 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#ifndef HAVE_CUDA
+
+cv::gpu::MOG_GPU::MOG_GPU(int) { throw_nogpu(); }
+void cv::gpu::MOG_GPU::initialize(cv::Size, int) { throw_nogpu(); }
+void cv::gpu::MOG_GPU::operator()(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, float, Stream&) { throw_nogpu(); }
+
+cv::gpu::MOG2_GPU::MOG2_GPU(int) { throw_nogpu(); }
+void cv::gpu::MOG2_GPU::initialize(cv::Size, int) { throw_nogpu(); }
+void cv::gpu::MOG2_GPU::operator()(const GpuMat&, GpuMat&, float, Stream&) { throw_nogpu(); }
+void cv::gpu::MOG2_GPU::getBackgroundImage(GpuMat&, Stream&) const { throw_nogpu(); }
+
+#else
+
+namespace cv { namespace gpu { namespace device
+{
+    namespace mog
+    {
+        void mog_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var,
+                     int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma,
+                     cudaStream_t stream);
+
+        void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal);
+        void mog2_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
+        void getBackgroundImage_gpu(int cn, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream);
+    }
+}}}
+
+namespace mog
+{
+    const int defaultNMixtures = 5;
+    const int defaultHistory = 200;
+    const float defaultBackgroundRatio = 0.7f;
+    const float defaultVarThreshold = 2.5f * 2.5f;
+    const float defaultNoiseSigma = 30.0f * 0.5f;
+    const float defaultInitialWeight = 0.05f;
+}
+
+cv::gpu::MOG_GPU::MOG_GPU(int nmixtures) :
+    frameSize_(0, 0), nframes_(0)
+{
+    nmixtures_ = std::min(nmixtures > 0 ? nmixtures : mog::defaultNMixtures, 8);
+    history = mog::defaultHistory;
+    varThreshold = mog::defaultVarThreshold;
+    backgroundRatio = mog::defaultBackgroundRatio;
+    noiseSigma = mog::defaultNoiseSigma;
+}
+
+void cv::gpu::MOG_GPU::initialize(cv::Size frameSize, int frameType)
+{
+    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
+
+    frameSize_ = frameSize;
+
+    int ch = CV_MAT_CN(frameType);
+    int work_ch = ch;
+
+    // for each gaussian mixture of each pixel bg model we store
+    // the mixture sort key (w/sum_of_variances), the mixture weight (w),
+    // the mean (nchannels values) and
+    // the diagonal covariance matrix (another nchannels values)
+
+    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    sortKey_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+    var_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+
+    weight_.setTo(cv::Scalar::all(0));
+    sortKey_.setTo(cv::Scalar::all(0));
+    mean_.setTo(cv::Scalar::all(0));
+    var_.setTo(cv::Scalar::all(0));
+
+    nframes_ = 0;
+}
+
+void cv::gpu::MOG_GPU::operator()(const cv::gpu::GpuMat& frame, cv::gpu::GpuMat& fgmask, float learningRate, Stream& stream)
+{
+    using namespace cv::gpu::device::mog;
+
+    CV_Assert(frame.depth() == CV_8U);
+
+    int ch = frame.channels();
+    int work_ch = ch;
+
+    if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels())
+        initialize(frame.size(), frame.type());
+
+    fgmask.create(frameSize_, CV_8UC1);
+
+    ++nframes_;
+    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(nframes_, history);
+    CV_Assert(learningRate >= 0.0f);
+
+    mog_gpu(frame, ch, fgmask, weight_, sortKey_, mean_, var_, nmixtures_,
+            varThreshold, learningRate, backgroundRatio, noiseSigma,
+            StreamAccessor::getStream(stream));
+}
+
+/////////////////////////////////////////////////////////////////
+// MOG2
+
+namespace mog2
+{
+    // default parameters of gaussian background detection algorithm
+    const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
+    const float defaultVarThreshold = 4.0f * 4.0f;
+    const int defaultNMixtures = 5; // maximal number of Gaussians in mixture
+    const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
+    const float defaultVarThresholdGen = 3.0f * 3.0f;
+    const float defaultVarInit = 15.0f; // initial variance for new components
+    const float defaultVarMax = 5.0f * defaultVarInit;
+    const float defaultVarMin = 4.0f;
+
+    // additional parameters
+    const float defaultfCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
+    const unsigned char defaultnShadowDetection = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
+    const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
+}
+
+cv::gpu::MOG2_GPU::MOG2_GPU(int nmixtures) :
+    frameSize_(0, 0), frameType_(0), nframes_(0)
+{
+    nmixtures_ = nmixtures > 0 ? nmixtures : mog2::defaultNMixtures;
+
+    history = mog2::defaultHistory;
+    varThreshold = mog2::defaultVarThreshold;
+    bShadowDetection = true;
+
+    backgroundRatio = mog2::defaultBackgroundRatio;
+    fVarInit = mog2::defaultVarInit;
+    fVarMax  = mog2::defaultVarMax;
+    fVarMin = mog2::defaultVarMin;
+
+    varThresholdGen = mog2::defaultVarThresholdGen;
+    fCT = mog2::defaultfCT;
+    nShadowDetection =  mog2::defaultnShadowDetection;
+    fTau = mog2::defaultfTau;
+}
+
+void cv::gpu::MOG2_GPU::initialize(cv::Size frameSize, int frameType)
+{
+    using namespace cv::gpu::device::mog;
+
+    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
+
+    frameSize_ = frameSize;
+    frameType_ = frameType;
+    nframes_ = 0;
+
+    int ch = CV_MAT_CN(frameType);
+    int work_ch = ch;
+
+    // for each gaussian mixture of each pixel bg model we store ...
+    // the mixture weight (w),
+    // the mean (nchannels values) and
+    // the covariance
+    weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
+    mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
+
+    //make the array for keeping track of the used modes per pixel - all zeros at start
+    bgmodelUsedModes_.create(frameSize_, CV_8UC1);
+    bgmodelUsedModes_.setTo(cv::Scalar::all(0));
+
+    loadConstants(nmixtures_, varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection);
+}
+
+void cv::gpu::MOG2_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate, Stream& stream)
+{
+    using namespace cv::gpu::device::mog;
+
+    int ch = frame.channels();
+    int work_ch = ch;
+
+    if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.channels())
+        initialize(frame.size(), frame.type());
+
+    fgmask.create(frameSize_, CV_8UC1);
+    fgmask.setTo(cv::Scalar::all(0));
+
+    ++nframes_;
+    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history);
+    CV_Assert(learningRate >= 0.0f);
+
+    if (learningRate > 0.0f)
+        mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, StreamAccessor::getStream(stream));
+}
+
+void cv::gpu::MOG2_GPU::getBackgroundImage(GpuMat& backgroundImage, Stream& stream) const
+{
+    using namespace cv::gpu::device::mog;
+
+    backgroundImage.create(frameSize_, frameType_);
+
+    getBackgroundImage_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, StreamAccessor::getStream(stream));
+}
+
+#endif
diff --git a/modules/gpu/src/cuda/bgfg_mog.cu b/modules/gpu/src/cuda/bgfg_mog.cu
new file mode 100644
index 0000000000..8cdf048799
--- /dev/null
+++ b/modules/gpu/src/cuda/bgfg_mog.cu
@@ -0,0 +1,703 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or bpied warranties, including, but not limited to, the bpied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include <stdio.h>
+#include "opencv2/gpu/device/common.hpp"
+#include "opencv2/gpu/device/vec_traits.hpp"
+#include "opencv2/gpu/device/vec_math.hpp"
+#include "opencv2/gpu/device/limits.hpp"
+
+namespace cv { namespace gpu { namespace device
+{
+    namespace mog
+    {
+        ///////////////////////////////////////////////////////////////
+        // Utility
+
+        __device__ __forceinline__ float cvt(uchar val)
+        {
+            return val;
+        }
+        __device__ __forceinline__ float3 cvt(const uchar3& val)
+        {
+            return make_float3(val.x, val.y, val.z);
+        }
+        __device__ __forceinline__ float4 cvt(const uchar4& val)
+        {
+            return make_float4(val.x, val.y, val.z, val.w);
+        }
+
+        __device__ __forceinline__ float sqr(float val)
+        {
+            return val * val;
+        }
+        __device__ __forceinline__ float sqr(const float3& val)
+        {
+            return val.x * val.x + val.y * val.y + val.z * val.z;
+        }
+        __device__ __forceinline__ float sqr(const float4& val)
+        {
+            return val.x * val.x + val.y * val.y + val.z * val.z;
+        }
+
+        __device__ __forceinline__ float sum(float val)
+        {
+            return val;
+        }
+        __device__ __forceinline__ float sum(const float3& val)
+        {
+            return val.x + val.y + val.z;
+        }
+        __device__ __forceinline__ float sum(const float4& val)
+        {
+            return val.x + val.y + val.z;
+        }
+
+        __device__ __forceinline__ float clamp(float var, float learningRate, float diff, float minVar)
+        {
+             return ::fmaxf(var + learningRate * (diff * diff - var), minVar);
+        }
+        __device__ __forceinline__ float3 clamp(const float3& var, float learningRate, const float3& diff, float minVar)
+        {
+             return make_float3(::fmaxf(var.x + learningRate * (diff.x * diff.x - var.x), minVar),
+                                ::fmaxf(var.y + learningRate * (diff.y * diff.y - var.y), minVar),
+                                ::fmaxf(var.z + learningRate * (diff.z * diff.z - var.z), minVar));
+        }
+        __device__ __forceinline__ float4 clamp(const float4& var, float learningRate, const float4& diff, float minVar)
+        {
+             return make_float4(::fmaxf(var.x + learningRate * (diff.x * diff.x - var.x), minVar),
+                                ::fmaxf(var.y + learningRate * (diff.y * diff.y - var.y), minVar),
+                                ::fmaxf(var.z + learningRate * (diff.z * diff.z - var.z), minVar),
+                                0.0f);
+        }
+
+        template <class Ptr2D>
+        __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows)
+        {
+            typename Ptr2D::elem_type val = ptr(k * rows + y, x);
+            ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
+            ptr((k + 1) * rows + y, x) = val;
+        }
+
+        ///////////////////////////////////////////////////////////////
+        // MOG without learning
+
+        template <typename SrcT, typename WorkT>
+        __global__ void mog_withoutLearning(const DevMem2D_<SrcT> frame, PtrStepb fgmask,
+                                            const PtrStepf gmm_weight, const PtrStep_<WorkT> gmm_mean, const PtrStep_<WorkT> gmm_var,
+                                            const int nmixtures, const float varThreshold, const float backgroundRatio)
+        {
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x >= frame.cols || y >= frame.rows)
+                return;
+
+            WorkT pix = cvt(frame(y, x));
+
+            int kHit = -1;
+            int kForeground = -1;
+
+            for (int k = 0; k < nmixtures; ++k)
+            {
+                if (gmm_weight(k * frame.rows + y, x) < numeric_limits<float>::epsilon())
+                    break;
+
+                WorkT mu = gmm_mean(k * frame.rows + y, x);
+                WorkT var = gmm_var(k * frame.rows + y, x);
+
+                WorkT diff = pix - mu;
+
+                if (sqr(diff) < varThreshold * sum(var))
+                {
+                    kHit = k;
+                    break;
+                }
+            }
+
+            if (kHit >= 0)
+            {
+                float wsum = 0.0f;
+                for (int k = 0; k < nmixtures; ++k)
+                {
+                    wsum += gmm_weight(k * frame.rows + y, x);
+
+                    if (wsum > backgroundRatio)
+                    {
+                        kForeground = k + 1;
+                        break;
+                    }
+                }
+            }
+
+            fgmask(y, x) = (uchar) (-(kHit < 0 || kHit >= kForeground));
+        }
+
+        template <typename SrcT, typename WorkT>
+        void mog_withoutLearning_caller(DevMem2Db frame, DevMem2Db fgmask, DevMem2Df weight, DevMem2Db mean, DevMem2Db var,
+                                        int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream)
+        {
+            dim3 block(32, 8);
+            dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(mog_withoutLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
+
+            mog_withoutLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask,
+                                                                         weight, (DevMem2D_<WorkT>) mean, (DevMem2D_<WorkT>) var,
+                                                                         nmixtures, varThreshold, backgroundRatio);
+
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        ///////////////////////////////////////////////////////////////
+        // MOG with learning
+
+        template <typename SrcT, typename WorkT>
+        __global__ void mog_withLearning(const DevMem2D_<SrcT> frame, PtrStepb fgmask,
+                                         PtrStepf gmm_weight, PtrStepf gmm_sortKey, PtrStep_<WorkT> gmm_mean, PtrStep_<WorkT> gmm_var,
+                                         const int nmixtures, const float varThreshold, const float backgroundRatio, const float learningRate, const float minVar)
+        {
+            const float w0 = 0.05f;
+            const float sk0 = w0 / (30.0f * 0.5f * 2.0f);
+            const float var0 = 30.0f * 0.5f * 30.0f * 0.5f * 4.0f;
+
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x >= frame.cols || y >= frame.rows)
+                return;
+
+            WorkT pix = cvt(frame(y, x));
+
+            float wsum = 0.0f;
+            int kHit = -1;
+            int kForeground = -1;
+
+            int k = 0;
+            for (; k < nmixtures; ++k)
+            {
+                float w = gmm_weight(k * frame.rows + y, x);
+                wsum += w;
+
+                if (w < numeric_limits<float>::epsilon())
+                    break;
+
+                WorkT mu = gmm_mean(k * frame.rows + y, x);
+                WorkT var = gmm_var(k * frame.rows + y, x);
+
+                WorkT diff = pix - mu;
+
+                if (sqr(diff) < varThreshold * sum(var))
+                {
+                    wsum -= w;
+                    float dw = learningRate * (1.0f - w);
+
+                    var = clamp(var, learningRate, diff, minVar);
+
+                    float sortKey_prev = w / ::sqrtf(sum(var));
+                    gmm_sortKey(k * frame.rows + y, x) = sortKey_prev;
+
+                    float weight_prev = w + dw;
+                    gmm_weight(k * frame.rows + y, x) = weight_prev;
+
+                    WorkT mean_prev = mu + learningRate * diff;
+                    gmm_mean(k * frame.rows + y, x) = mean_prev;
+
+                    WorkT var_prev = var;
+                    gmm_var(k * frame.rows + y, x) = var_prev;
+
+                    int k1 = k - 1;
+
+                    if (k1 >= 0)
+                    {
+                        float sortKey_next = gmm_sortKey(k1 * frame.rows + y, x);
+                        float weight_next = gmm_weight(k1 * frame.rows + y, x);
+                        WorkT mean_next = gmm_mean(k1 * frame.rows + y, x);
+                        WorkT var_next = gmm_var(k1 * frame.rows + y, x);
+
+                        for (; sortKey_next < sortKey_prev && k1 >= 0; --k1)
+                        {
+                            gmm_sortKey(k1 * frame.rows + y, x) = sortKey_prev;
+                            gmm_sortKey((k1 + 1) * frame.rows + y, x) = sortKey_next;
+
+                            gmm_weight(k1 * frame.rows + y, x) = weight_prev;
+                            gmm_weight((k1 + 1) * frame.rows + y, x) = weight_next;
+
+                            gmm_mean(k1 * frame.rows + y, x) = mean_prev;
+                            gmm_mean((k1 + 1) * frame.rows + y, x) = mean_next;
+
+                            gmm_var(k1 * frame.rows + y, x) = var_prev;
+                            gmm_var((k1 + 1) * frame.rows + y, x) = var_next;
+
+                            sortKey_prev = sortKey_next;
+                            sortKey_next = k1 > 0 ? gmm_sortKey((k1 - 1) * frame.rows + y, x) : 0.0f;
+
+                            weight_prev = weight_next;
+                            weight_next = k1 > 0 ? gmm_weight((k1 - 1) * frame.rows + y, x) : 0.0f;
+
+                            mean_prev = mean_next;
+                            mean_next = k1 > 0 ? gmm_mean((k1 - 1) * frame.rows + y, x) : VecTraits<WorkT>::all(0.0f);
+
+                            var_prev = var_next;
+                            var_next = k1 > 0 ? gmm_var((k1 - 1) * frame.rows + y, x) : VecTraits<WorkT>::all(0.0f);
+                        }
+                    }
+
+                    kHit = k1 + 1;
+                    break;
+                }
+            }
+
+            if (kHit < 0)
+            {
+                // no appropriate gaussian mixture found at all, remove the weakest mixture and create a new one
+                kHit = k = ::min(k, nmixtures - 1);
+                wsum += w0 - gmm_weight(k * frame.rows + y, x);
+
+                gmm_weight(k * frame.rows + y, x) = w0;
+                gmm_mean(k * frame.rows + y, x) = pix;
+                gmm_var(k * frame.rows + y, x) = VecTraits<WorkT>::all(var0);
+                gmm_sortKey(k * frame.rows + y, x) = sk0;
+            }
+            else
+            {
+                for( ; k < nmixtures; k++)
+                    wsum += gmm_weight(k * frame.rows + y, x);
+            }
+
+            float wscale = 1.0f / wsum;
+            wsum = 0;
+            for (k = 0; k < nmixtures; ++k)
+            {
+                float w = gmm_weight(k * frame.rows + y, x);
+                wsum += w *= wscale;
+
+                gmm_weight(k * frame.rows + y, x) = w;
+                gmm_sortKey(k * frame.rows + y, x) *= wscale;
+
+                if (wsum > backgroundRatio && kForeground < 0)
+                    kForeground = k + 1;
+            }
+
+            fgmask(y, x) = (uchar)(-(kHit >= kForeground));
+        }
+
+        template <typename SrcT, typename WorkT>
+        void mog_withLearning_caller(DevMem2Db frame, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var,
+                                     int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar,
+                                     cudaStream_t stream)
+        {
+            dim3 block(32, 8);
+            dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(mog_withLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
+
+            mog_withLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask,
+                                                                      weight, sortKey, (DevMem2D_<WorkT>) mean, (DevMem2D_<WorkT>) var,
+                                                                      nmixtures, varThreshold, backgroundRatio, learningRate, minVar);
+
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        ///////////////////////////////////////////////////////////////
+        // MOG
+
+        void mog_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var, int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma, cudaStream_t stream)
+        {
+            typedef void (*withoutLearning_t)(DevMem2Db frame, DevMem2Db fgmask, DevMem2Df weight, DevMem2Db mean, DevMem2Db var, int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream);
+            typedef void (*withLearning_t)(DevMem2Db frame, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var, int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar, cudaStream_t stream);
+
+            static const withoutLearning_t withoutLearning[] =
+            {
+                0, mog_withoutLearning_caller<uchar, float>, 0, mog_withoutLearning_caller<uchar3, float3>, mog_withoutLearning_caller<uchar4, float4>
+            };
+            static const withLearning_t withLearning[] =
+            {
+                0, mog_withLearning_caller<uchar, float>, 0, mog_withLearning_caller<uchar3, float3>, mog_withLearning_caller<uchar4, float4>
+            };
+
+            const float minVar = noiseSigma * noiseSigma;
+
+            if (learningRate > 0.0f)
+                withLearning[cn](frame, fgmask, weight, sortKey, mean, var, nmixtures, varThreshold, backgroundRatio, learningRate, minVar, stream);
+            else
+                withoutLearning[cn](frame, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio, stream);
+        }
+
+        ///////////////////////////////////////////////////////////////
+        // MOG2
+
+        __constant__ int           c_nmixtures;
+        __constant__ float         c_Tb;
+        __constant__ float         c_TB;
+        __constant__ float         c_Tg;
+        __constant__ float         c_varInit;
+        __constant__ float         c_varMin;
+        __constant__ float         c_varMax;
+        __constant__ float         c_tau;
+        __constant__ unsigned char c_shadowVal;
+
+        void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
+        {
+            varMin = ::fminf(varMin, varMax);
+            varMax = ::fmaxf(varMin, varMax);
+
+            cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
+            cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
+        }
+
+        template <bool detectShadows, typename SrcT, typename WorkT>
+        __global__ void mog2(const DevMem2D_<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
+                             PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep_<WorkT> gmm_mean,
+                             const float alphaT, const float alpha1, const float prune)
+        {
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x >= frame.cols || y >= frame.rows)
+                return;
+
+            WorkT pix = cvt(frame(y, x));
+
+            //calculate distances to the modes (+ sort)
+            //here we need to go in descending order!!!
+
+            bool background = false; // true - the pixel classified as background
+
+            //internal:
+
+            bool fitsPDF = false; //if it remains zero a new GMM mode will be added
+
+            int nmodes = modesUsed(y, x);
+            int nNewModes = nmodes; //current number of modes in GMM
+
+            float totalWeight = 0.0f;
+
+            //go through all modes
+
+            for (int mode = 0; mode < nmodes; ++mode)
+            {
+                //need only weight if fit is found
+                float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
+
+                //fit not found yet
+                if (!fitsPDF)
+                {
+                    //check if it belongs to some of the remaining modes
+                    float var = gmm_variance(mode * frame.rows + y, x);
+
+                    WorkT mean = gmm_mean(mode * frame.rows + y, x);
+
+                    //calculate difference and distance
+                    WorkT diff = mean - pix;
+                    float dist2 = sqr(diff);
+
+                    //background? - Tb - usually larger than Tg
+                    if (totalWeight < c_TB && dist2 < c_Tb * var)
+                        background = true;
+
+                    //check fit
+                    if (dist2 < c_Tg * var)
+                    {
+                        //belongs to the mode
+                        fitsPDF = true;
+
+                        //update distribution
+
+                        //update weight
+                        weight += alphaT;
+                        float k = alphaT / weight;
+
+                        //update mean
+                        gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
+
+                        //update variance
+                        float varnew = var + k * (dist2 - var);
+
+                        //limit the variance
+                        varnew = ::fmaxf(varnew, c_varMin);
+                        varnew = ::fminf(varnew, c_varMax);
+
+                        gmm_variance(mode * frame.rows + y, x) = varnew;
+
+                        //sort
+                        //all other weights are at the same place and
+                        //only the matched (iModes) is higher -> just find the new place for it
+
+                        for (int i = mode; i > 0; --i)
+                        {
+                            //check one up
+                            if (weight < gmm_weight((i - 1) * frame.rows + y, x))
+                                break;
+
+                            //swap one up
+                            swap(gmm_weight, x, y, i - 1, frame.rows);
+                            swap(gmm_variance, x, y, i - 1, frame.rows);
+                            swap(gmm_mean, x, y, i - 1, frame.rows);
+                        }
+
+                        //belongs to the mode - bFitsPDF becomes 1
+                    }
+                } // !fitsPDF
+
+                //check prune
+                if (weight < -prune)
+                {
+                    weight = 0.0;
+                    nmodes--;
+                }
+
+                gmm_weight(mode * frame.rows + y, x) = weight; //update weight by the calculated value
+                totalWeight += weight;
+            }
+
+            //renormalize weights
+
+            totalWeight = 1.f / totalWeight;
+            for (int mode = 0; mode < nmodes; ++mode)
+                gmm_weight(mode * frame.rows + y, x) *= totalWeight;
+
+            nmodes = nNewModes;
+
+            //make new mode if needed and exit
+
+            if (!fitsPDF)
+            {
+                // replace the weakest or add a new one
+                int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++;
+
+                if (nmodes == 1)
+                    gmm_weight(mode * frame.rows + y, x) = 1.f;
+                else
+                {
+                    gmm_weight(mode * frame.rows + y, x) = alphaT;
+
+                    // renormalize all other weights
+
+                    for (int i = 0; i < nmodes - 1; ++i)
+                        gmm_weight(i * frame.rows + y, x) *= alpha1;
+                }
+
+                // init
+
+                gmm_mean(mode * frame.rows + y, x) = pix;
+                gmm_variance(mode * frame.rows + y, x) = c_varInit;
+
+                //sort
+                //find the new place for it
+
+                for (int i = nmodes - 1; i > 0; --i)
+                {
+                    // check one up
+                    if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
+                        break;
+
+                    //swap one up
+                    swap(gmm_weight, x, y, i - 1, frame.rows);
+                    swap(gmm_variance, x, y, i - 1, frame.rows);
+                    swap(gmm_mean, x, y, i - 1, frame.rows);
+                }
+            }
+
+            //set the number of modes
+            modesUsed(y, x) = nmodes;
+
+            bool isShadow = false;
+            if (detectShadows && !background)
+            {
+                float tWeight = 0.0f;
+
+                // check all the components  marked as background:
+                for (int mode = 0; mode < nmodes; ++mode)
+                {
+                    WorkT mean = gmm_mean(mode * frame.rows + y, x);
+
+                    WorkT pix_mean = pix * mean;
+
+                    float numerator = sum(pix_mean);
+                    float denominator = sqr(mean);
+
+                    // no division by zero allowed
+                    if (denominator == 0)
+                        break;
+
+                    // if tau < a < 1 then also check the color distortion
+                    if (numerator <= denominator && numerator >= c_tau * denominator)
+                    {
+                        float a = numerator / denominator;
+
+                        WorkT dD = a * mean - pix;
+
+                        if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a)
+                        {
+                            isShadow = true;
+                            break;
+                        }
+                    };
+
+                    tWeight += gmm_weight(mode * frame.rows + y, x);
+                    if (tWeight > c_TB)
+                        break;
+                };
+            }
+
+            fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
+        }
+
+        template <typename SrcT, typename WorkT>
+        void mog2_caller(DevMem2Db frame, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean,
+                         float alphaT, float prune, bool detectShadows, cudaStream_t stream)
+        {
+            dim3 block(32, 8);
+            dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
+
+            const float alpha1 = 1.0f - alphaT;
+
+            if (detectShadows)
+            {
+                cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
+
+                mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask, modesUsed,
+                                                                    weight, variance, (DevMem2D_<WorkT>) mean,
+                                                                    alphaT, alpha1, prune);
+            }
+            else
+            {
+                cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
+
+                mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask, modesUsed,
+                                                                    weight, variance, (DevMem2D_<WorkT>) mean,
+                                                                    alphaT, alpha1, prune);
+            }
+
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        void mog2_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean,
+                      float alphaT, float prune, bool detectShadows, cudaStream_t stream)
+        {
+            typedef void (*func_t)(DevMem2Db frame, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
+
+            static const func_t funcs[] =
+            {
+                0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>
+            };
+
+            funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream);
+        }
+
+        template <typename WorkT, typename OutT>
+        __global__ void getBackgroundImage(const DevMem2Db modesUsed, const PtrStepf gmm_weight, const PtrStep_<WorkT> gmm_mean, PtrStep_<OutT> dst)
+        {
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x >= modesUsed.cols || y >= modesUsed.rows)
+                return;
+
+            int nmodes = modesUsed(y, x);
+
+            WorkT meanVal = VecTraits<WorkT>::all(0.0f);
+            float totalWeight = 0.0f;
+
+            for (int mode = 0; mode < nmodes; ++mode)
+            {
+                float weight = gmm_weight(mode * modesUsed.rows + y, x);
+
+                WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
+                meanVal = meanVal + weight * mean;
+
+                totalWeight += weight;
+
+                if(totalWeight > c_TB)
+                    break;
+            }
+
+            meanVal = meanVal * (1.f / totalWeight);
+
+            dst(y, x) = saturate_cast<OutT>(meanVal);
+        }
+
+        template <typename WorkT, typename OutT>
+        void getBackgroundImage_caller(DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream)
+        {
+            dim3 block(32, 8);
+            dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
+
+            cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage<WorkT, OutT>, cudaFuncCachePreferL1) );
+
+            getBackgroundImage<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (DevMem2D_<WorkT>) mean, (DevMem2D_<OutT>) dst);
+            cudaSafeCall( cudaGetLastError() );
+
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );
+        }
+
+        void getBackgroundImage_gpu(int cn, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream)
+        {
+            typedef void (*func_t)(DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream);
+
+            static const func_t funcs[] =
+            {
+                0, getBackgroundImage_caller<float, uchar>, 0, getBackgroundImage_caller<float3, uchar3>, getBackgroundImage_caller<float4, uchar4>
+            };
+
+            funcs[cn](modesUsed, weight, mean, dst, stream);
+        }
+    }
+}}}
diff --git a/modules/gpu/src/cuda/fgd_bgfg.cu b/modules/gpu/src/cuda/fgd_bgfg.cu
index 7fc5fe7917..e11eb10d0c 100644
--- a/modules/gpu/src/cuda/fgd_bgfg.cu
+++ b/modules/gpu/src/cuda/fgd_bgfg.cu
@@ -1,3 +1,45 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or bpied warranties, including, but not limited to, the bpied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
 #include "opencv2/gpu/device/common.hpp"
 #include "opencv2/gpu/device/vec_math.hpp"
 #include "opencv2/gpu/device/limits.hpp"
diff --git a/modules/gpu/test/test_video.cpp b/modules/gpu/test/test_video.cpp
index 13deef0e63..0053554957 100644
--- a/modules/gpu/test/test_video.cpp
+++ b/modules/gpu/test/test_video.cpp
@@ -416,16 +416,23 @@ namespace cv
 
 PARAM_TEST_CASE(FGDStatModel, cv::gpu::DeviceInfo, std::string, Channels)
 {
+    cv::gpu::DeviceInfo devInfo;
+    std::string inputFile;
+    int out_cn;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
+
+        out_cn = GET_PARAM(2);
+    }
 };
 
-TEST_P(FGDStatModel, Accuracy)
+TEST_P(FGDStatModel, Update)
 {
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
-    int out_cn = GET_PARAM(2);
-
     cv::VideoCapture cap(inputFile);
     ASSERT_TRUE(cap.isOpened());
 
@@ -473,8 +480,8 @@ TEST_P(FGDStatModel, Accuracy)
         }
         d_model.foreground.download(h_foreground);
 
-        EXPECT_MAT_NEAR(gold_background, h_background3, 1.0);
-        EXPECT_MAT_NEAR(gold_foreground, h_foreground, 0.0);
+        ASSERT_MAT_NEAR(gold_background, h_background3, 1.0);
+        ASSERT_MAT_NEAR(gold_foreground, h_foreground, 0.0);
     }
 }
 
@@ -483,6 +490,183 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, FGDStatModel, testing::Combine(
     testing::Values(std::string("768x576.avi")),
     testing::Values(Channels(3), Channels(4))));
 
+//////////////////////////////////////////////////////
+// MOG
+
+IMPLEMENT_PARAM_CLASS(LearningRate, double)
+
+PARAM_TEST_CASE(MOG, cv::gpu::DeviceInfo, std::string, UseGray, LearningRate, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    std::string inputFile;
+    bool useGray;
+    double learningRate;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
+
+        useGray = GET_PARAM(2);
+
+        learningRate = GET_PARAM(3);
+
+        useRoi = GET_PARAM(4);
+    }
+};
+
+TEST_P(MOG, Update)
+{
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::gpu::MOG_GPU mog;
+    cv::gpu::GpuMat foreground = createMat(frame.size(), CV_8UC1, useRoi);
+
+    cv::BackgroundSubtractorMOG mog_gold;
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (useGray)
+        {
+            cv::Mat temp;
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            cv::swap(temp, frame);
+        }
+
+        mog(loadMat(frame, useRoi), foreground, learningRate);
+
+        mog_gold(frame, foreground_gold, learningRate);
+
+        ASSERT_MAT_NEAR(foreground_gold, foreground, 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Video, MOG, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::string("768x576.avi")),
+    testing::Values(UseGray(true), UseGray(false)),
+    testing::Values(LearningRate(0.0), LearningRate(0.01)),
+    WHOLE_SUBMAT));
+
+//////////////////////////////////////////////////////
+// MOG2
+
+PARAM_TEST_CASE(MOG2, cv::gpu::DeviceInfo, std::string, UseGray, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    std::string inputFile;
+    bool useGray;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
+
+        useGray = GET_PARAM(2);
+
+        useRoi = GET_PARAM(3);
+    }
+};
+
+TEST_P(MOG2, Update)
+{
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+    cap >> frame;
+    ASSERT_FALSE(frame.empty());
+
+    cv::gpu::MOG2_GPU mog2;
+    cv::gpu::GpuMat foreground = createMat(frame.size(), CV_8UC1, useRoi);
+
+    cv::BackgroundSubtractorMOG2 mog2_gold;
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+        if (useGray)
+        {
+            cv::Mat temp;
+            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+            cv::swap(temp, frame);
+        }
+
+        mog2(loadMat(frame, useRoi), foreground);
+
+        mog2_gold(frame, foreground_gold);
+
+        double norm = cv::norm(foreground_gold, cv::Mat(foreground), cv::NORM_L1);
+
+        norm /= foreground_gold.size().area();
+
+        ASSERT_LE(norm, 0.09);
+    }
+}
+
+TEST_P(MOG2, getBackgroundImage)
+{
+    cv::VideoCapture cap(inputFile);
+    ASSERT_TRUE(cap.isOpened());
+
+    cv::Mat frame;
+
+    cv::gpu::MOG2_GPU mog2;
+    cv::gpu::GpuMat foreground;
+
+    cv::BackgroundSubtractorMOG2 mog2_gold;
+    cv::Mat foreground_gold;
+
+    for (int i = 0; i < 10; ++i)
+    {
+        cap >> frame;
+        ASSERT_FALSE(frame.empty());
+
+//        if (useGray)
+//        {
+//            cv::Mat temp;
+//            cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY);
+//            cv::swap(temp, frame);
+//        }
+
+        mog2(loadMat(frame, useRoi), foreground);
+
+        mog2_gold(frame, foreground_gold);
+    }
+
+    cv::gpu::GpuMat background = createMat(frame.size(), frame.type(), useRoi);
+    mog2.getBackgroundImage(background);
+
+    cv::Mat background_gold;
+    mog2_gold.getBackgroundImage(background_gold);
+
+    ASSERT_MAT_NEAR(background_gold, background, 0);
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Video, MOG2, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(std::string("768x576.avi")),
+    testing::Values(UseGray(true), UseGray(false)),
+    WHOLE_SUBMAT));
+
 //////////////////////////////////////////////////////
 // VideoWriter