diff --git a/modules/gpubgsegm/src/cuda/mog.cu b/modules/gpubgsegm/src/cuda/mog.cu index 6508262d28..ed0720c30c 100644 --- a/modules/gpubgsegm/src/cuda/mog.cu +++ b/modules/gpubgsegm/src/cuda/mog.cu @@ -111,14 +111,6 @@ namespace cv { namespace gpu { namespace cudev 0.0f); } - template - __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows) - { - typename Ptr2D::elem_type val = ptr(k * rows + y, x); - ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x); - ptr((k + 1) * rows + y, x) = val; - } - /////////////////////////////////////////////////////////////// // MOG without learning @@ -426,337 +418,6 @@ namespace cv { namespace gpu { namespace cudev funcs[cn](weight, mean, dst, nmixtures, backgroundRatio, stream); } - - /////////////////////////////////////////////////////////////// - // MOG2 - - __constant__ int c_nmixtures; - __constant__ float c_Tb; - __constant__ float c_TB; - __constant__ float c_Tg; - __constant__ float c_varInit; - __constant__ float c_varMin; - __constant__ float c_varMax; - __constant__ float c_tau; - __constant__ unsigned char c_shadowVal; - - void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal) - { - varMin = ::fminf(varMin, varMax); - varMax = ::fmaxf(varMin, varMax); - - cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) ); - } - - template - __global__ void mog2(const PtrStepSz frame, PtrStepb fgmask, PtrStepb modesUsed, - PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep gmm_mean, - const float alphaT, const float alpha1, const float prune) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= frame.cols || y >= frame.rows) - return; - - WorkT pix = cvt(frame(y, x)); - - //calculate distances to the modes (+ sort) - //here we need to go in descending order!!! - - bool background = false; // true - the pixel classified as background - - //internal: - - bool fitsPDF = false; //if it remains zero a new GMM mode will be added - - int nmodes = modesUsed(y, x); - int nNewModes = nmodes; //current number of modes in GMM - - float totalWeight = 0.0f; - - //go through all modes - - for (int mode = 0; mode < nmodes; ++mode) - { - //need only weight if fit is found - float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune; - - //fit not found yet - if (!fitsPDF) - { - //check if it belongs to some of the remaining modes - float var = gmm_variance(mode * frame.rows + y, x); - - WorkT mean = gmm_mean(mode * frame.rows + y, x); - - //calculate difference and distance - WorkT diff = mean - pix; - float dist2 = sqr(diff); - - //background? - Tb - usually larger than Tg - if (totalWeight < c_TB && dist2 < c_Tb * var) - background = true; - - //check fit - if (dist2 < c_Tg * var) - { - //belongs to the mode - fitsPDF = true; - - //update distribution - - //update weight - weight += alphaT; - float k = alphaT / weight; - - //update mean - gmm_mean(mode * frame.rows + y, x) = mean - k * diff; - - //update variance - float varnew = var + k * (dist2 - var); - - //limit the variance - varnew = ::fmaxf(varnew, c_varMin); - varnew = ::fminf(varnew, c_varMax); - - gmm_variance(mode * frame.rows + y, x) = varnew; - - //sort - //all other weights are at the same place and - //only the matched (iModes) is higher -> just find the new place for it - - for (int i = mode; i > 0; --i) - { - //check one up - if (weight < gmm_weight((i - 1) * frame.rows + y, x)) - break; - - //swap one up - swap(gmm_weight, x, y, i - 1, frame.rows); - swap(gmm_variance, x, y, i - 1, frame.rows); - swap(gmm_mean, x, y, i - 1, frame.rows); - } - - //belongs to the mode - bFitsPDF becomes 1 - } - } // !fitsPDF - - //check prune - if (weight < -prune) - { - weight = 0.0; - nmodes--; - } - - gmm_weight(mode * frame.rows + y, x) = weight; //update weight by the calculated value - totalWeight += weight; - } - - //renormalize weights - - totalWeight = 1.f / totalWeight; - for (int mode = 0; mode < nmodes; ++mode) - gmm_weight(mode * frame.rows + y, x) *= totalWeight; - - nmodes = nNewModes; - - //make new mode if needed and exit - - if (!fitsPDF) - { - // replace the weakest or add a new one - int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++; - - if (nmodes == 1) - gmm_weight(mode * frame.rows + y, x) = 1.f; - else - { - gmm_weight(mode * frame.rows + y, x) = alphaT; - - // renormalize all other weights - - for (int i = 0; i < nmodes - 1; ++i) - gmm_weight(i * frame.rows + y, x) *= alpha1; - } - - // init - - gmm_mean(mode * frame.rows + y, x) = pix; - gmm_variance(mode * frame.rows + y, x) = c_varInit; - - //sort - //find the new place for it - - for (int i = nmodes - 1; i > 0; --i) - { - // check one up - if (alphaT < gmm_weight((i - 1) * frame.rows + y, x)) - break; - - //swap one up - swap(gmm_weight, x, y, i - 1, frame.rows); - swap(gmm_variance, x, y, i - 1, frame.rows); - swap(gmm_mean, x, y, i - 1, frame.rows); - } - } - - //set the number of modes - modesUsed(y, x) = nmodes; - - bool isShadow = false; - if (detectShadows && !background) - { - float tWeight = 0.0f; - - // check all the components marked as background: - for (int mode = 0; mode < nmodes; ++mode) - { - WorkT mean = gmm_mean(mode * frame.rows + y, x); - - WorkT pix_mean = pix * mean; - - float numerator = sum(pix_mean); - float denominator = sqr(mean); - - // no division by zero allowed - if (denominator == 0) - break; - - // if tau < a < 1 then also check the color distortion - if (numerator <= denominator && numerator >= c_tau * denominator) - { - float a = numerator / denominator; - - WorkT dD = a * mean - pix; - - if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a) - { - isShadow = true; - break; - } - }; - - tWeight += gmm_weight(mode * frame.rows + y, x); - if (tWeight > c_TB) - break; - } - } - - fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255; - } - - template - void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, - float alphaT, float prune, bool detectShadows, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); - - const float alpha1 = 1.0f - alphaT; - - if (detectShadows) - { - cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); - - mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, - weight, variance, (PtrStepSz) mean, - alphaT, alpha1, prune); - } - else - { - cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); - - mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, - weight, variance, (PtrStepSz) mean, - alphaT, alpha1, prune); - } - - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, - float alphaT, float prune, bool detectShadows, cudaStream_t stream) - { - typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); - - static const func_t funcs[] = - { - 0, mog2_caller, 0, mog2_caller, mog2_caller - }; - - funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream); - } - - template - __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep gmm_mean, PtrStep dst) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= modesUsed.cols || y >= modesUsed.rows) - return; - - int nmodes = modesUsed(y, x); - - WorkT meanVal = VecTraits::all(0.0f); - float totalWeight = 0.0f; - - for (int mode = 0; mode < nmodes; ++mode) - { - float weight = gmm_weight(mode * modesUsed.rows + y, x); - - WorkT mean = gmm_mean(mode * modesUsed.rows + y, x); - meanVal = meanVal + weight * mean; - - totalWeight += weight; - - if(totalWeight > c_TB) - break; - } - - meanVal = meanVal * (1.f / totalWeight); - - dst(y, x) = saturate_cast(meanVal); - } - - template - void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2, cudaFuncCachePreferL1) ); - - getBackgroundImage2<<>>(modesUsed, weight, (PtrStepSz) mean, (PtrStepSz) dst); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) - { - typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); - - static const func_t funcs[] = - { - 0, getBackgroundImage2_caller, 0, getBackgroundImage2_caller, getBackgroundImage2_caller - }; - - funcs[cn](modesUsed, weight, mean, dst, stream); - } } }}} diff --git a/modules/gpubgsegm/src/cuda/mog2.cu b/modules/gpubgsegm/src/cuda/mog2.cu new file mode 100644 index 0000000000..89b43b12fa --- /dev/null +++ b/modules/gpubgsegm/src/cuda/mog2.cu @@ -0,0 +1,438 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include "opencv2/core/cuda/common.hpp" +#include "opencv2/core/cuda/vec_traits.hpp" +#include "opencv2/core/cuda/vec_math.hpp" +#include "opencv2/core/cuda/limits.hpp" + +namespace cv { namespace gpu { namespace cudev +{ + namespace mog2 + { + /////////////////////////////////////////////////////////////// + // Utility + + __device__ __forceinline__ float cvt(uchar val) + { + return val; + } + __device__ __forceinline__ float3 cvt(const uchar3& val) + { + return make_float3(val.x, val.y, val.z); + } + __device__ __forceinline__ float4 cvt(const uchar4& val) + { + return make_float4(val.x, val.y, val.z, val.w); + } + + __device__ __forceinline__ float sqr(float val) + { + return val * val; + } + __device__ __forceinline__ float sqr(const float3& val) + { + return val.x * val.x + val.y * val.y + val.z * val.z; + } + __device__ __forceinline__ float sqr(const float4& val) + { + return val.x * val.x + val.y * val.y + val.z * val.z; + } + + __device__ __forceinline__ float sum(float val) + { + return val; + } + __device__ __forceinline__ float sum(const float3& val) + { + return val.x + val.y + val.z; + } + __device__ __forceinline__ float sum(const float4& val) + { + return val.x + val.y + val.z; + } + + template + __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows) + { + typename Ptr2D::elem_type val = ptr(k * rows + y, x); + ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x); + ptr((k + 1) * rows + y, x) = val; + } + + /////////////////////////////////////////////////////////////// + // MOG2 + + __constant__ int c_nmixtures; + __constant__ float c_Tb; + __constant__ float c_TB; + __constant__ float c_Tg; + __constant__ float c_varInit; + __constant__ float c_varMin; + __constant__ float c_varMax; + __constant__ float c_tau; + __constant__ unsigned char c_shadowVal; + + void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal) + { + varMin = ::fminf(varMin, varMax); + varMax = ::fmaxf(varMin, varMax); + + cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) ); + cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) ); + } + + template + __global__ void mog2(const PtrStepSz frame, PtrStepb fgmask, PtrStepb modesUsed, + PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep gmm_mean, + const float alphaT, const float alpha1, const float prune) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= frame.cols || y >= frame.rows) + return; + + WorkT pix = cvt(frame(y, x)); + + //calculate distances to the modes (+ sort) + //here we need to go in descending order!!! + + bool background = false; // true - the pixel classified as background + + //internal: + + bool fitsPDF = false; //if it remains zero a new GMM mode will be added + + int nmodes = modesUsed(y, x); + int nNewModes = nmodes; //current number of modes in GMM + + float totalWeight = 0.0f; + + //go through all modes + + for (int mode = 0; mode < nmodes; ++mode) + { + //need only weight if fit is found + float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune; + + //fit not found yet + if (!fitsPDF) + { + //check if it belongs to some of the remaining modes + float var = gmm_variance(mode * frame.rows + y, x); + + WorkT mean = gmm_mean(mode * frame.rows + y, x); + + //calculate difference and distance + WorkT diff = mean - pix; + float dist2 = sqr(diff); + + //background? - Tb - usually larger than Tg + if (totalWeight < c_TB && dist2 < c_Tb * var) + background = true; + + //check fit + if (dist2 < c_Tg * var) + { + //belongs to the mode + fitsPDF = true; + + //update distribution + + //update weight + weight += alphaT; + float k = alphaT / weight; + + //update mean + gmm_mean(mode * frame.rows + y, x) = mean - k * diff; + + //update variance + float varnew = var + k * (dist2 - var); + + //limit the variance + varnew = ::fmaxf(varnew, c_varMin); + varnew = ::fminf(varnew, c_varMax); + + gmm_variance(mode * frame.rows + y, x) = varnew; + + //sort + //all other weights are at the same place and + //only the matched (iModes) is higher -> just find the new place for it + + for (int i = mode; i > 0; --i) + { + //check one up + if (weight < gmm_weight((i - 1) * frame.rows + y, x)) + break; + + //swap one up + swap(gmm_weight, x, y, i - 1, frame.rows); + swap(gmm_variance, x, y, i - 1, frame.rows); + swap(gmm_mean, x, y, i - 1, frame.rows); + } + + //belongs to the mode - bFitsPDF becomes 1 + } + } // !fitsPDF + + //check prune + if (weight < -prune) + { + weight = 0.0; + nmodes--; + } + + gmm_weight(mode * frame.rows + y, x) = weight; //update weight by the calculated value + totalWeight += weight; + } + + //renormalize weights + + totalWeight = 1.f / totalWeight; + for (int mode = 0; mode < nmodes; ++mode) + gmm_weight(mode * frame.rows + y, x) *= totalWeight; + + nmodes = nNewModes; + + //make new mode if needed and exit + + if (!fitsPDF) + { + // replace the weakest or add a new one + int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++; + + if (nmodes == 1) + gmm_weight(mode * frame.rows + y, x) = 1.f; + else + { + gmm_weight(mode * frame.rows + y, x) = alphaT; + + // renormalize all other weights + + for (int i = 0; i < nmodes - 1; ++i) + gmm_weight(i * frame.rows + y, x) *= alpha1; + } + + // init + + gmm_mean(mode * frame.rows + y, x) = pix; + gmm_variance(mode * frame.rows + y, x) = c_varInit; + + //sort + //find the new place for it + + for (int i = nmodes - 1; i > 0; --i) + { + // check one up + if (alphaT < gmm_weight((i - 1) * frame.rows + y, x)) + break; + + //swap one up + swap(gmm_weight, x, y, i - 1, frame.rows); + swap(gmm_variance, x, y, i - 1, frame.rows); + swap(gmm_mean, x, y, i - 1, frame.rows); + } + } + + //set the number of modes + modesUsed(y, x) = nmodes; + + bool isShadow = false; + if (detectShadows && !background) + { + float tWeight = 0.0f; + + // check all the components marked as background: + for (int mode = 0; mode < nmodes; ++mode) + { + WorkT mean = gmm_mean(mode * frame.rows + y, x); + + WorkT pix_mean = pix * mean; + + float numerator = sum(pix_mean); + float denominator = sqr(mean); + + // no division by zero allowed + if (denominator == 0) + break; + + // if tau < a < 1 then also check the color distortion + if (numerator <= denominator && numerator >= c_tau * denominator) + { + float a = numerator / denominator; + + WorkT dD = a * mean - pix; + + if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a) + { + isShadow = true; + break; + } + }; + + tWeight += gmm_weight(mode * frame.rows + y, x); + if (tWeight > c_TB) + break; + } + } + + fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255; + } + + template + void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, + float alphaT, float prune, bool detectShadows, cudaStream_t stream) + { + dim3 block(32, 8); + dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); + + const float alpha1 = 1.0f - alphaT; + + if (detectShadows) + { + cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); + + mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, + weight, variance, (PtrStepSz) mean, + alphaT, alpha1, prune); + } + else + { + cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); + + mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, + weight, variance, (PtrStepSz) mean, + alphaT, alpha1, prune); + } + + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, + float alphaT, float prune, bool detectShadows, cudaStream_t stream) + { + typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); + + static const func_t funcs[] = + { + 0, mog2_caller, 0, mog2_caller, mog2_caller + }; + + funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream); + } + + template + __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep gmm_mean, PtrStep dst) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= modesUsed.cols || y >= modesUsed.rows) + return; + + int nmodes = modesUsed(y, x); + + WorkT meanVal = VecTraits::all(0.0f); + float totalWeight = 0.0f; + + for (int mode = 0; mode < nmodes; ++mode) + { + float weight = gmm_weight(mode * modesUsed.rows + y, x); + + WorkT mean = gmm_mean(mode * modesUsed.rows + y, x); + meanVal = meanVal + weight * mean; + + totalWeight += weight; + + if(totalWeight > c_TB) + break; + } + + meanVal = meanVal * (1.f / totalWeight); + + dst(y, x) = saturate_cast(meanVal); + } + + template + void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) + { + dim3 block(32, 8); + dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2, cudaFuncCachePreferL1) ); + + getBackgroundImage2<<>>(modesUsed, weight, (PtrStepSz) mean, (PtrStepSz) dst); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) + { + typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); + + static const func_t funcs[] = + { + 0, getBackgroundImage2_caller, 0, getBackgroundImage2_caller, getBackgroundImage2_caller + }; + + funcs[cn](modesUsed, weight, mean, dst, stream); + } + } +}}} + + +#endif /* CUDA_DISABLER */ diff --git a/modules/gpubgsegm/src/mog.cpp b/modules/gpubgsegm/src/mog.cpp index 22b31074fe..1acb844df3 100644 --- a/modules/gpubgsegm/src/mog.cpp +++ b/modules/gpubgsegm/src/mog.cpp @@ -50,12 +50,6 @@ void cv::gpu::MOG_GPU::operator()(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, floa void cv::gpu::MOG_GPU::getBackgroundImage(GpuMat&, Stream&) const { throw_no_cuda(); } void cv::gpu::MOG_GPU::release() {} -cv::gpu::MOG2_GPU::MOG2_GPU(int) { throw_no_cuda(); } -void cv::gpu::MOG2_GPU::initialize(cv::Size, int) { throw_no_cuda(); } -void cv::gpu::MOG2_GPU::operator()(const GpuMat&, GpuMat&, float, Stream&) { throw_no_cuda(); } -void cv::gpu::MOG2_GPU::getBackgroundImage(GpuMat&, Stream&) const { throw_no_cuda(); } -void cv::gpu::MOG2_GPU::release() {} - #else namespace cv { namespace gpu { namespace cudev @@ -66,10 +60,6 @@ namespace cv { namespace gpu { namespace cudev int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma, cudaStream_t stream); void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream); - - void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal); - void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); - void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); } }}} @@ -165,115 +155,4 @@ void cv::gpu::MOG_GPU::release() var_.release(); } -///////////////////////////////////////////////////////////////// -// MOG2 - -namespace mog2 -{ - // default parameters of gaussian background detection algorithm - const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2 - const float defaultVarThreshold = 4.0f * 4.0f; - const int defaultNMixtures = 5; // maximal number of Gaussians in mixture - const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test - const float defaultVarThresholdGen = 3.0f * 3.0f; - const float defaultVarInit = 15.0f; // initial variance for new components - const float defaultVarMax = 5.0f * defaultVarInit; - const float defaultVarMin = 4.0f; - - // additional parameters - const float defaultfCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components - const unsigned char defaultnShadowDetection = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection - const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation -} - -cv::gpu::MOG2_GPU::MOG2_GPU(int nmixtures) : - frameSize_(0, 0), frameType_(0), nframes_(0) -{ - nmixtures_ = nmixtures > 0 ? nmixtures : mog2::defaultNMixtures; - - history = mog2::defaultHistory; - varThreshold = mog2::defaultVarThreshold; - bShadowDetection = true; - - backgroundRatio = mog2::defaultBackgroundRatio; - fVarInit = mog2::defaultVarInit; - fVarMax = mog2::defaultVarMax; - fVarMin = mog2::defaultVarMin; - - varThresholdGen = mog2::defaultVarThresholdGen; - fCT = mog2::defaultfCT; - nShadowDetection = mog2::defaultnShadowDetection; - fTau = mog2::defaultfTau; -} - -void cv::gpu::MOG2_GPU::initialize(cv::Size frameSize, int frameType) -{ - using namespace cv::gpu::cudev::mog; - - CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4); - - frameSize_ = frameSize; - frameType_ = frameType; - nframes_ = 0; - - int ch = CV_MAT_CN(frameType); - int work_ch = ch; - - // for each gaussian mixture of each pixel bg model we store ... - // the mixture weight (w), - // the mean (nchannels values) and - // the covariance - weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); - variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); - mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); - - //make the array for keeping track of the used modes per pixel - all zeros at start - bgmodelUsedModes_.create(frameSize_, CV_8UC1); - bgmodelUsedModes_.setTo(cv::Scalar::all(0)); - - loadConstants(nmixtures_, varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection); -} - -void cv::gpu::MOG2_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate, Stream& stream) -{ - using namespace cv::gpu::cudev::mog; - - int ch = frame.channels(); - int work_ch = ch; - - if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.channels()) - initialize(frame.size(), frame.type()); - - fgmask.create(frameSize_, CV_8UC1); - fgmask.setTo(cv::Scalar::all(0)); - - ++nframes_; - learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history); - CV_Assert(learningRate >= 0.0f); - - mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, StreamAccessor::getStream(stream)); -} - -void cv::gpu::MOG2_GPU::getBackgroundImage(GpuMat& backgroundImage, Stream& stream) const -{ - using namespace cv::gpu::cudev::mog; - - backgroundImage.create(frameSize_, frameType_); - - getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, StreamAccessor::getStream(stream)); -} - -void cv::gpu::MOG2_GPU::release() -{ - frameSize_ = Size(0, 0); - frameType_ = 0; - nframes_ = 0; - - weight_.release(); - variance_.release(); - mean_.release(); - - bgmodelUsedModes_.release(); -} - #endif diff --git a/modules/gpubgsegm/src/mog2.cpp b/modules/gpubgsegm/src/mog2.cpp new file mode 100644 index 0000000000..7258294c25 --- /dev/null +++ b/modules/gpubgsegm/src/mog2.cpp @@ -0,0 +1,173 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#if !defined HAVE_CUDA || defined(CUDA_DISABLER) + +cv::gpu::MOG2_GPU::MOG2_GPU(int) { throw_no_cuda(); } +void cv::gpu::MOG2_GPU::initialize(cv::Size, int) { throw_no_cuda(); } +void cv::gpu::MOG2_GPU::operator()(const GpuMat&, GpuMat&, float, Stream&) { throw_no_cuda(); } +void cv::gpu::MOG2_GPU::getBackgroundImage(GpuMat&, Stream&) const { throw_no_cuda(); } +void cv::gpu::MOG2_GPU::release() {} + +#else + +namespace cv { namespace gpu { namespace cudev +{ + namespace mog2 + { + void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal); + void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); + void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); + } +}}} + +namespace mog2 +{ + // default parameters of gaussian background detection algorithm + const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2 + const float defaultVarThreshold = 4.0f * 4.0f; + const int defaultNMixtures = 5; // maximal number of Gaussians in mixture + const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test + const float defaultVarThresholdGen = 3.0f * 3.0f; + const float defaultVarInit = 15.0f; // initial variance for new components + const float defaultVarMax = 5.0f * defaultVarInit; + const float defaultVarMin = 4.0f; + + // additional parameters + const float defaultfCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components + const unsigned char defaultnShadowDetection = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection + const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation +} + +cv::gpu::MOG2_GPU::MOG2_GPU(int nmixtures) : + frameSize_(0, 0), frameType_(0), nframes_(0) +{ + nmixtures_ = nmixtures > 0 ? nmixtures : mog2::defaultNMixtures; + + history = mog2::defaultHistory; + varThreshold = mog2::defaultVarThreshold; + bShadowDetection = true; + + backgroundRatio = mog2::defaultBackgroundRatio; + fVarInit = mog2::defaultVarInit; + fVarMax = mog2::defaultVarMax; + fVarMin = mog2::defaultVarMin; + + varThresholdGen = mog2::defaultVarThresholdGen; + fCT = mog2::defaultfCT; + nShadowDetection = mog2::defaultnShadowDetection; + fTau = mog2::defaultfTau; +} + +void cv::gpu::MOG2_GPU::initialize(cv::Size frameSize, int frameType) +{ + using namespace cv::gpu::cudev::mog2; + + CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4); + + frameSize_ = frameSize; + frameType_ = frameType; + nframes_ = 0; + + int ch = CV_MAT_CN(frameType); + int work_ch = ch; + + // for each gaussian mixture of each pixel bg model we store ... + // the mixture weight (w), + // the mean (nchannels values) and + // the covariance + weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); + variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); + mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); + + //make the array for keeping track of the used modes per pixel - all zeros at start + bgmodelUsedModes_.create(frameSize_, CV_8UC1); + bgmodelUsedModes_.setTo(cv::Scalar::all(0)); + + loadConstants(nmixtures_, varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection); +} + +void cv::gpu::MOG2_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, float learningRate, Stream& stream) +{ + using namespace cv::gpu::cudev::mog2; + + int ch = frame.channels(); + int work_ch = ch; + + if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.channels()) + initialize(frame.size(), frame.type()); + + fgmask.create(frameSize_, CV_8UC1); + fgmask.setTo(cv::Scalar::all(0)); + + ++nframes_; + learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history); + CV_Assert(learningRate >= 0.0f); + + mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, StreamAccessor::getStream(stream)); +} + +void cv::gpu::MOG2_GPU::getBackgroundImage(GpuMat& backgroundImage, Stream& stream) const +{ + using namespace cv::gpu::cudev::mog2; + + backgroundImage.create(frameSize_, frameType_); + + getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, StreamAccessor::getStream(stream)); +} + +void cv::gpu::MOG2_GPU::release() +{ + frameSize_ = Size(0, 0); + frameType_ = 0; + nframes_ = 0; + + weight_.release(); + variance_.release(); + mean_.release(); + + bgmodelUsedModes_.release(); +} + +#endif