From c8d8b1fbcd612d7044ea9c94c5bf3f6cd53cdaef Mon Sep 17 00:00:00 2001 From: Ya-Chiu Wu Date: Fri, 2 Feb 2018 18:20:46 +0800 Subject: [PATCH] Merge pull request #10553 from GlueCrow:bgfg_knn_opencl Add ocl version BackgroundSubtractorKNN (#10553) * Add ocl version bgfg_knn * Add ocl KNN perf test * ocl KNN: Avoid unnecessary initializing when non-UMat parameters are used * video: turn off OpenCL for color KNN on Intel devices due performance degradation * video: turn off KNN OpenCL on Apple devices with Intel iGPU due process freeze during clBuildProgram() call --- modules/video/perf/opencl/perf_bgfg_knn.cpp | 95 +++++++ modules/video/src/bgfg_KNN.cpp | 271 +++++++++++++++++--- modules/video/src/opencl/bgfg_knn.cl | 248 ++++++++++++++++++ 3 files changed, 580 insertions(+), 34 deletions(-) create mode 100644 modules/video/perf/opencl/perf_bgfg_knn.cpp create mode 100644 modules/video/src/opencl/bgfg_knn.cl diff --git a/modules/video/perf/opencl/perf_bgfg_knn.cpp b/modules/video/perf/opencl/perf_bgfg_knn.cpp new file mode 100644 index 0000000000..30419af422 --- /dev/null +++ b/modules/video/perf/opencl/perf_bgfg_knn.cpp @@ -0,0 +1,95 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../perf_precomp.hpp" +#include "opencv2/ts/ocl_perf.hpp" + +#ifdef HAVE_OPENCL +#ifdef HAVE_VIDEO_INPUT +#include "../perf_bgfg_utils.hpp" + +namespace cvtest { +namespace ocl { + +//////////////////////////// KNN////////////////////////// + +typedef tuple VideoKNNParamType; +typedef TestBaseWithParam KNN_Apply; +typedef TestBaseWithParam KNN_GetBackgroundImage; + +using namespace opencv_test; + +OCL_PERF_TEST_P(KNN_Apply, KNN, Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"), Values(1,3))) +{ + VideoKNNParamType params = GetParam(); + + const string inputFile = getDataPath(get<0>(params)); + + const int cn = get<1>(params); + int nFrame = 5; + + vector frame_buffer(nFrame); + + cv::VideoCapture cap(inputFile); + ASSERT_TRUE(cap.isOpened()); + prepareData(cap, cn, frame_buffer); + + UMat u_foreground; + + OCL_TEST_CYCLE() + { + Ptr knn = createBackgroundSubtractorKNN(); + knn->setDetectShadows(false); + u_foreground.release(); + for (int i = 0; i < nFrame; i++) + { + knn->apply(frame_buffer[i], u_foreground); + } + } + SANITY_CHECK_NOTHING(); +} + +OCL_PERF_TEST_P(KNN_GetBackgroundImage, KNN, Values( + std::make_pair("gpu/video/768x576.avi", 5), + std::make_pair("gpu/video/1920x1080.avi", 5))) +{ + VideoKNNParamType params = GetParam(); + + const string inputFile = getDataPath(get<0>(params)); + + const int cn = 3; + const int skipFrames = get<1>(params); + int nFrame = 10; + + vector frame_buffer(nFrame); + + cv::VideoCapture cap(inputFile); + ASSERT_TRUE(cap.isOpened()); + prepareData(cap, cn, frame_buffer, skipFrames); + + UMat u_foreground, u_background; + + OCL_TEST_CYCLE() + { + Ptr knn = createBackgroundSubtractorKNN(); + knn->setDetectShadows(false); + u_foreground.release(); + u_background.release(); + for (int i = 0; i < nFrame; i++) + { + knn->apply(frame_buffer[i], u_foreground); + } + knn->getBackgroundImage(u_background); + } +#ifdef DEBUG_BGFG + imwrite(format("fg_%d_%d_knn_ocl.png", frame_buffer[0].rows, cn), u_foreground.getMat(ACCESS_READ)); + imwrite(format("bg_%d_%d_knn_ocl.png", frame_buffer[0].rows, cn), u_background.getMat(ACCESS_READ)); +#endif + SANITY_CHECK_NOTHING(); +} + +}}// namespace cvtest::ocl + +#endif +#endif diff --git a/modules/video/src/bgfg_KNN.cpp b/modules/video/src/bgfg_KNN.cpp index 0f0ff12958..006d3bd1c9 100755 --- a/modules/video/src/bgfg_KNN.cpp +++ b/modules/video/src/bgfg_KNN.cpp @@ -42,6 +42,7 @@ //#include #include "precomp.hpp" +#include "opencl_kernels_video.hpp" namespace cv { @@ -92,6 +93,9 @@ public: nLongCounter = 0; nMidCounter = 0; nShortCounter = 0; +#ifdef HAVE_OPENCL + opencl_ON = true; +#endif } //! the full constructor that takes the length of the history, // the number of gaussian mixtures, the background ratio parameter and the noise strength @@ -119,6 +123,9 @@ public: nLongCounter = 0; nMidCounter = 0; nShortCounter = 0; +#ifdef HAVE_OPENCL + opencl_ON = true; +#endif } //! the destructor ~BackgroundSubtractorKNNImpl() {} @@ -131,40 +138,80 @@ public: //! re-initialization method void initialize(Size _frameSize, int _frameType) { - frameSize = _frameSize; - frameType = _frameType; - nframes = 0; + frameSize = _frameSize; + frameType = _frameType; + nframes = 0; - int nchannels = CV_MAT_CN(frameType); - CV_Assert( nchannels <= CV_CN_MAX ); + int nchannels = CV_MAT_CN(frameType); + CV_Assert( nchannels <= CV_CN_MAX ); - // Reserve memory for the model - int size=frameSize.height*frameSize.width; - // for each sample of 3 speed pixel models each pixel bg model we store ... - // values + flag (nchannels+1 values) - bgmodel.create( 1,(nN * 3) * (nchannels+1)* size,CV_8U); - bgmodel = Scalar::all(0); + // Reserve memory for the model + int size=frameSize.height*frameSize.width; + //Reset counters + nShortCounter = 0; + nMidCounter = 0; + nLongCounter = 0; - //index through the three circular lists - aModelIndexShort.create(1,size,CV_8U); - aModelIndexMid.create(1,size,CV_8U); - aModelIndexLong.create(1,size,CV_8U); - //when to update next - nNextShortUpdate.create(1,size,CV_8U); - nNextMidUpdate.create(1,size,CV_8U); - nNextLongUpdate.create(1,size,CV_8U); +#ifdef HAVE_OPENCL + if (ocl::isOpenCLActivated() && opencl_ON) + { + create_ocl_apply_kernel(); - //Reset counters - nShortCounter = 0; - nMidCounter = 0; - nLongCounter = 0; + kernel_getBg.create("getBackgroundImage2_kernel", ocl::video::bgfg_knn_oclsrc, format( "-D CN=%d -D NSAMPLES=%d", nchannels, nN)); - aModelIndexShort = Scalar::all(0);//random? //((m_nN)*rand())/(RAND_MAX+1);//0...m_nN-1 - aModelIndexMid = Scalar::all(0); - aModelIndexLong = Scalar::all(0); - nNextShortUpdate = Scalar::all(0); - nNextMidUpdate = Scalar::all(0); - nNextLongUpdate = Scalar::all(0); + if (kernel_apply.empty() || kernel_getBg.empty()) + opencl_ON = false; + } + else opencl_ON = false; + + if (opencl_ON) + { + u_flag.create(frameSize.height * nN * 3, frameSize.width, CV_8UC1); + u_flag.setTo(Scalar::all(0)); + + if (nchannels==3) + nchannels=4; + u_sample.create(frameSize.height * nN * 3, frameSize.width, CV_32FC(nchannels)); + u_sample.setTo(Scalar::all(0)); + + u_aModelIndexShort.create(frameSize.height, frameSize.width, CV_8UC1); + u_aModelIndexShort.setTo(Scalar::all(0)); + u_aModelIndexMid.create(frameSize.height, frameSize.width, CV_8UC1); + u_aModelIndexMid.setTo(Scalar::all(0)); + u_aModelIndexLong.create(frameSize.height, frameSize.width, CV_8UC1); + u_aModelIndexLong.setTo(Scalar::all(0)); + + u_nNextShortUpdate.create(frameSize.height, frameSize.width, CV_8UC1); + u_nNextShortUpdate.setTo(Scalar::all(0)); + u_nNextMidUpdate.create(frameSize.height, frameSize.width, CV_8UC1); + u_nNextMidUpdate.setTo(Scalar::all(0)); + u_nNextLongUpdate.create(frameSize.height, frameSize.width, CV_8UC1); + u_nNextLongUpdate.setTo(Scalar::all(0)); + } + else +#endif + { + // for each sample of 3 speed pixel models each pixel bg model we store ... + // values + flag (nchannels+1 values) + bgmodel.create( 1,(nN * 3) * (nchannels+1)* size,CV_8U); + bgmodel = Scalar::all(0); + + //index through the three circular lists + aModelIndexShort.create(1,size,CV_8U); + aModelIndexMid.create(1,size,CV_8U); + aModelIndexLong.create(1,size,CV_8U); + //when to update next + nNextShortUpdate.create(1,size,CV_8U); + nNextMidUpdate.create(1,size,CV_8U); + nNextLongUpdate.create(1,size,CV_8U); + + aModelIndexShort = Scalar::all(0);//random? //((m_nN)*rand())/(RAND_MAX+1);//0...m_nN-1 + aModelIndexMid = Scalar::all(0); + aModelIndexLong = Scalar::all(0); + nNextShortUpdate = Scalar::all(0); + nNextMidUpdate = Scalar::all(0); + nNextLongUpdate = Scalar::all(0); + } } virtual int getHistory() const { return history; } @@ -180,7 +227,19 @@ public: virtual void setDist2Threshold(double _dist2Threshold) { fTb = (float)_dist2Threshold; } virtual bool getDetectShadows() const { return bShadowDetection; } - virtual void setDetectShadows(bool detectshadows) { bShadowDetection = detectshadows; } + virtual void setDetectShadows(bool detectshadows) + { + if ((bShadowDetection && detectshadows) || (!bShadowDetection && !detectshadows)) + return; + bShadowDetection = detectshadows; +#ifdef HAVE_OPENCL + if (!kernel_apply.empty()) + { + create_ocl_apply_kernel(); + CV_Assert( !kernel_apply.empty() ); + } +#endif + } virtual int getShadowValue() const { return nShadowDetection; } virtual void setShadowValue(int value) { nShadowDetection = (uchar)value; } @@ -256,7 +315,29 @@ protected: Mat nNextMidUpdate; Mat nNextLongUpdate; +#ifdef HAVE_OPENCL + mutable bool opencl_ON; + + UMat u_flag; + UMat u_sample; + UMat u_aModelIndexShort; + UMat u_aModelIndexMid; + UMat u_aModelIndexLong; + UMat u_nNextShortUpdate; + UMat u_nNextMidUpdate; + UMat u_nNextLongUpdate; + + mutable ocl::Kernel kernel_apply; + mutable ocl::Kernel kernel_getBg; +#endif + String name_; + +#ifdef HAVE_OPENCL + bool ocl_getBackgroundImage(OutputArray backgroundImage) const; + bool ocl_apply(InputArray _image, OutputArray _fgmask, double learningRate=-1); + void create_ocl_apply_kernel(); +#endif }; CV_INLINE void @@ -328,7 +409,6 @@ CV_INLINE int include=0;//do we include this pixel into background model? int ndata=nchannels+1; -// float k; // now increase the probability for each pixel for (int n = 0; n < m_nN*3; n++) { @@ -546,18 +626,132 @@ public: uchar m_nShadowDetection; }; +#ifdef HAVE_OPENCL +bool BackgroundSubtractorKNNImpl::ocl_apply(InputArray _image, OutputArray _fgmask, double learningRate) +{ + bool needToInitialize = nframes == 0 || learningRate >= 1 || _image.size() != frameSize || _image.type() != frameType; + if( needToInitialize ) + initialize(_image.size(), _image.type()); + + ++nframes; + learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./std::min( 2*nframes, history ); + CV_Assert(learningRate >= 0); + + _fgmask.create(_image.size(), CV_8U); + UMat fgmask = _fgmask.getUMat(); + + UMat frame = _image.getUMat(); + + //recalculate update rates - in case alpha is changed + // calculate update parameters (using alpha) + int Kshort,Kmid,Klong; + //approximate exponential learning curve + Kshort=(int)(log(0.7)/log(1-learningRate))+1;//Kshort + Kmid=(int)(log(0.4)/log(1-learningRate))-Kshort+1;//Kmid + Klong=(int)(log(0.1)/log(1-learningRate))-Kshort-Kmid+1;//Klong + + //refresh rates + int nShortUpdate = (Kshort/nN)+1; + int nMidUpdate = (Kmid/nN)+1; + int nLongUpdate = (Klong/nN)+1; + + int idxArg = 0; + idxArg = kernel_apply.set(idxArg, ocl::KernelArg::ReadOnly(frame)); + idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadOnly(u_nNextLongUpdate)); + idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadOnly(u_nNextMidUpdate)); + idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadOnly(u_nNextShortUpdate)); + idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_aModelIndexLong)); + idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_aModelIndexMid)); + idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_aModelIndexShort)); + idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_flag)); + idxArg = kernel_apply.set(idxArg, ocl::KernelArg::PtrReadWrite(u_sample)); + idxArg = kernel_apply.set(idxArg, ocl::KernelArg::WriteOnlyNoSize(fgmask)); + + idxArg = kernel_apply.set(idxArg, nLongCounter); + idxArg = kernel_apply.set(idxArg, nMidCounter); + idxArg = kernel_apply.set(idxArg, nShortCounter); + idxArg = kernel_apply.set(idxArg, fTb); + idxArg = kernel_apply.set(idxArg, nkNN); + idxArg = kernel_apply.set(idxArg, fTau); + if (bShadowDetection) + kernel_apply.set(idxArg, nShadowDetection); + + size_t globalsize[2] = {(size_t)frame.cols, (size_t)frame.rows}; + if(!kernel_apply.run(2, globalsize, NULL, true)) + return false; + + nShortCounter++;//0,1,...,nShortUpdate-1 + nMidCounter++; + nLongCounter++; + if (nShortCounter >= nShortUpdate) + { + nShortCounter = 0; + randu(u_nNextShortUpdate, Scalar::all(0), Scalar::all(nShortUpdate)); + } + if (nMidCounter >= nMidUpdate) + { + nMidCounter = 0; + randu(u_nNextMidUpdate, Scalar::all(0), Scalar::all(nMidUpdate)); + } + if (nLongCounter >= nLongUpdate) + { + nLongCounter = 0; + randu(u_nNextLongUpdate, Scalar::all(0), Scalar::all(nLongUpdate)); + } + return true; +} + +bool BackgroundSubtractorKNNImpl::ocl_getBackgroundImage(OutputArray _backgroundImage) const +{ + _backgroundImage.create(frameSize, frameType); + UMat dst = _backgroundImage.getUMat(); + + int idxArg = 0; + idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_flag)); + idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::PtrReadOnly(u_sample)); + idxArg = kernel_getBg.set(idxArg, ocl::KernelArg::WriteOnly(dst)); + + size_t globalsize[2] = {(size_t)dst.cols, (size_t)dst.rows}; + + return kernel_getBg.run(2, globalsize, NULL, false); +} + +void BackgroundSubtractorKNNImpl::create_ocl_apply_kernel() +{ + int nchannels = CV_MAT_CN(frameType); + String opts = format("-D CN=%d -D NSAMPLES=%d%s", nchannels, nN, bShadowDetection ? " -D SHADOW_DETECT" : ""); + kernel_apply.create("knn_kernel", ocl::video::bgfg_knn_oclsrc, opts); +} + +#endif void BackgroundSubtractorKNNImpl::apply(InputArray _image, OutputArray _fgmask, double learningRate) { CV_INSTRUMENT_REGION() - Mat image = _image.getMat(); - bool needToInitialize = nframes == 0 || learningRate >= 1 || image.size() != frameSize || image.type() != frameType; +#ifdef HAVE_OPENCL + if (opencl_ON) + { +#ifndef __APPLE__ + CV_OCL_RUN(_fgmask.isUMat() && OCL_PERFORMANCE_CHECK(!ocl::Device::getDefault().isIntel() || _image.channels() == 1), + ocl_apply(_image, _fgmask, learningRate)) +#else + CV_OCL_RUN(_fgmask.isUMat() && OCL_PERFORMANCE_CHECK(!ocl::Device::getDefault().isIntel()), + ocl_apply(_image, _fgmask, learningRate)) +#endif + + opencl_ON = false; + nframes = 0; + } +#endif + + bool needToInitialize = nframes == 0 || learningRate >= 1 || _image.size() != frameSize || _image.type() != frameType; if( needToInitialize ) - initialize(image.size(), image.type()); + initialize(_image.size(), _image.type()); + Mat image = _image.getMat(); _fgmask.create( image.size(), CV_8U ); Mat fgmask = _fgmask.getMat(); @@ -622,6 +816,15 @@ void BackgroundSubtractorKNNImpl::getBackgroundImage(OutputArray backgroundImage { CV_INSTRUMENT_REGION() +#ifdef HAVE_OPENCL + if (opencl_ON) + { + CV_OCL_RUN(opencl_ON, ocl_getBackgroundImage(backgroundImage)) + + opencl_ON = false; + } +#endif + int nchannels = CV_MAT_CN(frameType); //CV_Assert( nchannels == 3 ); Mat meanBackground(frameSize, CV_8UC3, Scalar::all(0)); diff --git a/modules/video/src/opencl/bgfg_knn.cl b/modules/video/src/opencl/bgfg_knn.cl new file mode 100644 index 0000000000..0205dba76c --- /dev/null +++ b/modules/video/src/opencl/bgfg_knn.cl @@ -0,0 +1,248 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2018 Ya-Chiu Wu, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Ya-Chiu Wu, yacwu@cs.nctu.edu.tw +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if CN==1 + +#define T_MEAN float +#define F_ZERO (0.0f) + +#define frameToMean(a, b) (b) = *(a); +#define meanToFrame(a, b) *b = convert_uchar_sat(a); + +#else + +#define T_MEAN float4 +#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f) + +#define meanToFrame(a, b)\ + b[0] = convert_uchar_sat(a.x); \ + b[1] = convert_uchar_sat(a.y); \ + b[2] = convert_uchar_sat(a.z); + +#define frameToMean(a, b)\ + b.x = a[0]; \ + b.y = a[1]; \ + b.z = a[2]; \ + b.w = 0.0f; + +#endif + +__kernel void knn_kernel(__global const uchar* frame, int frame_step, int frame_offset, int frame_row, int frame_col, + __global const uchar* nNextLongUpdate, + __global const uchar* nNextMidUpdate, + __global const uchar* nNextShortUpdate, + __global uchar* aModelIndexLong, + __global uchar* aModelIndexMid, + __global uchar* aModelIndexShort, + __global uchar* flag, + __global uchar* sample, + __global uchar* fgmask, int fgmask_step, int fgmask_offset, + int nLongCounter, int nMidCounter, int nShortCounter, + float c_Tb, int c_nkNN, float c_tau +#ifdef SHADOW_DETECT + , uchar c_shadowVal +#endif + ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if( x < frame_col && y < frame_row) + { + __global const uchar* _frame = (frame + mad24(y, frame_step, mad24(x, CN, frame_offset))); + T_MEAN pix; + frameToMean(_frame, pix); + + uchar foreground = 255; // 0 - the pixel classified as background + + int Pbf = 0; + int Pb = 0; + uchar include = 0; + + int pt_idx = mad24(y, frame_col, x); + int idx_step = frame_row * frame_col; + + __global T_MEAN* _sample = (__global T_MEAN*)(sample); + + for (uchar n = 0; n < (NSAMPLES) * 3 ; ++n) + { + int n_idx = mad24(n, idx_step, pt_idx); + + T_MEAN c_mean = _sample[n_idx]; + + uchar c_flag = flag[n_idx]; + + T_MEAN diff = c_mean - pix; + float dist2 = dot(diff, diff); + + if (dist2 < c_Tb) + { + Pbf++; + if (c_flag) + { + Pb++; + if (Pb >= c_nkNN) + { + include = 1; + foreground = 0; + break; + } + } + } + } + if (Pbf >= c_nkNN) + { + include = 1; + } + +#ifdef SHADOW_DETECT + if (foreground) + { + int Ps = 0; + for (uchar n = 0; n < (NSAMPLES) * 3 ; ++n) + { + int n_idx = mad24(n, idx_step, pt_idx); + uchar c_flag = flag[n_idx]; + + if (c_flag) + { + T_MEAN c_mean = _sample[n_idx]; + + float numerator = dot(pix, c_mean); + float denominator = dot(c_mean, c_mean); + + if (denominator == 0) + break; + + if (numerator <= denominator && numerator >= c_tau * denominator) + { + float a = numerator / denominator; + + T_MEAN dD = mad(a, c_mean, -pix); + + if (dot(dD, dD) < c_Tb * a * a) + { + Ps++; + if (Ps >= c_nkNN) + { + foreground = c_shadowVal; + break; + } + } + } + } + } + } +#endif + __global uchar* _fgmask = fgmask + mad24(y, fgmask_step, x + fgmask_offset); + *_fgmask = (uchar)foreground; + + __global const uchar* _nNextLongUpdate = nNextLongUpdate + pt_idx; + __global const uchar* _nNextMidUpdate = nNextMidUpdate + pt_idx; + __global const uchar* _nNextShortUpdate = nNextShortUpdate + pt_idx; + __global uchar* _aModelIndexLong = aModelIndexLong + pt_idx; + __global uchar* _aModelIndexMid = aModelIndexMid + pt_idx; + __global uchar* _aModelIndexShort = aModelIndexShort + pt_idx; + + uchar nextLongUpdate = _nNextLongUpdate[0]; + uchar nextMidUpdate = _nNextMidUpdate[0]; + uchar nextShortUpdate = _nNextShortUpdate[0]; + uchar modelIndexLong = _aModelIndexLong[0]; + uchar modelIndexMid = _aModelIndexMid[0]; + uchar modelIndexShort = _aModelIndexShort[0]; + int offsetLong = mad24(mad24(2, (NSAMPLES), modelIndexLong), idx_step, pt_idx); + int offsetMid = mad24((NSAMPLES)+modelIndexMid, idx_step, pt_idx); + int offsetShort = mad24(modelIndexShort, idx_step, pt_idx); + if (nextLongUpdate == nLongCounter) + { + _sample[offsetLong] = _sample[offsetMid]; + flag[offsetLong] = flag[offsetMid]; + _aModelIndexLong[0] = (modelIndexLong >= ((NSAMPLES)-1)) ? 0 : (modelIndexLong + 1); + } + + if (nextMidUpdate == nMidCounter) + { + _sample[offsetMid] = _sample[offsetShort]; + flag[offsetMid] = flag[offsetShort]; + _aModelIndexMid[0] = (modelIndexMid >= ((NSAMPLES)-1)) ? 0 : (modelIndexMid + 1); + } + + if (nextShortUpdate == nShortCounter) + { + _sample[offsetShort] = pix; + flag[offsetShort] = include; + _aModelIndexShort[0] = (modelIndexShort >= ((NSAMPLES)-1)) ? 0 : (modelIndexShort + 1); + } + } +} + +__kernel void getBackgroundImage2_kernel(__global const uchar* flag, + __global const uchar* sample, + __global uchar* dst, int dst_step, int dst_offset, int dst_row, int dst_col) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < dst_col && y < dst_row) + { + int pt_idx = mad24(y, dst_col, x); + + T_MEAN meanVal = (T_MEAN)F_ZERO; + + __global T_MEAN* _sample = (__global T_MEAN*)(sample); + int idx_step = dst_row * dst_col; + for (uchar n = 0; n < (NSAMPLES) * 3 ; ++n) + { + int n_idx = mad24(n, idx_step, pt_idx); + uchar c_flag = flag[n_idx]; + if(c_flag) + { + meanVal = _sample[n_idx]; + break; + } + } + __global uchar* _dst = dst + mad24(y, dst_step, mad24(x, CN, dst_offset)); + meanToFrame(meanVal, _dst); + } +}