diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index d36686fb8a..e8eb3e8599 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -1473,6 +1473,16 @@ namespace cv void releaseMemory(); private: + void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf); + + void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst); + + void polynomialExpansionOcl( + const oclMat &src, int polyN, oclMat &dst); + + void gaussianBlur5Ocl( + const oclMat &src, int ksizeHalf, oclMat &dst); + void prepareGaussian( int n, double sigma, float *g, float *xg, float *xxg, double &ig11, double &ig03, double &ig33, double &ig55); @@ -1490,6 +1500,11 @@ namespace cv oclMat frames_[2]; oclMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2]; std::vector pyramid0_, pyramid1_; + float ig[4]; + oclMat gMat; + oclMat xgMat; + oclMat xxgMat; + oclMat gKerMat; }; //////////////// build warping maps //////////////////// diff --git a/modules/ocl/src/optical_flow_farneback.cpp b/modules/ocl/src/optical_flow_farneback.cpp index 5f064204da..d9006eabea 100644 --- a/modules/ocl/src/optical_flow_farneback.cpp +++ b/modules/ocl/src/optical_flow_farneback.cpp @@ -57,77 +57,6 @@ namespace cv { namespace ocl { namespace optflow_farneback { -oclMat g; -oclMat xg; -oclMat xxg; -oclMat gKer; - -float ig[4]; - -inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf) -{ - cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast(c_gKer)); - gKer.upload(t_gKer); -} - -static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst) -{ - string kernelName("gaussianBlur"); -#ifdef ANDROID - size_t localThreads[3] = { 128, 1, 1 }; -#else - size_t localThreads[3] = { 256, 1, 1 }; -#endif - size_t globalThreads[3] = { src.cols, src.rows, 1 }; - int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float); - - CV_Assert(dst.size() == src.size()); - std::vector< std::pair > args; - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data)); - args.push_back(std::make_pair(smem_size, (void *)NULL)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); - - openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, - globalThreads, localThreads, args, -1, -1); -} - -static void polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst) -{ - string kernelName("polynomialExpansion"); - -#ifdef ANDROID - size_t localThreads[3] = { 128, 1, 1 }; -#else - size_t localThreads[3] = { 256, 1, 1 }; -#endif - size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 }; - int smem_size = 3 * localThreads[0] * sizeof(float); - - std::vector< std::pair > args; - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&g.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xg.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxg.data)); - args.push_back(std::make_pair(smem_size, (void *)NULL)); - args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); - args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); - - char opt [128]; - sprintf(opt, "-D polyN=%d", polyN); - - openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, - globalThreads, localThreads, args, -1, -1, opt); -} static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oclMat &R0, const oclMat &R1, oclMat &M) { @@ -207,8 +136,83 @@ static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy) openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, globalThreads, localThreads, args, -1, -1); } +} +} +} // namespace cv { namespace ocl { namespace optflow_farneback -static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) +static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat) +{ + if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols) + return mat(Rect(0, 0, cols, rows)); + return mat = oclMat(rows, cols, type); +} + +void cv::ocl::FarnebackOpticalFlow::setGaussianBlurKernel(const float *c_gKer, int ksizeHalf) +{ + cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast(c_gKer)); + gKerMat.upload(t_gKer); +} + +void cv::ocl::FarnebackOpticalFlow::gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst) +{ + string kernelName("gaussianBlur"); +#ifdef ANDROID + size_t localThreads[3] = { 128, 1, 1 }; +#else + size_t localThreads[3] = { 256, 1, 1 }; +#endif + size_t globalThreads[3] = { src.cols, src.rows, 1 }; + int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float); + + CV_Assert(dst.size() == src.size()); + std::vector< std::pair > args; + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKerMat.data)); + args.push_back(std::make_pair(smem_size, (void *)NULL)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf)); + + openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, + globalThreads, localThreads, args, -1, -1); +} + +void cv::ocl::FarnebackOpticalFlow::polynomialExpansionOcl(const oclMat &src, int polyN, oclMat &dst) +{ + string kernelName("polynomialExpansion"); + +#ifdef ANDROID + size_t localThreads[3] = { 128, 1, 1 }; +#else + size_t localThreads[3] = { 256, 1, 1 }; +#endif + size_t globalThreads[3] = { divUp(src.cols, localThreads[0] - 2*polyN) * localThreads[0], src.rows, 1 }; + int smem_size = 3 * localThreads[0] * sizeof(float); + + std::vector< std::pair > args; + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gMat.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xgMat.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&xxgMat.data)); + args.push_back(std::make_pair(smem_size, (void *)NULL)); + args.push_back(std::make_pair(sizeof(cl_float4), (void *)&ig)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step)); + args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step)); + + char opt [128]; + sprintf(opt, "-D polyN=%d", polyN); + + openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, + globalThreads, localThreads, args, -1, -1, opt); +} + +void cv::ocl::FarnebackOpticalFlow::gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) { string kernelName("gaussianBlur5"); int height = src.rows / 5; @@ -223,7 +227,7 @@ static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) std::vector< std::pair > args; args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data)); + args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKerMat.data)); args.push_back(std::make_pair(smem_size, (void *)NULL)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&height)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols)); @@ -234,16 +238,6 @@ static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst) openCLExecuteKernel(Context::getContext(), &optical_flow_farneback, kernelName, globalThreads, localThreads, args, -1, -1); } -} -} -} // namespace cv { namespace ocl { namespace optflow_farneback - -static oclMat allocMatFromBuf(int rows, int cols, int type, oclMat &mat) -{ - if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols) - return mat(Rect(0, 0, cols, rows)); - return mat = oclMat(rows, cols, type); -} cv::ocl::FarnebackOpticalFlow::FarnebackOpticalFlow() { @@ -343,14 +337,14 @@ void cv::ocl::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double s cv::Mat t_xg(1, n + 1, CV_32FC1, xg); cv::Mat t_xxg(1, n + 1, CV_32FC1, xxg); - optflow_farneback::g.upload(t_g); - optflow_farneback::xg.upload(t_xg); - optflow_farneback::xxg.upload(t_xxg); + gMat.upload(t_g); + xgMat.upload(t_xg); + xxgMat.upload(t_xxg); - optflow_farneback::ig[0] = static_cast(ig11); - optflow_farneback::ig[1] = static_cast(ig03); - optflow_farneback::ig[2] = static_cast(ig33); - optflow_farneback::ig[3] = static_cast(ig55); + ig[0] = static_cast(ig11); + ig[1] = static_cast(ig03); + ig[2] = static_cast(ig33); + ig[3] = static_cast(ig55); } void cv::ocl::FarnebackOpticalFlow::updateFlow_boxFilter( @@ -372,7 +366,7 @@ void cv::ocl::FarnebackOpticalFlow::updateFlow_gaussianBlur( const oclMat& R0, const oclMat& R1, oclMat& flowx, oclMat& flowy, oclMat& M, oclMat &bufM, int blockSize, bool updateMatrices) { - optflow_farneback::gaussianBlur5Ocl(M, blockSize/2, bufM); + gaussianBlur5Ocl(M, blockSize/2, bufM); swap(M, bufM); @@ -491,8 +485,8 @@ void cv::ocl::FarnebackOpticalFlow::operator ()( if (fastPyramids) { - optflow_farneback::polynomialExpansionOcl(pyramid0_[k], polyN, R[0]); - optflow_farneback::polynomialExpansionOcl(pyramid1_[k], polyN, R[1]); + polynomialExpansionOcl(pyramid0_[k], polyN, R[0]); + polynomialExpansionOcl(pyramid1_[k], polyN, R[1]); } else { @@ -508,13 +502,13 @@ void cv::ocl::FarnebackOpticalFlow::operator ()( }; Mat g = getGaussianKernel(smoothSize, sigma, CV_32F); - optflow_farneback::setGaussianBlurKernel(g.ptr(smoothSize/2), smoothSize/2); + setGaussianBlurKernel(g.ptr(smoothSize/2), smoothSize/2); for (int i = 0; i < 2; i++) { - optflow_farneback::gaussianBlurOcl(frames_[i], smoothSize/2, blurredFrame[i]); + gaussianBlurOcl(frames_[i], smoothSize/2, blurredFrame[i]); resize(blurredFrame[i], pyrLevel[i], Size(width, height), INTER_LINEAR); - optflow_farneback::polynomialExpansionOcl(pyrLevel[i], polyN, R[i]); + polynomialExpansionOcl(pyrLevel[i], polyN, R[i]); } } @@ -523,7 +517,7 @@ void cv::ocl::FarnebackOpticalFlow::operator ()( if (flags & OPTFLOW_FARNEBACK_GAUSSIAN) { Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F); - optflow_farneback::setGaussianBlurKernel(g.ptr(winSize/2), winSize/2); + setGaussianBlurKernel(g.ptr(winSize/2), winSize/2); } for (int i = 0; i < numIters; i++) {