diff --git a/modules/video/include/opencv2/video/tracking.hpp b/modules/video/include/opencv2/video/tracking.hpp index d0a53357b3..eede6e8ab0 100644 --- a/modules/video/include/opencv2/video/tracking.hpp +++ b/modules/video/include/opencv2/video/tracking.hpp @@ -397,6 +397,29 @@ public: }; +/** @brief Read a .flo file + + @param path Path to the file to be loaded + + The function readOpticalFlow loads a flow field from a file and returns it as a single matrix. + Resulting Mat has a type CV_32FC2 - floating-point, 2-channel. First channel corresponds to the + flow in the horizontal direction (u), second - vertical (v). + */ +CV_EXPORTS_W Mat readOpticalFlow( const String& path ); +/** @brief Write a .flo to disk + + @param path Path to the file to be written + @param flow Flow field to be stored + + The function stores a flow field in a file, returns true on success, false otherwise. + The flow field must be a 2-channel, floating-point matrix (CV_32FC2). First channel corresponds + to the flow in the horizontal direction (u), second - vertical (v). + */ +CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow ); + +/** + Base class for dense optical flow algorithms +*/ class CV_EXPORTS_W DenseOpticalFlow : public Algorithm { public: @@ -433,131 +456,6 @@ public: OutputArray err = cv::noArray()) = 0; }; -/** @brief "Dual TV L1" Optical Flow Algorithm. - -The class implements the "Dual TV L1" optical flow algorithm described in @cite Zach2007 and -@cite Javier2012 . -Here are important members of the class that control the algorithm, which you can set after -constructing the class instance: - -- member double tau - Time step of the numerical scheme. - -- member double lambda - Weight parameter for the data term, attachment parameter. This is the most relevant - parameter, which determines the smoothness of the output. The smaller this parameter is, - the smoother the solutions we obtain. It depends on the range of motions of the images, so - its value should be adapted to each image sequence. - -- member double theta - Weight parameter for (u - v)\^2, tightness parameter. It serves as a link between the - attachment and the regularization terms. In theory, it should have a small value in order - to maintain both parts in correspondence. The method is stable for a large range of values - of this parameter. - -- member int nscales - Number of scales used to create the pyramid of images. - -- member int warps - Number of warpings per scale. Represents the number of times that I1(x+u0) and grad( - I1(x+u0) ) are computed per scale. This is a parameter that assures the stability of the - method. It also affects the running time, so it is a compromise between speed and - accuracy. - -- member double epsilon - Stopping criterion threshold used in the numerical scheme, which is a trade-off between - precision and running time. A small value will yield more accurate solutions at the - expense of a slower convergence. - -- member int iterations - Stopping criterion iterations number used in the numerical scheme. - -C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". -Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". -*/ -class CV_EXPORTS_W DualTVL1OpticalFlow : public DenseOpticalFlow -{ -public: - //! @brief Time step of the numerical scheme - /** @see setTau */ - CV_WRAP virtual double getTau() const = 0; - /** @copybrief getTau @see getTau */ - CV_WRAP virtual void setTau(double val) = 0; - //! @brief Weight parameter for the data term, attachment parameter - /** @see setLambda */ - CV_WRAP virtual double getLambda() const = 0; - /** @copybrief getLambda @see getLambda */ - CV_WRAP virtual void setLambda(double val) = 0; - //! @brief Weight parameter for (u - v)^2, tightness parameter - /** @see setTheta */ - CV_WRAP virtual double getTheta() const = 0; - /** @copybrief getTheta @see getTheta */ - CV_WRAP virtual void setTheta(double val) = 0; - //! @brief coefficient for additional illumination variation term - /** @see setGamma */ - CV_WRAP virtual double getGamma() const = 0; - /** @copybrief getGamma @see getGamma */ - CV_WRAP virtual void setGamma(double val) = 0; - //! @brief Number of scales used to create the pyramid of images - /** @see setScalesNumber */ - CV_WRAP virtual int getScalesNumber() const = 0; - /** @copybrief getScalesNumber @see getScalesNumber */ - CV_WRAP virtual void setScalesNumber(int val) = 0; - //! @brief Number of warpings per scale - /** @see setWarpingsNumber */ - CV_WRAP virtual int getWarpingsNumber() const = 0; - /** @copybrief getWarpingsNumber @see getWarpingsNumber */ - CV_WRAP virtual void setWarpingsNumber(int val) = 0; - //! @brief Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time - /** @see setEpsilon */ - CV_WRAP virtual double getEpsilon() const = 0; - /** @copybrief getEpsilon @see getEpsilon */ - CV_WRAP virtual void setEpsilon(double val) = 0; - //! @brief Inner iterations (between outlier filtering) used in the numerical scheme - /** @see setInnerIterations */ - CV_WRAP virtual int getInnerIterations() const = 0; - /** @copybrief getInnerIterations @see getInnerIterations */ - CV_WRAP virtual void setInnerIterations(int val) = 0; - //! @brief Outer iterations (number of inner loops) used in the numerical scheme - /** @see setOuterIterations */ - CV_WRAP virtual int getOuterIterations() const = 0; - /** @copybrief getOuterIterations @see getOuterIterations */ - CV_WRAP virtual void setOuterIterations(int val) = 0; - //! @brief Use initial flow - /** @see setUseInitialFlow */ - CV_WRAP virtual bool getUseInitialFlow() const = 0; - /** @copybrief getUseInitialFlow @see getUseInitialFlow */ - CV_WRAP virtual void setUseInitialFlow(bool val) = 0; - //! @brief Step between scales (<1) - /** @see setScaleStep */ - CV_WRAP virtual double getScaleStep() const = 0; - /** @copybrief getScaleStep @see getScaleStep */ - CV_WRAP virtual void setScaleStep(double val) = 0; - //! @brief Median filter kernel size (1 = no filter) (3 or 5) - /** @see setMedianFiltering */ - CV_WRAP virtual int getMedianFiltering() const = 0; - /** @copybrief getMedianFiltering @see getMedianFiltering */ - CV_WRAP virtual void setMedianFiltering(int val) = 0; - - /** @brief Creates instance of cv::DualTVL1OpticalFlow*/ - CV_WRAP static Ptr create( - double tau = 0.25, - double lambda = 0.15, - double theta = 0.3, - int nscales = 5, - int warps = 5, - double epsilon = 0.01, - int innnerIterations = 30, - int outerIterations = 10, - double scaleStep = 0.8, - double gamma = 0.0, - int medianFiltering = 5, - bool useInitialFlow = false); -}; - -/** @brief Creates instance of cv::DenseOpticalFlow -*/ -CV_EXPORTS_W Ptr createOptFlow_DualTVL1(); /** @brief Class computing a dense optical flow using the Gunnar Farneback's algorithm. */ @@ -599,6 +497,166 @@ public: int flags = 0); }; +/** @brief Variational optical flow refinement + +This class implements variational refinement of the input flow field, i.e. +it uses input flow to initialize the minimization of the following functional: +\f$E(U) = \int_{\Omega} \delta \Psi(E_I) + \gamma \Psi(E_G) + \alpha \Psi(E_S) \f$, +where \f$E_I,E_G,E_S\f$ are color constancy, gradient constancy and smoothness terms +respectively. \f$\Psi(s^2)=\sqrt{s^2+\epsilon^2}\f$ is a robust penalizer to limit the +influence of outliers. A complete formulation and a description of the minimization +procedure can be found in @cite Brox2004 +*/ +class CV_EXPORTS_W VariationalRefinement : public DenseOpticalFlow +{ +public: + /** @brief @ref calc function overload to handle separate horizontal (u) and vertical (v) flow components + (to avoid extra splits/merges) */ + CV_WRAP virtual void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) = 0; + + /** @brief Number of outer (fixed-point) iterations in the minimization procedure. + @see setFixedPointIterations */ + CV_WRAP virtual int getFixedPointIterations() const = 0; + /** @copybrief getFixedPointIterations @see getFixedPointIterations */ + CV_WRAP virtual void setFixedPointIterations(int val) = 0; + + /** @brief Number of inner successive over-relaxation (SOR) iterations + in the minimization procedure to solve the respective linear system. + @see setSorIterations */ + CV_WRAP virtual int getSorIterations() const = 0; + /** @copybrief getSorIterations @see getSorIterations */ + CV_WRAP virtual void setSorIterations(int val) = 0; + + /** @brief Relaxation factor in SOR + @see setOmega */ + CV_WRAP virtual float getOmega() const = 0; + /** @copybrief getOmega @see getOmega */ + CV_WRAP virtual void setOmega(float val) = 0; + + /** @brief Weight of the smoothness term + @see setAlpha */ + CV_WRAP virtual float getAlpha() const = 0; + /** @copybrief getAlpha @see getAlpha */ + CV_WRAP virtual void setAlpha(float val) = 0; + + /** @brief Weight of the color constancy term + @see setDelta */ + CV_WRAP virtual float getDelta() const = 0; + /** @copybrief getDelta @see getDelta */ + CV_WRAP virtual void setDelta(float val) = 0; + + /** @brief Weight of the gradient constancy term + @see setGamma */ + CV_WRAP virtual float getGamma() const = 0; + /** @copybrief getGamma @see getGamma */ + CV_WRAP virtual void setGamma(float val) = 0; + + /** @brief Creates an instance of VariationalRefinement + */ + CV_WRAP static Ptr create(); +}; + +/** @brief DIS optical flow algorithm. + +This class implements the Dense Inverse Search (DIS) optical flow algorithm. More +details about the algorithm can be found at @cite Kroeger2016 . Includes three presets with preselected +parameters to provide reasonable trade-off between speed and quality. However, even the slowest preset is +still relatively fast, use DeepFlow if you need better quality and don't care about speed. + +This implementation includes several additional features compared to the algorithm described in the paper, +including spatial propagation of flow vectors (@ref getUseSpatialPropagation), as well as an option to +utilize an initial flow approximation passed to @ref calc (which is, essentially, temporal propagation, +if the previous frame's flow field is passed). +*/ +class CV_EXPORTS_W DISOpticalFlow : public DenseOpticalFlow +{ +public: + enum + { + PRESET_ULTRAFAST = 0, + PRESET_FAST = 1, + PRESET_MEDIUM = 2 + }; + + /** @brief Finest level of the Gaussian pyramid on which the flow is computed (zero level + corresponds to the original image resolution). The final flow is obtained by bilinear upscaling. + @see setFinestScale */ + CV_WRAP virtual int getFinestScale() const = 0; + /** @copybrief getFinestScale @see getFinestScale */ + CV_WRAP virtual void setFinestScale(int val) = 0; + + /** @brief Size of an image patch for matching (in pixels). Normally, default 8x8 patches work well + enough in most cases. + @see setPatchSize */ + CV_WRAP virtual int getPatchSize() const = 0; + /** @copybrief getPatchSize @see getPatchSize */ + CV_WRAP virtual void setPatchSize(int val) = 0; + + /** @brief Stride between neighbor patches. Must be less than patch size. Lower values correspond + to higher flow quality. + @see setPatchStride */ + CV_WRAP virtual int getPatchStride() const = 0; + /** @copybrief getPatchStride @see getPatchStride */ + CV_WRAP virtual void setPatchStride(int val) = 0; + + /** @brief Maximum number of gradient descent iterations in the patch inverse search stage. Higher values + may improve quality in some cases. + @see setGradientDescentIterations */ + CV_WRAP virtual int getGradientDescentIterations() const = 0; + /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */ + CV_WRAP virtual void setGradientDescentIterations(int val) = 0; + + /** @brief Number of fixed point iterations of variational refinement per scale. Set to zero to + disable variational refinement completely. Higher values will typically result in more smooth and + high-quality flow. + @see setGradientDescentIterations */ + CV_WRAP virtual int getVariationalRefinementIterations() const = 0; + /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */ + CV_WRAP virtual void setVariationalRefinementIterations(int val) = 0; + + /** @brief Weight of the smoothness term + @see setVariationalRefinementAlpha */ + CV_WRAP virtual float getVariationalRefinementAlpha() const = 0; + /** @copybrief getVariationalRefinementAlpha @see getVariationalRefinementAlpha */ + CV_WRAP virtual void setVariationalRefinementAlpha(float val) = 0; + + /** @brief Weight of the color constancy term + @see setVariationalRefinementDelta */ + CV_WRAP virtual float getVariationalRefinementDelta() const = 0; + /** @copybrief getVariationalRefinementDelta @see getVariationalRefinementDelta */ + CV_WRAP virtual void setVariationalRefinementDelta(float val) = 0; + + /** @brief Weight of the gradient constancy term + @see setVariationalRefinementGamma */ + CV_WRAP virtual float getVariationalRefinementGamma() const = 0; + /** @copybrief getVariationalRefinementGamma @see getVariationalRefinementGamma */ + CV_WRAP virtual void setVariationalRefinementGamma(float val) = 0; + + + /** @brief Whether to use mean-normalization of patches when computing patch distance. It is turned on + by default as it typically provides a noticeable quality boost because of increased robustness to + illumination variations. Turn it off if you are certain that your sequence doesn't contain any changes + in illumination. + @see setUseMeanNormalization */ + CV_WRAP virtual bool getUseMeanNormalization() const = 0; + /** @copybrief getUseMeanNormalization @see getUseMeanNormalization */ + CV_WRAP virtual void setUseMeanNormalization(bool val) = 0; + + /** @brief Whether to use spatial propagation of good optical flow vectors. This option is turned on by + default, as it tends to work better on average and can sometimes help recover from major errors + introduced by the coarse-to-fine scheme employed by the DIS optical flow algorithm. Turning this + option off can make the output flow field a bit smoother, however. + @see setUseSpatialPropagation */ + CV_WRAP virtual bool getUseSpatialPropagation() const = 0; + /** @copybrief getUseSpatialPropagation @see getUseSpatialPropagation */ + CV_WRAP virtual void setUseSpatialPropagation(bool val) = 0; + + /** @brief Creates an instance of DISOpticalFlow + + @param preset one of PRESET_ULTRAFAST, PRESET_FAST and PRESET_MEDIUM + */ + CV_WRAP static Ptr create(int preset = DISOpticalFlow::PRESET_FAST); +}; /** @brief Class used for calculating a sparse optical flow. diff --git a/modules/video/perf/opencl/perf_dis_optflow.cpp b/modules/video/perf/opencl/perf_dis_optflow.cpp new file mode 100644 index 0000000000..8552174227 --- /dev/null +++ b/modules/video/perf/opencl/perf_dis_optflow.cpp @@ -0,0 +1,72 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "../perf_precomp.hpp" +#include "opencv2/ts/ocl_perf.hpp" + +namespace opencv_test { namespace { + +#ifdef HAVE_OPENCL + +void MakeArtificialExample(UMat &dst_frame1, UMat &dst_frame2); + +typedef tuple DISParams; +typedef TestBaseWithParam DenseOpticalFlow_DIS; + +OCL_PERF_TEST_P(DenseOpticalFlow_DIS, perf, + Combine(Values("PRESET_ULTRAFAST", "PRESET_FAST", "PRESET_MEDIUM"), Values(szVGA, sz720p, sz1080p))) +{ + DISParams params = GetParam(); + + // use strings to print preset names in the perf test results: + String preset_string = get<0>(params); + int preset = DISOpticalFlow::PRESET_FAST; + if (preset_string == "PRESET_ULTRAFAST") + preset = DISOpticalFlow::PRESET_ULTRAFAST; + else if (preset_string == "PRESET_FAST") + preset = DISOpticalFlow::PRESET_FAST; + else if (preset_string == "PRESET_MEDIUM") + preset = DISOpticalFlow::PRESET_MEDIUM; + Size sz = get<1>(params); + + UMat frame1(sz, CV_8U); + UMat frame2(sz, CV_8U); + UMat flow; + + MakeArtificialExample(frame1, frame2); + + Ptr algo = DISOpticalFlow::create(preset); + + OCL_TEST_CYCLE_N(10) + { + algo->calc(frame1, frame2, flow); + } + + SANITY_CHECK_NOTHING(); +} + +void MakeArtificialExample(UMat &dst_frame1, UMat &dst_frame2) +{ + int src_scale = 2; + int OF_scale = 6; + double sigma = dst_frame1.cols / 300; + + UMat tmp(Size(dst_frame1.cols / (1 << src_scale), dst_frame1.rows / (1 << src_scale)), CV_8U); + randu(tmp, 0, 255); + resize(tmp, dst_frame1, dst_frame1.size(), 0.0, 0.0, INTER_LINEAR_EXACT); + resize(tmp, dst_frame2, dst_frame2.size(), 0.0, 0.0, INTER_LINEAR_EXACT); + + Mat displacement_field(Size(dst_frame1.cols / (1 << OF_scale), dst_frame1.rows / (1 << OF_scale)), + CV_32FC2); + randn(displacement_field, 0.0, sigma); + resize(displacement_field, displacement_field, dst_frame2.size(), 0.0, 0.0, INTER_CUBIC); + for (int i = 0; i < displacement_field.rows; i++) + for (int j = 0; j < displacement_field.cols; j++) + displacement_field.at(i, j) += Vec2f((float)j, (float)i); + + remap(dst_frame2, dst_frame2, displacement_field, Mat(), INTER_LINEAR, BORDER_REPLICATE); +} + +#endif // HAVE_OPENCL + +}} // namespace diff --git a/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp b/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp deleted file mode 100644 index 6abc0b1a11..0000000000 --- a/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Fangfang Bai, fangfang@multicorewareinc.com -// Jin Ma, jin@multicorewareinc.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "../perf_precomp.hpp" -#include "opencv2/ts/ocl_perf.hpp" - -#ifdef HAVE_OPENCL - -namespace opencv_test { -namespace ocl { - -///////////// OpticalFlow Dual TVL1 //////////////////////// -typedef tuple< tuple, bool> OpticalFlowDualTVL1Params; -typedef TestBaseWithParam OpticalFlowDualTVL1Fixture; - -OCL_PERF_TEST_P(OpticalFlowDualTVL1Fixture, OpticalFlowDualTVL1, - ::testing::Combine( - ::testing::Values(make_tuple(-1, 0.3), - make_tuple(3, 0.5)), - ::testing::Bool() - ) - ) - { - Mat frame0 = imread(getDataPath("cv/optflow/RubberWhale1.png"), cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame0.empty()) << "can't load RubberWhale1.png"; - - Mat frame1 = imread(getDataPath("cv/optflow/RubberWhale2.png"), cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame1.empty()) << "can't load RubberWhale2.png"; - - const Size srcSize = frame0.size(); - - const OpticalFlowDualTVL1Params params = GetParam(); - const tuple filteringScale = get<0>(params); - const int medianFiltering = get<0>(filteringScale); - const double scaleStep = get<1>(filteringScale); - const bool useInitFlow = get<1>(params); - double eps = 0.9; - - UMat uFrame0; frame0.copyTo(uFrame0); - UMat uFrame1; frame1.copyTo(uFrame1); - UMat uFlow(srcSize, CV_32FC2); - declare.in(uFrame0, uFrame1, WARMUP_READ).out(uFlow, WARMUP_READ); - - //create algorithm - cv::Ptr alg = cv::createOptFlow_DualTVL1(); - - //set parameters - alg->setScaleStep(scaleStep); - alg->setMedianFiltering(medianFiltering); - - if (useInitFlow) - { - //calculate initial flow as result of optical flow - alg->calc(uFrame0, uFrame1, uFlow); - } - - //set flag to use initial flow - alg->setUseInitialFlow(useInitFlow); - OCL_TEST_CYCLE() - alg->calc(uFrame0, uFrame1, uFlow); - - SANITY_CHECK(uFlow, eps, ERROR_RELATIVE); - } -} - -} // namespace opencv_test::ocl - -#endif // HAVE_OPENCL diff --git a/modules/video/perf/perf_disflow.cpp b/modules/video/perf/perf_disflow.cpp new file mode 100644 index 0000000000..21b92cb44d --- /dev/null +++ b/modules/video/perf/perf_disflow.cpp @@ -0,0 +1,66 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +void MakeArtificialExample(Mat &dst_frame1, Mat &dst_frame2); + +typedef tuple DISParams; +typedef TestBaseWithParam DenseOpticalFlow_DIS; + +PERF_TEST_P(DenseOpticalFlow_DIS, perf, + Combine(Values("PRESET_ULTRAFAST", "PRESET_FAST", "PRESET_MEDIUM"), Values(szVGA, sz720p, sz1080p))) +{ + DISParams params = GetParam(); + + // use strings to print preset names in the perf test results: + String preset_string = get<0>(params); + int preset = DISOpticalFlow::PRESET_FAST; + if (preset_string == "PRESET_ULTRAFAST") + preset = DISOpticalFlow::PRESET_ULTRAFAST; + else if (preset_string == "PRESET_FAST") + preset = DISOpticalFlow::PRESET_FAST; + else if (preset_string == "PRESET_MEDIUM") + preset = DISOpticalFlow::PRESET_MEDIUM; + Size sz = get<1>(params); + + Mat frame1(sz, CV_8U); + Mat frame2(sz, CV_8U); + Mat flow; + + MakeArtificialExample(frame1, frame2); + + TEST_CYCLE_N(10) + { + Ptr algo = DISOpticalFlow::create(preset); + algo->calc(frame1, frame2, flow); + } + + SANITY_CHECK_NOTHING(); +} + +void MakeArtificialExample(Mat &dst_frame1, Mat &dst_frame2) +{ + int src_scale = 2; + int OF_scale = 6; + double sigma = dst_frame1.cols / 300; + + Mat tmp(Size(dst_frame1.cols / (1 << src_scale), dst_frame1.rows / (1 << src_scale)), CV_8U); + randu(tmp, 0, 255); + resize(tmp, dst_frame1, dst_frame1.size(), 0.0, 0.0, INTER_LINEAR_EXACT); + resize(tmp, dst_frame2, dst_frame2.size(), 0.0, 0.0, INTER_LINEAR_EXACT); + + Mat displacement_field(Size(dst_frame1.cols / (1 << OF_scale), dst_frame1.rows / (1 << OF_scale)), + CV_32FC2); + randn(displacement_field, 0.0, sigma); + resize(displacement_field, displacement_field, dst_frame2.size(), 0.0, 0.0, INTER_CUBIC); + for (int i = 0; i < displacement_field.rows; i++) + for (int j = 0; j < displacement_field.cols; j++) + displacement_field.at(i, j) += Vec2f((float)j, (float)i); + + remap(dst_frame2, dst_frame2, displacement_field, Mat(), INTER_LINEAR, BORDER_REPLICATE); +} + +}} // namespace diff --git a/modules/video/perf/perf_precomp.hpp b/modules/video/perf/perf_precomp.hpp index e2ac9db104..ccc1865ff7 100644 --- a/modules/video/perf/perf_precomp.hpp +++ b/modules/video/perf/perf_precomp.hpp @@ -6,5 +6,11 @@ #include "opencv2/ts.hpp" #include +#include "opencv2/ts/ts_perf.hpp" + +namespace cvtest +{ +using namespace perf; +} #endif diff --git a/modules/video/perf/perf_tvl1optflow.cpp b/modules/video/perf/perf_tvl1optflow.cpp deleted file mode 100644 index 917c28fcf3..0000000000 --- a/modules/video/perf/perf_tvl1optflow.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "perf_precomp.hpp" - -namespace opencv_test { namespace { -using namespace perf; - -typedef TestBaseWithParam< std::pair > ImagePair; - -std::pair impair(const char* im1, const char* im2) -{ - return std::make_pair(string(im1), string(im2)); -} - -PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1, testing::Values(impair("cv/optflow/RubberWhale1.png", "cv/optflow/RubberWhale2.png"))) -{ - declare.time(260); - - Mat frame1 = imread(getDataPath(GetParam().first), IMREAD_GRAYSCALE); - Mat frame2 = imread(getDataPath(GetParam().second), IMREAD_GRAYSCALE); - ASSERT_FALSE(frame1.empty()); - ASSERT_FALSE(frame2.empty()); - - Mat flow; - - Ptr tvl1 = createOptFlow_DualTVL1(); - - TEST_CYCLE() tvl1->calc(frame1, frame2, flow); - - SANITY_CHECK_NOTHING(); -} - -}} // namespace diff --git a/modules/video/perf/perf_variational_refinement.cpp b/modules/video/perf/perf_variational_refinement.cpp new file mode 100644 index 0000000000..20b911df48 --- /dev/null +++ b/modules/video/perf/perf_variational_refinement.cpp @@ -0,0 +1,40 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +typedef tuple VarRefParams; +typedef TestBaseWithParam DenseOpticalFlow_VariationalRefinement; + +PERF_TEST_P(DenseOpticalFlow_VariationalRefinement, perf, Combine(Values(szQVGA, szVGA), Values(5, 10), Values(5, 10))) +{ + VarRefParams params = GetParam(); + Size sz = get<0>(params); + int sorIter = get<1>(params); + int fixedPointIter = get<2>(params); + + Mat frame1(sz, CV_8U); + Mat frame2(sz, CV_8U); + Mat flow(sz, CV_32FC2); + + randu(frame1, 0, 255); + randu(frame2, 0, 255); + flow.setTo(0.0f); + + TEST_CYCLE_N(10) + { + Ptr var = VariationalRefinement::create(); + var->setAlpha(20.0f); + var->setGamma(10.0f); + var->setDelta(5.0f); + var->setSorIterations(sorIter); + var->setFixedPointIterations(fixedPointIter); + var->calc(frame1, frame2, flow); + } + + SANITY_CHECK_NOTHING(); +} + +}} // namespace diff --git a/modules/video/src/dis_flow.cpp b/modules/video/src/dis_flow.cpp new file mode 100644 index 0000000000..e666a4e1b0 --- /dev/null +++ b/modules/video/src/dis_flow.cpp @@ -0,0 +1,1500 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "opencl_kernels_video.hpp" + +using namespace std; +#define EPS 0.001F +#define INF 1E+10F + +namespace cv +{ + +class DISOpticalFlowImpl CV_FINAL : public DISOpticalFlow +{ + public: + DISOpticalFlowImpl(); + + void calc(InputArray I0, InputArray I1, InputOutputArray flow) CV_OVERRIDE; + void collectGarbage() CV_OVERRIDE; + + protected: //!< algorithm parameters + int finest_scale, coarsest_scale; + int patch_size; + int patch_stride; + int grad_descent_iter; + int variational_refinement_iter; + float variational_refinement_alpha; + float variational_refinement_gamma; + float variational_refinement_delta; + bool use_mean_normalization; + bool use_spatial_propagation; + + protected: //!< some auxiliary variables + int border_size; + int w, h; //!< flow buffer width and height on the current scale + int ws, hs; //!< sparse flow buffer width and height on the current scale + + public: + int getFinestScale() const CV_OVERRIDE { return finest_scale; } + void setFinestScale(int val) CV_OVERRIDE { finest_scale = val; } + int getPatchSize() const CV_OVERRIDE { return patch_size; } + void setPatchSize(int val) CV_OVERRIDE { patch_size = val; } + int getPatchStride() const CV_OVERRIDE { return patch_stride; } + void setPatchStride(int val) CV_OVERRIDE { patch_stride = val; } + int getGradientDescentIterations() const CV_OVERRIDE { return grad_descent_iter; } + void setGradientDescentIterations(int val) CV_OVERRIDE { grad_descent_iter = val; } + int getVariationalRefinementIterations() const CV_OVERRIDE { return variational_refinement_iter; } + void setVariationalRefinementIterations(int val) CV_OVERRIDE { variational_refinement_iter = val; } + float getVariationalRefinementAlpha() const CV_OVERRIDE { return variational_refinement_alpha; } + void setVariationalRefinementAlpha(float val) CV_OVERRIDE { variational_refinement_alpha = val; } + float getVariationalRefinementDelta() const CV_OVERRIDE { return variational_refinement_delta; } + void setVariationalRefinementDelta(float val) CV_OVERRIDE { variational_refinement_delta = val; } + float getVariationalRefinementGamma() const CV_OVERRIDE { return variational_refinement_gamma; } + void setVariationalRefinementGamma(float val) CV_OVERRIDE { variational_refinement_gamma = val; } + + bool getUseMeanNormalization() const CV_OVERRIDE { return use_mean_normalization; } + void setUseMeanNormalization(bool val) CV_OVERRIDE { use_mean_normalization = val; } + bool getUseSpatialPropagation() const CV_OVERRIDE { return use_spatial_propagation; } + void setUseSpatialPropagation(bool val) CV_OVERRIDE { use_spatial_propagation = val; } + + protected: //!< internal buffers + vector > I0s; //!< Gaussian pyramid for the current frame + vector > I1s; //!< Gaussian pyramid for the next frame + vector > I1s_ext; //!< I1s with borders + + vector > I0xs; //!< Gaussian pyramid for the x gradient of the current frame + vector > I0ys; //!< Gaussian pyramid for the y gradient of the current frame + + vector > Ux; //!< x component of the flow vectors + vector > Uy; //!< y component of the flow vectors + + vector > initial_Ux; //!< x component of the initial flow field, if one was passed as an input + vector > initial_Uy; //!< y component of the initial flow field, if one was passed as an input + + Mat_ U; //!< a buffer for the merged flow + + Mat_ Sx; //!< intermediate sparse flow representation (x component) + Mat_ Sy; //!< intermediate sparse flow representation (y component) + + /* Structure tensor components: */ + Mat_ I0xx_buf; //!< sum of squares of x gradient values + Mat_ I0yy_buf; //!< sum of squares of y gradient values + Mat_ I0xy_buf; //!< sum of x and y gradient products + + /* Extra buffers that are useful if patch mean-normalization is used: */ + Mat_ I0x_buf; //!< sum of x gradient values + Mat_ I0y_buf; //!< sum of y gradient values + + /* Auxiliary buffers used in structure tensor computation: */ + Mat_ I0xx_buf_aux; + Mat_ I0yy_buf_aux; + Mat_ I0xy_buf_aux; + Mat_ I0x_buf_aux; + Mat_ I0y_buf_aux; + + vector > variational_refinement_processors; + + private: //!< private methods and parallel sections + void prepareBuffers(Mat &I0, Mat &I1, Mat &flow, bool use_flow); + void precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &dst_I0x, Mat &dst_I0y, Mat &I0x, + Mat &I0y); + + struct PatchInverseSearch_ParBody : public ParallelLoopBody + { + DISOpticalFlowImpl *dis; + int nstripes, stripe_sz; + int hs; + Mat *Sx, *Sy, *Ux, *Uy, *I0, *I1, *I0x, *I0y; + int num_iter, pyr_level; + + PatchInverseSearch_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _hs, Mat &dst_Sx, Mat &dst_Sy, + Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1, Mat &_I0x, Mat &_I0y, int _num_iter, + int _pyr_level); + void operator()(const Range &range) const CV_OVERRIDE; + }; + + struct Densification_ParBody : public ParallelLoopBody + { + DISOpticalFlowImpl *dis; + int nstripes, stripe_sz; + int h; + Mat *Ux, *Uy, *Sx, *Sy, *I0, *I1; + + Densification_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _h, Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, + Mat &src_Sy, Mat &_I0, Mat &_I1); + void operator()(const Range &range) const CV_OVERRIDE; + }; + +#ifdef HAVE_OPENCL + vector u_I0s; //!< Gaussian pyramid for the current frame + vector u_I1s; //!< Gaussian pyramid for the next frame + vector u_I1s_ext; //!< I1s with borders + + vector u_I0xs; //!< Gaussian pyramid for the x gradient of the current frame + vector u_I0ys; //!< Gaussian pyramid for the y gradient of the current frame + + vector u_Ux; //!< x component of the flow vectors + vector u_Uy; //!< y component of the flow vectors + + vector u_initial_Ux; //!< x component of the initial flow field, if one was passed as an input + vector u_initial_Uy; //!< y component of the initial flow field, if one was passed as an input + + UMat u_U; //!< a buffer for the merged flow + + UMat u_Sx; //!< intermediate sparse flow representation (x component) + UMat u_Sy; //!< intermediate sparse flow representation (y component) + + /* Structure tensor components: */ + UMat u_I0xx_buf; //!< sum of squares of x gradient values + UMat u_I0yy_buf; //!< sum of squares of y gradient values + UMat u_I0xy_buf; //!< sum of x and y gradient products + + /* Extra buffers that are useful if patch mean-normalization is used: */ + UMat u_I0x_buf; //!< sum of x gradient values + UMat u_I0y_buf; //!< sum of y gradient values + + /* Auxiliary buffers used in structure tensor computation: */ + UMat u_I0xx_buf_aux; + UMat u_I0yy_buf_aux; + UMat u_I0xy_buf_aux; + UMat u_I0x_buf_aux; + UMat u_I0y_buf_aux; + + bool ocl_precomputeStructureTensor(UMat &dst_I0xx, UMat &dst_I0yy, UMat &dst_I0xy, + UMat &dst_I0x, UMat &dst_I0y, UMat &I0x, UMat &I0y); + void ocl_prepareBuffers(UMat &I0, UMat &I1, UMat &flow, bool use_flow); + bool ocl_calc(InputArray I0, InputArray I1, InputOutputArray flow); + bool ocl_Densification(UMat &dst_Ux, UMat &dst_Uy, UMat &src_Sx, UMat &src_Sy, UMat &_I0, UMat &_I1); + bool ocl_PatchInverseSearch(UMat &src_Ux, UMat &src_Uy, + UMat &I0, UMat &I1, UMat &I0x, UMat &I0y, int num_iter, int pyr_level); +#endif +}; + +DISOpticalFlowImpl::DISOpticalFlowImpl() +{ + finest_scale = 2; + patch_size = 8; + patch_stride = 4; + grad_descent_iter = 16; + variational_refinement_iter = 5; + variational_refinement_alpha = 20.f; + variational_refinement_gamma = 10.f; + variational_refinement_delta = 5.f; + + border_size = 16; + use_mean_normalization = true; + use_spatial_propagation = true; + + /* Use separate variational refinement instances for different scales to avoid repeated memory allocation: */ + int max_possible_scales = 10; + for (int i = 0; i < max_possible_scales; i++) + variational_refinement_processors.push_back(VariationalRefinement::create()); +} + +void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1, Mat &flow, bool use_flow) +{ + I0s.resize(coarsest_scale + 1); + I1s.resize(coarsest_scale + 1); + I1s_ext.resize(coarsest_scale + 1); + I0xs.resize(coarsest_scale + 1); + I0ys.resize(coarsest_scale + 1); + Ux.resize(coarsest_scale + 1); + Uy.resize(coarsest_scale + 1); + + Mat flow_uv[2]; + if (use_flow) + { + split(flow, flow_uv); + initial_Ux.resize(coarsest_scale + 1); + initial_Uy.resize(coarsest_scale + 1); + } + + int fraction = 1; + int cur_rows = 0, cur_cols = 0; + + for (int i = 0; i <= coarsest_scale; i++) + { + /* Avoid initializing the pyramid levels above the finest scale, as they won't be used anyway */ + if (i == finest_scale) + { + cur_rows = I0.rows / fraction; + cur_cols = I0.cols / fraction; + I0s[i].create(cur_rows, cur_cols); + resize(I0, I0s[i], I0s[i].size(), 0.0, 0.0, INTER_AREA); + I1s[i].create(cur_rows, cur_cols); + resize(I1, I1s[i], I1s[i].size(), 0.0, 0.0, INTER_AREA); + + /* These buffers are reused in each scale so we initialize them once on the finest scale: */ + Sx.create(cur_rows / patch_stride, cur_cols / patch_stride); + Sy.create(cur_rows / patch_stride, cur_cols / patch_stride); + I0xx_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); + I0yy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); + I0xy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); + I0x_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); + I0y_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); + + I0xx_buf_aux.create(cur_rows, cur_cols / patch_stride); + I0yy_buf_aux.create(cur_rows, cur_cols / patch_stride); + I0xy_buf_aux.create(cur_rows, cur_cols / patch_stride); + I0x_buf_aux.create(cur_rows, cur_cols / patch_stride); + I0y_buf_aux.create(cur_rows, cur_cols / patch_stride); + + U.create(cur_rows, cur_cols); + } + else if (i > finest_scale) + { + cur_rows = I0s[i - 1].rows / 2; + cur_cols = I0s[i - 1].cols / 2; + I0s[i].create(cur_rows, cur_cols); + resize(I0s[i - 1], I0s[i], I0s[i].size(), 0.0, 0.0, INTER_AREA); + I1s[i].create(cur_rows, cur_cols); + resize(I1s[i - 1], I1s[i], I1s[i].size(), 0.0, 0.0, INTER_AREA); + } + + if (i >= finest_scale) + { + I1s_ext[i].create(cur_rows + 2 * border_size, cur_cols + 2 * border_size); + copyMakeBorder(I1s[i], I1s_ext[i], border_size, border_size, border_size, border_size, BORDER_REPLICATE); + I0xs[i].create(cur_rows, cur_cols); + I0ys[i].create(cur_rows, cur_cols); + spatialGradient(I0s[i], I0xs[i], I0ys[i]); + Ux[i].create(cur_rows, cur_cols); + Uy[i].create(cur_rows, cur_cols); + variational_refinement_processors[i]->setAlpha(variational_refinement_alpha); + variational_refinement_processors[i]->setDelta(variational_refinement_delta); + variational_refinement_processors[i]->setGamma(variational_refinement_gamma); + variational_refinement_processors[i]->setSorIterations(5); + variational_refinement_processors[i]->setFixedPointIterations(variational_refinement_iter); + + if (use_flow) + { + resize(flow_uv[0], initial_Ux[i], Size(cur_cols, cur_rows)); + initial_Ux[i] /= fraction; + resize(flow_uv[1], initial_Uy[i], Size(cur_cols, cur_rows)); + initial_Uy[i] /= fraction; + } + } + + fraction *= 2; + } +} + +/* This function computes the structure tensor elements (local sums of I0x^2, I0x*I0y and I0y^2). + * A simple box filter is not used instead because we need to compute these sums on a sparse grid + * and store them densely in the output buffers. + */ +void DISOpticalFlowImpl::precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &dst_I0x, + Mat &dst_I0y, Mat &I0x, Mat &I0y) +{ + float *I0xx_ptr = dst_I0xx.ptr(); + float *I0yy_ptr = dst_I0yy.ptr(); + float *I0xy_ptr = dst_I0xy.ptr(); + float *I0x_ptr = dst_I0x.ptr(); + float *I0y_ptr = dst_I0y.ptr(); + + float *I0xx_aux_ptr = I0xx_buf_aux.ptr(); + float *I0yy_aux_ptr = I0yy_buf_aux.ptr(); + float *I0xy_aux_ptr = I0xy_buf_aux.ptr(); + float *I0x_aux_ptr = I0x_buf_aux.ptr(); + float *I0y_aux_ptr = I0y_buf_aux.ptr(); + + /* Separable box filter: horizontal pass */ + for (int i = 0; i < h; i++) + { + float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f, sum_x = 0.0f, sum_y = 0.0f; + short *x_row = I0x.ptr(i); + short *y_row = I0y.ptr(i); + for (int j = 0; j < patch_size; j++) + { + sum_xx += x_row[j] * x_row[j]; + sum_yy += y_row[j] * y_row[j]; + sum_xy += x_row[j] * y_row[j]; + sum_x += x_row[j]; + sum_y += y_row[j]; + } + I0xx_aux_ptr[i * ws] = sum_xx; + I0yy_aux_ptr[i * ws] = sum_yy; + I0xy_aux_ptr[i * ws] = sum_xy; + I0x_aux_ptr[i * ws] = sum_x; + I0y_aux_ptr[i * ws] = sum_y; + int js = 1; + for (int j = patch_size; j < w; j++) + { + sum_xx += (x_row[j] * x_row[j] - x_row[j - patch_size] * x_row[j - patch_size]); + sum_yy += (y_row[j] * y_row[j] - y_row[j - patch_size] * y_row[j - patch_size]); + sum_xy += (x_row[j] * y_row[j] - x_row[j - patch_size] * y_row[j - patch_size]); + sum_x += (x_row[j] - x_row[j - patch_size]); + sum_y += (y_row[j] - y_row[j - patch_size]); + if ((j - patch_size + 1) % patch_stride == 0) + { + I0xx_aux_ptr[i * ws + js] = sum_xx; + I0yy_aux_ptr[i * ws + js] = sum_yy; + I0xy_aux_ptr[i * ws + js] = sum_xy; + I0x_aux_ptr[i * ws + js] = sum_x; + I0y_aux_ptr[i * ws + js] = sum_y; + js++; + } + } + } + + AutoBuffer sum_xx(ws), sum_yy(ws), sum_xy(ws), sum_x(ws), sum_y(ws); + for (int j = 0; j < ws; j++) + { + sum_xx[j] = 0.0f; + sum_yy[j] = 0.0f; + sum_xy[j] = 0.0f; + sum_x[j] = 0.0f; + sum_y[j] = 0.0f; + } + + /* Separable box filter: vertical pass */ + for (int i = 0; i < patch_size; i++) + for (int j = 0; j < ws; j++) + { + sum_xx[j] += I0xx_aux_ptr[i * ws + j]; + sum_yy[j] += I0yy_aux_ptr[i * ws + j]; + sum_xy[j] += I0xy_aux_ptr[i * ws + j]; + sum_x[j] += I0x_aux_ptr[i * ws + j]; + sum_y[j] += I0y_aux_ptr[i * ws + j]; + } + for (int j = 0; j < ws; j++) + { + I0xx_ptr[j] = sum_xx[j]; + I0yy_ptr[j] = sum_yy[j]; + I0xy_ptr[j] = sum_xy[j]; + I0x_ptr[j] = sum_x[j]; + I0y_ptr[j] = sum_y[j]; + } + int is = 1; + for (int i = patch_size; i < h; i++) + { + for (int j = 0; j < ws; j++) + { + sum_xx[j] += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - patch_size) * ws + j]); + sum_yy[j] += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - patch_size) * ws + j]); + sum_xy[j] += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - patch_size) * ws + j]); + sum_x[j] += (I0x_aux_ptr[i * ws + j] - I0x_aux_ptr[(i - patch_size) * ws + j]); + sum_y[j] += (I0y_aux_ptr[i * ws + j] - I0y_aux_ptr[(i - patch_size) * ws + j]); + } + if ((i - patch_size + 1) % patch_stride == 0) + { + for (int j = 0; j < ws; j++) + { + I0xx_ptr[is * ws + j] = sum_xx[j]; + I0yy_ptr[is * ws + j] = sum_yy[j]; + I0xy_ptr[is * ws + j] = sum_xy[j]; + I0x_ptr[is * ws + j] = sum_x[j]; + I0y_ptr[is * ws + j] = sum_y[j]; + } + is++; + } + } +} + +DISOpticalFlowImpl::PatchInverseSearch_ParBody::PatchInverseSearch_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, + int _hs, Mat &dst_Sx, Mat &dst_Sy, + Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1, + Mat &_I0x, Mat &_I0y, int _num_iter, + int _pyr_level) + : dis(&_dis), nstripes(_nstripes), hs(_hs), Sx(&dst_Sx), Sy(&dst_Sy), Ux(&src_Ux), Uy(&src_Uy), I0(&_I0), I1(&_I1), + I0x(&_I0x), I0y(&_I0y), num_iter(_num_iter), pyr_level(_pyr_level) +{ + stripe_sz = (int)ceil(hs / (double)nstripes); +} + +/////////////////////////////////////////////* Patch processing functions *///////////////////////////////////////////// + +/* Some auxiliary macros */ +#define HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION \ + v_float32x4 w00v = v_setall_f32(w00); \ + v_float32x4 w01v = v_setall_f32(w01); \ + v_float32x4 w10v = v_setall_f32(w10); \ + v_float32x4 w11v = v_setall_f32(w11); \ + \ + v_uint8x16 I0_row_16, I1_row_16, I1_row_shifted_16, I1_row_next_16, I1_row_next_shifted_16; \ + v_uint16x8 I0_row_8, I1_row_8, I1_row_shifted_8, I1_row_next_8, I1_row_next_shifted_8, tmp; \ + v_uint32x4 I0_row_4_left, I1_row_4_left, I1_row_shifted_4_left, I1_row_next_4_left, I1_row_next_shifted_4_left; \ + v_uint32x4 I0_row_4_right, I1_row_4_right, I1_row_shifted_4_right, I1_row_next_4_right, \ + I1_row_next_shifted_4_right; \ + v_float32x4 I_diff_left, I_diff_right; \ + \ + /* Preload and expand the first row of I1: */ \ + I1_row_16 = v_load(I1_ptr); \ + I1_row_shifted_16 = v_extract<1>(I1_row_16, I1_row_16); \ + v_expand(I1_row_16, I1_row_8, tmp); \ + v_expand(I1_row_shifted_16, I1_row_shifted_8, tmp); \ + v_expand(I1_row_8, I1_row_4_left, I1_row_4_right); \ + v_expand(I1_row_shifted_8, I1_row_shifted_4_left, I1_row_shifted_4_right); \ + I1_ptr += I1_stride; + +#define HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION \ + /* Load the next row of I1: */ \ + I1_row_next_16 = v_load(I1_ptr); \ + /* Circular shift left by 1 element: */ \ + I1_row_next_shifted_16 = v_extract<1>(I1_row_next_16, I1_row_next_16); \ + /* Expand to 8 ushorts (we only need the first 8 values): */ \ + v_expand(I1_row_next_16, I1_row_next_8, tmp); \ + v_expand(I1_row_next_shifted_16, I1_row_next_shifted_8, tmp); \ + /* Separate the left and right halves: */ \ + v_expand(I1_row_next_8, I1_row_next_4_left, I1_row_next_4_right); \ + v_expand(I1_row_next_shifted_8, I1_row_next_shifted_4_left, I1_row_next_shifted_4_right); \ + \ + /* Load current row of I0: */ \ + I0_row_16 = v_load(I0_ptr); \ + v_expand(I0_row_16, I0_row_8, tmp); \ + v_expand(I0_row_8, I0_row_4_left, I0_row_4_right); \ + \ + /* Compute diffs between I0 and bilinearly interpolated I1: */ \ + I_diff_left = w00v * v_cvt_f32(v_reinterpret_as_s32(I1_row_4_left)) + \ + w01v * v_cvt_f32(v_reinterpret_as_s32(I1_row_shifted_4_left)) + \ + w10v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_4_left)) + \ + w11v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_shifted_4_left)) - \ + v_cvt_f32(v_reinterpret_as_s32(I0_row_4_left)); \ + I_diff_right = w00v * v_cvt_f32(v_reinterpret_as_s32(I1_row_4_right)) + \ + w01v * v_cvt_f32(v_reinterpret_as_s32(I1_row_shifted_4_right)) + \ + w10v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_4_right)) + \ + w11v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_shifted_4_right)) - \ + v_cvt_f32(v_reinterpret_as_s32(I0_row_4_right)); + +#define HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW \ + I0_ptr += I0_stride; \ + I1_ptr += I1_stride; \ + \ + I1_row_4_left = I1_row_next_4_left; \ + I1_row_4_right = I1_row_next_4_right; \ + I1_row_shifted_4_left = I1_row_next_shifted_4_left; \ + I1_row_shifted_4_right = I1_row_next_shifted_4_right; + +/* This function essentially performs one iteration of gradient descent when finding the most similar patch in I1 for a + * given one in I0. It assumes that I0_ptr and I1_ptr already point to the corresponding patches and w00, w01, w10, w11 + * are precomputed bilinear interpolation weights. It returns the SSD (sum of squared differences) between these patches + * and computes the values (dst_dUx, dst_dUy) that are used in the flow vector update. HAL acceleration is implemented + * only for the default patch size (8x8). Everything is processed in floats as using fixed-point approximations harms + * the quality significantly. + */ +inline float processPatch(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar *I1_ptr, short *I0x_ptr, short *I0y_ptr, + int I0_stride, int I1_stride, float w00, float w01, float w10, float w11, int patch_sz) +{ + float SSD = 0.0f; +#ifdef CV_SIMD128 + if (patch_sz == 8) + { + /* Variables to accumulate the sums */ + v_float32x4 Ux_vec = v_setall_f32(0); + v_float32x4 Uy_vec = v_setall_f32(0); + v_float32x4 SSD_vec = v_setall_f32(0); + + v_int16x8 I0x_row, I0y_row; + v_int32x4 I0x_row_4_left, I0x_row_4_right, I0y_row_4_left, I0y_row_4_right; + + HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION; + for (int row = 0; row < 8; row++) + { + HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION; + I0x_row = v_load(I0x_ptr); + v_expand(I0x_row, I0x_row_4_left, I0x_row_4_right); + I0y_row = v_load(I0y_ptr); + v_expand(I0y_row, I0y_row_4_left, I0y_row_4_right); + + /* Update the sums: */ + Ux_vec += I_diff_left * v_cvt_f32(I0x_row_4_left) + I_diff_right * v_cvt_f32(I0x_row_4_right); + Uy_vec += I_diff_left * v_cvt_f32(I0y_row_4_left) + I_diff_right * v_cvt_f32(I0y_row_4_right); + SSD_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right; + + I0x_ptr += I0_stride; + I0y_ptr += I0_stride; + HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW; + } + + /* Final reduce operations: */ + dst_dUx = v_reduce_sum(Ux_vec); + dst_dUy = v_reduce_sum(Uy_vec); + SSD = v_reduce_sum(SSD_vec); + } + else + { +#endif + dst_dUx = 0.0f; + dst_dUy = 0.0f; + float diff; + for (int i = 0; i < patch_sz; i++) + for (int j = 0; j < patch_sz; j++) + { + diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] + + w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] - + I0_ptr[i * I0_stride + j]; + + SSD += diff * diff; + dst_dUx += diff * I0x_ptr[i * I0_stride + j]; + dst_dUy += diff * I0y_ptr[i * I0_stride + j]; + } +#ifdef CV_SIMD128 + } +#endif + return SSD; +} + +/* Same as processPatch, but with patch mean normalization, which improves robustness under changing + * lighting conditions + */ +inline float processPatchMeanNorm(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar *I1_ptr, short *I0x_ptr, + short *I0y_ptr, int I0_stride, int I1_stride, float w00, float w01, float w10, + float w11, int patch_sz, float x_grad_sum, float y_grad_sum) +{ + float sum_diff = 0.0, sum_diff_sq = 0.0; + float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0; + float n = (float)patch_sz * patch_sz; + +#ifdef CV_SIMD128 + if (patch_sz == 8) + { + /* Variables to accumulate the sums */ + v_float32x4 sum_I0x_mul_vec = v_setall_f32(0); + v_float32x4 sum_I0y_mul_vec = v_setall_f32(0); + v_float32x4 sum_diff_vec = v_setall_f32(0); + v_float32x4 sum_diff_sq_vec = v_setall_f32(0); + + v_int16x8 I0x_row, I0y_row; + v_int32x4 I0x_row_4_left, I0x_row_4_right, I0y_row_4_left, I0y_row_4_right; + + HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION; + for (int row = 0; row < 8; row++) + { + HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION; + I0x_row = v_load(I0x_ptr); + v_expand(I0x_row, I0x_row_4_left, I0x_row_4_right); + I0y_row = v_load(I0y_ptr); + v_expand(I0y_row, I0y_row_4_left, I0y_row_4_right); + + /* Update the sums: */ + sum_I0x_mul_vec += I_diff_left * v_cvt_f32(I0x_row_4_left) + I_diff_right * v_cvt_f32(I0x_row_4_right); + sum_I0y_mul_vec += I_diff_left * v_cvt_f32(I0y_row_4_left) + I_diff_right * v_cvt_f32(I0y_row_4_right); + sum_diff_sq_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right; + sum_diff_vec += I_diff_left + I_diff_right; + + I0x_ptr += I0_stride; + I0y_ptr += I0_stride; + HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW; + } + + /* Final reduce operations: */ + sum_I0x_mul = v_reduce_sum(sum_I0x_mul_vec); + sum_I0y_mul = v_reduce_sum(sum_I0y_mul_vec); + sum_diff = v_reduce_sum(sum_diff_vec); + sum_diff_sq = v_reduce_sum(sum_diff_sq_vec); + } + else + { +#endif + float diff; + for (int i = 0; i < patch_sz; i++) + for (int j = 0; j < patch_sz; j++) + { + diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] + + w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] - + I0_ptr[i * I0_stride + j]; + + sum_diff += diff; + sum_diff_sq += diff * diff; + + sum_I0x_mul += diff * I0x_ptr[i * I0_stride + j]; + sum_I0y_mul += diff * I0y_ptr[i * I0_stride + j]; + } +#ifdef CV_SIMD128 + } +#endif + dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n; + dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum / n; + return sum_diff_sq - sum_diff * sum_diff / n; +} + +/* Similar to processPatch, but compute only the sum of squared differences (SSD) between the patches */ +inline float computeSSD(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stride, float w00, float w01, float w10, + float w11, int patch_sz) +{ + float SSD = 0.0f; +#ifdef CV_SIMD128 + if (patch_sz == 8) + { + v_float32x4 SSD_vec = v_setall_f32(0); + HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION; + for (int row = 0; row < 8; row++) + { + HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION; + SSD_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right; + HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW; + } + SSD = v_reduce_sum(SSD_vec); + } + else + { +#endif + float diff; + for (int i = 0; i < patch_sz; i++) + for (int j = 0; j < patch_sz; j++) + { + diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] + + w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] - + I0_ptr[i * I0_stride + j]; + SSD += diff * diff; + } +#ifdef CV_SIMD128 + } +#endif + return SSD; +} + +/* Same as computeSSD, but with patch mean normalization */ +inline float computeSSDMeanNorm(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stride, float w00, float w01, + float w10, float w11, int patch_sz) +{ + float sum_diff = 0.0f, sum_diff_sq = 0.0f; + float n = (float)patch_sz * patch_sz; +#ifdef CV_SIMD128 + if (patch_sz == 8) + { + v_float32x4 sum_diff_vec = v_setall_f32(0); + v_float32x4 sum_diff_sq_vec = v_setall_f32(0); + HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION; + for (int row = 0; row < 8; row++) + { + HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION; + sum_diff_sq_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right; + sum_diff_vec += I_diff_left + I_diff_right; + HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW; + } + sum_diff = v_reduce_sum(sum_diff_vec); + sum_diff_sq = v_reduce_sum(sum_diff_sq_vec); + } + else + { +#endif + float diff; + for (int i = 0; i < patch_sz; i++) + for (int j = 0; j < patch_sz; j++) + { + diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] + + w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] - + I0_ptr[i * I0_stride + j]; + + sum_diff += diff; + sum_diff_sq += diff * diff; + } +#ifdef CV_SIMD128 + } +#endif + return sum_diff_sq - sum_diff * sum_diff / n; +} + +#undef HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION +#undef HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION +#undef HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +void DISOpticalFlowImpl::PatchInverseSearch_ParBody::operator()(const Range &range) const +{ + // force separate processing of stripes if we are using spatial propagation: + if (dis->use_spatial_propagation && range.end > range.start + 1) + { + for (int n = range.start; n < range.end; n++) + (*this)(Range(n, n + 1)); + return; + } + int psz = dis->patch_size; + int psz2 = psz / 2; + int w_ext = dis->w + 2 * dis->border_size; //!< width of I1_ext + int bsz = dis->border_size; + + /* Input dense flow */ + float *Ux_ptr = Ux->ptr(); + float *Uy_ptr = Uy->ptr(); + + /* Output sparse flow */ + float *Sx_ptr = Sx->ptr(); + float *Sy_ptr = Sy->ptr(); + + uchar *I0_ptr = I0->ptr(); + uchar *I1_ptr = I1->ptr(); + short *I0x_ptr = I0x->ptr(); + short *I0y_ptr = I0y->ptr(); + + /* Precomputed structure tensor */ + float *xx_ptr = dis->I0xx_buf.ptr(); + float *yy_ptr = dis->I0yy_buf.ptr(); + float *xy_ptr = dis->I0xy_buf.ptr(); + /* And extra buffers for mean-normalization: */ + float *x_ptr = dis->I0x_buf.ptr(); + float *y_ptr = dis->I0y_buf.ptr(); + + bool use_temporal_candidates = false; + float *initial_Ux_ptr = NULL, *initial_Uy_ptr = NULL; + if (!dis->initial_Ux.empty()) + { + initial_Ux_ptr = dis->initial_Ux[pyr_level].ptr(); + initial_Uy_ptr = dis->initial_Uy[pyr_level].ptr(); + use_temporal_candidates = true; + } + + int i, j, dir; + int start_is, end_is, start_js, end_js; + int start_i, start_j; + float i_lower_limit = bsz - psz + 1.0f; + float i_upper_limit = bsz + dis->h - 1.0f; + float j_lower_limit = bsz - psz + 1.0f; + float j_upper_limit = bsz + dis->w - 1.0f; + float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy; + +#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \ + i_I1 = min(max(i + Uy + bsz, i_lower_limit), i_upper_limit); \ + j_I1 = min(max(j + Ux + bsz, j_lower_limit), j_upper_limit); \ + \ + w11 = (i_I1 - floor(i_I1)) * (j_I1 - floor(j_I1)); \ + w10 = (i_I1 - floor(i_I1)) * (floor(j_I1) + 1 - j_I1); \ + w01 = (floor(i_I1) + 1 - i_I1) * (j_I1 - floor(j_I1)); \ + w00 = (floor(i_I1) + 1 - i_I1) * (floor(j_I1) + 1 - j_I1); + +#define COMPUTE_SSD(dst, Ux, Uy) \ + INIT_BILINEAR_WEIGHTS(Ux, Uy); \ + if (dis->use_mean_normalization) \ + dst = computeSSDMeanNorm(I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, dis->w, w_ext, w00, \ + w01, w10, w11, psz); \ + else \ + dst = computeSSD(I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, dis->w, w_ext, w00, w01, \ + w10, w11, psz); + + int num_inner_iter = (int)floor(dis->grad_descent_iter / (float)num_iter); + for (int iter = 0; iter < num_iter; iter++) + { + if (iter % 2 == 0) + { + dir = 1; + start_is = min(range.start * stripe_sz, hs); + end_is = min(range.end * stripe_sz, hs); + start_js = 0; + end_js = dis->ws; + start_i = start_is * dis->patch_stride; + start_j = 0; + } + else + { + dir = -1; + start_is = min(range.end * stripe_sz, hs) - 1; + end_is = min(range.start * stripe_sz, hs) - 1; + start_js = dis->ws - 1; + end_js = -1; + start_i = start_is * dis->patch_stride; + start_j = (dis->ws - 1) * dis->patch_stride; + } + + i = start_i; + for (int is = start_is; dir * is < dir * end_is; is += dir) + { + j = start_j; + for (int js = start_js; dir * js < dir * end_js; js += dir) + { + if (iter == 0) + { + /* Using result form the previous pyramid level as the very first approximation: */ + Sx_ptr[is * dis->ws + js] = Ux_ptr[(i + psz2) * dis->w + j + psz2]; + Sy_ptr[is * dis->ws + js] = Uy_ptr[(i + psz2) * dis->w + j + psz2]; + } + + float min_SSD = INF, cur_SSD; + if (use_temporal_candidates || dis->use_spatial_propagation) + { + COMPUTE_SSD(min_SSD, Sx_ptr[is * dis->ws + js], Sy_ptr[is * dis->ws + js]); + } + + if (use_temporal_candidates) + { + /* Try temporal candidates (vectors from the initial flow field that was passed to the function) */ + COMPUTE_SSD(cur_SSD, initial_Ux_ptr[(i + psz2) * dis->w + j + psz2], + initial_Uy_ptr[(i + psz2) * dis->w + j + psz2]); + if (cur_SSD < min_SSD) + { + min_SSD = cur_SSD; + Sx_ptr[is * dis->ws + js] = initial_Ux_ptr[(i + psz2) * dis->w + j + psz2]; + Sy_ptr[is * dis->ws + js] = initial_Uy_ptr[(i + psz2) * dis->w + j + psz2]; + } + } + + if (dis->use_spatial_propagation) + { + /* Try spatial candidates: */ + if (dir * js > dir * start_js) + { + COMPUTE_SSD(cur_SSD, Sx_ptr[is * dis->ws + js - dir], Sy_ptr[is * dis->ws + js - dir]); + if (cur_SSD < min_SSD) + { + min_SSD = cur_SSD; + Sx_ptr[is * dis->ws + js] = Sx_ptr[is * dis->ws + js - dir]; + Sy_ptr[is * dis->ws + js] = Sy_ptr[is * dis->ws + js - dir]; + } + } + /* Flow vectors won't actually propagate across different stripes, which is the reason for keeping + * the number of stripes constant. It works well enough in practice and doesn't introduce any + * visible seams. + */ + if (dir * is > dir * start_is) + { + COMPUTE_SSD(cur_SSD, Sx_ptr[(is - dir) * dis->ws + js], Sy_ptr[(is - dir) * dis->ws + js]); + if (cur_SSD < min_SSD) + { + min_SSD = cur_SSD; + Sx_ptr[is * dis->ws + js] = Sx_ptr[(is - dir) * dis->ws + js]; + Sy_ptr[is * dis->ws + js] = Sy_ptr[(is - dir) * dis->ws + js]; + } + } + } + + /* Use the best candidate as a starting point for the gradient descent: */ + float cur_Ux = Sx_ptr[is * dis->ws + js]; + float cur_Uy = Sy_ptr[is * dis->ws + js]; + + /* Computing the inverse of the structure tensor: */ + float detH = xx_ptr[is * dis->ws + js] * yy_ptr[is * dis->ws + js] - + xy_ptr[is * dis->ws + js] * xy_ptr[is * dis->ws + js]; + if (abs(detH) < EPS) + detH = EPS; + float invH11 = yy_ptr[is * dis->ws + js] / detH; + float invH12 = -xy_ptr[is * dis->ws + js] / detH; + float invH22 = xx_ptr[is * dis->ws + js] / detH; + float prev_SSD = INF, SSD; + float x_grad_sum = x_ptr[is * dis->ws + js]; + float y_grad_sum = y_ptr[is * dis->ws + js]; + + for (int t = 0; t < num_inner_iter; t++) + { + INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy); + if (dis->use_mean_normalization) + SSD = processPatchMeanNorm(dUx, dUy, I0_ptr + i * dis->w + j, + I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * dis->w + j, + I0y_ptr + i * dis->w + j, dis->w, w_ext, w00, w01, w10, w11, psz, + x_grad_sum, y_grad_sum); + else + SSD = processPatch(dUx, dUy, I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, + I0x_ptr + i * dis->w + j, I0y_ptr + i * dis->w + j, dis->w, w_ext, w00, w01, + w10, w11, psz); + + dx = invH11 * dUx + invH12 * dUy; + dy = invH12 * dUx + invH22 * dUy; + cur_Ux -= dx; + cur_Uy -= dy; + + /* Break when patch distance stops decreasing */ + if (SSD >= prev_SSD) + break; + prev_SSD = SSD; + } + + /* If gradient descent converged to a flow vector that is very far from the initial approximation + * (more than patch size) then we don't use it. Noticeably improves the robustness. + */ + if (norm(Vec2f(cur_Ux - Sx_ptr[is * dis->ws + js], cur_Uy - Sy_ptr[is * dis->ws + js])) <= psz) + { + Sx_ptr[is * dis->ws + js] = cur_Ux; + Sy_ptr[is * dis->ws + js] = cur_Uy; + } + j += dir * dis->patch_stride; + } + i += dir * dis->patch_stride; + } + } +#undef INIT_BILINEAR_WEIGHTS +#undef COMPUTE_SSD +} + +DISOpticalFlowImpl::Densification_ParBody::Densification_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _h, + Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, Mat &src_Sy, + Mat &_I0, Mat &_I1) + : dis(&_dis), nstripes(_nstripes), h(_h), Ux(&dst_Ux), Uy(&dst_Uy), Sx(&src_Sx), Sy(&src_Sy), I0(&_I0), I1(&_I1) +{ + stripe_sz = (int)ceil(h / (double)nstripes); +} + +/* This function transforms a sparse optical flow field obtained by PatchInverseSearch (which computes flow values + * on a sparse grid defined by patch_stride) into a dense optical flow field by weighted averaging of values from the + * overlapping patches. + */ +void DISOpticalFlowImpl::Densification_ParBody::operator()(const Range &range) const +{ + int start_i = min(range.start * stripe_sz, h); + int end_i = min(range.end * stripe_sz, h); + + /* Input sparse flow */ + float *Sx_ptr = Sx->ptr(); + float *Sy_ptr = Sy->ptr(); + + /* Output dense flow */ + float *Ux_ptr = Ux->ptr(); + float *Uy_ptr = Uy->ptr(); + + uchar *I0_ptr = I0->ptr(); + uchar *I1_ptr = I1->ptr(); + + int psz = dis->patch_size; + int pstr = dis->patch_stride; + int i_l, i_u; + int j_l, j_u; + float i_m, j_m, diff; + + /* These values define the set of sparse grid locations that contain patches overlapping with the current dense flow + * location */ + int start_is, end_is; + int start_js, end_js; + +/* Some helper macros for updating this set of sparse grid locations */ +#define UPDATE_SPARSE_I_COORDINATES \ + if (i % pstr == 0 && i + psz <= h) \ + end_is++; \ + if (i - psz >= 0 && (i - psz) % pstr == 0 && start_is < end_is) \ + start_is++; + +#define UPDATE_SPARSE_J_COORDINATES \ + if (j % pstr == 0 && j + psz <= dis->w) \ + end_js++; \ + if (j - psz >= 0 && (j - psz) % pstr == 0 && start_js < end_js) \ + start_js++; + + start_is = 0; + end_is = -1; + for (int i = 0; i < start_i; i++) + { + UPDATE_SPARSE_I_COORDINATES; + } + for (int i = start_i; i < end_i; i++) + { + UPDATE_SPARSE_I_COORDINATES; + start_js = 0; + end_js = -1; + for (int j = 0; j < dis->w; j++) + { + UPDATE_SPARSE_J_COORDINATES; + float coef, sum_coef = 0.0f; + float sum_Ux = 0.0f; + float sum_Uy = 0.0f; + + /* Iterate through all the patches that overlap the current location (i,j) */ + for (int is = start_is; is <= end_is; is++) + for (int js = start_js; js <= end_js; js++) + { + j_m = min(max(j + Sx_ptr[is * dis->ws + js], 0.0f), dis->w - 1.0f - EPS); + i_m = min(max(i + Sy_ptr[is * dis->ws + js], 0.0f), dis->h - 1.0f - EPS); + j_l = (int)j_m; + j_u = j_l + 1; + i_l = (int)i_m; + i_u = i_l + 1; + diff = (j_m - j_l) * (i_m - i_l) * I1_ptr[i_u * dis->w + j_u] + + (j_u - j_m) * (i_m - i_l) * I1_ptr[i_u * dis->w + j_l] + + (j_m - j_l) * (i_u - i_m) * I1_ptr[i_l * dis->w + j_u] + + (j_u - j_m) * (i_u - i_m) * I1_ptr[i_l * dis->w + j_l] - I0_ptr[i * dis->w + j]; + coef = 1 / max(1.0f, abs(diff)); + sum_Ux += coef * Sx_ptr[is * dis->ws + js]; + sum_Uy += coef * Sy_ptr[is * dis->ws + js]; + sum_coef += coef; + } + Ux_ptr[i * dis->w + j] = sum_Ux / sum_coef; + Uy_ptr[i * dis->w + j] = sum_Uy / sum_coef; + } + } +#undef UPDATE_SPARSE_I_COORDINATES +#undef UPDATE_SPARSE_J_COORDINATES +} + +#ifdef HAVE_OPENCL +bool DISOpticalFlowImpl::ocl_PatchInverseSearch(UMat &src_Ux, UMat &src_Uy, + UMat &I0, UMat &I1, UMat &I0x, UMat &I0y, int num_iter, int pyr_level) +{ + size_t globalSize[] = {(size_t)ws, (size_t)hs}; + size_t localSize[] = {16, 16}; + int idx; + int num_inner_iter = (int)floor(grad_descent_iter / (float)num_iter); + + for (int iter = 0; iter < num_iter; iter++) + { + if (iter == 0) + { + ocl::Kernel k1("dis_patch_inverse_search_fwd_1", ocl::video::dis_flow_oclsrc); + size_t global_sz[] = {(size_t)hs * 8}; + size_t local_sz[] = {8}; + idx = 0; + + idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(src_Ux)); + idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(src_Uy)); + idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(I0)); + idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(I1)); + idx = k1.set(idx, (int)border_size); + idx = k1.set(idx, (int)patch_size); + idx = k1.set(idx, (int)patch_stride); + idx = k1.set(idx, (int)w); + idx = k1.set(idx, (int)h); + idx = k1.set(idx, (int)ws); + idx = k1.set(idx, (int)hs); + idx = k1.set(idx, (int)pyr_level); + idx = k1.set(idx, ocl::KernelArg::PtrWriteOnly(u_Sx)); + idx = k1.set(idx, ocl::KernelArg::PtrWriteOnly(u_Sy)); + if (!k1.run(1, global_sz, local_sz, false)) + return false; + + ocl::Kernel k2("dis_patch_inverse_search_fwd_2", ocl::video::dis_flow_oclsrc); + idx = 0; + + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(src_Ux)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(src_Uy)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I1)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0x)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0y)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xx_buf)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0yy_buf)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xy_buf)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0x_buf)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0y_buf)); + idx = k2.set(idx, (int)border_size); + idx = k2.set(idx, (int)patch_size); + idx = k2.set(idx, (int)patch_stride); + idx = k2.set(idx, (int)w); + idx = k2.set(idx, (int)h); + idx = k2.set(idx, (int)ws); + idx = k2.set(idx, (int)hs); + idx = k2.set(idx, (int)num_inner_iter); + idx = k2.set(idx, (int)pyr_level); + idx = k2.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx)); + idx = k2.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy)); + if (!k2.run(2, globalSize, localSize, false)) + return false; + } + else + { + ocl::Kernel k3("dis_patch_inverse_search_bwd_1", ocl::video::dis_flow_oclsrc); + size_t global_sz[] = {(size_t)hs * 8}; + size_t local_sz[] = {8}; + idx = 0; + + idx = k3.set(idx, ocl::KernelArg::PtrReadOnly(I0)); + idx = k3.set(idx, ocl::KernelArg::PtrReadOnly(I1)); + idx = k3.set(idx, (int)border_size); + idx = k3.set(idx, (int)patch_size); + idx = k3.set(idx, (int)patch_stride); + idx = k3.set(idx, (int)w); + idx = k3.set(idx, (int)h); + idx = k3.set(idx, (int)ws); + idx = k3.set(idx, (int)hs); + idx = k3.set(idx, (int)pyr_level); + idx = k3.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx)); + idx = k3.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy)); + if (!k3.run(1, global_sz, local_sz, false)) + return false; + + ocl::Kernel k4("dis_patch_inverse_search_bwd_2", ocl::video::dis_flow_oclsrc); + idx = 0; + + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I1)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0x)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0y)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xx_buf)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0yy_buf)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xy_buf)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0x_buf)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0y_buf)); + idx = k4.set(idx, (int)border_size); + idx = k4.set(idx, (int)patch_size); + idx = k4.set(idx, (int)patch_stride); + idx = k4.set(idx, (int)w); + idx = k4.set(idx, (int)h); + idx = k4.set(idx, (int)ws); + idx = k4.set(idx, (int)hs); + idx = k4.set(idx, (int)num_inner_iter); + idx = k4.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx)); + idx = k4.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy)); + if (!k4.run(2, globalSize, localSize, false)) + return false; + } + } + return true; +} + +bool DISOpticalFlowImpl::ocl_Densification(UMat &dst_Ux, UMat &dst_Uy, UMat &src_Sx, UMat &src_Sy, UMat &_I0, UMat &_I1) +{ + size_t globalSize[] = {(size_t)w, (size_t)h}; + size_t localSize[] = {16, 16}; + + ocl::Kernel kernel("dis_densification", ocl::video::dis_flow_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(src_Sx), + ocl::KernelArg::PtrReadOnly(src_Sy), + ocl::KernelArg::PtrReadOnly(_I0), + ocl::KernelArg::PtrReadOnly(_I1), + (int)patch_size, (int)patch_stride, + (int)w, (int)h, (int)ws, + ocl::KernelArg::PtrWriteOnly(dst_Ux), + ocl::KernelArg::PtrWriteOnly(dst_Uy)); + return kernel.run(2, globalSize, localSize, false); +} + +void DISOpticalFlowImpl::ocl_prepareBuffers(UMat &I0, UMat &I1, UMat &flow, bool use_flow) +{ + u_I0s.resize(coarsest_scale + 1); + u_I1s.resize(coarsest_scale + 1); + u_I1s_ext.resize(coarsest_scale + 1); + u_I0xs.resize(coarsest_scale + 1); + u_I0ys.resize(coarsest_scale + 1); + u_Ux.resize(coarsest_scale + 1); + u_Uy.resize(coarsest_scale + 1); + + vector flow_uv(2); + if (use_flow) + { + split(flow, flow_uv); + u_initial_Ux.resize(coarsest_scale + 1); + u_initial_Uy.resize(coarsest_scale + 1); + } + + int fraction = 1; + int cur_rows = 0, cur_cols = 0; + + for (int i = 0; i <= coarsest_scale; i++) + { + /* Avoid initializing the pyramid levels above the finest scale, as they won't be used anyway */ + if (i == finest_scale) + { + cur_rows = I0.rows / fraction; + cur_cols = I0.cols / fraction; + u_I0s[i].create(cur_rows, cur_cols, CV_8UC1); + resize(I0, u_I0s[i], u_I0s[i].size(), 0.0, 0.0, INTER_AREA); + u_I1s[i].create(cur_rows, cur_cols, CV_8UC1); + resize(I1, u_I1s[i], u_I1s[i].size(), 0.0, 0.0, INTER_AREA); + + /* These buffers are reused in each scale so we initialize them once on the finest scale: */ + u_Sx.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + u_Sy.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + u_I0xx_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + u_I0yy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + u_I0xy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + u_I0x_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + u_I0y_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + + u_I0xx_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); + u_I0yy_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); + u_I0xy_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); + u_I0x_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); + u_I0y_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); + + u_U.create(cur_rows, cur_cols, CV_32FC2); + } + else if (i > finest_scale) + { + cur_rows = u_I0s[i - 1].rows / 2; + cur_cols = u_I0s[i - 1].cols / 2; + u_I0s[i].create(cur_rows, cur_cols, CV_8UC1); + resize(u_I0s[i - 1], u_I0s[i], u_I0s[i].size(), 0.0, 0.0, INTER_AREA); + u_I1s[i].create(cur_rows, cur_cols, CV_8UC1); + resize(u_I1s[i - 1], u_I1s[i], u_I1s[i].size(), 0.0, 0.0, INTER_AREA); + } + + if (i >= finest_scale) + { + u_I1s_ext[i].create(cur_rows + 2 * border_size, cur_cols + 2 * border_size, CV_8UC1); + copyMakeBorder(u_I1s[i], u_I1s_ext[i], border_size, border_size, border_size, border_size, BORDER_REPLICATE); + u_I0xs[i].create(cur_rows, cur_cols, CV_16SC1); + u_I0ys[i].create(cur_rows, cur_cols, CV_16SC1); + spatialGradient(u_I0s[i], u_I0xs[i], u_I0ys[i]); + u_Ux[i].create(cur_rows, cur_cols, CV_32FC1); + u_Uy[i].create(cur_rows, cur_cols, CV_32FC1); + variational_refinement_processors[i]->setAlpha(variational_refinement_alpha); + variational_refinement_processors[i]->setDelta(variational_refinement_delta); + variational_refinement_processors[i]->setGamma(variational_refinement_gamma); + variational_refinement_processors[i]->setSorIterations(5); + variational_refinement_processors[i]->setFixedPointIterations(variational_refinement_iter); + + if (use_flow) + { + resize(flow_uv[0], u_initial_Ux[i], Size(cur_cols, cur_rows)); + divide(u_initial_Ux[i], static_cast(fraction), u_initial_Ux[i]); + resize(flow_uv[1], u_initial_Uy[i], Size(cur_cols, cur_rows)); + divide(u_initial_Uy[i], static_cast(fraction), u_initial_Uy[i]); + } + } + + fraction *= 2; + } +} + +bool DISOpticalFlowImpl::ocl_precomputeStructureTensor(UMat &dst_I0xx, UMat &dst_I0yy, UMat &dst_I0xy, + UMat &dst_I0x, UMat &dst_I0y, UMat &I0x, UMat &I0y) +{ + size_t globalSizeX[] = {(size_t)h}; + size_t localSizeX[] = {16}; + + ocl::Kernel kernelX("dis_precomputeStructureTensor_hor", ocl::video::dis_flow_oclsrc); + kernelX.args(ocl::KernelArg::PtrReadOnly(I0x), + ocl::KernelArg::PtrReadOnly(I0y), + (int)patch_size, (int)patch_stride, + (int)w, (int)h, (int)ws, + ocl::KernelArg::PtrWriteOnly(u_I0xx_buf_aux), + ocl::KernelArg::PtrWriteOnly(u_I0yy_buf_aux), + ocl::KernelArg::PtrWriteOnly(u_I0xy_buf_aux), + ocl::KernelArg::PtrWriteOnly(u_I0x_buf_aux), + ocl::KernelArg::PtrWriteOnly(u_I0y_buf_aux)); + if (!kernelX.run(1, globalSizeX, localSizeX, false)) + return false; + + size_t globalSizeY[] = {(size_t)ws}; + size_t localSizeY[] = {16}; + + ocl::Kernel kernelY("dis_precomputeStructureTensor_ver", ocl::video::dis_flow_oclsrc); + kernelY.args(ocl::KernelArg::PtrReadOnly(u_I0xx_buf_aux), + ocl::KernelArg::PtrReadOnly(u_I0yy_buf_aux), + ocl::KernelArg::PtrReadOnly(u_I0xy_buf_aux), + ocl::KernelArg::PtrReadOnly(u_I0x_buf_aux), + ocl::KernelArg::PtrReadOnly(u_I0y_buf_aux), + (int)patch_size, (int)patch_stride, + (int)w, (int)h, (int)ws, + ocl::KernelArg::PtrWriteOnly(dst_I0xx), + ocl::KernelArg::PtrWriteOnly(dst_I0yy), + ocl::KernelArg::PtrWriteOnly(dst_I0xy), + ocl::KernelArg::PtrWriteOnly(dst_I0x), + ocl::KernelArg::PtrWriteOnly(dst_I0y)); + return kernelY.run(1, globalSizeY, localSizeY, false); +} + +bool DISOpticalFlowImpl::ocl_calc(InputArray I0, InputArray I1, InputOutputArray flow) +{ + UMat I0Mat = I0.getUMat(); + UMat I1Mat = I1.getUMat(); + bool use_input_flow = false; + if (flow.sameSize(I0) && flow.depth() == CV_32F && flow.channels() == 2) + use_input_flow = true; + else + flow.create(I1Mat.size(), CV_32FC2); + UMat &u_flowMat = flow.getUMatRef(); + coarsest_scale = min((int)(log(max(I0Mat.cols, I0Mat.rows) / (4.0 * patch_size)) / log(2.0) + 0.5), /* Original code serach for maximal movement of width/4 */ + (int)(log(min(I0Mat.cols, I0Mat.rows) / patch_size) / log(2.0))); /* Deepest pyramid level greater or equal than patch*/ + + ocl_prepareBuffers(I0Mat, I1Mat, u_flowMat, use_input_flow); + u_Ux[coarsest_scale].setTo(0.0f); + u_Uy[coarsest_scale].setTo(0.0f); + + for (int i = coarsest_scale; i >= finest_scale; i--) + { + w = u_I0s[i].cols; + h = u_I0s[i].rows; + ws = 1 + (w - patch_size) / patch_stride; + hs = 1 + (h - patch_size) / patch_stride; + + if (!ocl_precomputeStructureTensor(u_I0xx_buf, u_I0yy_buf, u_I0xy_buf, + u_I0x_buf, u_I0y_buf, u_I0xs[i], u_I0ys[i])) + return false; + + if (!ocl_PatchInverseSearch(u_Ux[i], u_Uy[i], u_I0s[i], u_I1s_ext[i], u_I0xs[i], u_I0ys[i], 2, i)) + return false; + + if (!ocl_Densification(u_Ux[i], u_Uy[i], u_Sx, u_Sy, u_I0s[i], u_I1s[i])) + return false; + + if (variational_refinement_iter > 0) + variational_refinement_processors[i]->calcUV(u_I0s[i], u_I1s[i], + u_Ux[i].getMat(ACCESS_WRITE), u_Uy[i].getMat(ACCESS_WRITE)); + + if (i > finest_scale) + { + resize(u_Ux[i], u_Ux[i - 1], u_Ux[i - 1].size()); + resize(u_Uy[i], u_Uy[i - 1], u_Uy[i - 1].size()); + multiply(u_Ux[i - 1], 2, u_Ux[i - 1]); + multiply(u_Uy[i - 1], 2, u_Uy[i - 1]); + } + } + vector uxy(2); + uxy[0] = u_Ux[finest_scale]; + uxy[1] = u_Uy[finest_scale]; + merge(uxy, u_U); + resize(u_U, u_flowMat, u_flowMat.size()); + multiply(u_flowMat, 1 << finest_scale, u_flowMat); + + return true; +} +#endif + +void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow) +{ + CV_Assert(!I0.empty() && I0.depth() == CV_8U && I0.channels() == 1); + CV_Assert(!I1.empty() && I1.depth() == CV_8U && I1.channels() == 1); + CV_Assert(I0.sameSize(I1)); + CV_Assert(I0.isContinuous()); + CV_Assert(I1.isContinuous()); + + CV_OCL_RUN(ocl::Device::getDefault().isIntel() && flow.isUMat() && + (patch_size == 8) && (use_spatial_propagation == true), + ocl_calc(I0, I1, flow)); + + Mat I0Mat = I0.getMat(); + Mat I1Mat = I1.getMat(); + bool use_input_flow = false; + if (flow.sameSize(I0) && flow.depth() == CV_32F && flow.channels() == 2) + use_input_flow = true; + else + flow.create(I1Mat.size(), CV_32FC2); + Mat flowMat = flow.getMat(); + coarsest_scale = min((int)(log(max(I0Mat.cols, I0Mat.rows) / (4.0 * patch_size)) / log(2.0) + 0.5), /* Original code serach for maximal movement of width/4 */ + (int)(log(min(I0Mat.cols, I0Mat.rows) / patch_size) / log(2.0))); /* Deepest pyramid level greater or equal than patch*/ + int num_stripes = getNumThreads(); + + prepareBuffers(I0Mat, I1Mat, flowMat, use_input_flow); + Ux[coarsest_scale].setTo(0.0f); + Uy[coarsest_scale].setTo(0.0f); + + for (int i = coarsest_scale; i >= finest_scale; i--) + { + w = I0s[i].cols; + h = I0s[i].rows; + ws = 1 + (w - patch_size) / patch_stride; + hs = 1 + (h - patch_size) / patch_stride; + + precomputeStructureTensor(I0xx_buf, I0yy_buf, I0xy_buf, I0x_buf, I0y_buf, I0xs[i], I0ys[i]); + if (use_spatial_propagation) + { + /* Use a fixed number of stripes regardless the number of threads to make inverse search + * with spatial propagation reproducible + */ + parallel_for_(Range(0, 8), PatchInverseSearch_ParBody(*this, 8, hs, Sx, Sy, Ux[i], Uy[i], I0s[i], + I1s_ext[i], I0xs[i], I0ys[i], 2, i)); + } + else + { + parallel_for_(Range(0, num_stripes), + PatchInverseSearch_ParBody(*this, num_stripes, hs, Sx, Sy, Ux[i], Uy[i], I0s[i], I1s_ext[i], + I0xs[i], I0ys[i], 1, i)); + } + + parallel_for_(Range(0, num_stripes), + Densification_ParBody(*this, num_stripes, I0s[i].rows, Ux[i], Uy[i], Sx, Sy, I0s[i], I1s[i])); + if (variational_refinement_iter > 0) + variational_refinement_processors[i]->calcUV(I0s[i], I1s[i], Ux[i], Uy[i]); + + if (i > finest_scale) + { + resize(Ux[i], Ux[i - 1], Ux[i - 1].size()); + resize(Uy[i], Uy[i - 1], Uy[i - 1].size()); + Ux[i - 1] *= 2; + Uy[i - 1] *= 2; + } + } + Mat uxy[] = {Ux[finest_scale], Uy[finest_scale]}; + merge(uxy, 2, U); + resize(U, flowMat, flowMat.size()); + flowMat *= 1 << finest_scale; +} + +void DISOpticalFlowImpl::collectGarbage() +{ + I0s.clear(); + I1s.clear(); + I1s_ext.clear(); + I0xs.clear(); + I0ys.clear(); + Ux.clear(); + Uy.clear(); + U.release(); + Sx.release(); + Sy.release(); + I0xx_buf.release(); + I0yy_buf.release(); + I0xy_buf.release(); + I0xx_buf_aux.release(); + I0yy_buf_aux.release(); + I0xy_buf_aux.release(); + +#ifdef HAVE_OPENCL + u_I0s.clear(); + u_I1s.clear(); + u_I1s_ext.clear(); + u_I0xs.clear(); + u_I0ys.clear(); + u_Ux.clear(); + u_Uy.clear(); + u_U.release(); + u_Sx.release(); + u_Sy.release(); + u_I0xx_buf.release(); + u_I0yy_buf.release(); + u_I0xy_buf.release(); + u_I0xx_buf_aux.release(); + u_I0yy_buf_aux.release(); + u_I0xy_buf_aux.release(); +#endif + + for (int i = finest_scale; i <= coarsest_scale; i++) + variational_refinement_processors[i]->collectGarbage(); + variational_refinement_processors.clear(); +} + +Ptr DISOpticalFlow::create(int preset) +{ + Ptr dis = makePtr(); + dis->setPatchSize(8); + if (preset == DISOpticalFlow::PRESET_ULTRAFAST) + { + dis->setFinestScale(2); + dis->setPatchStride(4); + dis->setGradientDescentIterations(12); + dis->setVariationalRefinementIterations(0); + } + else if (preset == DISOpticalFlow::PRESET_FAST) + { + dis->setFinestScale(2); + dis->setPatchStride(4); + dis->setGradientDescentIterations(16); + dis->setVariationalRefinementIterations(5); + } + else if (preset == DISOpticalFlow::PRESET_MEDIUM) + { + dis->setFinestScale(1); + dis->setPatchStride(3); + dis->setGradientDescentIterations(25); + dis->setVariationalRefinementIterations(5); + } + + return dis; +} +} diff --git a/modules/video/src/opencl/dis_flow.cl b/modules/video/src/opencl/dis_flow.cl new file mode 100644 index 0000000000..d2bc039d22 --- /dev/null +++ b/modules/video/src/opencl/dis_flow.cl @@ -0,0 +1,521 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#define EPS 0.001f +#define INF 1E+10F + +__kernel void dis_precomputeStructureTensor_hor(__global const short *I0x, + __global const short *I0y, + int patch_size, int patch_stride, + int w, int h, int ws, + __global float *I0xx_aux_ptr, + __global float *I0yy_aux_ptr, + __global float *I0xy_aux_ptr, + __global float *I0x_aux_ptr, + __global float *I0y_aux_ptr) +{ + + int i = get_global_id(0); + + if (i >= h) return; + + const __global short *x_row = I0x + i * w; + const __global short *y_row = I0y + i * w; + + float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f, sum_x = 0.0f, sum_y = 0.0f; + float8 x_vec = convert_float8(vload8(0, x_row)); + float8 y_vec = convert_float8(vload8(0, y_row)); + sum_xx = dot(x_vec.lo, x_vec.lo) + dot(x_vec.hi, x_vec.hi); + sum_yy = dot(y_vec.lo, y_vec.lo) + dot(y_vec.hi, y_vec.hi); + sum_xy = dot(x_vec.lo, y_vec.lo) + dot(x_vec.hi, y_vec.hi); + sum_x = dot(x_vec.lo, 1.0f) + dot(x_vec.hi, 1.0f); + sum_y = dot(y_vec.lo, 1.0f) + dot(y_vec.hi, 1.0f); + + I0xx_aux_ptr[i * ws] = sum_xx; + I0yy_aux_ptr[i * ws] = sum_yy; + I0xy_aux_ptr[i * ws] = sum_xy; + I0x_aux_ptr[i * ws] = sum_x; + I0y_aux_ptr[i * ws] = sum_y; + + int js = 1; + for (int j = patch_size; j < w; j++) + { + short x_val1 = x_row[j]; + short x_val2 = x_row[j - patch_size]; + short y_val1 = y_row[j]; + short y_val2 = y_row[j - patch_size]; + sum_xx += (x_val1 * x_val1 - x_val2 * x_val2); + sum_yy += (y_val1 * y_val1 - y_val2 * y_val2); + sum_xy += (x_val1 * y_val1 - x_val2 * y_val2); + sum_x += (x_val1 - x_val2); + sum_y += (y_val1 - y_val2); + if ((j - patch_size + 1) % patch_stride == 0) + { + int index = i * ws + js; + I0xx_aux_ptr[index] = sum_xx; + I0yy_aux_ptr[index] = sum_yy; + I0xy_aux_ptr[index] = sum_xy; + I0x_aux_ptr[index] = sum_x; + I0y_aux_ptr[index] = sum_y; + js++; + } + } +} + +__kernel void dis_precomputeStructureTensor_ver(__global const float *I0xx_aux_ptr, + __global const float *I0yy_aux_ptr, + __global const float *I0xy_aux_ptr, + __global const float *I0x_aux_ptr, + __global const float *I0y_aux_ptr, + int patch_size, int patch_stride, + int w, int h, int ws, + __global float *I0xx_ptr, + __global float *I0yy_ptr, + __global float *I0xy_ptr, + __global float *I0x_ptr, + __global float *I0y_ptr) +{ + int j = get_global_id(0); + + if (j >= ws) return; + + float sum_xx, sum_yy, sum_xy, sum_x, sum_y; + sum_xx = sum_yy = sum_xy = sum_x = sum_y = 0.0f; + + for (int i = 0; i < patch_size; i++) + { + sum_xx += I0xx_aux_ptr[i * ws + j]; + sum_yy += I0yy_aux_ptr[i * ws + j]; + sum_xy += I0xy_aux_ptr[i * ws + j]; + sum_x += I0x_aux_ptr[i * ws + j]; + sum_y += I0y_aux_ptr[i * ws + j]; + } + I0xx_ptr[j] = sum_xx; + I0yy_ptr[j] = sum_yy; + I0xy_ptr[j] = sum_xy; + I0x_ptr[j] = sum_x; + I0y_ptr[j] = sum_y; + + int is = 1; + for (int i = patch_size; i < h; i++) + { + sum_xx += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - patch_size) * ws + j]); + sum_yy += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - patch_size) * ws + j]); + sum_xy += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - patch_size) * ws + j]); + sum_x += (I0x_aux_ptr[i * ws + j] - I0x_aux_ptr[(i - patch_size) * ws + j]); + sum_y += (I0y_aux_ptr[i * ws + j] - I0y_aux_ptr[(i - patch_size) * ws + j]); + + if ((i - patch_size + 1) % patch_stride == 0) + { + I0xx_ptr[is * ws + j] = sum_xx; + I0yy_ptr[is * ws + j] = sum_yy; + I0xy_ptr[is * ws + j] = sum_xy; + I0x_ptr[is * ws + j] = sum_x; + I0y_ptr[is * ws + j] = sum_y; + is++; + } + } +} + +__kernel void dis_densification(__global const float *sx, __global const float *sy, + __global const uchar *i0, __global const uchar *i1, + int psz, int pstr, + int w, int h, int ws, + __global float *ux, __global float *uy) +{ + int x = get_global_id(0); + int y = get_global_id(1); + int i, j; + + if (x >= w || y >= h) return; + + int start_is, end_is; + int start_js, end_js; + + end_is = min(y / pstr, (h - psz) / pstr); + start_is = max(0, y - psz + pstr) / pstr; + start_is = min(start_is, end_is); + + end_js = min(x / pstr, (w - psz) / pstr); + start_js = max(0, x - psz + pstr) / pstr; + start_js = min(start_js, end_js); + + float coef, sum_coef = 0.0f; + float sum_Ux = 0.0f; + float sum_Uy = 0.0f; + + int i_l, i_u; + int j_l, j_u; + float i_m, j_m, diff; + + i = y; + j = x; + + /* Iterate through all the patches that overlap the current location (i,j) */ + for (int is = start_is; is <= end_is; is++) + for (int js = start_js; js <= end_js; js++) + { + float sx_val = sx[is * ws + js]; + float sy_val = sy[is * ws + js]; + uchar2 i1_vec1, i1_vec2; + + j_m = min(max(j + sx_val, 0.0f), w - 1.0f - EPS); + i_m = min(max(i + sy_val, 0.0f), h - 1.0f - EPS); + j_l = (int)j_m; + j_u = j_l + 1; + i_l = (int)i_m; + i_u = i_l + 1; + i1_vec1 = vload2(0, i1 + i_u * w + j_l); + i1_vec2 = vload2(0, i1 + i_l * w + j_l); + diff = (j_m - j_l) * (i_m - i_l) * i1_vec1.y + + (j_u - j_m) * (i_m - i_l) * i1_vec1.x + + (j_m - j_l) * (i_u - i_m) * i1_vec2.y + + (j_u - j_m) * (i_u - i_m) * i1_vec2.x - i0[i * w + j]; + coef = 1 / max(1.0f, fabs(diff)); + sum_Ux += coef * sx_val; + sum_Uy += coef * sy_val; + sum_coef += coef; + } + + ux[i * w + j] = sum_Ux / sum_coef; + uy[i * w + j] = sum_Uy / sum_coef; +} + +#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \ + i_I1 = min(max(i + Uy + bsz, i_lower_limit), i_upper_limit); \ + j_I1 = min(max(j + Ux + bsz, j_lower_limit), j_upper_limit); \ + \ + w11 = (i_I1 - floor(i_I1)) * (j_I1 - floor(j_I1)); \ + w10 = (i_I1 - floor(i_I1)) * (floor(j_I1) + 1 - j_I1); \ + w01 = (floor(i_I1) + 1 - i_I1) * (j_I1 - floor(j_I1)); \ + w00 = (floor(i_I1) + 1 - i_I1) * (floor(j_I1) + 1 - j_I1); + +float computeSSDMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr, + int I0_stride, int I1_stride, + float w00, float w01, float w10, float w11, int patch_sz, int i) +{ + float sum_diff = 0.0f, sum_diff_sq = 0.0f; + int n = patch_sz * patch_sz; + + uchar8 I1_vec1, I1_vec2, I0_vec; + uchar I1_val1, I1_val2; + + I0_vec = vload8(0, I0_ptr + i * I0_stride); + I1_vec1 = vload8(0, I1_ptr + i * I1_stride); + I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride); + I1_val1 = I1_ptr[i * I1_stride + 8]; + I1_val2 = I1_ptr[(i + 1) * I1_stride + 8]; + + float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) + + w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) - + convert_float8(I0_vec); + + sum_diff = (dot(vec.lo, 1.0) + dot(vec.hi, 1.0)); + sum_diff_sq = (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi)); + + sum_diff = sub_group_reduce_add(sum_diff); + sum_diff_sq = sub_group_reduce_add(sum_diff_sq); + + return sum_diff_sq - sum_diff * sum_diff / n; +} + +__kernel void dis_patch_inverse_search_fwd_1(__global const float *Ux_ptr, __global const float *Uy_ptr, + __global const uchar *I0_ptr, __global const uchar *I1_ptr, + int border_size, int patch_size, int patch_stride, + int w, int h, int ws, int hs, int pyr_level, + __global float *Sx_ptr, __global float *Sy_ptr) +{ + int id = get_global_id(0); + int is = id / 8; + if (id >= (hs * 8)) return; + + int i = is * patch_stride; + int j = 0; + int psz = patch_size; + int psz2 = psz / 2; + int w_ext = w + 2 * border_size; + int bsz = border_size; + + float i_lower_limit = bsz - psz + 1.0f; + float i_upper_limit = bsz + h - 1.0f; + float j_lower_limit = bsz - psz + 1.0f; + float j_upper_limit = bsz + w - 1.0f; + float i_I1, j_I1, w00, w01, w10, w11; + + float prev_Ux = Ux_ptr[(i + psz2) * w + j + psz2]; + float prev_Uy = Uy_ptr[(i + psz2) * w + j + psz2]; + Sx_ptr[is * ws] = prev_Ux; + Sy_ptr[is * ws] = prev_Uy; + j += patch_stride; + + int sid = get_sub_group_local_id(); + for (int js = 1; js < ws; js++, j += patch_stride) + { + float min_SSD, cur_SSD; + float Ux = Ux_ptr[(i + psz2) * w + j + psz2]; + float Uy = Uy_ptr[(i + psz2) * w + j + psz2]; + + INIT_BILINEAR_WEIGHTS(Ux, Uy); + min_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, + w, w_ext, w00, w01, w10, w11, psz, sid); + + INIT_BILINEAR_WEIGHTS(prev_Ux, prev_Uy); + cur_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, + w, w_ext, w00, w01, w10, w11, psz, sid); + if (cur_SSD < min_SSD) + { + Ux = prev_Ux; + Uy = prev_Uy; + } + + prev_Ux = Ux; + prev_Uy = Uy; + Sx_ptr[is * ws + js] = Ux; + Sy_ptr[is * ws + js] = Uy; + } +} + +float3 processPatchMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr, + const __global short *I0x_ptr, const __global short *I0y_ptr, + int I0_stride, int I1_stride, float w00, float w01, float w10, + float w11, int patch_sz, float x_grad_sum, float y_grad_sum) +{ + float sum_diff = 0.0, sum_diff_sq = 0.0; + float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0; + int n = patch_sz * patch_sz; + uchar8 I1_vec1, I1_vec2; + uchar I1_val1, I1_val2; + + for (int i = 0; i < 8; i++) + { + uchar8 I0_vec = vload8(0, I0_ptr + i * I0_stride); + + I1_vec1 = (i == 0) ? vload8(0, I1_ptr + i * I1_stride) : I1_vec2; + I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride); + I1_val1 = (i == 0) ? I1_ptr[i * I1_stride + patch_sz] : I1_val2; + I1_val2 = I1_ptr[(i + 1) * I1_stride + patch_sz]; + + float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) + + w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) - + convert_float8(I0_vec); + + sum_diff += (dot(vec.lo, 1.0) + dot(vec.hi, 1.0)); + sum_diff_sq += (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi)); + + short8 I0x_vec = vload8(0, I0x_ptr + i * I0_stride); + short8 I0y_vec = vload8(0, I0y_ptr + i * I0_stride); + + sum_I0x_mul += dot(vec.lo, convert_float4(I0x_vec.lo)); + sum_I0x_mul += dot(vec.hi, convert_float4(I0x_vec.hi)); + sum_I0y_mul += dot(vec.lo, convert_float4(I0y_vec.lo)); + sum_I0y_mul += dot(vec.hi, convert_float4(I0y_vec.hi)); + } + + float dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n; + float dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum / n; + float SSD = sum_diff_sq - sum_diff * sum_diff / n; + + return (float3)(SSD, dst_dUx, dst_dUy); +} + +__kernel void dis_patch_inverse_search_fwd_2(__global const float *Ux_ptr, __global const float *Uy_ptr, + __global const uchar *I0_ptr, __global const uchar *I1_ptr, + __global const short *I0x_ptr, __global const short *I0y_ptr, + __global const float *xx_ptr, __global const float *yy_ptr, + __global const float *xy_ptr, + __global const float *x_ptr, __global const float *y_ptr, + int border_size, int patch_size, int patch_stride, + int w, int h, int ws, int hs, int num_inner_iter, int pyr_level, + __global float *Sx_ptr, __global float *Sy_ptr) +{ + int js = get_global_id(0); + int is = get_global_id(1); + int i = is * patch_stride; + int j = js * patch_stride; + int psz = patch_size; + int psz2 = psz / 2; + int w_ext = w + 2 * border_size; + int bsz = border_size; + int index = is * ws + js; + + if (js >= ws || is >= hs) return; + + float Ux = Sx_ptr[index]; + float Uy = Sy_ptr[index]; + float cur_Ux = Ux; + float cur_Uy = Uy; + float cur_xx = xx_ptr[index]; + float cur_yy = yy_ptr[index]; + float cur_xy = xy_ptr[index]; + float detH = cur_xx * cur_yy - cur_xy * cur_xy; + + if (fabs(detH) < EPS) detH = EPS; + + float invH11 = cur_yy / detH; + float invH12 = -cur_xy / detH; + float invH22 = cur_xx / detH; + float prev_SSD = INF, SSD; + float x_grad_sum = x_ptr[index]; + float y_grad_sum = y_ptr[index]; + + float i_lower_limit = bsz - psz + 1.0f; + float i_upper_limit = bsz + h - 1.0f; + float j_lower_limit = bsz - psz + 1.0f; + float j_upper_limit = bsz + w - 1.0f; + float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy; + float3 res; + + for (int t = 0; t < num_inner_iter; t++) + { + INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy); + res = processPatchMeanNorm(I0_ptr + i * w + j, + I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * w + j, + I0y_ptr + i * w + j, w, w_ext, w00, w01, w10, w11, psz, + x_grad_sum, y_grad_sum); + + SSD = res.x; + dUx = res.y; + dUy = res.z; + dx = invH11 * dUx + invH12 * dUy; + dy = invH12 * dUx + invH22 * dUy; + + cur_Ux -= dx; + cur_Uy -= dy; + + if (SSD >= prev_SSD) + break; + prev_SSD = SSD; + } + + float2 vec = (float2)(cur_Ux - Ux, cur_Uy - Uy); + if (dot(vec, vec) <= (float)(psz * psz)) + { + Sx_ptr[index] = cur_Ux; + Sy_ptr[index] = cur_Uy; + } +} + +__kernel void dis_patch_inverse_search_bwd_1(__global const uchar *I0_ptr, __global const uchar *I1_ptr, + int border_size, int patch_size, int patch_stride, + int w, int h, int ws, int hs, int pyr_level, + __global float *Sx_ptr, __global float *Sy_ptr) +{ + int id = get_global_id(0); + int is = id / 8; + if (id >= (hs * 8)) return; + + is = (hs - 1 - is); + int i = is * patch_stride; + int j = (ws - 2) * patch_stride; + int psz = patch_size; + int psz2 = psz / 2; + int w_ext = w + 2 * border_size; + int bsz = border_size; + + float i_lower_limit = bsz - psz + 1.0f; + float i_upper_limit = bsz + h - 1.0f; + float j_lower_limit = bsz - psz + 1.0f; + float j_upper_limit = bsz + w - 1.0f; + float i_I1, j_I1, w00, w01, w10, w11; + + int sid = get_sub_group_local_id(); + for (int js = (ws - 2); js > -1; js--, j -= patch_stride) + { + float min_SSD, cur_SSD; + float2 Ux = vload2(0, Sx_ptr + is * ws + js); + float2 Uy = vload2(0, Sy_ptr + is * ws + js); + + INIT_BILINEAR_WEIGHTS(Ux.x, Uy.x); + min_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, + w, w_ext, w00, w01, w10, w11, psz, sid); + + INIT_BILINEAR_WEIGHTS(Ux.y, Uy.y); + cur_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, + w, w_ext, w00, w01, w10, w11, psz, sid); + if (cur_SSD < min_SSD) + { + Sx_ptr[is * ws + js] = Ux.y; + Sy_ptr[is * ws + js] = Uy.y; + } + } +} + +__kernel void dis_patch_inverse_search_bwd_2(__global const uchar *I0_ptr, __global const uchar *I1_ptr, + __global const short *I0x_ptr, __global const short *I0y_ptr, + __global const float *xx_ptr, __global const float *yy_ptr, + __global const float *xy_ptr, + __global const float *x_ptr, __global const float *y_ptr, + int border_size, int patch_size, int patch_stride, + int w, int h, int ws, int hs, int num_inner_iter, + __global float *Sx_ptr, __global float *Sy_ptr) +{ + int js = get_global_id(0); + int is = get_global_id(1); + if (js >= ws || is >= hs) return; + + js = (ws - 1 - js); + is = (hs - 1 - is); + + int j = js * patch_stride; + int i = is * patch_stride; + int psz = patch_size; + int psz2 = psz / 2; + int w_ext = w + 2 * border_size; + int bsz = border_size; + int index = is * ws + js; + + float Ux = Sx_ptr[index]; + float Uy = Sy_ptr[index]; + float cur_Ux = Ux; + float cur_Uy = Uy; + float cur_xx = xx_ptr[index]; + float cur_yy = yy_ptr[index]; + float cur_xy = xy_ptr[index]; + float detH = cur_xx * cur_yy - cur_xy * cur_xy; + + if (fabs(detH) < EPS) detH = EPS; + + float invH11 = cur_yy / detH; + float invH12 = -cur_xy / detH; + float invH22 = cur_xx / detH; + float prev_SSD = INF, SSD; + float x_grad_sum = x_ptr[index]; + float y_grad_sum = y_ptr[index]; + + float i_lower_limit = bsz - psz + 1.0f; + float i_upper_limit = bsz + h - 1.0f; + float j_lower_limit = bsz - psz + 1.0f; + float j_upper_limit = bsz + w - 1.0f; + float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy; + float3 res; + + for (int t = 0; t < num_inner_iter; t++) + { + INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy); + res = processPatchMeanNorm(I0_ptr + i * w + j, + I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * w + j, + I0y_ptr + i * w + j, w, w_ext, w00, w01, w10, w11, psz, + x_grad_sum, y_grad_sum); + + SSD = res.x; + dUx = res.y; + dUy = res.z; + dx = invH11 * dUx + invH12 * dUy; + dy = invH12 * dUx + invH22 * dUy; + + cur_Ux -= dx; + cur_Uy -= dy; + + if (SSD >= prev_SSD) + break; + prev_SSD = SSD; + } + + float2 vec = (float2)(cur_Ux - Ux, cur_Uy - Uy); + if ((dot(vec, vec)) <= (float)(psz * psz)) + { + Sx_ptr[index] = cur_Ux; + Sy_ptr[index] = cur_Uy; + } +} diff --git a/modules/video/src/opencl/optical_flow_tvl1.cl b/modules/video/src/opencl/optical_flow_tvl1.cl deleted file mode 100644 index ce67879fea..0000000000 --- a/modules/video/src/opencl/optical_flow_tvl1.cl +++ /dev/null @@ -1,378 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Jin Ma jin@multicorewareinc.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -__kernel void centeredGradientKernel(__global const float* src_ptr, int src_col, int src_row, int src_step, - __global float* dx, __global float* dy, int d_step) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if((x < src_col)&&(y < src_row)) - { - int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1); - int src_x2 = (x - 1) > 0 ? (x -1) : 0; - dx[y * d_step+ x] = 0.5f * (src_ptr[y * src_step + src_x1] - src_ptr[y * src_step+ src_x2]); - - int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1); - int src_y2 = (y - 1) > 0 ? (y - 1) : 0; - dy[y * d_step+ x] = 0.5f * (src_ptr[src_y1 * src_step + x] - src_ptr[src_y2 * src_step+ x]); - } - -} - -inline float bicubicCoeff(float x_) -{ - - float x = fabs(x_); - if (x <= 1.0f) - return x * x * (1.5f * x - 2.5f) + 1.0f; - else if (x < 2.0f) - return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; - else - return 0.0f; -} - -__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row, - image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y, - __global const float* u1, int u1_step, - __global const float* u2, - __global float* I1w, - __global float* I1wx, /*int I1wx_step,*/ - __global float* I1wy, /*int I1wy_step,*/ - __global float* grad, /*int grad_step,*/ - __global float* rho, - int I1w_step, - int u2_step, - int u1_offset_x, - int u1_offset_y, - int u2_offset_x, - int u2_offset_y) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < I0_col&&y < I0_row) - { - //float u1Val = u1(y, x); - float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; - //float u2Val = u2(y, x); - float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; - - float wx = x + u1Val; - float wy = y + u2Val; - - int xmin = ceil(wx - 2.0f); - int xmax = floor(wx + 2.0f); - - int ymin = ceil(wy - 2.0f); - int ymax = floor(wy + 2.0f); - - float sum = 0.0f; - float sumx = 0.0f; - float sumy = 0.0f; - float wsum = 0.0f; - sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; - for (int cy = ymin; cy <= ymax; ++cy) - { - for (int cx = xmin; cx <= xmax; ++cx) - { - float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); - //sum += w * tex2D(tex_I1 , cx, cy); - int2 cood = (int2)(cx, cy); - sum += w * read_imagef(tex_I1, sampleri, cood).x; - //sumx += w * tex2D(tex_I1x, cx, cy); - sumx += w * read_imagef(tex_I1x, sampleri, cood).x; - //sumy += w * tex2D(tex_I1y, cx, cy); - sumy += w * read_imagef(tex_I1y, sampleri, cood).x; - wsum += w; - } - } - float coeff = 1.0f / wsum; - float I1wVal = sum * coeff; - float I1wxVal = sumx * coeff; - float I1wyVal = sumy * coeff; - I1w[y * I1w_step + x] = I1wVal; - I1wx[y * I1w_step + x] = I1wxVal; - I1wy[y * I1w_step + x] = I1wyVal; - float Ix2 = I1wxVal * I1wxVal; - float Iy2 = I1wyVal * I1wyVal; - - // store the |Grad(I1)|^2 - grad[y * I1w_step + x] = Ix2 + Iy2; - - // compute the constant part of the rho function - float I0Val = I0[y * I0_step + x]; - rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; - } -} - -inline float readImage(__global const float *image, int x, int y, int rows, int cols, int elemCntPerRow) -{ - int i0 = clamp(x, 0, cols - 1); - int j0 = clamp(y, 0, rows - 1); - - return image[j0 * elemCntPerRow + i0]; -} - -__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row, - __global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y, - __global const float* u1, int u1_step, - __global const float* u2, - __global float* I1w, - __global float* I1wx, /*int I1wx_step,*/ - __global float* I1wy, /*int I1wy_step,*/ - __global float* grad, /*int grad_step,*/ - __global float* rho, - int I1w_step, - int u2_step, - int I1_step, - int I1x_step) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < I0_col&&y < I0_row) - { - //float u1Val = u1(y, x); - float u1Val = u1[y * u1_step + x]; - //float u2Val = u2(y, x); - float u2Val = u2[y * u2_step + x]; - - float wx = x + u1Val; - float wy = y + u2Val; - - int xmin = ceil(wx - 2.0f); - int xmax = floor(wx + 2.0f); - - int ymin = ceil(wy - 2.0f); - int ymax = floor(wy + 2.0f); - - float sum = 0.0f; - float sumx = 0.0f; - float sumy = 0.0f; - float wsum = 0.0f; - - for (int cy = ymin; cy <= ymax; ++cy) - { - for (int cx = xmin; cx <= xmax; ++cx) - { - float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); - - int2 cood = (int2)(cx, cy); - sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step); - sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step); - sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step); - wsum += w; - } - } - - float coeff = 1.0f / wsum; - - float I1wVal = sum * coeff; - float I1wxVal = sumx * coeff; - float I1wyVal = sumy * coeff; - - I1w[y * I1w_step + x] = I1wVal; - I1wx[y * I1w_step + x] = I1wxVal; - I1wy[y * I1w_step + x] = I1wyVal; - - float Ix2 = I1wxVal * I1wxVal; - float Iy2 = I1wyVal * I1wyVal; - - // store the |Grad(I1)|^2 - grad[y * I1w_step + x] = Ix2 + Iy2; - - // compute the constant part of the rho function - float I0Val = I0[y * I0_step + x]; - rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; - } - -} - - -__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step, - __global const float* u2, - __global float* p11, int p11_step, - __global float* p12, - __global float* p21, - __global float* p22, - float taut, - int u2_step, - int u1_offset_x, - int u1_offset_y, - int u2_offset_x, - int u2_offset_y) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < u1_col && y < u1_row) - { - int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); - float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; - - int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); - float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; - - int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); - float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; - - int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); - float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; - - float g1 = hypot(u1x, u1y); - float g2 = hypot(u2x, u2y); - - float ng1 = 1.0f + taut * g1; - float ng2 = 1.0f + taut * g2; - - p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1; - p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1; - p21[y * p11_step + x] = (p21[y * p11_step + x] + taut * u2x) / ng2; - p22[y * p11_step + x] = (p22[y * p11_step + x] + taut * u2y) / ng2; - } - -} - -inline float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step) -{ - - if (x > 0 && y > 0) - { - float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1]; - float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x]; - return v1x + v2y; - } - else - { - if (y > 0) - return v1[y * v1_step + 0] + v2[y * v2_step + 0] - v2[(y - 1) * v2_step + 0]; - else - { - if (x > 0) - return v1[0 * v1_step + x] - v1[0 * v1_step + x - 1] + v2[0 * v2_step + x]; - else - return v1[0 * v1_step + 0] + v2[0 * v2_step + 0]; - } - } - -} - -__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step, - __global const float* I1wy, /*int I1wy_step,*/ - __global const float* grad, /*int grad_step,*/ - __global const float* rho_c, /*int rho_c_step,*/ - __global const float* p11, /*int p11_step,*/ - __global const float* p12, /*int p12_step,*/ - __global const float* p21, /*int p21_step,*/ - __global const float* p22, /*int p22_step,*/ - __global float* u1, int u1_step, - __global float* u2, - __global float* error, float l_t, float theta, int u2_step, - int u1_offset_x, - int u1_offset_y, - int u2_offset_x, - int u2_offset_y, - char calc_error) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < I1wx_col && y < I1wx_row) - { - float I1wxVal = I1wx[y * I1wx_step + x]; - float I1wyVal = I1wy[y * I1wx_step + x]; - float gradVal = grad[y * I1wx_step + x]; - float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; - float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; - - float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal); - - // estimate the values of the variable (v1, v2) (thresholding operator TH) - - float d1 = 0.0f; - float d2 = 0.0f; - - if (rho < -l_t * gradVal) - { - d1 = l_t * I1wxVal; - d2 = l_t * I1wyVal; - } - else if (rho > l_t * gradVal) - { - d1 = -l_t * I1wxVal; - d2 = -l_t * I1wyVal; - } - else if (gradVal > 1.192092896e-07f) - { - float fi = -rho / gradVal; - d1 = fi * I1wxVal; - d2 = fi * I1wyVal; - } - - float v1 = u1OldVal + d1; - float v2 = u2OldVal + d2; - - // compute the divergence of the dual variable (p1, p2) - - float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step); - float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step); - - // estimate the values of the optical flow (u1, u2) - - float u1NewVal = v1 + theta * div_p1; - float u2NewVal = v2 + theta * div_p2; - - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal; - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal; - - if(calc_error) - { - float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); - float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); - error[y * I1wx_step + x] = n1 + n2; - } - } -} diff --git a/modules/video/src/optical_flow_io.cpp b/modules/video/src/optical_flow_io.cpp new file mode 100644 index 0000000000..8a2d70bc83 --- /dev/null +++ b/modules/video/src/optical_flow_io.cpp @@ -0,0 +1,129 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" +#include +#include + +namespace cv { + +const float FLOW_TAG_FLOAT = 202021.25f; +const char *FLOW_TAG_STRING = "PIEH"; +CV_EXPORTS_W Mat readOpticalFlow( const String& path ) +{ + Mat_ flow; + std::ifstream file(path.c_str(), std::ios_base::binary); + if ( !file.good() ) + return flow; // no file - return empty matrix + + float tag; + file.read((char*) &tag, sizeof(float)); + if ( tag != FLOW_TAG_FLOAT ) + return flow; + + int width, height; + + file.read((char*) &width, 4); + file.read((char*) &height, 4); + + flow.create(height, width); + + for ( int i = 0; i < flow.rows; ++i ) + { + for ( int j = 0; j < flow.cols; ++j ) + { + Point2f u; + file.read((char*) &u.x, sizeof(float)); + file.read((char*) &u.y, sizeof(float)); + if ( !file.good() ) + { + flow.release(); + return flow; + } + + flow(i, j) = u; + } + } + file.close(); + return flow; +} + +CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow ) +{ + const int nChannels = 2; + + Mat input = flow.getMat(); + if ( input.channels() != nChannels || input.depth() != CV_32F || path.length() == 0 ) + return false; + + std::ofstream file(path.c_str(), std::ofstream::binary); + if ( !file.good() ) + return false; + + int nRows, nCols; + + nRows = (int) input.size().height; + nCols = (int) input.size().width; + + const int headerSize = 12; + char header[headerSize]; + memcpy(header, FLOW_TAG_STRING, 4); + // size of ints is known - has been asserted in the current function + memcpy(header + 4, reinterpret_cast(&nCols), sizeof(nCols)); + memcpy(header + 8, reinterpret_cast(&nRows), sizeof(nRows)); + file.write(header, headerSize); + if ( !file.good() ) + return false; + + int row; + char* p; + for ( row = 0; row < nRows; row++ ) + { + p = input.ptr(row); + file.write(p, nCols * nChannels * sizeof(float)); + if ( !file.good() ) + return false; + } + file.close(); + return true; +} + +} diff --git a/modules/video/src/tvl1flow.cpp b/modules/video/src/tvl1flow.cpp deleted file mode 100644 index dc2dc827ac..0000000000 --- a/modules/video/src/tvl1flow.cpp +++ /dev/null @@ -1,1489 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -/* -// -// This implementation is based on Javier Sánchez Pérez implementation. -// Original BSD license: -// -// Copyright (c) 2011, Javier Sánchez Pérez, Enric Meinhardt Llopis -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -*/ - -#include "precomp.hpp" -#include "opencl_kernels_video.hpp" - -#include -#include -#include -#include "opencv2/core/opencl/ocl_defs.hpp" - - - -using namespace cv; - -namespace { - -class OpticalFlowDual_TVL1 : public DualTVL1OpticalFlow -{ -public: - - OpticalFlowDual_TVL1(double tau_, double lambda_, double theta_, int nscales_, int warps_, - double epsilon_, int innerIterations_, int outerIterations_, - double scaleStep_, double gamma_, int medianFiltering_, - bool useInitialFlow_) : - tau(tau_), lambda(lambda_), theta(theta_), gamma(gamma_), nscales(nscales_), - warps(warps_), epsilon(epsilon_), innerIterations(innerIterations_), - outerIterations(outerIterations_), useInitialFlow(useInitialFlow_), - scaleStep(scaleStep_), medianFiltering(medianFiltering_) - { - } - OpticalFlowDual_TVL1(); - - void calc(InputArray I0, InputArray I1, InputOutputArray flow) CV_OVERRIDE; - void collectGarbage() CV_OVERRIDE; - - inline double getTau() const CV_OVERRIDE { return tau; } - inline void setTau(double val) CV_OVERRIDE { tau = val; } - inline double getLambda() const CV_OVERRIDE { return lambda; } - inline void setLambda(double val) CV_OVERRIDE { lambda = val; } - inline double getTheta() const CV_OVERRIDE { return theta; } - inline void setTheta(double val) CV_OVERRIDE { theta = val; } - inline double getGamma() const CV_OVERRIDE { return gamma; } - inline void setGamma(double val) CV_OVERRIDE { gamma = val; } - inline int getScalesNumber() const CV_OVERRIDE { return nscales; } - inline void setScalesNumber(int val) CV_OVERRIDE { nscales = val; } - inline int getWarpingsNumber() const CV_OVERRIDE { return warps; } - inline void setWarpingsNumber(int val) CV_OVERRIDE { warps = val; } - inline double getEpsilon() const CV_OVERRIDE { return epsilon; } - inline void setEpsilon(double val) CV_OVERRIDE { epsilon = val; } - inline int getInnerIterations() const CV_OVERRIDE { return innerIterations; } - inline void setInnerIterations(int val) CV_OVERRIDE { innerIterations = val; } - inline int getOuterIterations() const CV_OVERRIDE { return outerIterations; } - inline void setOuterIterations(int val) CV_OVERRIDE { outerIterations = val; } - inline bool getUseInitialFlow() const CV_OVERRIDE { return useInitialFlow; } - inline void setUseInitialFlow(bool val) CV_OVERRIDE { useInitialFlow = val; } - inline double getScaleStep() const CV_OVERRIDE { return scaleStep; } - inline void setScaleStep(double val) CV_OVERRIDE { scaleStep = val; } - inline int getMedianFiltering() const CV_OVERRIDE { return medianFiltering; } - inline void setMedianFiltering(int val) CV_OVERRIDE { medianFiltering = val; } - -protected: - double tau; - double lambda; - double theta; - double gamma; - int nscales; - int warps; - double epsilon; - int innerIterations; - int outerIterations; - bool useInitialFlow; - double scaleStep; - int medianFiltering; - -private: - void procOneScale(const Mat_& I0, const Mat_& I1, Mat_& u1, Mat_& u2, Mat_& u3); - -#ifdef HAVE_OPENCL - bool procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2); - - bool calc_ocl(InputArray I0, InputArray I1, InputOutputArray flow); -#endif - struct dataMat - { - std::vector > I0s; - std::vector > I1s; - std::vector > u1s; - std::vector > u2s; - std::vector > u3s; - - Mat_ I1x_buf; - Mat_ I1y_buf; - - Mat_ flowMap1_buf; - Mat_ flowMap2_buf; - - Mat_ I1w_buf; - Mat_ I1wx_buf; - Mat_ I1wy_buf; - - Mat_ grad_buf; - Mat_ rho_c_buf; - - Mat_ v1_buf; - Mat_ v2_buf; - Mat_ v3_buf; - - Mat_ p11_buf; - Mat_ p12_buf; - Mat_ p21_buf; - Mat_ p22_buf; - Mat_ p31_buf; - Mat_ p32_buf; - - Mat_ div_p1_buf; - Mat_ div_p2_buf; - Mat_ div_p3_buf; - - Mat_ u1x_buf; - Mat_ u1y_buf; - Mat_ u2x_buf; - Mat_ u2y_buf; - Mat_ u3x_buf; - Mat_ u3y_buf; - } dm; - -#ifdef HAVE_OPENCL - struct dataUMat - { - std::vector I0s; - std::vector I1s; - std::vector u1s; - std::vector u2s; - - UMat I1x_buf; - UMat I1y_buf; - - UMat I1w_buf; - UMat I1wx_buf; - UMat I1wy_buf; - - UMat grad_buf; - UMat rho_c_buf; - - UMat p11_buf; - UMat p12_buf; - UMat p21_buf; - UMat p22_buf; - - UMat diff_buf; - UMat norm_buf; - } dum; -#endif -}; - -#ifdef HAVE_OPENCL -namespace cv_ocl_tvl1flow -{ - bool centeredGradient(const UMat &src, UMat &dx, UMat &dy); - - bool warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y, - UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy, - UMat &grad, UMat &rho); - - bool estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, - UMat &rho_c, UMat &p11, UMat &p12, - UMat &p21, UMat &p22, UMat &u1, - UMat &u2, UMat &error, float l_t, float theta, char calc_error); - - bool estimateDualVariables(UMat &u1, UMat &u2, - UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut); -} - -bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy) -{ - size_t globalsize[2] = { (size_t)src.cols, (size_t)src.rows }; - - ocl::Kernel kernel; - if (!kernel.create("centeredGradientKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, "")) - return false; - - int idxArg = 0; - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));//src mat - idxArg = kernel.set(idxArg, (int)(src.cols));//src mat col - idxArg = kernel.set(idxArg, (int)(src.rows));//src mat rows - idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));//src mat step - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dx));//res mat dx - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dy));//res mat dy - idxArg = kernel.set(idxArg, (int)(dx.step/dx.elemSize()));//res mat step - return kernel.run(2, globalsize, NULL, false); -} - -bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y, - UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy, - UMat &grad, UMat &rho) -{ - size_t globalsize[2] = { (size_t)I0.cols, (size_t)I0.rows }; - - ocl::Kernel kernel; - if (!kernel.create("warpBackwardKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, "")) - return false; - - int idxArg = 0; - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat - int I0_step = (int)(I0.step / I0.elemSize()); - idxArg = kernel.set(idxArg, I0_step);//I0_step - idxArg = kernel.set(idxArg, (int)(I0.cols));//I0_col - idxArg = kernel.set(idxArg, (int)(I0.rows));//I0_row - ocl::Image2D imageI1(I1); - ocl::Image2D imageI1x(I1x); - ocl::Image2D imageI1y(I1y); - idxArg = kernel.set(idxArg, imageI1);//image2d_t tex_I1 - idxArg = kernel.set(idxArg, imageI1x);//image2d_t tex_I1x - idxArg = kernel.set(idxArg, imageI1y);//image2d_t tex_I1y - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));//const float* u1 - idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize()));//int u1_step - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2));//const float* u2 - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1w));///float* I1w - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wx));//float* I1wx - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wy));//float* I1wy - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(grad));//float* grad - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(rho));//float* rho - idxArg = kernel.set(idxArg, (int)(I1w.step / I1w.elemSize()));//I1w_step - idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//u2_step - int u1_offset_x = (int)((u1.offset) % (u1.step)); - u1_offset_x = (int)(u1_offset_x / u1.elemSize()); - idxArg = kernel.set(idxArg, (int)u1_offset_x );//u1_offset_x - idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step));//u1_offset_y - int u2_offset_x = (int)((u2.offset) % (u2.step)); - u2_offset_x = (int) (u2_offset_x / u2.elemSize()); - idxArg = kernel.set(idxArg, (int)u2_offset_x);//u2_offset_x - idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step));//u2_offset_y - return kernel.run(2, globalsize, NULL, false); -} - -bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, - UMat &rho_c, UMat &p11, UMat &p12, - UMat &p21, UMat &p22, UMat &u1, - UMat &u2, UMat &error, float l_t, float theta, char calc_error) -{ - size_t globalsize[2] = { (size_t)I1wx.cols, (size_t)I1wx.rows }; - - ocl::Kernel kernel; - if (!kernel.create("estimateUKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, "")) - return false; - - int idxArg = 0; - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx - idxArg = kernel.set(idxArg, (int)(I1wx.cols)); //int I1wx_col - idxArg = kernel.set(idxArg, (int)(I1wx.rows)); //int I1wx_row - idxArg = kernel.set(idxArg, (int)(I1wx.step/I1wx.elemSize())); //int I1wx_step - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wy)); //const float* I1wy - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(grad)); //const float* grad - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(rho_c)); //const float* rho_c - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p11)); //const float* p11 - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p12)); //const float* p12 - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p21)); //const float* p21 - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p22)); //const float* p22 - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u1)); //float* u1 - idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u2)); //float* u2 - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(error)); //float* error - idxArg = kernel.set(idxArg, (float)l_t); //float l_t - idxArg = kernel.set(idxArg, (float)theta); //float theta - idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//int u2_step - int u1_offset_x = (int)(u1.offset % u1.step); - u1_offset_x = (int) (u1_offset_x / u1.elemSize()); - idxArg = kernel.set(idxArg, (int)u1_offset_x); //int u1_offset_x - idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step)); //int u1_offset_y - int u2_offset_x = (int)(u2.offset % u2.step); - u2_offset_x = (int)(u2_offset_x / u2.elemSize()); - idxArg = kernel.set(idxArg, (int)u2_offset_x ); //int u2_offset_x - idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y - idxArg = kernel.set(idxArg, (char)calc_error); //char calc_error - - return kernel.run(2, globalsize, NULL, false); -} - -bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2, - UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut) -{ - size_t globalsize[2] = { (size_t)u1.cols, (size_t)u1.rows }; - - ocl::Kernel kernel; - if (!kernel.create("estimateDualVariablesKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, "")) - return false; - - int idxArg = 0; - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1 - idxArg = kernel.set(idxArg, (int)(u1.cols)); //int u1_col - idxArg = kernel.set(idxArg, (int)(u1.rows)); //int u1_row - idxArg = kernel.set(idxArg, (int)(u1.step/u1.elemSize())); //int u1_step - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2)); // const float* u2 - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p11)); // float* p11 - idxArg = kernel.set(idxArg, (int)(p11.step/p11.elemSize())); //int p11_step - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p12)); // float* p12 - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p21)); // float* p21 - idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p22)); // float* p22 - idxArg = kernel.set(idxArg, (float)(taut)); //float taut - idxArg = kernel.set(idxArg, (int)(u2.step/u2.elemSize())); //int u2_step - int u1_offset_x = (int)(u1.offset % u1.step); - u1_offset_x = (int)(u1_offset_x / u1.elemSize()); - idxArg = kernel.set(idxArg, u1_offset_x); //int u1_offset_x - idxArg = kernel.set(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y - int u2_offset_x = (int)(u2.offset % u2.step); - u2_offset_x = (int)(u2_offset_x / u2.elemSize()); - idxArg = kernel.set(idxArg, u2_offset_x); //int u2_offset_x - idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y - - return kernel.run(2, globalsize, NULL, false); - -} -#endif - -OpticalFlowDual_TVL1::OpticalFlowDual_TVL1() -{ - tau = 0.25; - lambda = 0.15; - theta = 0.3; - nscales = 5; - warps = 5; - epsilon = 0.01; - gamma = 0.; - innerIterations = 30; - outerIterations = 10; - useInitialFlow = false; - medianFiltering = 5; - scaleStep = 0.8; -} - -void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow) -{ - CV_INSTRUMENT_REGION(); - -#ifndef __APPLE__ - CV_OCL_RUN(_flow.isUMat() && - ocl::Image2D::isFormatSupported(CV_32F, 1, false), - calc_ocl(_I0, _I1, _flow)) -#endif - - Mat I0 = _I0.getMat(); - Mat I1 = _I1.getMat(); - - CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); - CV_Assert( I0.size() == I1.size() ); - CV_Assert( I0.type() == I1.type() ); - CV_Assert( !useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2) ); - CV_Assert( nscales > 0 ); - bool use_gamma = gamma != 0; - // allocate memory for the pyramid structure - dm.I0s.resize(nscales); - dm.I1s.resize(nscales); - dm.u1s.resize(nscales); - dm.u2s.resize(nscales); - dm.u3s.resize(nscales); - - I0.convertTo(dm.I0s[0], dm.I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0); - I1.convertTo(dm.I1s[0], dm.I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0); - - dm.u1s[0].create(I0.size()); - dm.u2s[0].create(I0.size()); - if (use_gamma) dm.u3s[0].create(I0.size()); - - if (useInitialFlow) - { - Mat_ mv[] = { dm.u1s[0], dm.u2s[0] }; - split(_flow.getMat(), mv); - } - - dm.I1x_buf.create(I0.size()); - dm.I1y_buf.create(I0.size()); - - dm.flowMap1_buf.create(I0.size()); - dm.flowMap2_buf.create(I0.size()); - - dm.I1w_buf.create(I0.size()); - dm.I1wx_buf.create(I0.size()); - dm.I1wy_buf.create(I0.size()); - - dm.grad_buf.create(I0.size()); - dm.rho_c_buf.create(I0.size()); - - dm.v1_buf.create(I0.size()); - dm.v2_buf.create(I0.size()); - dm.v3_buf.create(I0.size()); - - dm.p11_buf.create(I0.size()); - dm.p12_buf.create(I0.size()); - dm.p21_buf.create(I0.size()); - dm.p22_buf.create(I0.size()); - dm.p31_buf.create(I0.size()); - dm.p32_buf.create(I0.size()); - - dm.div_p1_buf.create(I0.size()); - dm.div_p2_buf.create(I0.size()); - dm.div_p3_buf.create(I0.size()); - - dm.u1x_buf.create(I0.size()); - dm.u1y_buf.create(I0.size()); - dm.u2x_buf.create(I0.size()); - dm.u2y_buf.create(I0.size()); - dm.u3x_buf.create(I0.size()); - dm.u3y_buf.create(I0.size()); - - // create the scales - for (int s = 1; s < nscales; ++s) - { - resize(dm.I0s[s - 1], dm.I0s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); - resize(dm.I1s[s - 1], dm.I1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); - - if (dm.I0s[s].cols < 16 || dm.I0s[s].rows < 16) - { - nscales = s; - break; - } - - if (useInitialFlow) - { - resize(dm.u1s[s - 1], dm.u1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); - resize(dm.u2s[s - 1], dm.u2s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); - - multiply(dm.u1s[s], Scalar::all(scaleStep), dm.u1s[s]); - multiply(dm.u2s[s], Scalar::all(scaleStep), dm.u2s[s]); - } - else - { - dm.u1s[s].create(dm.I0s[s].size()); - dm.u2s[s].create(dm.I0s[s].size()); - } - if (use_gamma) dm.u3s[s].create(dm.I0s[s].size()); - } - if (!useInitialFlow) - { - dm.u1s[nscales - 1].setTo(Scalar::all(0)); - dm.u2s[nscales - 1].setTo(Scalar::all(0)); - } - if (use_gamma) dm.u3s[nscales - 1].setTo(Scalar::all(0)); - // pyramidal structure for computing the optical flow - for (int s = nscales - 1; s >= 0; --s) - { - // compute the optical flow at the current scale - procOneScale(dm.I0s[s], dm.I1s[s], dm.u1s[s], dm.u2s[s], dm.u3s[s]); - - // if this was the last scale, finish now - if (s == 0) - break; - - // otherwise, upsample the optical flow - - // zoom the optical flow for the next finer scale - resize(dm.u1s[s], dm.u1s[s - 1], dm.I0s[s - 1].size(), 0, 0, INTER_LINEAR); - resize(dm.u2s[s], dm.u2s[s - 1], dm.I0s[s - 1].size(), 0, 0, INTER_LINEAR); - if (use_gamma) resize(dm.u3s[s], dm.u3s[s - 1], dm.I0s[s - 1].size(), 0, 0, INTER_LINEAR); - - // scale the optical flow with the appropriate zoom factor (don't scale u3!) - multiply(dm.u1s[s - 1], Scalar::all(1 / scaleStep), dm.u1s[s - 1]); - multiply(dm.u2s[s - 1], Scalar::all(1 / scaleStep), dm.u2s[s - 1]); - } - - Mat uxy[] = { dm.u1s[0], dm.u2s[0] }; - merge(uxy, 2, _flow); -} - -#ifdef HAVE_OPENCL -bool OpticalFlowDual_TVL1::calc_ocl(InputArray _I0, InputArray _I1, InputOutputArray _flow) -{ - UMat I0 = _I0.getUMat(); - UMat I1 = _I1.getUMat(); - - CV_Assert(I0.type() == CV_8UC1 || I0.type() == CV_32FC1); - CV_Assert(I0.size() == I1.size()); - CV_Assert(I0.type() == I1.type()); - CV_Assert(!useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2)); - CV_Assert(nscales > 0); - - // allocate memory for the pyramid structure - dum.I0s.resize(nscales); - dum.I1s.resize(nscales); - dum.u1s.resize(nscales); - dum.u2s.resize(nscales); - //I0s_step == I1s_step - double alpha = I0.depth() == CV_8U ? 1.0 : 255.0; - - I0.convertTo(dum.I0s[0], CV_32F, alpha); - I1.convertTo(dum.I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0); - - dum.u1s[0].create(I0.size(), CV_32FC1); - dum.u2s[0].create(I0.size(), CV_32FC1); - - if (useInitialFlow) - { - std::vector umv; - umv.push_back(dum.u1s[0]); - umv.push_back(dum.u2s[0]); - cv::split(_flow,umv); - } - - dum.I1x_buf.create(I0.size(), CV_32FC1); - dum.I1y_buf.create(I0.size(), CV_32FC1); - - dum.I1w_buf.create(I0.size(), CV_32FC1); - dum.I1wx_buf.create(I0.size(), CV_32FC1); - dum.I1wy_buf.create(I0.size(), CV_32FC1); - - dum.grad_buf.create(I0.size(), CV_32FC1); - dum.rho_c_buf.create(I0.size(), CV_32FC1); - - dum.p11_buf.create(I0.size(), CV_32FC1); - dum.p12_buf.create(I0.size(), CV_32FC1); - dum.p21_buf.create(I0.size(), CV_32FC1); - dum.p22_buf.create(I0.size(), CV_32FC1); - - dum.diff_buf.create(I0.size(), CV_32FC1); - - // create the scales - for (int s = 1; s < nscales; ++s) - { - resize(dum.I0s[s - 1], dum.I0s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); - resize(dum.I1s[s - 1], dum.I1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); - - if (dum.I0s[s].cols < 16 || dum.I0s[s].rows < 16) - { - nscales = s; - break; - } - - if (useInitialFlow) - { - resize(dum.u1s[s - 1], dum.u1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); - resize(dum.u2s[s - 1], dum.u2s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); - - //scale by scale factor - multiply(dum.u1s[s], Scalar::all(scaleStep), dum.u1s[s]); - multiply(dum.u2s[s], Scalar::all(scaleStep), dum.u2s[s]); - } - } - - // pyramidal structure for computing the optical flow - for (int s = nscales - 1; s >= 0; --s) - { - // compute the optical flow at the current scale - if (!OpticalFlowDual_TVL1::procOneScale_ocl(dum.I0s[s], dum.I1s[s], dum.u1s[s], dum.u2s[s])) - return false; - - // if this was the last scale, finish now - if (s == 0) - break; - - // zoom the optical flow for the next finer scale - resize(dum.u1s[s], dum.u1s[s - 1], dum.I0s[s - 1].size(), 0, 0, INTER_LINEAR); - resize(dum.u2s[s], dum.u2s[s - 1], dum.I0s[s - 1].size(), 0, 0, INTER_LINEAR); - - // scale the optical flow with the appropriate zoom factor - multiply(dum.u1s[s - 1], Scalar::all(1 / scaleStep), dum.u1s[s - 1]); - multiply(dum.u2s[s - 1], Scalar::all(1 / scaleStep), dum.u2s[s - 1]); - } - - std::vector uxy; - uxy.push_back(dum.u1s[0]); - uxy.push_back(dum.u2s[0]); - merge(uxy, _flow); - return true; -} -#endif - -//////////////////////////////////////////////////////////// -// buildFlowMap - -struct BuildFlowMapBody : ParallelLoopBody -{ - void operator() (const Range& range) const CV_OVERRIDE; - - Mat_ u1; - Mat_ u2; - mutable Mat_ map1; - mutable Mat_ map2; -}; - -void BuildFlowMapBody::operator() (const Range& range) const -{ - for (int y = range.start; y < range.end; ++y) - { - const float* u1Row = u1[y]; - const float* u2Row = u2[y]; - - float* map1Row = map1[y]; - float* map2Row = map2[y]; - - for (int x = 0; x < u1.cols; ++x) - { - map1Row[x] = x + u1Row[x]; - map2Row[x] = y + u2Row[x]; - } - } -} - -void buildFlowMap(const Mat_& u1, const Mat_& u2, Mat_& map1, Mat_& map2) -{ - CV_DbgAssert( u2.size() == u1.size() ); - CV_DbgAssert( map1.size() == u1.size() ); - CV_DbgAssert( map2.size() == u1.size() ); - - BuildFlowMapBody body; - - body.u1 = u1; - body.u2 = u2; - body.map1 = map1; - body.map2 = map2; - - parallel_for_(Range(0, u1.rows), body); -} - -//////////////////////////////////////////////////////////// -// centeredGradient - -struct CenteredGradientBody : ParallelLoopBody -{ - void operator() (const Range& range) const CV_OVERRIDE; - - Mat_ src; - mutable Mat_ dx; - mutable Mat_ dy; -}; - -void CenteredGradientBody::operator() (const Range& range) const -{ - const int last_col = src.cols - 1; - - for (int y = range.start; y < range.end; ++y) - { - const float* srcPrevRow = src[y - 1]; - const float* srcCurRow = src[y]; - const float* srcNextRow = src[y + 1]; - - float* dxRow = dx[y]; - float* dyRow = dy[y]; - - for (int x = 1; x < last_col; ++x) - { - dxRow[x] = 0.5f * (srcCurRow[x + 1] - srcCurRow[x - 1]); - dyRow[x] = 0.5f * (srcNextRow[x] - srcPrevRow[x]); - } - } -} - -void centeredGradient(const Mat_& src, Mat_& dx, Mat_& dy) -{ - CV_DbgAssert( src.rows > 2 && src.cols > 2 ); - CV_DbgAssert( dx.size() == src.size() ); - CV_DbgAssert( dy.size() == src.size() ); - - const int last_row = src.rows - 1; - const int last_col = src.cols - 1; - - // compute the gradient on the center body of the image - { - CenteredGradientBody body; - - body.src = src; - body.dx = dx; - body.dy = dy; - - parallel_for_(Range(1, last_row), body); - } - - // compute the gradient on the first and last rows - for (int x = 1; x < last_col; ++x) - { - dx(0, x) = 0.5f * (src(0, x + 1) - src(0, x - 1)); - dy(0, x) = 0.5f * (src(1, x) - src(0, x)); - - dx(last_row, x) = 0.5f * (src(last_row, x + 1) - src(last_row, x - 1)); - dy(last_row, x) = 0.5f * (src(last_row, x) - src(last_row - 1, x)); - } - - // compute the gradient on the first and last columns - for (int y = 1; y < last_row; ++y) - { - dx(y, 0) = 0.5f * (src(y, 1) - src(y, 0)); - dy(y, 0) = 0.5f * (src(y + 1, 0) - src(y - 1, 0)); - - dx(y, last_col) = 0.5f * (src(y, last_col) - src(y, last_col - 1)); - dy(y, last_col) = 0.5f * (src(y + 1, last_col) - src(y - 1, last_col)); - } - - // compute the gradient at the four corners - dx(0, 0) = 0.5f * (src(0, 1) - src(0, 0)); - dy(0, 0) = 0.5f * (src(1, 0) - src(0, 0)); - - dx(0, last_col) = 0.5f * (src(0, last_col) - src(0, last_col - 1)); - dy(0, last_col) = 0.5f * (src(1, last_col) - src(0, last_col)); - - dx(last_row, 0) = 0.5f * (src(last_row, 1) - src(last_row, 0)); - dy(last_row, 0) = 0.5f * (src(last_row, 0) - src(last_row - 1, 0)); - - dx(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row, last_col - 1)); - dy(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row - 1, last_col)); -} - -//////////////////////////////////////////////////////////// -// forwardGradient - -struct ForwardGradientBody : ParallelLoopBody -{ - void operator() (const Range& range) const CV_OVERRIDE; - - Mat_ src; - mutable Mat_ dx; - mutable Mat_ dy; -}; - -void ForwardGradientBody::operator() (const Range& range) const -{ - const int last_col = src.cols - 1; - - for (int y = range.start; y < range.end; ++y) - { - const float* srcCurRow = src[y]; - const float* srcNextRow = src[y + 1]; - - float* dxRow = dx[y]; - float* dyRow = dy[y]; - - for (int x = 0; x < last_col; ++x) - { - dxRow[x] = srcCurRow[x + 1] - srcCurRow[x]; - dyRow[x] = srcNextRow[x] - srcCurRow[x]; - } - } -} - -void forwardGradient(const Mat_& src, Mat_& dx, Mat_& dy) -{ - CV_DbgAssert( src.rows > 2 && src.cols > 2 ); - CV_DbgAssert( dx.size() == src.size() ); - CV_DbgAssert( dy.size() == src.size() ); - - const int last_row = src.rows - 1; - const int last_col = src.cols - 1; - - // compute the gradient on the central body of the image - { - ForwardGradientBody body; - - body.src = src; - body.dx = dx; - body.dy = dy; - - parallel_for_(Range(0, last_row), body); - } - - // compute the gradient on the last row - for (int x = 0; x < last_col; ++x) - { - dx(last_row, x) = src(last_row, x + 1) - src(last_row, x); - dy(last_row, x) = 0.0f; - } - - // compute the gradient on the last column - for (int y = 0; y < last_row; ++y) - { - dx(y, last_col) = 0.0f; - dy(y, last_col) = src(y + 1, last_col) - src(y, last_col); - } - - dx(last_row, last_col) = 0.0f; - dy(last_row, last_col) = 0.0f; -} - -//////////////////////////////////////////////////////////// -// divergence - -struct DivergenceBody : ParallelLoopBody -{ - void operator() (const Range& range) const CV_OVERRIDE; - - Mat_ v1; - Mat_ v2; - mutable Mat_ div; -}; - -void DivergenceBody::operator() (const Range& range) const -{ - for (int y = range.start; y < range.end; ++y) - { - const float* v1Row = v1[y]; - const float* v2PrevRow = v2[y - 1]; - const float* v2CurRow = v2[y]; - - float* divRow = div[y]; - - for(int x = 1; x < v1.cols; ++x) - { - const float v1x = v1Row[x] - v1Row[x - 1]; - const float v2y = v2CurRow[x] - v2PrevRow[x]; - - divRow[x] = v1x + v2y; - } - } -} - -void divergence(const Mat_& v1, const Mat_& v2, Mat_& div) -{ - CV_DbgAssert( v1.rows > 2 && v1.cols > 2 ); - CV_DbgAssert( v2.size() == v1.size() ); - CV_DbgAssert( div.size() == v1.size() ); - - { - DivergenceBody body; - - body.v1 = v1; - body.v2 = v2; - body.div = div; - - parallel_for_(Range(1, v1.rows), body); - } - - // compute the divergence on the first row - for(int x = 1; x < v1.cols; ++x) - div(0, x) = v1(0, x) - v1(0, x - 1) + v2(0, x); - - // compute the divergence on the first column - for (int y = 1; y < v1.rows; ++y) - div(y, 0) = v1(y, 0) + v2(y, 0) - v2(y - 1, 0); - - div(0, 0) = v1(0, 0) + v2(0, 0); -} - -//////////////////////////////////////////////////////////// -// calcGradRho - -struct CalcGradRhoBody : ParallelLoopBody -{ - void operator() (const Range& range) const CV_OVERRIDE; - - Mat_ I0; - Mat_ I1w; - Mat_ I1wx; - Mat_ I1wy; - Mat_ u1; - Mat_ u2; - mutable Mat_ grad; - mutable Mat_ rho_c; -}; - -void CalcGradRhoBody::operator() (const Range& range) const -{ - for (int y = range.start; y < range.end; ++y) - { - const float* I0Row = I0[y]; - const float* I1wRow = I1w[y]; - const float* I1wxRow = I1wx[y]; - const float* I1wyRow = I1wy[y]; - const float* u1Row = u1[y]; - const float* u2Row = u2[y]; - - float* gradRow = grad[y]; - float* rhoRow = rho_c[y]; - - for (int x = 0; x < I0.cols; ++x) - { - const float Ix2 = I1wxRow[x] * I1wxRow[x]; - const float Iy2 = I1wyRow[x] * I1wyRow[x]; - - // store the |Grad(I1)|^2 - gradRow[x] = Ix2 + Iy2; - - // compute the constant part of the rho function - rhoRow[x] = (I1wRow[x] - I1wxRow[x] * u1Row[x] - I1wyRow[x] * u2Row[x] - I0Row[x]); - } - } -} - -void calcGradRho(const Mat_& I0, const Mat_& I1w, const Mat_& I1wx, const Mat_& I1wy, const Mat_& u1, const Mat_& u2, - Mat_& grad, Mat_& rho_c) -{ - CV_DbgAssert( I1w.size() == I0.size() ); - CV_DbgAssert( I1wx.size() == I0.size() ); - CV_DbgAssert( I1wy.size() == I0.size() ); - CV_DbgAssert( u1.size() == I0.size() ); - CV_DbgAssert( u2.size() == I0.size() ); - CV_DbgAssert( grad.size() == I0.size() ); - CV_DbgAssert( rho_c.size() == I0.size() ); - - CalcGradRhoBody body; - - body.I0 = I0; - body.I1w = I1w; - body.I1wx = I1wx; - body.I1wy = I1wy; - body.u1 = u1; - body.u2 = u2; - body.grad = grad; - body.rho_c = rho_c; - - parallel_for_(Range(0, I0.rows), body); -} - -//////////////////////////////////////////////////////////// -// estimateV - -struct EstimateVBody : ParallelLoopBody -{ - void operator() (const Range& range) const CV_OVERRIDE; - - Mat_ I1wx; - Mat_ I1wy; - Mat_ u1; - Mat_ u2; - Mat_ u3; - Mat_ grad; - Mat_ rho_c; - mutable Mat_ v1; - mutable Mat_ v2; - mutable Mat_ v3; - float l_t; - float gamma; -}; - -void EstimateVBody::operator() (const Range& range) const -{ - bool use_gamma = gamma != 0; - for (int y = range.start; y < range.end; ++y) - { - const float* I1wxRow = I1wx[y]; - const float* I1wyRow = I1wy[y]; - const float* u1Row = u1[y]; - const float* u2Row = u2[y]; - const float* u3Row = use_gamma?u3[y]:NULL; - const float* gradRow = grad[y]; - const float* rhoRow = rho_c[y]; - - float* v1Row = v1[y]; - float* v2Row = v2[y]; - float* v3Row = use_gamma ? v3[y]:NULL; - - for (int x = 0; x < I1wx.cols; ++x) - { - const float rho = use_gamma ? rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]) + gamma * u3Row[x] : - rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]); - float d1 = 0.0f; - float d2 = 0.0f; - float d3 = 0.0f; - if (rho < -l_t * gradRow[x]) - { - d1 = l_t * I1wxRow[x]; - d2 = l_t * I1wyRow[x]; - if (use_gamma) d3 = l_t * gamma; - } - else if (rho > l_t * gradRow[x]) - { - d1 = -l_t * I1wxRow[x]; - d2 = -l_t * I1wyRow[x]; - if (use_gamma) d3 = -l_t * gamma; - } - else if (gradRow[x] > std::numeric_limits::epsilon()) - { - float fi = -rho / gradRow[x]; - d1 = fi * I1wxRow[x]; - d2 = fi * I1wyRow[x]; - if (use_gamma) d3 = fi * gamma; - } - - v1Row[x] = u1Row[x] + d1; - v2Row[x] = u2Row[x] + d2; - if (use_gamma) v3Row[x] = u3Row[x] + d3; - } - } -} - -void estimateV(const Mat_& I1wx, const Mat_& I1wy, const Mat_& u1, const Mat_& u2, const Mat_& u3, const Mat_& grad, const Mat_& rho_c, - Mat_& v1, Mat_& v2, Mat_& v3, float l_t, float gamma) -{ - CV_DbgAssert( I1wy.size() == I1wx.size() ); - CV_DbgAssert( u1.size() == I1wx.size() ); - CV_DbgAssert( u2.size() == I1wx.size() ); - CV_DbgAssert( grad.size() == I1wx.size() ); - CV_DbgAssert( rho_c.size() == I1wx.size() ); - CV_DbgAssert( v1.size() == I1wx.size() ); - CV_DbgAssert( v2.size() == I1wx.size() ); - - EstimateVBody body; - bool use_gamma = gamma != 0; - body.I1wx = I1wx; - body.I1wy = I1wy; - body.u1 = u1; - body.u2 = u2; - if (use_gamma) body.u3 = u3; - body.grad = grad; - body.rho_c = rho_c; - body.v1 = v1; - body.v2 = v2; - if (use_gamma) body.v3 = v3; - body.l_t = l_t; - body.gamma = gamma; - parallel_for_(Range(0, I1wx.rows), body); -} - -//////////////////////////////////////////////////////////// -// estimateU - -float estimateU(const Mat_& v1, const Mat_& v2, const Mat_& v3, - const Mat_& div_p1, const Mat_& div_p2, const Mat_& div_p3, - Mat_& u1, Mat_& u2, Mat_& u3, - float theta, float gamma) -{ - CV_DbgAssert( v2.size() == v1.size() ); - CV_DbgAssert( div_p1.size() == v1.size() ); - CV_DbgAssert( div_p2.size() == v1.size() ); - CV_DbgAssert( u1.size() == v1.size() ); - CV_DbgAssert( u2.size() == v1.size() ); - - float error = 0.0f; - bool use_gamma = gamma != 0; - for (int y = 0; y < v1.rows; ++y) - { - const float* v1Row = v1[y]; - const float* v2Row = v2[y]; - const float* v3Row = use_gamma?v3[y]:NULL; - const float* divP1Row = div_p1[y]; - const float* divP2Row = div_p2[y]; - const float* divP3Row = use_gamma?div_p3[y]:NULL; - - float* u1Row = u1[y]; - float* u2Row = u2[y]; - float* u3Row = use_gamma?u3[y]:NULL; - - - for (int x = 0; x < v1.cols; ++x) - { - const float u1k = u1Row[x]; - const float u2k = u2Row[x]; - const float u3k = use_gamma?u3Row[x]:0; - - u1Row[x] = v1Row[x] + theta * divP1Row[x]; - u2Row[x] = v2Row[x] + theta * divP2Row[x]; - if (use_gamma) u3Row[x] = v3Row[x] + theta * divP3Row[x]; - error += use_gamma?(u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k) + (u3Row[x] - u3k) * (u3Row[x] - u3k): - (u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k); - } - } - - return error; -} - -//////////////////////////////////////////////////////////// -// estimateDualVariables - -struct EstimateDualVariablesBody : ParallelLoopBody -{ - void operator() (const Range& range) const CV_OVERRIDE; - - Mat_ u1x; - Mat_ u1y; - Mat_ u2x; - Mat_ u2y; - Mat_ u3x; - Mat_ u3y; - mutable Mat_ p11; - mutable Mat_ p12; - mutable Mat_ p21; - mutable Mat_ p22; - mutable Mat_ p31; - mutable Mat_ p32; - float taut; - bool use_gamma; -}; - -void EstimateDualVariablesBody::operator() (const Range& range) const -{ - for (int y = range.start; y < range.end; ++y) - { - const float* u1xRow = u1x[y]; - const float* u1yRow = u1y[y]; - const float* u2xRow = u2x[y]; - const float* u2yRow = u2y[y]; - const float* u3xRow = u3x[y]; - const float* u3yRow = u3y[y]; - - float* p11Row = p11[y]; - float* p12Row = p12[y]; - float* p21Row = p21[y]; - float* p22Row = p22[y]; - float* p31Row = p31[y]; - float* p32Row = p32[y]; - - for (int x = 0; x < u1x.cols; ++x) - { - const float g1 = static_cast(hypot(u1xRow[x], u1yRow[x])); - const float g2 = static_cast(hypot(u2xRow[x], u2yRow[x])); - - const float ng1 = 1.0f + taut * g1; - const float ng2 = 1.0f + taut * g2; - - p11Row[x] = (p11Row[x] + taut * u1xRow[x]) / ng1; - p12Row[x] = (p12Row[x] + taut * u1yRow[x]) / ng1; - p21Row[x] = (p21Row[x] + taut * u2xRow[x]) / ng2; - p22Row[x] = (p22Row[x] + taut * u2yRow[x]) / ng2; - - if (use_gamma) - { - const float g3 = static_cast(hypot(u3xRow[x], u3yRow[x])); - const float ng3 = 1.0f + taut * g3; - p31Row[x] = (p31Row[x] + taut * u3xRow[x]) / ng3; - p32Row[x] = (p32Row[x] + taut * u3yRow[x]) / ng3; - } - } - } -} - -void estimateDualVariables(const Mat_& u1x, const Mat_& u1y, - const Mat_& u2x, const Mat_& u2y, - const Mat_& u3x, const Mat_& u3y, - Mat_& p11, Mat_& p12, - Mat_& p21, Mat_& p22, - Mat_& p31, Mat_& p32, - float taut, bool use_gamma) -{ - CV_DbgAssert( u1y.size() == u1x.size() ); - CV_DbgAssert( u2x.size() == u1x.size() ); - CV_DbgAssert( u3x.size() == u1x.size() ); - CV_DbgAssert( u2y.size() == u1x.size() ); - CV_DbgAssert( u3y.size() == u1x.size() ); - CV_DbgAssert( p11.size() == u1x.size() ); - CV_DbgAssert( p12.size() == u1x.size() ); - CV_DbgAssert( p21.size() == u1x.size() ); - CV_DbgAssert( p22.size() == u1x.size() ); - CV_DbgAssert( p31.size() == u1x.size() ); - CV_DbgAssert( p32.size() == u1x.size() ); - - EstimateDualVariablesBody body; - - body.u1x = u1x; - body.u1y = u1y; - body.u2x = u2x; - body.u2y = u2y; - body.u3x = u3x; - body.u3y = u3y; - body.p11 = p11; - body.p12 = p12; - body.p21 = p21; - body.p22 = p22; - body.p31 = p31; - body.p32 = p32; - body.taut = taut; - body.use_gamma = use_gamma; - - parallel_for_(Range(0, u1x.rows), body); -} - -#ifdef HAVE_OPENCL -bool OpticalFlowDual_TVL1::procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2) -{ - using namespace cv_ocl_tvl1flow; - - const double scaledEpsilon = epsilon * epsilon * I0.size().area(); - - CV_DbgAssert(I1.size() == I0.size()); - CV_DbgAssert(I1.type() == I0.type()); - CV_DbgAssert(u1.empty() || u1.size() == I0.size()); - CV_DbgAssert(u2.size() == u1.size()); - - if (u1.empty()) - { - u1.create(I0.size(), CV_32FC1); - u1.setTo(Scalar::all(0)); - - u2.create(I0.size(), CV_32FC1); - u2.setTo(Scalar::all(0)); - } - - UMat I1x = dum.I1x_buf(Rect(0, 0, I0.cols, I0.rows)); - UMat I1y = dum.I1y_buf(Rect(0, 0, I0.cols, I0.rows)); - - if (!centeredGradient(I1, I1x, I1y)) - return false; - - UMat I1w = dum.I1w_buf(Rect(0, 0, I0.cols, I0.rows)); - UMat I1wx = dum.I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); - UMat I1wy = dum.I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); - - UMat grad = dum.grad_buf(Rect(0, 0, I0.cols, I0.rows)); - UMat rho_c = dum.rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); - - UMat p11 = dum.p11_buf(Rect(0, 0, I0.cols, I0.rows)); - UMat p12 = dum.p12_buf(Rect(0, 0, I0.cols, I0.rows)); - UMat p21 = dum.p21_buf(Rect(0, 0, I0.cols, I0.rows)); - UMat p22 = dum.p22_buf(Rect(0, 0, I0.cols, I0.rows)); - p11.setTo(Scalar::all(0)); - p12.setTo(Scalar::all(0)); - p21.setTo(Scalar::all(0)); - p22.setTo(Scalar::all(0)); - - UMat diff = dum.diff_buf(Rect(0, 0, I0.cols, I0.rows)); - - const float l_t = static_cast(lambda * theta); - const float taut = static_cast(tau / theta); - int n; - - for (int warpings = 0; warpings < warps; ++warpings) - { - if (!warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c)) - return false; - - double error = std::numeric_limits::max(); - double prev_error = 0; - - for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer) - { - if (medianFiltering > 1) { - cv::medianBlur(u1, u1, medianFiltering); - cv::medianBlur(u2, u2, medianFiltering); - } - for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner) - { - // some tweaks to make sum operation less frequently - n = n_inner + n_outer*innerIterations; - char calc_error = (n & 0x1) && (prev_error < scaledEpsilon); - if (!estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, - u1, u2, diff, l_t, static_cast(theta), calc_error)) - return false; - if (calc_error) - { - error = cv::sum(diff)[0]; - prev_error = error; - } - else - { - error = std::numeric_limits::max(); - prev_error -= scaledEpsilon; - } - if (!estimateDualVariables(u1, u2, p11, p12, p21, p22, taut)) - return false; - } - } - } - return true; -} -#endif - -void OpticalFlowDual_TVL1::procOneScale(const Mat_& I0, const Mat_& I1, Mat_& u1, Mat_& u2, Mat_& u3) -{ - const float scaledEpsilon = static_cast(epsilon * epsilon * I0.size().area()); - - CV_DbgAssert( I1.size() == I0.size() ); - CV_DbgAssert( I1.type() == I0.type() ); - CV_DbgAssert( u1.size() == I0.size() ); - CV_DbgAssert( u2.size() == u1.size() ); - - Mat_ I1x = dm.I1x_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ I1y = dm.I1y_buf(Rect(0, 0, I0.cols, I0.rows)); - centeredGradient(I1, I1x, I1y); - - Mat_ flowMap1 = dm.flowMap1_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ flowMap2 = dm.flowMap2_buf(Rect(0, 0, I0.cols, I0.rows)); - - Mat_ I1w = dm.I1w_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ I1wx = dm.I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ I1wy = dm.I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); - - Mat_ grad = dm.grad_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ rho_c = dm.rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); - - Mat_ v1 = dm.v1_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ v2 = dm.v2_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ v3 = dm.v3_buf(Rect(0, 0, I0.cols, I0.rows)); - - Mat_ p11 = dm.p11_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ p12 = dm.p12_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ p21 = dm.p21_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ p22 = dm.p22_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ p31 = dm.p31_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ p32 = dm.p32_buf(Rect(0, 0, I0.cols, I0.rows)); - p11.setTo(Scalar::all(0)); - p12.setTo(Scalar::all(0)); - p21.setTo(Scalar::all(0)); - p22.setTo(Scalar::all(0)); - bool use_gamma = gamma != 0.; - if (use_gamma) p31.setTo(Scalar::all(0)); - if (use_gamma) p32.setTo(Scalar::all(0)); - - Mat_ div_p1 = dm.div_p1_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ div_p2 = dm.div_p2_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ div_p3 = dm.div_p3_buf(Rect(0, 0, I0.cols, I0.rows)); - - Mat_ u1x = dm.u1x_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ u1y = dm.u1y_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ u2x = dm.u2x_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ u2y = dm.u2y_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ u3x = dm.u3x_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ u3y = dm.u3y_buf(Rect(0, 0, I0.cols, I0.rows)); - - const float l_t = static_cast(lambda * theta); - const float taut = static_cast(tau / theta); - - for (int warpings = 0; warpings < warps; ++warpings) - { - // compute the warping of the target image and its derivatives - buildFlowMap(u1, u2, flowMap1, flowMap2); - remap(I1, I1w, flowMap1, flowMap2, INTER_CUBIC); - remap(I1x, I1wx, flowMap1, flowMap2, INTER_CUBIC); - remap(I1y, I1wy, flowMap1, flowMap2, INTER_CUBIC); - //calculate I1(x+u0) and its gradient - calcGradRho(I0, I1w, I1wx, I1wy, u1, u2, grad, rho_c); - - float error = std::numeric_limits::max(); - for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer) - { - if (medianFiltering > 1) { - cv::medianBlur(u1, u1, medianFiltering); - cv::medianBlur(u2, u2, medianFiltering); - } - for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner) - { - // estimate the values of the variable (v1, v2) (thresholding operator TH) - estimateV(I1wx, I1wy, u1, u2, u3, grad, rho_c, v1, v2, v3, l_t, static_cast(gamma)); - - // compute the divergence of the dual variable (p1, p2, p3) - divergence(p11, p12, div_p1); - divergence(p21, p22, div_p2); - if (use_gamma) divergence(p31, p32, div_p3); - - // estimate the values of the optical flow (u1, u2) - error = estimateU(v1, v2, v3, div_p1, div_p2, div_p3, u1, u2, u3, static_cast(theta), static_cast(gamma)); - - // compute the gradient of the optical flow (Du1, Du2) - forwardGradient(u1, u1x, u1y); - forwardGradient(u2, u2x, u2y); - if (use_gamma) forwardGradient(u3, u3x, u3y); - - // estimate the values of the dual variable (p1, p2, p3) - estimateDualVariables(u1x, u1y, u2x, u2y, u3x, u3y, p11, p12, p21, p22, p31, p32, taut, use_gamma); - } - } - } -} - -void OpticalFlowDual_TVL1::collectGarbage() -{ - //dataMat structure dm - dm.I0s.clear(); - dm.I1s.clear(); - dm.u1s.clear(); - dm.u2s.clear(); - - dm.I1x_buf.release(); - dm.I1y_buf.release(); - - dm.flowMap1_buf.release(); - dm.flowMap2_buf.release(); - - dm.I1w_buf.release(); - dm.I1wx_buf.release(); - dm.I1wy_buf.release(); - - dm.grad_buf.release(); - dm.rho_c_buf.release(); - - dm.v1_buf.release(); - dm.v2_buf.release(); - - dm.p11_buf.release(); - dm.p12_buf.release(); - dm.p21_buf.release(); - dm.p22_buf.release(); - - dm.div_p1_buf.release(); - dm.div_p2_buf.release(); - - dm.u1x_buf.release(); - dm.u1y_buf.release(); - dm.u2x_buf.release(); - dm.u2y_buf.release(); - -#ifdef HAVE_OPENCL - //dataUMat structure dum - dum.I0s.clear(); - dum.I1s.clear(); - dum.u1s.clear(); - dum.u2s.clear(); - - dum.I1x_buf.release(); - dum.I1y_buf.release(); - - dum.I1w_buf.release(); - dum.I1wx_buf.release(); - dum.I1wy_buf.release(); - - dum.grad_buf.release(); - dum.rho_c_buf.release(); - - dum.p11_buf.release(); - dum.p12_buf.release(); - dum.p21_buf.release(); - dum.p22_buf.release(); - - dum.diff_buf.release(); - dum.norm_buf.release(); -#endif -} - -} // namespace - -Ptr cv::createOptFlow_DualTVL1() -{ - return makePtr(); -} - -Ptr cv::DualTVL1OpticalFlow::create( - double tau, double lambda, double theta, int nscales, int warps, - double epsilon, int innerIterations, int outerIterations, double scaleStep, - double gamma, int medianFilter, bool useInitialFlow) -{ - return makePtr(tau, lambda, theta, nscales, warps, - epsilon, innerIterations, outerIterations, - scaleStep, gamma, medianFilter, useInitialFlow); -} diff --git a/modules/video/src/variational_refinement.cpp b/modules/video/src/variational_refinement.cpp new file mode 100644 index 0000000000..82d8c7d5fb --- /dev/null +++ b/modules/video/src/variational_refinement.cpp @@ -0,0 +1,1191 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "opencv2/core/hal/intrin.hpp" + +using namespace std; + +namespace cv +{ + +class VariationalRefinementImpl CV_FINAL : public VariationalRefinement +{ + public: + VariationalRefinementImpl(); + + void calc(InputArray I0, InputArray I1, InputOutputArray flow) CV_OVERRIDE; + void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) CV_OVERRIDE; + void collectGarbage() CV_OVERRIDE; + + protected: //!< algorithm parameters + int fixedPointIterations, sorIterations; + float omega; + float alpha, delta, gamma; + float zeta, epsilon; + + public: + int getFixedPointIterations() const CV_OVERRIDE { return fixedPointIterations; } + void setFixedPointIterations(int val) CV_OVERRIDE { fixedPointIterations = val; } + int getSorIterations() const CV_OVERRIDE { return sorIterations; } + void setSorIterations(int val) CV_OVERRIDE { sorIterations = val; } + float getOmega() const CV_OVERRIDE { return omega; } + void setOmega(float val) CV_OVERRIDE { omega = val; } + float getAlpha() const CV_OVERRIDE { return alpha; } + void setAlpha(float val) CV_OVERRIDE { alpha = val; } + float getDelta() const CV_OVERRIDE { return delta; } + void setDelta(float val) CV_OVERRIDE { delta = val; } + float getGamma() const CV_OVERRIDE { return gamma; } + void setGamma(float val) CV_OVERRIDE { gamma = val; } + + protected: //!< internal buffers + /* This struct defines a special data layout for Mat_. Original buffer is split into two: one for "red" + * elements (sum of indices is even) and one for "black" (sum of indices is odd) in a checkerboard pattern. It + * allows for more efficient processing in SOR iterations, more natural SIMD vectorization and parallelization + * (Red-Black SOR). Additionally, it simplifies border handling by adding repeated borders to both red and + * black buffers. + */ + struct RedBlackBuffer + { + Mat_ red; //!< (i+j)%2==0 + Mat_ black; //!< (i+j)%2==1 + + /* Width of even and odd rows may be different */ + int red_even_len, red_odd_len; + int black_even_len, black_odd_len; + + void create(Size s); + void release(); + }; + + Mat_ Ix, Iy, Iz, Ixx, Ixy, Iyy, Ixz, Iyz; //!< image derivative buffers + RedBlackBuffer Ix_rb, Iy_rb, Iz_rb, Ixx_rb, Ixy_rb, Iyy_rb, Ixz_rb, Iyz_rb; //!< corresponding red-black buffers + + RedBlackBuffer A11, A12, A22, b1, b2; //!< main linear system coefficients + RedBlackBuffer weights; //!< smoothness term weights in the current fixed point iteration + + Mat_ mapX, mapY; //!< auxiliary buffers for remapping + + RedBlackBuffer tempW_u, tempW_v; //!< flow buffers that are modified in each fixed point iteration + RedBlackBuffer dW_u, dW_v; //!< optical flow increment + RedBlackBuffer W_u_rb, W_v_rb; //!< red-black-buffer version of the input flow + + private: //!< private methods and parallel sections + void splitCheckerboard(RedBlackBuffer &dst, Mat &src); + void mergeCheckerboard(Mat &dst, RedBlackBuffer &src); + void updateRepeatedBorders(RedBlackBuffer &dst); + void warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v); + void prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v); + + /* Parallelizing arbitrary operations with 3 input/output arguments */ + typedef void (VariationalRefinementImpl::*Op)(void *op1, void *op2, void *op3); + struct ParallelOp_ParBody : public ParallelLoopBody + { + VariationalRefinementImpl *var; + vector ops; + vector op1s; + vector op2s; + vector op3s; + + ParallelOp_ParBody(VariationalRefinementImpl &_var, vector _ops, vector &_op1s, + vector &_op2s, vector &_op3s); + void operator()(const Range &range) const CV_OVERRIDE; + }; + void gradHorizAndSplitOp(void *src, void *dst, void *dst_split) + { + Sobel(*(Mat *)src, *(Mat *)dst, -1, 1, 0, 1, 1, 0.00, BORDER_REPLICATE); + splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst); + } + void gradVertAndSplitOp(void *src, void *dst, void *dst_split) + { + Sobel(*(Mat *)src, *(Mat *)dst, -1, 0, 1, 1, 1, 0.00, BORDER_REPLICATE); + splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst); + } + void averageOp(void *src1, void *src2, void *dst) + { + addWeighted(*(Mat *)src1, 0.5, *(Mat *)src2, 0.5, 0.0, *(Mat *)dst, CV_32F); + } + void subtractOp(void *src1, void *src2, void *dst) + { + subtract(*(Mat *)src1, *(Mat *)src2, *(Mat *)dst, noArray(), CV_32F); + } + + struct ComputeDataTerm_ParBody : public ParallelLoopBody + { + VariationalRefinementImpl *var; + int nstripes, stripe_sz; + int h; + RedBlackBuffer *dW_u, *dW_v; + bool red_pass; + + ComputeDataTerm_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_dW_u, + RedBlackBuffer &_dW_v, bool _red_pass); + void operator()(const Range &range) const CV_OVERRIDE; + }; + + struct ComputeSmoothnessTermHorPass_ParBody : public ParallelLoopBody + { + VariationalRefinementImpl *var; + int nstripes, stripe_sz; + int h; + RedBlackBuffer *W_u, *W_v, *curW_u, *curW_v; + bool red_pass; + + ComputeSmoothnessTermHorPass_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, + RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, RedBlackBuffer &_tempW_u, + RedBlackBuffer &_tempW_v, bool _red_pass); + void operator()(const Range &range) const CV_OVERRIDE; + }; + + struct ComputeSmoothnessTermVertPass_ParBody : public ParallelLoopBody + { + VariationalRefinementImpl *var; + int nstripes, stripe_sz; + int h; + RedBlackBuffer *W_u, *W_v; + bool red_pass; + + ComputeSmoothnessTermVertPass_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, + RedBlackBuffer &W_u, RedBlackBuffer &_W_v, bool _red_pass); + void operator()(const Range &range) const CV_OVERRIDE; + }; + + struct RedBlackSOR_ParBody : public ParallelLoopBody + { + VariationalRefinementImpl *var; + int nstripes, stripe_sz; + int h; + RedBlackBuffer *dW_u, *dW_v; + bool red_pass; + + RedBlackSOR_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_dW_u, + RedBlackBuffer &_dW_v, bool _red_pass); + void operator()(const Range &range) const CV_OVERRIDE; + }; +}; + +VariationalRefinementImpl::VariationalRefinementImpl() +{ + fixedPointIterations = 5; + sorIterations = 5; + alpha = 20.0f; + delta = 5.0f; + gamma = 10.0f; + omega = 1.6f; + zeta = 0.1f; + epsilon = 0.001f; +} + +/* This function converts an input Mat into the RedBlackBuffer format, which involves + * splitting the input buffer into two and adding repeated borders. Assumes that enough + * memory in dst is already allocated. + */ +void VariationalRefinementImpl::splitCheckerboard(RedBlackBuffer &dst, Mat &src) +{ + int buf_j, j; + int buf_w = (int)ceil(src.cols / 2.0) + 2; //!< max width of red/black buffers with borders + + /* Rows of red and black buffers can have different actual width, some extra repeated values are + * added for padding in such cases. + */ + for (int i = 0; i < src.rows; i++) + { + float *src_buf = src.ptr(i); + float *r_buf = dst.red.ptr(i + 1); + float *b_buf = dst.black.ptr(i + 1); + r_buf[0] = b_buf[0] = src_buf[0]; + buf_j = 1; + if (i % 2 == 0) + { + for (j = 0; j < src.cols - 1; j += 2) + { + r_buf[buf_j] = src_buf[j]; + b_buf[buf_j] = src_buf[j + 1]; + buf_j++; + } + if (j < src.cols) + r_buf[buf_j] = b_buf[buf_j] = src_buf[j]; + else + j--; + } + else + { + for (j = 0; j < src.cols - 1; j += 2) + { + b_buf[buf_j] = src_buf[j]; + r_buf[buf_j] = src_buf[j + 1]; + buf_j++; + } + if (j < src.cols) + r_buf[buf_j] = b_buf[buf_j] = src_buf[j]; + else + j--; + } + r_buf[buf_w - 1] = b_buf[buf_w - 1] = src_buf[j]; + } + + /* Fill top and bottom borders: */ + { + float *r_buf_border = dst.red.ptr(dst.red.rows - 1); + float *b_buf_border = dst.black.ptr(dst.black.rows - 1); + float *r_buf = dst.red.ptr(dst.red.rows - 2); + float *b_buf = dst.black.ptr(dst.black.rows - 2); + memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); + memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); + } + { + float *r_buf_border = dst.red.ptr(0); + float *b_buf_border = dst.black.ptr(0); + float *r_buf = dst.red.ptr(1); + float *b_buf = dst.black.ptr(1); + memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); + memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); + } +} + +/* The inverse of splitCheckerboard, i.e. converting the RedBlackBuffer back into Mat. + * Assumes that enough memory in dst is already allocated. + */ +void VariationalRefinementImpl::mergeCheckerboard(Mat &dst, RedBlackBuffer &src) +{ + int buf_j, j; + for (int i = 0; i < dst.rows; i++) + { + float *src_r_buf = src.red.ptr(i + 1); + float *src_b_buf = src.black.ptr(i + 1); + float *dst_buf = dst.ptr(i); + buf_j = 1; + + if (i % 2 == 0) + { + for (j = 0; j < dst.cols - 1; j += 2) + { + dst_buf[j] = src_r_buf[buf_j]; + dst_buf[j + 1] = src_b_buf[buf_j]; + buf_j++; + } + if (j < dst.cols) + dst_buf[j] = src_r_buf[buf_j]; + } + else + { + for (j = 0; j < dst.cols - 1; j += 2) + { + dst_buf[j] = src_b_buf[buf_j]; + dst_buf[j + 1] = src_r_buf[buf_j]; + buf_j++; + } + if (j < dst.cols) + dst_buf[j] = src_b_buf[buf_j]; + } + } +} + +/* An auxiliary function that updates the borders. Used to enforce that border values repeat + * the ones adjacent to the border. + */ +void VariationalRefinementImpl::updateRepeatedBorders(RedBlackBuffer &dst) +{ + int buf_w = dst.red.cols; + for (int i = 0; i < dst.red.rows - 2; i++) + { + float *r_buf = dst.red.ptr(i + 1); + float *b_buf = dst.black.ptr(i + 1); + + if (i % 2 == 0) + { + b_buf[0] = r_buf[1]; + if (dst.red_even_len > dst.black_even_len) + b_buf[dst.black_even_len + 1] = r_buf[dst.red_even_len]; + else + r_buf[dst.red_even_len + 1] = b_buf[dst.black_even_len]; + } + else + { + r_buf[0] = b_buf[1]; + if (dst.red_odd_len < dst.black_odd_len) + r_buf[dst.red_odd_len + 1] = b_buf[dst.black_odd_len]; + else + b_buf[dst.black_odd_len + 1] = r_buf[dst.red_odd_len]; + } + } + { + float *r_buf_border = dst.red.ptr(dst.red.rows - 1); + float *b_buf_border = dst.black.ptr(dst.black.rows - 1); + float *r_buf = dst.red.ptr(dst.red.rows - 2); + float *b_buf = dst.black.ptr(dst.black.rows - 2); + memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); + memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); + } + { + float *r_buf_border = dst.red.ptr(0); + float *b_buf_border = dst.black.ptr(0); + float *r_buf = dst.red.ptr(1); + float *b_buf = dst.black.ptr(1); + memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); + memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); + } +} + +void VariationalRefinementImpl::RedBlackBuffer::create(Size s) +{ + /* Allocate enough memory to include borders */ + int w = (int)ceil(s.width / 2.0) + 2; + red.create(s.height + 2, w); + black.create(s.height + 2, w); + + if (s.width % 2 == 0) + red_even_len = red_odd_len = black_even_len = black_odd_len = w - 2; + else + { + red_even_len = black_odd_len = w - 2; + red_odd_len = black_even_len = w - 3; + } +} + +void VariationalRefinementImpl::RedBlackBuffer::release() +{ + red.release(); + black.release(); +} + +VariationalRefinementImpl::ParallelOp_ParBody::ParallelOp_ParBody(VariationalRefinementImpl &_var, vector _ops, + vector &_op1s, vector &_op2s, + vector &_op3s) + : var(&_var), ops(_ops), op1s(_op1s), op2s(_op2s), op3s(_op3s) +{ +} + +void VariationalRefinementImpl::ParallelOp_ParBody::operator()(const Range &range) const +{ + for (int i = range.start; i < range.end; i++) + (var->*ops[i])(op1s[i], op2s[i], op3s[i]); +} + +void VariationalRefinementImpl::warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v) +{ + for (int i = 0; i < flow_u.rows; i++) + { + float *pFlowU = flow_u.ptr(i); + float *pFlowV = flow_v.ptr(i); + float *pMapX = mapX.ptr(i); + float *pMapY = mapY.ptr(i); + for (int j = 0; j < flow_u.cols; j++) + { + pMapX[j] = j + pFlowU[j]; + pMapY[j] = i + pFlowV[j]; + } + } + remap(src, dst, mapX, mapY, INTER_LINEAR, BORDER_REPLICATE); +} + +void VariationalRefinementImpl::prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v) +{ + Size s = I0.size(); + A11.create(s); + A12.create(s); + A22.create(s); + b1.create(s); + b2.create(s); + weights.create(s); + weights.red.setTo(0.0f); + weights.black.setTo(0.0f); + tempW_u.create(s); + tempW_v.create(s); + dW_u.create(s); + dW_v.create(s); + W_u_rb.create(s); + W_v_rb.create(s); + + Ix.create(s); + Iy.create(s); + Iz.create(s); + Ixx.create(s); + Ixy.create(s); + Iyy.create(s); + Ixz.create(s); + Iyz.create(s); + + Ix_rb.create(s); + Iy_rb.create(s); + Iz_rb.create(s); + Ixx_rb.create(s); + Ixy_rb.create(s); + Iyy_rb.create(s); + Ixz_rb.create(s); + Iyz_rb.create(s); + + mapX.create(s); + mapY.create(s); + + /* Floating point warps work significantly better than fixed-point */ + Mat I1flt, warpedI; + I1.convertTo(I1flt, CV_32F); + warpImage(warpedI, I1flt, W_u, W_v); + + /* Computing an average of the current and warped next frames (to compute the derivatives on) and + * temporal derivative Iz + */ + Mat averagedI; + { + vector op1s; + op1s.push_back((void *)&I0); + op1s.push_back((void *)&warpedI); + vector op2s; + op2s.push_back((void *)&warpedI); + op2s.push_back((void *)&I0); + vector op3s; + op3s.push_back((void *)&averagedI); + op3s.push_back((void *)&Iz); + vector ops; + ops.push_back(&VariationalRefinementImpl::averageOp); + ops.push_back(&VariationalRefinementImpl::subtractOp); + parallel_for_(Range(0, 2), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s)); + } + splitCheckerboard(Iz_rb, Iz); + + /* Computing first-order derivatives */ + { + vector op1s; + op1s.push_back((void *)&averagedI); + op1s.push_back((void *)&averagedI); + op1s.push_back((void *)&Iz); + op1s.push_back((void *)&Iz); + vector op2s; + op2s.push_back((void *)&Ix); + op2s.push_back((void *)&Iy); + op2s.push_back((void *)&Ixz); + op2s.push_back((void *)&Iyz); + vector op3s; + op3s.push_back((void *)&Ix_rb); + op3s.push_back((void *)&Iy_rb); + op3s.push_back((void *)&Ixz_rb); + op3s.push_back((void *)&Iyz_rb); + vector ops; + ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp); + ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); + ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp); + ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); + parallel_for_(Range(0, 4), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s)); + } + + /* Computing second-order derivatives */ + { + vector op1s; + op1s.push_back((void *)&Ix); + op1s.push_back((void *)&Ix); + op1s.push_back((void *)&Iy); + vector op2s; + op2s.push_back((void *)&Ixx); + op2s.push_back((void *)&Ixy); + op2s.push_back((void *)&Iyy); + vector op3s; + op3s.push_back((void *)&Ixx_rb); + op3s.push_back((void *)&Ixy_rb); + op3s.push_back((void *)&Iyy_rb); + vector ops; + ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp); + ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); + ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); + parallel_for_(Range(0, 3), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s)); + } +} + +VariationalRefinementImpl::ComputeDataTerm_ParBody::ComputeDataTerm_ParBody(VariationalRefinementImpl &_var, + int _nstripes, int _h, + RedBlackBuffer &_dW_u, + RedBlackBuffer &_dW_v, bool _red_pass) + : var(&_var), nstripes(_nstripes), h(_h), dW_u(&_dW_u), dW_v(&_dW_v), red_pass(_red_pass) +{ + stripe_sz = (int)ceil(h / (double)nstripes); +} + +/* This function computes parts of the main linear system coefficients A11,A12,A22,b1,b1 + * that correspond to the data term, which includes color and gradient constancy assumptions. + */ +void VariationalRefinementImpl::ComputeDataTerm_ParBody::operator()(const Range &range) const +{ + int start_i = min(range.start * stripe_sz, h); + int end_i = min(range.end * stripe_sz, h); + + float zeta_squared = var->zeta * var->zeta; + float epsilon_squared = var->epsilon * var->epsilon; + float gamma2 = var->gamma / 2; + float delta2 = var->delta / 2; + + float *pIx, *pIy, *pIz; + float *pIxx, *pIxy, *pIyy, *pIxz, *pIyz; + float *pdU, *pdV; + float *pa11, *pa12, *pa22, *pb1, *pb2; + + float derivNorm, derivNorm2; + float Ik1z, Ik1zx, Ik1zy; + float weight; + int len; + for (int i = start_i; i < end_i; i++) + { +#define INIT_ROW_POINTERS(color) \ + pIx = var->Ix_rb.color.ptr(i + 1) + 1; \ + pIy = var->Iy_rb.color.ptr(i + 1) + 1; \ + pIz = var->Iz_rb.color.ptr(i + 1) + 1; \ + pIxx = var->Ixx_rb.color.ptr(i + 1) + 1; \ + pIxy = var->Ixy_rb.color.ptr(i + 1) + 1; \ + pIyy = var->Iyy_rb.color.ptr(i + 1) + 1; \ + pIxz = var->Ixz_rb.color.ptr(i + 1) + 1; \ + pIyz = var->Iyz_rb.color.ptr(i + 1) + 1; \ + pa11 = var->A11.color.ptr(i + 1) + 1; \ + pa12 = var->A12.color.ptr(i + 1) + 1; \ + pa22 = var->A22.color.ptr(i + 1) + 1; \ + pb1 = var->b1.color.ptr(i + 1) + 1; \ + pb2 = var->b2.color.ptr(i + 1) + 1; \ + pdU = dW_u->color.ptr(i + 1) + 1; \ + pdV = dW_v->color.ptr(i + 1) + 1; \ + if (i % 2 == 0) \ + len = var->Ix_rb.color##_even_len; \ + else \ + len = var->Ix_rb.color##_odd_len; + + if (red_pass) + { + INIT_ROW_POINTERS(red); + } + else + { + INIT_ROW_POINTERS(black); + } +#undef INIT_ROW_POINTERS + + int j = 0; +#ifdef CV_SIMD128 + v_float32x4 zeta_vec = v_setall_f32(zeta_squared); + v_float32x4 eps_vec = v_setall_f32(epsilon_squared); + v_float32x4 delta_vec = v_setall_f32(delta2); + v_float32x4 gamma_vec = v_setall_f32(gamma2); + v_float32x4 zero_vec = v_setall_f32(0.0f); + v_float32x4 pIx_vec, pIy_vec, pIz_vec, pdU_vec, pdV_vec; + v_float32x4 pIxx_vec, pIxy_vec, pIyy_vec, pIxz_vec, pIyz_vec; + v_float32x4 derivNorm_vec, derivNorm2_vec, weight_vec; + v_float32x4 Ik1z_vec, Ik1zx_vec, Ik1zy_vec; + v_float32x4 pa11_vec, pa12_vec, pa22_vec, pb1_vec, pb2_vec; + + for (; j < len - 3; j += 4) + { + pIx_vec = v_load(pIx + j); + pIy_vec = v_load(pIy + j); + pIz_vec = v_load(pIz + j); + pdU_vec = v_load(pdU + j); + pdV_vec = v_load(pdV + j); + + derivNorm_vec = pIx_vec * pIx_vec + pIy_vec * pIy_vec + zeta_vec; + Ik1z_vec = pIz_vec + pIx_vec * pdU_vec + pIy_vec * pdV_vec; + weight_vec = (delta_vec / v_sqrt(Ik1z_vec * Ik1z_vec / derivNorm_vec + eps_vec)) / derivNorm_vec; + + pa11_vec = weight_vec * (pIx_vec * pIx_vec) + zeta_vec; + pa12_vec = weight_vec * (pIx_vec * pIy_vec); + pa22_vec = weight_vec * (pIy_vec * pIy_vec) + zeta_vec; + pb1_vec = zero_vec - weight_vec * (pIz_vec * pIx_vec); + pb2_vec = zero_vec - weight_vec * (pIz_vec * pIy_vec); + + pIxx_vec = v_load(pIxx + j); + pIxy_vec = v_load(pIxy + j); + pIyy_vec = v_load(pIyy + j); + pIxz_vec = v_load(pIxz + j); + pIyz_vec = v_load(pIyz + j); + + derivNorm_vec = pIxx_vec * pIxx_vec + pIxy_vec * pIxy_vec + zeta_vec; + derivNorm2_vec = pIyy_vec * pIyy_vec + pIxy_vec * pIxy_vec + zeta_vec; + Ik1zx_vec = pIxz_vec + pIxx_vec * pdU_vec + pIxy_vec * pdV_vec; + Ik1zy_vec = pIyz_vec + pIxy_vec * pdU_vec + pIyy_vec * pdV_vec; + weight_vec = gamma_vec / v_sqrt(Ik1zx_vec * Ik1zx_vec / derivNorm_vec + + Ik1zy_vec * Ik1zy_vec / derivNorm2_vec + eps_vec); + + pa11_vec += weight_vec * (pIxx_vec * pIxx_vec / derivNorm_vec + pIxy_vec * pIxy_vec / derivNorm2_vec); + pa12_vec += weight_vec * (pIxx_vec * pIxy_vec / derivNorm_vec + pIxy_vec * pIyy_vec / derivNorm2_vec); + pa22_vec += weight_vec * (pIxy_vec * pIxy_vec / derivNorm_vec + pIyy_vec * pIyy_vec / derivNorm2_vec); + pb1_vec -= weight_vec * (pIxx_vec * pIxz_vec / derivNorm_vec + pIxy_vec * pIyz_vec / derivNorm2_vec); + pb2_vec -= weight_vec * (pIxy_vec * pIxz_vec / derivNorm_vec + pIyy_vec * pIyz_vec / derivNorm2_vec); + + v_store(pa11 + j, pa11_vec); + v_store(pa12 + j, pa12_vec); + v_store(pa22 + j, pa22_vec); + v_store(pb1 + j, pb1_vec); + v_store(pb2 + j, pb2_vec); + } +#endif + for (; j < len; j++) + { + /* Step 1: Compute color constancy terms */ + /* Normalization factor:*/ + derivNorm = pIx[j] * pIx[j] + pIy[j] * pIy[j] + zeta_squared; + /* Color constancy penalty (computed by Taylor expansion):*/ + Ik1z = pIz[j] + pIx[j] * pdU[j] + pIy[j] * pdV[j]; + /* Weight of the color constancy term in the current fixed-point iteration divided by derivNorm: */ + weight = (delta2 / sqrt(Ik1z * Ik1z / derivNorm + epsilon_squared)) / derivNorm; + /* Add respective color constancy terms to the linear system coefficients: */ + pa11[j] = weight * (pIx[j] * pIx[j]) + zeta_squared; + pa12[j] = weight * (pIx[j] * pIy[j]); + pa22[j] = weight * (pIy[j] * pIy[j]) + zeta_squared; + pb1[j] = -weight * (pIz[j] * pIx[j]); + pb2[j] = -weight * (pIz[j] * pIy[j]); + + /* Step 2: Compute gradient constancy terms */ + /* Normalization factor for x gradient: */ + derivNorm = pIxx[j] * pIxx[j] + pIxy[j] * pIxy[j] + zeta_squared; + /* Normalization factor for y gradient: */ + derivNorm2 = pIyy[j] * pIyy[j] + pIxy[j] * pIxy[j] + zeta_squared; + /* Gradient constancy penalties (computed by Taylor expansion): */ + Ik1zx = pIxz[j] + pIxx[j] * pdU[j] + pIxy[j] * pdV[j]; + Ik1zy = pIyz[j] + pIxy[j] * pdU[j] + pIyy[j] * pdV[j]; + /* Weight of the gradient constancy term in the current fixed-point iteration: */ + weight = gamma2 / sqrt(Ik1zx * Ik1zx / derivNorm + Ik1zy * Ik1zy / derivNorm2 + epsilon_squared); + /* Add respective gradient constancy components to the linear system coefficients: */ + pa11[j] += weight * (pIxx[j] * pIxx[j] / derivNorm + pIxy[j] * pIxy[j] / derivNorm2); + pa12[j] += weight * (pIxx[j] * pIxy[j] / derivNorm + pIxy[j] * pIyy[j] / derivNorm2); + pa22[j] += weight * (pIxy[j] * pIxy[j] / derivNorm + pIyy[j] * pIyy[j] / derivNorm2); + pb1[j] += -weight * (pIxx[j] * pIxz[j] / derivNorm + pIxy[j] * pIyz[j] / derivNorm2); + pb2[j] += -weight * (pIxy[j] * pIxz[j] / derivNorm + pIyy[j] * pIyz[j] / derivNorm2); + } + } +} + +VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::ComputeSmoothnessTermHorPass_ParBody( + VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, + RedBlackBuffer &_tempW_u, RedBlackBuffer &_tempW_v, bool _red_pass) + : var(&_var), nstripes(_nstripes), h(_h), W_u(&_W_u), W_v(&_W_v), curW_u(&_tempW_u), curW_v(&_tempW_v), + red_pass(_red_pass) +{ + stripe_sz = (int)ceil(h / (double)nstripes); +} + +/* This function updates the linear system coefficients A11,A22,b1,b1 according to the + * flow smoothness term and computes corresponding weights for the current fixed point iteration. + * A11,A22,b1,b1 are updated only partially (horizontal pass). Doing both horizontal and vertical + * passes in one loop complicates parallelization (different threads write to the same elements). + */ +void VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::operator()(const Range &range) const +{ + int start_i = min(range.start * stripe_sz, h); + int end_i = min(range.end * stripe_sz, h); + + float epsilon_squared = var->epsilon * var->epsilon; + float alpha2 = var->alpha / 2; + float *pWeight; + float *pA_u, *pA_u_next, *pA_v, *pA_v_next; + float *pB_u, *pB_u_next, *pB_v, *pB_v_next; + float *cW_u, *cW_u_next, *cW_u_next_row; + float *cW_v, *cW_v_next, *cW_v_next_row; + float *pW_u, *pW_u_next; + float *pW_v, *pW_v_next; + float ux, uy, vx, vy; + int len; + bool touches_right_border = true; + +#define INIT_ROW_POINTERS(cur_color, next_color, next_offs_even, next_offs_odd, bool_default) \ + pWeight = var->weights.cur_color.ptr(i + 1) + 1; \ + pA_u = var->A11.cur_color.ptr(i + 1) + 1; \ + pB_u = var->b1.cur_color.ptr(i + 1) + 1; \ + cW_u = curW_u->cur_color.ptr(i + 1) + 1; \ + pW_u = W_u->cur_color.ptr(i + 1) + 1; \ + pA_v = var->A22.cur_color.ptr(i + 1) + 1; \ + pB_v = var->b2.cur_color.ptr(i + 1) + 1; \ + cW_v = curW_v->cur_color.ptr(i + 1) + 1; \ + pW_v = W_v->cur_color.ptr(i + 1) + 1; \ + \ + cW_u_next_row = curW_u->next_color.ptr(i + 2) + 1; \ + cW_v_next_row = curW_v->next_color.ptr(i + 2) + 1; \ + \ + if (i % 2 == 0) \ + { \ + pA_u_next = var->A11.next_color.ptr(i + 1) + next_offs_even; \ + pB_u_next = var->b1.next_color.ptr(i + 1) + next_offs_even; \ + cW_u_next = curW_u->next_color.ptr(i + 1) + next_offs_even; \ + pW_u_next = W_u->next_color.ptr(i + 1) + next_offs_even; \ + pA_v_next = var->A22.next_color.ptr(i + 1) + next_offs_even; \ + pB_v_next = var->b2.next_color.ptr(i + 1) + next_offs_even; \ + cW_v_next = curW_v->next_color.ptr(i + 1) + next_offs_even; \ + pW_v_next = W_v->next_color.ptr(i + 1) + next_offs_even; \ + len = var->A11.cur_color##_even_len; \ + if (var->A11.cur_color##_even_len != var->A11.cur_color##_odd_len) \ + touches_right_border = bool_default; \ + else \ + touches_right_border = !bool_default; \ + } \ + else \ + { \ + pA_u_next = var->A11.next_color.ptr(i + 1) + next_offs_odd; \ + pB_u_next = var->b1.next_color.ptr(i + 1) + next_offs_odd; \ + cW_u_next = curW_u->next_color.ptr(i + 1) + next_offs_odd; \ + pW_u_next = W_u->next_color.ptr(i + 1) + next_offs_odd; \ + pA_v_next = var->A22.next_color.ptr(i + 1) + next_offs_odd; \ + pB_v_next = var->b2.next_color.ptr(i + 1) + next_offs_odd; \ + cW_v_next = curW_v->next_color.ptr(i + 1) + next_offs_odd; \ + pW_v_next = W_v->next_color.ptr(i + 1) + next_offs_odd; \ + len = var->A11.cur_color##_odd_len; \ + if (var->A11.cur_color##_even_len != var->A11.cur_color##_odd_len) \ + touches_right_border = !bool_default; \ + else \ + touches_right_border = bool_default; \ + } + + for (int i = start_i; i < end_i; i++) + { + if (red_pass) + { + INIT_ROW_POINTERS(red, black, 1, 2, true); + } + else + { + INIT_ROW_POINTERS(black, red, 2, 1, false); + } +#undef INIT_ROW_POINTERS + +#define COMPUTE \ + /* Gradients for the flow on the current fixed-point iteration: */ \ + ux = cW_u_next[j] - cW_u[j]; \ + vx = cW_v_next[j] - cW_v[j]; \ + uy = cW_u_next_row[j] - cW_u[j]; \ + vy = cW_v_next_row[j] - cW_v[j]; \ + /* Weight of the smoothness term in the current fixed-point iteration: */ \ + pWeight[j] = alpha2 / sqrt(ux * ux + vx * vx + uy * uy + vy * vy + epsilon_squared); \ + /* Gradients for initial raw flow multiplied by weight:*/ \ + ux = pWeight[j] * (pW_u_next[j] - pW_u[j]); \ + vx = pWeight[j] * (pW_v_next[j] - pW_v[j]); + +#define UPDATE \ + pB_u[j] += ux; \ + pA_u[j] += pWeight[j]; \ + pB_v[j] += vx; \ + pA_v[j] += pWeight[j]; \ + pB_u_next[j] -= ux; \ + pA_u_next[j] += pWeight[j]; \ + pB_v_next[j] -= vx; \ + pA_v_next[j] += pWeight[j]; + + int j = 0; +#ifdef CV_SIMD128 + v_float32x4 alpha2_vec = v_setall_f32(alpha2); + v_float32x4 eps_vec = v_setall_f32(epsilon_squared); + v_float32x4 cW_u_vec, cW_v_vec; + v_float32x4 pWeight_vec, ux_vec, vx_vec, uy_vec, vy_vec; + + for (; j < len - 4; j += 4) + { + cW_u_vec = v_load(cW_u + j); + cW_v_vec = v_load(cW_v + j); + + ux_vec = v_load(cW_u_next + j) - cW_u_vec; + vx_vec = v_load(cW_v_next + j) - cW_v_vec; + uy_vec = v_load(cW_u_next_row + j) - cW_u_vec; + vy_vec = v_load(cW_v_next_row + j) - cW_v_vec; + pWeight_vec = + alpha2_vec / v_sqrt(ux_vec * ux_vec + vx_vec * vx_vec + uy_vec * uy_vec + vy_vec * vy_vec + eps_vec); + v_store(pWeight + j, pWeight_vec); + + ux_vec = pWeight_vec * (v_load(pW_u_next + j) - v_load(pW_u + j)); + vx_vec = pWeight_vec * (v_load(pW_v_next + j) - v_load(pW_v + j)); + + v_store(pA_u + j, v_load(pA_u + j) + pWeight_vec); + v_store(pA_v + j, v_load(pA_v + j) + pWeight_vec); + v_store(pB_u + j, v_load(pB_u + j) + ux_vec); + v_store(pB_v + j, v_load(pB_v + j) + vx_vec); + + v_store(pA_u_next + j, v_load(pA_u_next + j) + pWeight_vec); + v_store(pA_v_next + j, v_load(pA_v_next + j) + pWeight_vec); + v_store(pB_u_next + j, v_load(pB_u_next + j) - ux_vec); + v_store(pB_v_next + j, v_load(pB_v_next + j) - vx_vec); + } +#endif + for (; j < len - 1; j++) + { + COMPUTE; + UPDATE; + } + + /* Omit the update on the rightmost elements */ + if (touches_right_border) + { + COMPUTE; + } + else + { + COMPUTE; + UPDATE; + } + } +#undef COMPUTE +#undef UPDATE +} + +VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::ComputeSmoothnessTermVertPass_ParBody( + VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, bool _red_pass) + : var(&_var), nstripes(_nstripes), W_u(&_W_u), W_v(&_W_v), red_pass(_red_pass) +{ + /* Omit the last row in the vertical pass */ + h = _h - 1; + stripe_sz = (int)ceil(h / (double)nstripes); +} + +/* This function adds the last remaining terms to the linear system coefficients A11,A22,b1,b1. */ +void VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::operator()(const Range &range) const +{ + int start_i = min(range.start * stripe_sz, h); + int end_i = min(range.end * stripe_sz, h); + + float *pWeight; + float *pA_u, *pA_u_next_row, *pA_v, *pA_v_next_row; + float *pB_u, *pB_u_next_row, *pB_v, *pB_v_next_row; + float *pW_u, *pW_u_next_row, *pW_v, *pW_v_next_row; + float vy, uy; + int len; + + for (int i = start_i; i < end_i; i++) + { +#define INIT_ROW_POINTERS(cur_color, next_color) \ + pWeight = var->weights.cur_color.ptr(i + 1) + 1; \ + pA_u = var->A11.cur_color.ptr(i + 1) + 1; \ + pB_u = var->b1.cur_color.ptr(i + 1) + 1; \ + pW_u = W_u->cur_color.ptr(i + 1) + 1; \ + pA_v = var->A22.cur_color.ptr(i + 1) + 1; \ + pB_v = var->b2.cur_color.ptr(i + 1) + 1; \ + pW_v = W_v->cur_color.ptr(i + 1) + 1; \ + \ + pA_u_next_row = var->A11.next_color.ptr(i + 2) + 1; \ + pB_u_next_row = var->b1.next_color.ptr(i + 2) + 1; \ + pW_u_next_row = W_u->next_color.ptr(i + 2) + 1; \ + pA_v_next_row = var->A22.next_color.ptr(i + 2) + 1; \ + pB_v_next_row = var->b2.next_color.ptr(i + 2) + 1; \ + pW_v_next_row = W_v->next_color.ptr(i + 2) + 1; \ + \ + if (i % 2 == 0) \ + len = var->A11.cur_color##_even_len; \ + else \ + len = var->A11.cur_color##_odd_len; + + if (red_pass) + { + INIT_ROW_POINTERS(red, black); + } + else + { + INIT_ROW_POINTERS(black, red); + } +#undef INIT_ROW_POINTERS + + int j = 0; +#ifdef CV_SIMD128 + v_float32x4 pWeight_vec, uy_vec, vy_vec; + for (; j < len - 3; j += 4) + { + pWeight_vec = v_load(pWeight + j); + uy_vec = pWeight_vec * (v_load(pW_u_next_row + j) - v_load(pW_u + j)); + vy_vec = pWeight_vec * (v_load(pW_v_next_row + j) - v_load(pW_v + j)); + + v_store(pA_u + j, v_load(pA_u + j) + pWeight_vec); + v_store(pA_v + j, v_load(pA_v + j) + pWeight_vec); + v_store(pB_u + j, v_load(pB_u + j) + uy_vec); + v_store(pB_v + j, v_load(pB_v + j) + vy_vec); + + v_store(pA_u_next_row + j, v_load(pA_u_next_row + j) + pWeight_vec); + v_store(pA_v_next_row + j, v_load(pA_v_next_row + j) + pWeight_vec); + v_store(pB_u_next_row + j, v_load(pB_u_next_row + j) - uy_vec); + v_store(pB_v_next_row + j, v_load(pB_v_next_row + j) - vy_vec); + } +#endif + for (; j < len; j++) + { + uy = pWeight[j] * (pW_u_next_row[j] - pW_u[j]); + vy = pWeight[j] * (pW_v_next_row[j] - pW_v[j]); + pB_u[j] += uy; + pA_u[j] += pWeight[j]; + pB_v[j] += vy; + pA_v[j] += pWeight[j]; + pB_u_next_row[j] -= uy; + pA_u_next_row[j] += pWeight[j]; + pB_v_next_row[j] -= vy; + pA_v_next_row[j] += pWeight[j]; + } + } +} + +VariationalRefinementImpl::RedBlackSOR_ParBody::RedBlackSOR_ParBody(VariationalRefinementImpl &_var, int _nstripes, + int _h, RedBlackBuffer &_dW_u, + RedBlackBuffer &_dW_v, bool _red_pass) + : var(&_var), nstripes(_nstripes), h(_h), dW_u(&_dW_u), dW_v(&_dW_v), red_pass(_red_pass) +{ + stripe_sz = (int)ceil(h / (double)nstripes); +} + +/* This function implements the Red-Black SOR (successive-over relaxation) method for solving the main + * linear system in the current fixed-point iteration. + */ +void VariationalRefinementImpl::RedBlackSOR_ParBody::operator()(const Range &range) const +{ + int start = min(range.start * stripe_sz, h); + int end = min(range.end * stripe_sz, h); + + float *pa11, *pa12, *pa22, *pb1, *pb2, *pW, *pdu, *pdv; + float *pW_next, *pdu_next, *pdv_next; + float *pW_prev_row, *pdu_prev_row, *pdv_prev_row; + float *pdu_next_row, *pdv_next_row; + + float sigmaU, sigmaV; + int j, len; + for (int i = start; i < end; i++) + { +#define INIT_ROW_POINTERS(cur_color, next_color, next_offs_even, next_offs_odd) \ + pW = var->weights.cur_color.ptr(i + 1) + 1; \ + pa11 = var->A11.cur_color.ptr(i + 1) + 1; \ + pa12 = var->A12.cur_color.ptr(i + 1) + 1; \ + pa22 = var->A22.cur_color.ptr(i + 1) + 1; \ + pb1 = var->b1.cur_color.ptr(i + 1) + 1; \ + pb2 = var->b2.cur_color.ptr(i + 1) + 1; \ + pdu = dW_u->cur_color.ptr(i + 1) + 1; \ + pdv = dW_v->cur_color.ptr(i + 1) + 1; \ + \ + pdu_next_row = dW_u->next_color.ptr(i + 2) + 1; \ + pdv_next_row = dW_v->next_color.ptr(i + 2) + 1; \ + \ + pW_prev_row = var->weights.next_color.ptr(i) + 1; \ + pdu_prev_row = dW_u->next_color.ptr(i) + 1; \ + pdv_prev_row = dW_v->next_color.ptr(i) + 1; \ + \ + if (i % 2 == 0) \ + { \ + pW_next = var->weights.next_color.ptr(i + 1) + next_offs_even; \ + pdu_next = dW_u->next_color.ptr(i + 1) + next_offs_even; \ + pdv_next = dW_v->next_color.ptr(i + 1) + next_offs_even; \ + len = var->A11.cur_color##_even_len; \ + } \ + else \ + { \ + pW_next = var->weights.next_color.ptr(i + 1) + next_offs_odd; \ + pdu_next = dW_u->next_color.ptr(i + 1) + next_offs_odd; \ + pdv_next = dW_v->next_color.ptr(i + 1) + next_offs_odd; \ + len = var->A11.cur_color##_odd_len; \ + } + if (red_pass) + { + INIT_ROW_POINTERS(red, black, 1, 2); + } + else + { + INIT_ROW_POINTERS(black, red, 2, 1); + } +#undef INIT_ROW_POINTERS + + j = 0; +#ifdef CV_SIMD128 + v_float32x4 pW_prev_vec = v_setall_f32(pW_next[-1]); + v_float32x4 pdu_prev_vec = v_setall_f32(pdu_next[-1]); + v_float32x4 pdv_prev_vec = v_setall_f32(pdv_next[-1]); + v_float32x4 omega_vec = v_setall_f32(var->omega); + v_float32x4 pW_vec, pW_next_vec, pW_prev_row_vec; + v_float32x4 pdu_next_vec, pdu_prev_row_vec, pdu_next_row_vec; + v_float32x4 pdv_next_vec, pdv_prev_row_vec, pdv_next_row_vec; + v_float32x4 pW_shifted_vec, pdu_shifted_vec, pdv_shifted_vec; + v_float32x4 pa12_vec, sigmaU_vec, sigmaV_vec, pdu_vec, pdv_vec; + for (; j < len - 3; j += 4) + { + pW_vec = v_load(pW + j); + pW_next_vec = v_load(pW_next + j); + pW_prev_row_vec = v_load(pW_prev_row + j); + pdu_next_vec = v_load(pdu_next + j); + pdu_prev_row_vec = v_load(pdu_prev_row + j); + pdu_next_row_vec = v_load(pdu_next_row + j); + pdv_next_vec = v_load(pdv_next + j); + pdv_prev_row_vec = v_load(pdv_prev_row + j); + pdv_next_row_vec = v_load(pdv_next_row + j); + pa12_vec = v_load(pa12 + j); + pW_shifted_vec = v_reinterpret_as_f32( + v_extract<3>(v_reinterpret_as_s32(pW_prev_vec), v_reinterpret_as_s32(pW_next_vec))); + pdu_shifted_vec = v_reinterpret_as_f32( + v_extract<3>(v_reinterpret_as_s32(pdu_prev_vec), v_reinterpret_as_s32(pdu_next_vec))); + pdv_shifted_vec = v_reinterpret_as_f32( + v_extract<3>(v_reinterpret_as_s32(pdv_prev_vec), v_reinterpret_as_s32(pdv_next_vec))); + + sigmaU_vec = pW_shifted_vec * pdu_shifted_vec + pW_vec * pdu_next_vec + pW_prev_row_vec * pdu_prev_row_vec + + pW_vec * pdu_next_row_vec; + sigmaV_vec = pW_shifted_vec * pdv_shifted_vec + pW_vec * pdv_next_vec + pW_prev_row_vec * pdv_prev_row_vec + + pW_vec * pdv_next_row_vec; + + pdu_vec = v_load(pdu + j); + pdv_vec = v_load(pdv + j); + pdu_vec += omega_vec * ((sigmaU_vec + v_load(pb1 + j) - pdv_vec * pa12_vec) / v_load(pa11 + j) - pdu_vec); + pdv_vec += omega_vec * ((sigmaV_vec + v_load(pb2 + j) - pdu_vec * pa12_vec) / v_load(pa22 + j) - pdv_vec); + v_store(pdu + j, pdu_vec); + v_store(pdv + j, pdv_vec); + + pW_prev_vec = pW_next_vec; + pdu_prev_vec = pdu_next_vec; + pdv_prev_vec = pdv_next_vec; + } +#endif + for (; j < len; j++) + { + sigmaU = pW_next[j - 1] * pdu_next[j - 1] + pW[j] * pdu_next[j] + pW_prev_row[j] * pdu_prev_row[j] + + pW[j] * pdu_next_row[j]; + sigmaV = pW_next[j - 1] * pdv_next[j - 1] + pW[j] * pdv_next[j] + pW_prev_row[j] * pdv_prev_row[j] + + pW[j] * pdv_next_row[j]; + pdu[j] += var->omega * ((sigmaU + pb1[j] - pdv[j] * pa12[j]) / pa11[j] - pdu[j]); + pdv[j] += var->omega * ((sigmaV + pb2[j] - pdu[j] * pa12[j]) / pa22[j] - pdv[j]); + } + } +} + +void VariationalRefinementImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow) +{ + CV_Assert(!I0.empty() && I0.channels() == 1); + CV_Assert(!I1.empty() && I1.channels() == 1); + CV_Assert(I0.sameSize(I1)); + CV_Assert((I0.depth() == CV_8U && I1.depth() == CV_8U) || (I0.depth() == CV_32F && I1.depth() == CV_32F)); + CV_Assert(!flow.empty() && flow.depth() == CV_32F && flow.channels() == 2); + CV_Assert(I0.sameSize(flow)); + + Mat uv[2]; + Mat &flowMat = flow.getMatRef(); + split(flowMat, uv); + calcUV(I0, I1, uv[0], uv[1]); + merge(uv, 2, flowMat); +} + +void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) +{ + CV_Assert(!I0.empty() && I0.channels() == 1); + CV_Assert(!I1.empty() && I1.channels() == 1); + CV_Assert(I0.sameSize(I1)); + CV_Assert((I0.depth() == CV_8U && I1.depth() == CV_8U) || (I0.depth() == CV_32F && I1.depth() == CV_32F)); + CV_Assert(!flow_u.empty() && flow_u.depth() == CV_32F && flow_u.channels() == 1); + CV_Assert(!flow_v.empty() && flow_v.depth() == CV_32F && flow_v.channels() == 1); + CV_Assert(I0.sameSize(flow_u)); + CV_Assert(flow_u.sameSize(flow_v)); + + int num_stripes = getNumThreads(); + Mat I0Mat = I0.getMat(); + Mat I1Mat = I1.getMat(); + Mat &W_u = flow_u.getMatRef(); + Mat &W_v = flow_v.getMatRef(); + prepareBuffers(I0Mat, I1Mat, W_u, W_v); + + splitCheckerboard(W_u_rb, W_u); + splitCheckerboard(W_v_rb, W_v); + W_u_rb.red.copyTo(tempW_u.red); + W_u_rb.black.copyTo(tempW_u.black); + W_v_rb.red.copyTo(tempW_v.red); + W_v_rb.black.copyTo(tempW_v.black); + dW_u.red.setTo(0.0f); + dW_u.black.setTo(0.0f); + dW_v.red.setTo(0.0f); + dW_v.black.setTo(0.0f); + + for (int i = 0; i < fixedPointIterations; i++) + { + parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true)); + parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false)); + + parallel_for_(Range(0, num_stripes), ComputeSmoothnessTermHorPass_ParBody( + *this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, tempW_u, tempW_v, true)); + parallel_for_(Range(0, num_stripes), ComputeSmoothnessTermHorPass_ParBody( + *this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, tempW_u, tempW_v, false)); + + parallel_for_(Range(0, num_stripes), + ComputeSmoothnessTermVertPass_ParBody(*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, true)); + parallel_for_(Range(0, num_stripes), + ComputeSmoothnessTermVertPass_ParBody(*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, false)); + + for (int j = 0; j < sorIterations; j++) + { + parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true)); + parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false)); + } + + tempW_u.red = W_u_rb.red + dW_u.red; + tempW_u.black = W_u_rb.black + dW_u.black; + updateRepeatedBorders(tempW_u); + tempW_v.red = W_v_rb.red + dW_v.red; + tempW_v.black = W_v_rb.black + dW_v.black; + updateRepeatedBorders(tempW_v); + } + mergeCheckerboard(W_u, tempW_u); + mergeCheckerboard(W_v, tempW_v); +} +void VariationalRefinementImpl::collectGarbage() +{ + Ix.release(); + Iy.release(); + Iz.release(); + Ixx.release(); + Ixy.release(); + Iyy.release(); + Ixz.release(); + Iyz.release(); + + Ix_rb.release(); + Iy_rb.release(); + Iz_rb.release(); + Ixx_rb.release(); + Ixy_rb.release(); + Iyy_rb.release(); + Ixz_rb.release(); + Iyz_rb.release(); + + A11.release(); + A12.release(); + A22.release(); + b1.release(); + b2.release(); + weights.release(); + + mapX.release(); + mapY.release(); + + tempW_u.release(); + tempW_v.release(); + dW_u.release(); + dW_v.release(); + W_u_rb.release(); + W_v_rb.release(); +} + +Ptr VariationalRefinement::create() +{ return makePtr(); } + +} diff --git a/modules/video/test/ocl/test_dis.cpp b/modules/video/test/ocl/test_dis.cpp new file mode 100644 index 0000000000..582e13c567 --- /dev/null +++ b/modules/video/test/ocl/test_dis.cpp @@ -0,0 +1,96 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "../test_precomp.hpp" +#include "opencv2/ts/ocl_test.hpp" + +#ifdef HAVE_OPENCL + +namespace opencv_test { namespace { + +PARAM_TEST_CASE(OCL_DenseOpticalFlow_DIS, int) +{ + int preset; + + virtual void SetUp() + { + preset = GET_PARAM(0); + } +}; + +OCL_TEST_P(OCL_DenseOpticalFlow_DIS, Mat) +{ + Mat frame1, frame2, GT; + + frame1 = imread(TS::ptr()->get_data_path() + "optflow/RubberWhale1.png"); + frame2 = imread(TS::ptr()->get_data_path() + "optflow/RubberWhale2.png"); + + CV_Assert(!frame1.empty() && !frame2.empty()); + + cvtColor(frame1, frame1, COLOR_BGR2GRAY); + cvtColor(frame2, frame2, COLOR_BGR2GRAY); + + Ptr algo; + + // iterate over presets: + for (int i = 0; i < cvtest::ocl::test_loop_times; i++) + { + Mat flow; + UMat ocl_flow; + + algo = DISOpticalFlow::create(preset); + OCL_OFF(algo->calc(frame1, frame2, flow)); + OCL_ON(algo->calc(frame1, frame2, ocl_flow)); + ASSERT_EQ(flow.rows, ocl_flow.rows); + ASSERT_EQ(flow.cols, ocl_flow.cols); + + EXPECT_MAT_SIMILAR(flow, ocl_flow, 6e-3); + } +} + +OCL_INSTANTIATE_TEST_CASE_P(Video, OCL_DenseOpticalFlow_DIS, + Values(DISOpticalFlow::PRESET_ULTRAFAST, + DISOpticalFlow::PRESET_FAST, + DISOpticalFlow::PRESET_MEDIUM)); + +}} // namespace + +#endif // HAVE_OPENCL diff --git a/modules/video/test/ocl/test_optflow_tvl1flow.cpp b/modules/video/test/ocl/test_optflow_tvl1flow.cpp deleted file mode 100644 index 1e2088ac33..0000000000 --- a/modules/video/test/ocl/test_optflow_tvl1flow.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "../test_precomp.hpp" -#include "opencv2/ts/ocl_test.hpp" - -#ifdef HAVE_OPENCL - -namespace opencv_test { -namespace ocl { - -///////////////////////////////////////////////////////////////////////////////////////////////// -// Optical_flow_tvl1 -namespace -{ - IMPLEMENT_PARAM_CLASS(UseInitFlow, bool) - IMPLEMENT_PARAM_CLASS(MedianFiltering, int) - IMPLEMENT_PARAM_CLASS(ScaleStep, double) -} - -PARAM_TEST_CASE(OpticalFlowTVL1, UseInitFlow, MedianFiltering, ScaleStep) -{ - bool useInitFlow; - int medianFiltering; - double scaleStep; - virtual void SetUp() - { - useInitFlow = GET_PARAM(0); - medianFiltering = GET_PARAM(1); - scaleStep = GET_PARAM(2); - } -}; - -OCL_TEST_P(OpticalFlowTVL1, Mat) -{ - cv::Mat frame0 = readImage("optflow/RubberWhale1.png", cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame0.empty()); - - cv::Mat frame1 = readImage("optflow/RubberWhale2.png", cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame1.empty()); - - cv::Mat flow; cv::UMat uflow; - - //create algorithm - cv::Ptr alg = cv::createOptFlow_DualTVL1(); - - //set parameters - alg->setScaleStep(scaleStep); - alg->setMedianFiltering(medianFiltering); - - //create initial flow as result of algorithm calculation - if (useInitFlow) - { - OCL_ON(alg->calc(frame0, frame1, uflow)); - uflow.copyTo(flow); - } - - //set flag to use initial flow as it is ready to use - alg->setUseInitialFlow(useInitFlow); - - OCL_OFF(alg->calc(frame0, frame1, flow)); - OCL_ON(alg->calc(frame0, frame1, uflow)); - - EXPECT_MAT_SIMILAR(flow, uflow, 1e-2); -} - -OCL_INSTANTIATE_TEST_CASE_P(Video, OpticalFlowTVL1, - Combine( - Values(UseInitFlow(false), UseInitFlow(true)), - Values(MedianFiltering(3), MedianFiltering(-1)), - Values(ScaleStep(0.3),ScaleStep(0.5)) - ) - ); - -} } // namespace opencv_test::ocl - -#endif // HAVE_OPENCL diff --git a/modules/video/test/test_OF_accuracy.cpp b/modules/video/test/test_OF_accuracy.cpp new file mode 100644 index 0000000000..affbab6586 --- /dev/null +++ b/modules/video/test/test_OF_accuracy.cpp @@ -0,0 +1,147 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +static string getDataDir() { return TS::ptr()->get_data_path(); } + +static string getRubberWhaleFrame1() { return getDataDir() + "optflow/RubberWhale1.png"; } + +static string getRubberWhaleFrame2() { return getDataDir() + "optflow/RubberWhale2.png"; } + +static string getRubberWhaleGroundTruth() { return getDataDir() + "optflow/RubberWhale.flo"; } + +static bool isFlowCorrect(float u) { return !cvIsNaN(u) && (fabs(u) < 1e9); } + +static float calcRMSE(Mat flow1, Mat flow2) +{ + float sum = 0; + int counter = 0; + const int rows = flow1.rows; + const int cols = flow1.cols; + + for (int y = 0; y < rows; ++y) + { + for (int x = 0; x < cols; ++x) + { + Vec2f flow1_at_point = flow1.at(y, x); + Vec2f flow2_at_point = flow2.at(y, x); + + float u1 = flow1_at_point[0]; + float v1 = flow1_at_point[1]; + float u2 = flow2_at_point[0]; + float v2 = flow2_at_point[1]; + + if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2)) + { + sum += (u1 - u2) * (u1 - u2) + (v1 - v2) * (v1 - v2); + counter++; + } + } + } + return (float)sqrt(sum / (1e-9 + counter)); +} + +bool readRubberWhale(Mat &dst_frame_1, Mat &dst_frame_2, Mat &dst_GT) +{ + const string frame1_path = getRubberWhaleFrame1(); + const string frame2_path = getRubberWhaleFrame2(); + const string gt_flow_path = getRubberWhaleGroundTruth(); + + dst_frame_1 = imread(frame1_path); + dst_frame_2 = imread(frame2_path); + dst_GT = readOpticalFlow(gt_flow_path); + + if (dst_frame_1.empty() || dst_frame_2.empty() || dst_GT.empty()) + return false; + else + return true; +} + +TEST(DenseOpticalFlow_DIS, ReferenceAccuracy) +{ + Mat frame1, frame2, GT; + ASSERT_TRUE(readRubberWhale(frame1, frame2, GT)); + int presets[] = {DISOpticalFlow::PRESET_ULTRAFAST, DISOpticalFlow::PRESET_FAST, DISOpticalFlow::PRESET_MEDIUM}; + float target_RMSE[] = {0.86f, 0.74f, 0.49f}; + cvtColor(frame1, frame1, COLOR_BGR2GRAY); + cvtColor(frame2, frame2, COLOR_BGR2GRAY); + + Ptr algo; + + // iterate over presets: + for (int i = 0; i < 3; i++) + { + Mat flow; + algo = DISOpticalFlow::create(presets[i]); + algo->calc(frame1, frame2, flow); + ASSERT_EQ(GT.rows, flow.rows); + ASSERT_EQ(GT.cols, flow.cols); + EXPECT_LE(calcRMSE(GT, flow), target_RMSE[i]); + } +} + +TEST(DenseOpticalFlow_VariationalRefinement, ReferenceAccuracy) +{ + Mat frame1, frame2, GT; + ASSERT_TRUE(readRubberWhale(frame1, frame2, GT)); + float target_RMSE = 0.86f; + cvtColor(frame1, frame1, COLOR_BGR2GRAY); + cvtColor(frame2, frame2, COLOR_BGR2GRAY); + + Ptr var_ref; + var_ref = VariationalRefinement::create(); + var_ref->setAlpha(20.0f); + var_ref->setDelta(5.0f); + var_ref->setGamma(10.0f); + var_ref->setSorIterations(25); + var_ref->setFixedPointIterations(25); + Mat flow(frame1.size(), CV_32FC2); + flow.setTo(0.0f); + var_ref->calc(frame1, frame2, flow); + ASSERT_EQ(GT.rows, flow.rows); + ASSERT_EQ(GT.cols, flow.cols); + EXPECT_LE(calcRMSE(GT, flow), target_RMSE); +} + +}} // namespace diff --git a/modules/video/test/test_OF_reproducibility.cpp b/modules/video/test/test_OF_reproducibility.cpp new file mode 100644 index 0000000000..2fd40abe2b --- /dev/null +++ b/modules/video/test/test_OF_reproducibility.cpp @@ -0,0 +1,160 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +typedef tuple OFParams; +typedef TestWithParam DenseOpticalFlow_DIS; +typedef TestWithParam DenseOpticalFlow_VariationalRefinement; + +TEST_P(DenseOpticalFlow_DIS, MultithreadReproducibility) +{ + double MAX_DIF = 0.01; + double MAX_MEAN_DIF = 0.001; + int loopsCount = 2; + RNG rng(0); + + OFParams params = GetParam(); + Size size = get<0>(params); + + int nThreads = cv::getNumThreads(); + if (nThreads == 1) + throw SkipTestException("Single thread environment"); + for (int iter = 0; iter <= loopsCount; iter++) + { + Mat frame1(size, CV_8U); + randu(frame1, 0, 255); + Mat frame2(size, CV_8U); + randu(frame2, 0, 255); + + Ptr algo = DISOpticalFlow::create(); + int psz = rng.uniform(4, 16); + int pstr = rng.uniform(1, psz - 1); + int grad_iter = rng.uniform(1, 64); + int var_iter = rng.uniform(0, 10); + bool use_mean_normalization = !!rng.uniform(0, 2); + bool use_spatial_propagation = !!rng.uniform(0, 2); + algo->setFinestScale(0); + algo->setPatchSize(psz); + algo->setPatchStride(pstr); + algo->setGradientDescentIterations(grad_iter); + algo->setVariationalRefinementIterations(var_iter); + algo->setUseMeanNormalization(use_mean_normalization); + algo->setUseSpatialPropagation(use_spatial_propagation); + + cv::setNumThreads(nThreads); + Mat resMultiThread; + algo->calc(frame1, frame2, resMultiThread); + + cv::setNumThreads(1); + Mat resSingleThread; + algo->calc(frame1, frame2, resSingleThread); + + EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF); + EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF * frame1.total()); + + // resulting flow should be within the frame bounds: + double min_val, max_val; + minMaxLoc(resMultiThread, &min_val, &max_val); + EXPECT_LE(abs(min_val), sqrt( static_cast(size.height * size.height + size.width * size.width)) ); + EXPECT_LE(abs(max_val), sqrt( static_cast(size.height * size.height + size.width * size.width)) ); + } +} + +INSTANTIATE_TEST_CASE_P(FullSet, DenseOpticalFlow_DIS, Values(szODD, szQVGA)); + +TEST_P(DenseOpticalFlow_VariationalRefinement, MultithreadReproducibility) +{ + double MAX_DIF = 0.01; + double MAX_MEAN_DIF = 0.001; + float input_flow_rad = 5.0; + int loopsCount = 2; + RNG rng(0); + + OFParams params = GetParam(); + Size size = get<0>(params); + + int nThreads = cv::getNumThreads(); + if (nThreads == 1) + throw SkipTestException("Single thread environment"); + for (int iter = 0; iter <= loopsCount; iter++) + { + Mat frame1(size, CV_8U); + randu(frame1, 0, 255); + Mat frame2(size, CV_8U); + randu(frame2, 0, 255); + Mat flow(size, CV_32FC2); + randu(flow, -input_flow_rad, input_flow_rad); + + Ptr var = VariationalRefinement::create(); + var->setAlpha(rng.uniform(1.0f, 100.0f)); + var->setGamma(rng.uniform(0.1f, 10.0f)); + var->setDelta(rng.uniform(0.1f, 10.0f)); + var->setSorIterations(rng.uniform(1, 20)); + var->setFixedPointIterations(rng.uniform(1, 20)); + var->setOmega(rng.uniform(1.01f, 1.99f)); + + cv::setNumThreads(nThreads); + Mat resMultiThread; + flow.copyTo(resMultiThread); + var->calc(frame1, frame2, resMultiThread); + + cv::setNumThreads(1); + Mat resSingleThread; + flow.copyTo(resSingleThread); + var->calc(frame1, frame2, resSingleThread); + + EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF); + EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF * frame1.total()); + + // resulting flow should be within the frame bounds: + double min_val, max_val; + minMaxLoc(resMultiThread, &min_val, &max_val); + EXPECT_LE(abs(min_val), sqrt( static_cast(size.height * size.height + size.width * size.width)) ); + EXPECT_LE(abs(max_val), sqrt( static_cast(size.height * size.height + size.width * size.width)) ); + } +} + +INSTANTIATE_TEST_CASE_P(FullSet, DenseOpticalFlow_VariationalRefinement, Values(szODD, szQVGA)); + +}} // namespace diff --git a/modules/video/test/test_precomp.hpp b/modules/video/test/test_precomp.hpp index 19c21b0b9e..ba1f10998f 100644 --- a/modules/video/test/test_precomp.hpp +++ b/modules/video/test/test_precomp.hpp @@ -6,5 +6,10 @@ #include "opencv2/ts.hpp" #include "opencv2/video.hpp" +#include + +namespace opencv_test { +using namespace perf; +} #endif diff --git a/modules/video/test/test_tvl1optflow.cpp b/modules/video/test/test_tvl1optflow.cpp deleted file mode 100644 index 1c4a02fa84..0000000000 --- a/modules/video/test/test_tvl1optflow.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "test_precomp.hpp" - -namespace opencv_test { namespace { - -//#define DUMP - - // first four bytes, should be the same in little endian - const float FLO_TAG_FLOAT = 202021.25f; // check for this when READING the file - -#ifdef DUMP - // binary file format for flow data specified here: - // http://vision.middlebury.edu/flow/data/ - void writeOpticalFlowToFile(const Mat_& flow, const string& fileName) - { - const char FLO_TAG_STRING[] = "PIEH"; // use this when WRITING the file - ofstream file(fileName.c_str(), ios_base::binary); - - file << FLO_TAG_STRING; - - file.write((const char*) &flow.cols, sizeof(int)); - file.write((const char*) &flow.rows, sizeof(int)); - - for (int i = 0; i < flow.rows; ++i) - { - for (int j = 0; j < flow.cols; ++j) - { - const Point2f u = flow(i, j); - - file.write((const char*) &u.x, sizeof(float)); - file.write((const char*) &u.y, sizeof(float)); - } - } - } -#endif - - // binary file format for flow data specified here: - // http://vision.middlebury.edu/flow/data/ - void readOpticalFlowFromFile(Mat_& flow, const string& fileName) - { - std::ifstream file(fileName.c_str(), std::ios_base::binary); - - float tag; - file.read((char*) &tag, sizeof(float)); - CV_Assert( tag == FLO_TAG_FLOAT ); - - Size size; - - file.read((char*) &size.width, sizeof(int)); - file.read((char*) &size.height, sizeof(int)); - - flow.create(size); - - for (int i = 0; i < flow.rows; ++i) - { - for (int j = 0; j < flow.cols; ++j) - { - Point2f u; - - file.read((char*) &u.x, sizeof(float)); - file.read((char*) &u.y, sizeof(float)); - - flow(i, j) = u; - } - } - file.close(); - } - - bool isFlowCorrect(Point2f u) - { - return !cvIsNaN(u.x) && !cvIsNaN(u.y) && (fabs(u.x) < 1e9) && (fabs(u.y) < 1e9); - } - - void check(const Mat_& gold, const Mat_& flow, double threshold = 0.1, double expectedAccuracy = 0.95) - { - threshold = threshold*threshold; - - size_t gold_counter = 0; - size_t valid_counter = 0; - - for (int i = 0; i < gold.rows; ++i) - { - for (int j = 0; j < gold.cols; ++j) - { - const Point2f u1 = gold(i, j); - const Point2f u2 = flow(i, j); - - if (isFlowCorrect(u1)) - { - gold_counter++; - if (isFlowCorrect(u2)) - { - const Point2f diff = u1 - u2; - double err = diff.ddot(diff); - if (err <= threshold) - valid_counter++; - } - } - } - } - EXPECT_GE(valid_counter, expectedAccuracy * gold_counter); - } - -TEST(Video_calcOpticalFlowDual_TVL1, Regression) -{ - const string frame1_path = TS::ptr()->get_data_path() + "optflow/RubberWhale1.png"; - const string frame2_path = TS::ptr()->get_data_path() + "optflow/RubberWhale2.png"; - const string gold_flow_path = TS::ptr()->get_data_path() + "optflow/tvl1_flow.flo"; - - Mat frame1 = imread(frame1_path, IMREAD_GRAYSCALE); - Mat frame2 = imread(frame2_path, IMREAD_GRAYSCALE); - ASSERT_FALSE(frame1.empty()); - ASSERT_FALSE(frame2.empty()); - - Mat_ flow; - Ptr tvl1 = cv::DualTVL1OpticalFlow::create(); - - tvl1->calc(frame1, frame2, flow); - -#ifdef DUMP - writeOpticalFlowToFile(flow, gold_flow_path); -#else - Mat_ gold; - readOpticalFlowFromFile(gold, gold_flow_path); - - ASSERT_EQ(gold.rows, flow.rows); - ASSERT_EQ(gold.cols, flow.cols); - - check(gold, flow); -#endif -} - -}} // namespace diff --git a/samples/cpp/dis_opticalflow.cpp b/samples/cpp/dis_opticalflow.cpp new file mode 100644 index 0000000000..e1f905dc82 --- /dev/null +++ b/samples/cpp/dis_opticalflow.cpp @@ -0,0 +1,73 @@ + +#include "opencv2/core/utility.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/video.hpp" + +using namespace std; +using namespace cv; + +static void help() +{ + printf("Usage: dis_optflow \n"); +} + +int main(int argc, char **argv) +{ + VideoCapture cap; + + if (argc < 2) + { + help(); + exit(1); + } + + cap.open(argv[1]); + if(!cap.isOpened()) + { + printf("ERROR: Cannot open file %s\n", argv[1]); + return -1; + } + + Mat prevgray, gray, rgb, frame; + Mat flow, flow_uv[2]; + Mat mag, ang; + Mat hsv_split[3], hsv; + char ret; + + namedWindow("flow", 1); + namedWindow("orig", 1); + + Ptr algorithm = DISOpticalFlow::create(DISOpticalFlow::PRESET_MEDIUM); + + while(true) + { + cap >> frame; + if (frame.empty()) + break; + + cvtColor(frame, gray, COLOR_BGR2GRAY); + + if (!prevgray.empty()) + { + algorithm->calc(prevgray, gray, flow); + split(flow, flow_uv); + multiply(flow_uv[1], -1, flow_uv[1]); + cartToPolar(flow_uv[0], flow_uv[1], mag, ang, true); + normalize(mag, mag, 0, 1, NORM_MINMAX); + hsv_split[0] = ang; + hsv_split[1] = mag; + hsv_split[2] = Mat::ones(ang.size(), ang.type()); + merge(hsv_split, 3, hsv); + cvtColor(hsv, rgb, COLOR_HSV2BGR); + imshow("flow", rgb); + imshow("orig", frame); + } + + if ((ret = (char)waitKey(20)) > 0) + break; + std::swap(prevgray, gray); + } + + return 0; +} diff --git a/samples/cpp/tvl1_optical_flow.cpp b/samples/cpp/tvl1_optical_flow.cpp deleted file mode 100644 index 95871c7298..0000000000 --- a/samples/cpp/tvl1_optical_flow.cpp +++ /dev/null @@ -1,204 +0,0 @@ -#include -#include - -#include -#include "opencv2/video.hpp" -#include "opencv2/imgcodecs.hpp" -#include "opencv2/highgui.hpp" - -using namespace cv; -using namespace std; - -inline bool isFlowCorrect(Point2f u) -{ - return !cvIsNaN(u.x) && !cvIsNaN(u.y) && fabs(u.x) < 1e9 && fabs(u.y) < 1e9; -} - -static Vec3b computeColor(float fx, float fy) -{ - static bool first = true; - - // relative lengths of color transitions: - // these are chosen based on perceptual similarity - // (e.g. one can distinguish more shades between red and yellow - // than between yellow and green) - const int RY = 15; - const int YG = 6; - const int GC = 4; - const int CB = 11; - const int BM = 13; - const int MR = 6; - const int NCOLS = RY + YG + GC + CB + BM + MR; - static Vec3i colorWheel[NCOLS]; - - if (first) - { - int k = 0; - - for (int i = 0; i < RY; ++i, ++k) - colorWheel[k] = Vec3i(255, 255 * i / RY, 0); - - for (int i = 0; i < YG; ++i, ++k) - colorWheel[k] = Vec3i(255 - 255 * i / YG, 255, 0); - - for (int i = 0; i < GC; ++i, ++k) - colorWheel[k] = Vec3i(0, 255, 255 * i / GC); - - for (int i = 0; i < CB; ++i, ++k) - colorWheel[k] = Vec3i(0, 255 - 255 * i / CB, 255); - - for (int i = 0; i < BM; ++i, ++k) - colorWheel[k] = Vec3i(255 * i / BM, 0, 255); - - for (int i = 0; i < MR; ++i, ++k) - colorWheel[k] = Vec3i(255, 0, 255 - 255 * i / MR); - - first = false; - } - - const float rad = sqrt(fx * fx + fy * fy); - const float a = atan2(-fy, -fx) / (float)CV_PI; - - const float fk = (a + 1.0f) / 2.0f * (NCOLS - 1); - const int k0 = static_cast(fk); - const int k1 = (k0 + 1) % NCOLS; - const float f = fk - k0; - - Vec3b pix; - - for (int b = 0; b < 3; b++) - { - const float col0 = colorWheel[k0][b] / 255.f; - const float col1 = colorWheel[k1][b] / 255.f; - - float col = (1 - f) * col0 + f * col1; - - if (rad <= 1) - col = 1 - rad * (1 - col); // increase saturation with radius - else - col *= .75; // out of range - - pix[2 - b] = static_cast(255.f * col); - } - - return pix; -} - -static void drawOpticalFlow(const Mat_& flow, Mat& dst, float maxmotion = -1) -{ - dst.create(flow.size(), CV_8UC3); - dst.setTo(Scalar::all(0)); - - // determine motion range: - float maxrad = maxmotion; - - if (maxmotion <= 0) - { - maxrad = 1; - for (int y = 0; y < flow.rows; ++y) - { - for (int x = 0; x < flow.cols; ++x) - { - Point2f u = flow(y, x); - - if (!isFlowCorrect(u)) - continue; - - maxrad = max(maxrad, sqrt(u.x * u.x + u.y * u.y)); - } - } - } - - for (int y = 0; y < flow.rows; ++y) - { - for (int x = 0; x < flow.cols; ++x) - { - Point2f u = flow(y, x); - - if (isFlowCorrect(u)) - dst.at(y, x) = computeColor(u.x / maxrad, u.y / maxrad); - } - } -} - -// binary file format for flow data specified here: -// http://vision.middlebury.edu/flow/data/ -static void writeOpticalFlowToFile(const Mat_& flow, const string& fileName) -{ - static const char FLO_TAG_STRING[] = "PIEH"; - - ofstream file(fileName.c_str(), ios_base::binary); - - file << FLO_TAG_STRING; - - file.write((const char*) &flow.cols, sizeof(int)); - file.write((const char*) &flow.rows, sizeof(int)); - - for (int i = 0; i < flow.rows; ++i) - { - for (int j = 0; j < flow.cols; ++j) - { - const Point2f u = flow(i, j); - - file.write((const char*) &u.x, sizeof(float)); - file.write((const char*) &u.y, sizeof(float)); - } - } -} - -int main(int argc, const char* argv[]) -{ - cv::CommandLineParser parser(argc, argv, "{help h || show help message}" - "{ @frame0 | | frame 0}{ @frame1 | | frame 1}{ @output | | output flow}"); - if (parser.has("help")) - { - parser.printMessage(); - return 0; - } - string frame0_name = parser.get("@frame0"); - string frame1_name = parser.get("@frame1"); - string file = parser.get("@output"); - if (frame0_name.empty() || frame1_name.empty() || file.empty()) - { - cerr << "Usage : " << argv[0] << " [] [] []" << endl; - return -1; - } - - Mat frame0 = imread(frame0_name, IMREAD_GRAYSCALE); - Mat frame1 = imread(frame1_name, IMREAD_GRAYSCALE); - - if (frame0.empty()) - { - cerr << "Can't open image [" << parser.get("frame0") << "]" << endl; - return -1; - } - if (frame1.empty()) - { - cerr << "Can't open image [" << parser.get("frame1") << "]" << endl; - return -1; - } - - if (frame1.size() != frame0.size()) - { - cerr << "Images should be of equal sizes" << endl; - return -1; - } - - Mat_ flow; - Ptr tvl1 = cv::DualTVL1OpticalFlow::create(); - - const double start = (double)getTickCount(); - tvl1->calc(frame0, frame1, flow); - const double timeSec = (getTickCount() - start) / getTickFrequency(); - cout << "calcOpticalFlowDual_TVL1 : " << timeSec << " sec" << endl; - - Mat out; - drawOpticalFlow(flow, out); - if (!file.empty()) - writeOpticalFlowToFile(flow, file); - - imshow("Flow", out); - waitKey(); - - return 0; -} diff --git a/samples/python/dis_opt_flow.py b/samples/python/dis_opt_flow.py new file mode 100755 index 0000000000..037bf3a23a --- /dev/null +++ b/samples/python/dis_opt_flow.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python + +''' +example to show optical flow estimation using DISOpticalFlow + +USAGE: dis_opt_flow.py [] + +Keys: + 1 - toggle HSV flow visualization + 2 - toggle glitch + 3 - toggle spatial propagation of flow vectors + 4 - toggle temporal propagation of flow vectors +ESC - exit +''' + +# Python 2/3 compatibility +from __future__ import print_function + +import numpy as np +import cv2 as cv +import video + + +def draw_flow(img, flow, step=16): + h, w = img.shape[:2] + y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int) + fx, fy = flow[y,x].T + lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2) + lines = np.int32(lines + 0.5) + vis = cv.cvtColor(img, cv.COLOR_GRAY2BGR) + cv.polylines(vis, lines, 0, (0, 255, 0)) + for (x1, y1), (x2, y2) in lines: + cv.circle(vis, (x1, y1), 1, (0, 255, 0), -1) + return vis + + +def draw_hsv(flow): + h, w = flow.shape[:2] + fx, fy = flow[:,:,0], flow[:,:,1] + ang = np.arctan2(fy, fx) + np.pi + v = np.sqrt(fx*fx+fy*fy) + hsv = np.zeros((h, w, 3), np.uint8) + hsv[...,0] = ang*(180/np.pi/2) + hsv[...,1] = 255 + hsv[...,2] = np.minimum(v*4, 255) + bgr = cv.cvtColor(hsv, cv.COLOR_HSV2BGR) + return bgr + + +def warp_flow(img, flow): + h, w = flow.shape[:2] + flow = -flow + flow[:,:,0] += np.arange(w) + flow[:,:,1] += np.arange(h)[:,np.newaxis] + res = cv.remap(img, flow, None, cv.INTER_LINEAR) + return res + + +if __name__ == '__main__': + import sys + print(__doc__) + try: + fn = sys.argv[1] + except IndexError: + fn = 0 + + cam = video.create_capture(fn) + ret, prev = cam.read() + prevgray = cv.cvtColor(prev, cv.COLOR_BGR2GRAY) + show_hsv = False + show_glitch = False + use_spatial_propagation = False + use_temporal_propagation = True + cur_glitch = prev.copy() + inst = cv.DISOpticalFlow.create(cv.DISOPTICAL_FLOW_PRESET_MEDIUM) + inst.setUseSpatialPropagation(use_spatial_propagation) + + flow = None + while True: + ret, img = cam.read() + gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) + if flow is not None and use_temporal_propagation: + #warp previous flow to get an initial approximation for the current flow: + flow = inst.calc(prevgray, gray, warp_flow(flow,flow)) + else: + flow = inst.calc(prevgray, gray, None) + prevgray = gray + + cv.imshow('flow', draw_flow(gray, flow)) + if show_hsv: + cv.imshow('flow HSV', draw_hsv(flow)) + if show_glitch: + cur_glitch = warp_flow(cur_glitch, flow) + cv.imshow('glitch', cur_glitch) + + ch = 0xFF & cv.waitKey(5) + if ch == 27: + break + if ch == ord('1'): + show_hsv = not show_hsv + print('HSV flow visualization is', ['off', 'on'][show_hsv]) + if ch == ord('2'): + show_glitch = not show_glitch + if show_glitch: + cur_glitch = img.copy() + print('glitch is', ['off', 'on'][show_glitch]) + if ch == ord('3'): + use_spatial_propagation = not use_spatial_propagation + inst.setUseSpatialPropagation(use_spatial_propagation) + print('spatial propagation is', ['off', 'on'][use_spatial_propagation]) + if ch == ord('4'): + use_temporal_propagation = not use_temporal_propagation + print('temporal propagation is', ['off', 'on'][use_temporal_propagation]) + cv.destroyAllWindows() diff --git a/samples/tapi/dense_optical_flow.cpp b/samples/tapi/dense_optical_flow.cpp index aad083af8f..f90e63b804 100644 --- a/samples/tapi/dense_optical_flow.cpp +++ b/samples/tapi/dense_optical_flow.cpp @@ -50,7 +50,7 @@ int main(int argc, const char* argv[]) const char* keys = "{ h help | | print help message }" "{ c camera | 0 | capture video from camera (device index starting from 0) }" - "{ a algorithm | fb | algorithm (supported: 'fb', 'tvl')}" + "{ a algorithm | fb | algorithm (supported: 'fb', 'dis')}" "{ m cpu | | run without OpenCL }" "{ v video | | use video as input }" "{ o original | | use original frame size (do not resize to 640x480)}" @@ -84,11 +84,11 @@ int main(int argc, const char* argv[]) return 2; } - cv::Ptr alg; + Ptr alg; if (algorithm == "fb") - alg = cv::FarnebackOpticalFlow::create(); - else if (algorithm == "tvl") - alg = cv::DualTVL1OpticalFlow::create(); + alg = FarnebackOpticalFlow::create(); + else if (algorithm == "dis") + alg = DISOpticalFlow::create(DISOpticalFlow::PRESET_FAST); else { cout << "Invalid algorithm: " << algorithm << endl;