From d4af89781b81cdc2ffa93af2a04a26370575d230 Mon Sep 17 00:00:00 2001 From: Tomoaki Teshima Date: Thu, 11 Jun 2020 23:06:18 +0900 Subject: [PATCH 01/15] fix corner case of libnvcuvid * detect header automatically and not based on version number --- cmake/OpenCVDetectCUDA.cmake | 21 +++++++++++++++++++- cmake/templates/cvconfig.h.in | 2 ++ modules/cudacodec/src/cuvid_video_source.hpp | 4 ++-- modules/cudacodec/src/frame_queue.hpp | 4 ++-- modules/cudacodec/src/precomp.hpp | 4 ++-- modules/cudacodec/src/video_decoder.hpp | 4 ++-- modules/cudacodec/src/video_parser.hpp | 4 ++-- 7 files changed, 32 insertions(+), 11 deletions(-) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index 98a00fdd87..37a77cd660 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -38,11 +38,30 @@ if(CUDA_FOUND) endif() if(WITH_NVCUVID) + macro(SEARCH_NVCUVID_HEADER _filename _result) + # place header file under CUDA_TOOLKIT_TARGET_DIR or CUDA_TOOLKIT_ROOT_DIR + find_path(_header_result + ${_filename} + PATHS "${CUDA_TOOLKIT_TARGET_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}" + ENV CUDA_PATH + ENV CUDA_INC_PATH + PATH_SUFFIXES include + NO_DEFAULT_PATH + ) + if("x${_header_result}" STREQUAL "x_header_result-NOTFOUND") + set(${_result} 0) + else() + set(${_result} 1) + endif() + endmacro() + SEARCH_NVCUVID_HEADER("nvcuvid.h" HAVE_NVCUVID_HEADER) + SEARCH_NVCUVID_HEADER("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER) find_cuda_helper_libs(nvcuvid) if(WIN32) find_cuda_helper_libs(nvcuvenc) endif() - if(CUDA_nvcuvid_LIBRARY) + if(CUDA_nvcuvid_LIBRARY AND (${HAVE_NVCUVID_HEADER} OR ${HAVE_DYNLINK_NVCUVID_HEADER})) + # make sure to have both header and library before enabling set(HAVE_NVCUVID 1) endif() if(CUDA_nvcuvenc_LIBRARY) diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index f7656e9d71..9a42bfa3d1 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -127,6 +127,8 @@ /* NVIDIA Video Decoding API*/ #cmakedefine HAVE_NVCUVID +#cmakedefine HAVE_NVCUVID_HEADER +#cmakedefine HAVE_DYNLINK_NVCUVID_HEADER /* NVIDIA Video Encoding API*/ #cmakedefine HAVE_NVCUVENC diff --git a/modules/cudacodec/src/cuvid_video_source.hpp b/modules/cudacodec/src/cuvid_video_source.hpp index 802e65a92c..4dd7761620 100644 --- a/modules/cudacodec/src/cuvid_video_source.hpp +++ b/modules/cudacodec/src/cuvid_video_source.hpp @@ -44,9 +44,9 @@ #ifndef __CUVID_VIDEO_SOURCE_HPP__ #define __CUVID_VIDEO_SOURCE_HPP__ -#if CUDA_VERSION >= 9000 && CUDA_VERSION < 10000 +#if defined(HAVE_DYNLINK_NVCUVID_HEADER) #include -#else +#elif defined(HAVE_NVCUVID_HEADER) #include #endif #include "opencv2/core/private.cuda.hpp" diff --git a/modules/cudacodec/src/frame_queue.hpp b/modules/cudacodec/src/frame_queue.hpp index 3ff06a67ed..f7fe7ad57d 100644 --- a/modules/cudacodec/src/frame_queue.hpp +++ b/modules/cudacodec/src/frame_queue.hpp @@ -47,9 +47,9 @@ #include "opencv2/core/utility.hpp" #include "opencv2/core/private.cuda.hpp" -#if CUDA_VERSION >= 9000 && CUDA_VERSION < 10000 +#if defined(HAVE_DYNLINK_NVCUVID_HEADER) #include -#else +#elif defined(HAVE_NVCUVID_HEADER) #include #endif diff --git a/modules/cudacodec/src/precomp.hpp b/modules/cudacodec/src/precomp.hpp index 728924fa61..e453d90197 100644 --- a/modules/cudacodec/src/precomp.hpp +++ b/modules/cudacodec/src/precomp.hpp @@ -56,9 +56,9 @@ #include "opencv2/core/private.cuda.hpp" #ifdef HAVE_NVCUVID - #if CUDA_VERSION >= 9000 && CUDA_VERSION < 10000 + #if defined(HAVE_DYNLINK_NVCUVID_HEADER) #include - #else + #elif defined(HAVE_NVCUVID_HEADER) #include #endif diff --git a/modules/cudacodec/src/video_decoder.hpp b/modules/cudacodec/src/video_decoder.hpp index 30878caa7b..46caccaf83 100644 --- a/modules/cudacodec/src/video_decoder.hpp +++ b/modules/cudacodec/src/video_decoder.hpp @@ -44,9 +44,9 @@ #ifndef __VIDEO_DECODER_HPP__ #define __VIDEO_DECODER_HPP__ -#if CUDA_VERSION >= 9000 && CUDA_VERSION < 10000 +#if defined(HAVE_DYNLINK_NVCUVID_HEADER) #include -#else +#elif defined(HAVE_NVCUVID_HEADER) #include #endif diff --git a/modules/cudacodec/src/video_parser.hpp b/modules/cudacodec/src/video_parser.hpp index 5bd0f96562..03fff8e96a 100644 --- a/modules/cudacodec/src/video_parser.hpp +++ b/modules/cudacodec/src/video_parser.hpp @@ -44,9 +44,9 @@ #ifndef __VIDEO_PARSER_HPP__ #define __VIDEO_PARSER_HPP__ -#if CUDA_VERSION >= 9000 && CUDA_VERSION < 10000 +#if defined(HAVE_DYNLINK_NVCUVID_HEADER) #include -#else +#elif defined(HAVE_NVCUVID_HEADER) #include #endif From 442999dcdb0e51fe48661fe92803a568a52eeec9 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 12 Jun 2020 10:07:58 +0000 Subject: [PATCH 02/15] core: fix handling of ND-arrays in dumpInputArray() helpers --- modules/core/src/bindings_utils.cpp | 94 ++++++++++++++++++++++++----- modules/python/test/test_misc.py | 21 +++++-- 2 files changed, 97 insertions(+), 18 deletions(-) diff --git a/modules/core/src/bindings_utils.cpp b/modules/core/src/bindings_utils.cpp index 432f280e52..050b7247f8 100644 --- a/modules/core/src/bindings_utils.cpp +++ b/modules/core/src/bindings_utils.cpp @@ -25,9 +25,26 @@ String dumpInputArray(InputArray argument) break; // done } ss << cv::format(" total(-1)=%lld", (long long int)argument.total(-1)); - ss << cv::format(" dims(-1)=%d", argument.dims(-1)); - Size size = argument.size(-1); - ss << cv::format(" size(-1)=%dx%d", size.width, size.height); + int dims = argument.dims(-1); + ss << cv::format(" dims(-1)=%d", dims); + if (dims <= 2) + { + Size size = argument.size(-1); + ss << cv::format(" size(-1)=%dx%d", size.width, size.height); + } + else + { + int sz[CV_MAX_DIM] = {0}; + argument.sizend(sz, -1); + ss << " size(-1)=["; + for (int i = 0; i < dims; i++) + { + if (i > 0) + ss << ' '; + ss << sz[i]; + } + ss << "]"; + } ss << " type(-1)=" << cv::typeToString(argument.type(-1)); } while (0); } @@ -61,10 +78,26 @@ CV_EXPORTS_W String dumpInputArrayOfArrays(InputArrayOfArrays argument) if (argument.total(-1) > 0) { ss << " type(0)=" << cv::typeToString(argument.type(0)); - ss << cv::format(" dims(0)=%d", argument.dims(0)); - size = argument.size(0); - ss << cv::format(" size(0)=%dx%d", size.width, size.height); - ss << " type(0)=" << cv::typeToString(argument.type(0)); + int dims = argument.dims(0); + ss << cv::format(" dims(0)=%d", dims); + if (dims <= 2) + { + Size size0 = argument.size(0); + ss << cv::format(" size(0)=%dx%d", size0.width, size0.height); + } + else + { + int sz[CV_MAX_DIM] = {0}; + argument.sizend(sz, 0); + ss << " size(0)=["; + for (int i = 0; i < dims; i++) + { + if (i > 0) + ss << ' '; + ss << sz[i]; + } + ss << "]"; + } } } while (0); } @@ -92,9 +125,26 @@ CV_EXPORTS_W String dumpInputOutputArray(InputOutputArray argument) break; // done } ss << cv::format(" total(-1)=%lld", (long long int)argument.total(-1)); - ss << cv::format(" dims(-1)=%d", argument.dims(-1)); - Size size = argument.size(-1); - ss << cv::format(" size(-1)=%dx%d", size.width, size.height); + int dims = argument.dims(-1); + ss << cv::format(" dims(-1)=%d", dims); + if (dims <= 2) + { + Size size = argument.size(-1); + ss << cv::format(" size(-1)=%dx%d", size.width, size.height); + } + else + { + int sz[CV_MAX_DIM] = {0}; + argument.sizend(sz, -1); + ss << " size(-1)=["; + for (int i = 0; i < dims; i++) + { + if (i > 0) + ss << ' '; + ss << sz[i]; + } + ss << "]"; + } ss << " type(-1)=" << cv::typeToString(argument.type(-1)); } while (0); } @@ -128,10 +178,26 @@ CV_EXPORTS_W String dumpInputOutputArrayOfArrays(InputOutputArrayOfArrays argume if (argument.total(-1) > 0) { ss << " type(0)=" << cv::typeToString(argument.type(0)); - ss << cv::format(" dims(0)=%d", argument.dims(0)); - size = argument.size(0); - ss << cv::format(" size(0)=%dx%d", size.width, size.height); - ss << " type(0)=" << cv::typeToString(argument.type(0)); + int dims = argument.dims(0); + ss << cv::format(" dims(0)=%d", dims); + if (dims <= 2) + { + Size size0 = argument.size(0); + ss << cv::format(" size(0)=%dx%d", size0.width, size0.height); + } + else + { + int sz[CV_MAX_DIM] = {0}; + argument.sizend(sz, 0); + ss << " size(0)=["; + for (int i = 0; i < dims; i++) + { + if (i > 0) + ss << ' '; + ss << sz[i]; + } + ss << "]"; + } } } while (0); } diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py index b25ef7efbb..f9a350d160 100644 --- a/modules/python/test/test_misc.py +++ b/modules/python/test/test_misc.py @@ -105,22 +105,35 @@ class Arguments(NewOpenCVTests): a = np.array([[[1, 2]], [[3, 4]], [[5, 6]]], dtype=float) res5 = cv.utils.dumpInputArray(a) # 64FC2 self.assertEqual(res5, "InputArray: empty()=false kind=0x00010000 flags=0x01010000 total(-1)=3 dims(-1)=2 size(-1)=1x3 type(-1)=CV_64FC2") + a = np.zeros((2,3,4), dtype='f') + res6 = cv.utils.dumpInputArray(a) + self.assertEqual(res6, "InputArray: empty()=false kind=0x00010000 flags=0x01010000 total(-1)=6 dims(-1)=2 size(-1)=3x2 type(-1)=CV_32FC4") + a = np.zeros((2,3,4,5), dtype='f') + res7 = cv.utils.dumpInputArray(a) + self.assertEqual(res7, "InputArray: empty()=false kind=0x00010000 flags=0x01010000 total(-1)=120 dims(-1)=4 size(-1)=[2 3 4 5] type(-1)=CV_32FC1") def test_InputArrayOfArrays(self): res1 = cv.utils.dumpInputArrayOfArrays(None) # self.assertEqual(res1, "InputArray: noArray()") # not supported self.assertEqual(res1, "InputArrayOfArrays: empty()=true kind=0x00050000 flags=0x01050000 total(-1)=0 dims(-1)=1 size(-1)=0x0") res2_1 = cv.utils.dumpInputArrayOfArrays((1, 2)) # { Scalar:all(1), Scalar::all(2) } - self.assertEqual(res2_1, "InputArrayOfArrays: empty()=false kind=0x00050000 flags=0x01050000 total(-1)=2 dims(-1)=1 size(-1)=2x1 type(0)=CV_64FC1 dims(0)=2 size(0)=1x4 type(0)=CV_64FC1") + self.assertEqual(res2_1, "InputArrayOfArrays: empty()=false kind=0x00050000 flags=0x01050000 total(-1)=2 dims(-1)=1 size(-1)=2x1 type(0)=CV_64FC1 dims(0)=2 size(0)=1x4") res2_2 = cv.utils.dumpInputArrayOfArrays([1.5]) - self.assertEqual(res2_2, "InputArrayOfArrays: empty()=false kind=0x00050000 flags=0x01050000 total(-1)=1 dims(-1)=1 size(-1)=1x1 type(0)=CV_64FC1 dims(0)=2 size(0)=1x4 type(0)=CV_64FC1") + self.assertEqual(res2_2, "InputArrayOfArrays: empty()=false kind=0x00050000 flags=0x01050000 total(-1)=1 dims(-1)=1 size(-1)=1x1 type(0)=CV_64FC1 dims(0)=2 size(0)=1x4") a = np.array([[1, 2], [3, 4], [5, 6]]) b = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) res3 = cv.utils.dumpInputArrayOfArrays([a, b]) - self.assertEqual(res3, "InputArrayOfArrays: empty()=false kind=0x00050000 flags=0x01050000 total(-1)=2 dims(-1)=1 size(-1)=2x1 type(0)=CV_32SC1 dims(0)=2 size(0)=2x3 type(0)=CV_32SC1") + self.assertEqual(res3, "InputArrayOfArrays: empty()=false kind=0x00050000 flags=0x01050000 total(-1)=2 dims(-1)=1 size(-1)=2x1 type(0)=CV_32SC1 dims(0)=2 size(0)=2x3") c = np.array([[[1, 2], [3, 4], [5, 6]]], dtype='f') res4 = cv.utils.dumpInputArrayOfArrays([c, a, b]) - self.assertEqual(res4, "InputArrayOfArrays: empty()=false kind=0x00050000 flags=0x01050000 total(-1)=3 dims(-1)=1 size(-1)=3x1 type(0)=CV_32FC2 dims(0)=2 size(0)=3x1 type(0)=CV_32FC2") + self.assertEqual(res4, "InputArrayOfArrays: empty()=false kind=0x00050000 flags=0x01050000 total(-1)=3 dims(-1)=1 size(-1)=3x1 type(0)=CV_32FC2 dims(0)=2 size(0)=3x1") + a = np.zeros((2,3,4), dtype='f') + res5 = cv.utils.dumpInputArrayOfArrays([a, b]) + self.assertEqual(res5, "InputArrayOfArrays: empty()=false kind=0x00050000 flags=0x01050000 total(-1)=2 dims(-1)=1 size(-1)=2x1 type(0)=CV_32FC4 dims(0)=2 size(0)=3x2") + # TODO: fix conversion error + #a = np.zeros((2,3,4,5), dtype='f') + #res6 = cv.utils.dumpInputArray([a, b]) + #self.assertEqual(res6, "InputArrayOfArrays: empty()=false kind=0x00050000 flags=0x01050000 total(-1)=2 dims(-1)=1 size(-1)=2x1 type(0)=CV_32FC1 dims(0)=4 size(0)=[2 3 4 5]") def test_parse_to_bool_convertible(self): try_to_convert = partial(self._try_to_convert, cv.utils.dumpBool) From b0def9617fed66a2c3b9e56a4ef7b435d611921c Mon Sep 17 00:00:00 2001 From: Mehdi Zakaria Benadel Date: Fri, 12 Jun 2020 20:59:43 +0200 Subject: [PATCH 03/15] Fix typo This typo just made me lose my mind on the conan package update. please merge. --- cmake/OpenCVFindOpenEXR.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/OpenCVFindOpenEXR.cmake b/cmake/OpenCVFindOpenEXR.cmake index 9103f1f5a7..ef633e853a 100644 --- a/cmake/OpenCVFindOpenEXR.cmake +++ b/cmake/OpenCVFindOpenEXR.cmake @@ -88,7 +88,7 @@ FOREACH(SEARCH_PATH ${SEARCH_PATHS}) ocv_find_openexr("-${OPENEXR_VERSION}") ocv_find_openexr("-${OPENEXR_VERSION}_s") ocv_find_openexr("-${OPENEXR_VERSION}_d") - ocv_find_openexr("-${OPEXEXR_VERSION}_s_d") + ocv_find_openexr("-${OPENEXR_VERSION}_s_d") ocv_find_openexr("") ocv_find_openexr("_s") ocv_find_openexr("_d") From 411ce04f543de50f626313457902345874fee423 Mon Sep 17 00:00:00 2001 From: Namgoo Lee Date: Tue, 16 Jun 2020 01:43:51 +0900 Subject: [PATCH 04/15] CUDA_OptFlow/OpticalFlowDual_TVL1 Asynchronous test --- modules/cudaoptflow/test/test_optflow.cpp | 63 ++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/modules/cudaoptflow/test/test_optflow.cpp b/modules/cudaoptflow/test/test_optflow.cpp index 37ffe9e5c4..92f9466af6 100644 --- a/modules/cudaoptflow/test/test_optflow.cpp +++ b/modules/cudaoptflow/test/test_optflow.cpp @@ -405,10 +405,71 @@ CUDA_TEST_P(OpticalFlowDual_TVL1, Accuracy) EXPECT_MAT_SIMILAR(flow, d_flow, 4e-3); } +class TVL1AsyncParallelLoopBody : public cv::ParallelLoopBody +{ +public: + TVL1AsyncParallelLoopBody(const cv::cuda::GpuMat& d_img1_, const cv::cuda::GpuMat& d_img2_, cv::cuda::GpuMat* d_flow_, int iterations_, double gamma_) + : d_img1(d_img1_), d_img2(d_img2_), d_flow(d_flow_), iterations(iterations_), gamma(gamma_) {} + ~TVL1AsyncParallelLoopBody() {} + void operator()(const cv::Range& r) const + { + for (int i = r.start; i < r.end; i++) { + cv::cuda::Stream stream; + cv::Ptr d_alg = cv::cuda::OpticalFlowDual_TVL1::create(); + d_alg->setNumIterations(iterations); + d_alg->setGamma(gamma); + d_alg->calc(d_img1, d_img2, d_flow[i], stream); + stream.waitForCompletion(); + } + } +protected: + const cv::cuda::GpuMat& d_img1; + const cv::cuda::GpuMat& d_img2; + cv::cuda::GpuMat* d_flow; + int iterations; + double gamma; +}; + +#define NUM_STREAMS 16 + +CUDA_TEST_P(OpticalFlowDual_TVL1, Async) +{ + if (!supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_30)) + { + throw SkipTestException("CUDA device doesn't support texture objects"); + } + else + { + cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + const int iterations = 10; + + // Synchronous call + cv::Ptr d_alg = + cv::cuda::OpticalFlowDual_TVL1::create(); + d_alg->setNumIterations(iterations); + d_alg->setGamma(gamma); + + cv::cuda::GpuMat d_flow_gold; + d_alg->calc(loadMat(frame0), loadMat(frame1), d_flow_gold); + + // Asynchronous call + cv::cuda::GpuMat d_flow[NUM_STREAMS]; + cv::parallel_for_(cv::Range(0, NUM_STREAMS), TVL1AsyncParallelLoopBody(loadMat(frame0), loadMat(frame1), d_flow, iterations, gamma)); + + // Compare the results of synchronous call and asynchronous call + for (int i = 0; i < NUM_STREAMS; i++) + EXPECT_MAT_NEAR(d_flow_gold, d_flow[i], 0.0); + } +} + INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, OpticalFlowDual_TVL1, testing::Combine( ALL_DEVICES, testing::Values(Gamma(0.0), Gamma(1.0)))); - }} // namespace #endif // HAVE_CUDA From 676b818d6aefeb445e552a33081e34221cf5f8bc Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 15 Jun 2020 16:02:52 +0300 Subject: [PATCH 05/15] Removed plugin dispatcher backport of commit 74113737f3710069e5ae2daff09e77c824dd5a2b --- modules/dnn/src/ie_ngraph.cpp | 1 - modules/dnn/src/op_inf_engine.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index e1bf738313..7cac0c3593 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -15,7 +15,6 @@ #ifdef HAVE_DNN_NGRAPH #include -#include #endif // HAVE_DNN_NGRAPH #include diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp index 5f3af4658e..7020e3b0a9 100644 --- a/modules/dnn/src/op_inf_engine.cpp +++ b/modules/dnn/src/op_inf_engine.cpp @@ -11,7 +11,6 @@ #ifdef HAVE_INF_ENGINE #include -#include #endif // HAVE_INF_ENGINE #include From 2043e06102fadd5df4e52853d8f08f0510763aff Mon Sep 17 00:00:00 2001 From: Namgoo Lee Date: Wed, 17 Jun 2020 01:04:22 +0900 Subject: [PATCH 06/15] cuda optflow tvl1 : async safety also modify cuda canny to use createTextureObjectPitch2D, etc. --- .../core/include/opencv2/core/cuda/common.hpp | 14 +++ modules/cudaimgproc/src/cuda/canny.cu | 69 +++++------ modules/cudaimgproc/test/test_canny.cpp | 2 +- modules/cudaoptflow/src/cuda/tvl1flow.cu | 107 ++++++++++++++++-- 4 files changed, 138 insertions(+), 54 deletions(-) diff --git a/modules/core/include/opencv2/core/cuda/common.hpp b/modules/core/include/opencv2/core/cuda/common.hpp index 14b1f3f5ae..80b2ff08b1 100644 --- a/modules/core/include/opencv2/core/cuda/common.hpp +++ b/modules/core/include/opencv2/core/cuda/common.hpp @@ -101,6 +101,20 @@ namespace cv { namespace cuda cudaChannelFormatDesc desc = cudaCreateChannelDesc(); cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) ); } + + template inline void createTextureObjectPitch2D(cudaTextureObject_t* tex, PtrStepSz& img, const cudaTextureDesc& texDesc) + { + cudaResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = cudaResourceTypePitch2D; + resDesc.res.pitch2D.devPtr = static_cast(img.ptr()); + resDesc.res.pitch2D.height = img.rows; + resDesc.res.pitch2D.width = img.cols; + resDesc.res.pitch2D.pitchInBytes = img.step; + resDesc.res.pitch2D.desc = cudaCreateChannelDesc(); + + cudaSafeCall( cudaCreateTextureObject(tex, &resDesc, &texDesc, NULL) ); + } } }} diff --git a/modules/cudaimgproc/src/cuda/canny.cu b/modules/cudaimgproc/src/cuda/canny.cu index 4418b8e5eb..253287ca31 100644 --- a/modules/cudaimgproc/src/cuda/canny.cu +++ b/modules/cudaimgproc/src/cuda/canny.cu @@ -90,53 +90,47 @@ namespace cv { namespace cuda { namespace device namespace canny { - texture tex_src(false, cudaFilterModePoint, cudaAddressModeClamp); struct SrcTex { - int xoff; - int yoff; + virtual ~SrcTex() {} + __host__ SrcTex(int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {} - __device__ __forceinline__ int operator ()(int y, int x) const + __device__ __forceinline__ virtual int operator ()(int y, int x) const = 0; + + int xoff; + int yoff; + }; + + texture tex_src(false, cudaFilterModePoint, cudaAddressModeClamp); + struct SrcTexRef : SrcTex + { + __host__ SrcTexRef(int _xoff, int _yoff) : SrcTex(_xoff, _yoff) {} + + __device__ __forceinline__ int operator ()(int y, int x) const override { return tex2D(tex_src, x + xoff, y + yoff); } }; - struct SrcTexObject + struct SrcTexObj : SrcTex { - int xoff; - int yoff; - cudaTextureObject_t tex_src_object; - __host__ SrcTexObject(int _xoff, int _yoff, cudaTextureObject_t _tex_src_object) : xoff(_xoff), yoff(_yoff), tex_src_object(_tex_src_object) { } + __host__ SrcTexObj(int _xoff, int _yoff, cudaTextureObject_t _tex_src_object) : SrcTex(_xoff, _yoff), tex_src_object(_tex_src_object) { } - __device__ __forceinline__ int operator ()(int y, int x) const + __device__ __forceinline__ int operator ()(int y, int x) const override { return tex2D(tex_src_object, x + xoff, y + yoff); } + cudaTextureObject_t tex_src_object; }; - template __global__ - void calcMagnitudeKernel(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (y >= mag.rows || x >= mag.cols) - return; - - int dxVal = (src(y - 1, x + 1) + 2 * src(y, x + 1) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y, x - 1) + src(y + 1, x - 1)); - int dyVal = (src(y + 1, x - 1) + 2 * src(y + 1, x) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y - 1, x) + src(y - 1, x + 1)); - - dx(y, x) = dxVal; - dy(y, x) = dyVal; - - mag(y, x) = norm(dxVal, dyVal); - } - - template __global__ - void calcMagnitudeKernel(const SrcTexObject src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm) + template < + class T, + class Norm, + typename = std::enable_if_t::value> + > + __global__ void calcMagnitudeKernel(const T src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -162,15 +156,6 @@ namespace canny if (cc30) { - cudaResourceDesc resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - resDesc.resType = cudaResourceTypePitch2D; - resDesc.res.pitch2D.devPtr = srcWhole.ptr(); - resDesc.res.pitch2D.height = srcWhole.rows; - resDesc.res.pitch2D.width = srcWhole.cols; - resDesc.res.pitch2D.pitchInBytes = srcWhole.step; - resDesc.res.pitch2D.desc = cudaCreateChannelDesc(); - cudaTextureDesc texDesc; memset(&texDesc, 0, sizeof(texDesc)); texDesc.addressMode[0] = cudaAddressModeClamp; @@ -178,9 +163,9 @@ namespace canny texDesc.addressMode[2] = cudaAddressModeClamp; cudaTextureObject_t tex = 0; - cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL); + createTextureObjectPitch2D(&tex, srcWhole, texDesc); - SrcTexObject src(xoff, yoff, tex); + SrcTexObj src(xoff, yoff, tex); if (L2Grad) { @@ -205,7 +190,7 @@ namespace canny else { bindTexture(&tex_src, srcWhole); - SrcTex src(xoff, yoff); + SrcTexRef src(xoff, yoff); if (L2Grad) { diff --git a/modules/cudaimgproc/test/test_canny.cpp b/modules/cudaimgproc/test/test_canny.cpp index a782a87b3b..1b48e7d321 100644 --- a/modules/cudaimgproc/test/test_canny.cpp +++ b/modules/cudaimgproc/test/test_canny.cpp @@ -116,7 +116,7 @@ protected: bool useL2gradient; }; -#define NUM_STREAMS 64 +#define NUM_STREAMS 128 CUDA_TEST_P(Canny, Async) { diff --git a/modules/cudaoptflow/src/cuda/tvl1flow.cu b/modules/cudaoptflow/src/cuda/tvl1flow.cu index 66f0d664a0..2688e05c61 100644 --- a/modules/cudaoptflow/src/cuda/tvl1flow.cu +++ b/modules/cudaoptflow/src/cuda/tvl1flow.cu @@ -45,6 +45,7 @@ #include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/border_interpolate.hpp" #include "opencv2/core/cuda/limits.hpp" +#include "opencv2/core/cuda.hpp" using namespace cv::cuda; using namespace cv::cuda::device; @@ -101,11 +102,64 @@ namespace tvl1flow } } + struct SrcTex + { + virtual ~SrcTex() {} + + __device__ __forceinline__ virtual float I1(float x, float y) const = 0; + __device__ __forceinline__ virtual float I1x(float x, float y) const = 0; + __device__ __forceinline__ virtual float I1y(float x, float y) const = 0; + }; + texture tex_I1 (false, cudaFilterModePoint, cudaAddressModeClamp); texture tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp); texture tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp); + struct SrcTexRef : SrcTex + { + __device__ __forceinline__ float I1(float x, float y) const override + { + return tex2D(tex_I1, x, y); + } + __device__ __forceinline__ float I1x(float x, float y) const override + { + return tex2D(tex_I1x, x, y); + } + __device__ __forceinline__ float I1y(float x, float y) const override + { + return tex2D(tex_I1y, x, y); + } + }; - __global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho) + struct SrcTexObj : SrcTex + { + __host__ SrcTexObj(cudaTextureObject_t tex_obj_I1_, cudaTextureObject_t tex_obj_I1x_, cudaTextureObject_t tex_obj_I1y_) + : tex_obj_I1(tex_obj_I1_), tex_obj_I1x(tex_obj_I1x_), tex_obj_I1y(tex_obj_I1y_) {} + + __device__ __forceinline__ float I1(float x, float y) const override + { + return tex2D(tex_obj_I1, x, y); + } + __device__ __forceinline__ float I1x(float x, float y) const override + { + return tex2D(tex_obj_I1x, x, y); + } + __device__ __forceinline__ float I1y(float x, float y) const override + { + return tex2D(tex_obj_I1y, x, y); + } + + cudaTextureObject_t tex_obj_I1; + cudaTextureObject_t tex_obj_I1x; + cudaTextureObject_t tex_obj_I1y; + }; + + template < + typename T, + typename = std::enable_if_t::value> + > + __global__ void warpBackwardKernel( + const PtrStepSzf I0, const T src, const PtrStepf u1, const PtrStepf u2, + PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -136,9 +190,9 @@ namespace tvl1flow { const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); - sum += w * tex2D(tex_I1 , cx, cy); - sumx += w * tex2D(tex_I1x, cx, cy); - sumy += w * tex2D(tex_I1y, cx, cy); + sum += w * src.I1(cx, cy); + sumx += w * src.I1x(cx, cy); + sumy += w * src.I1y(cx, cy); wsum += w; } @@ -173,15 +227,46 @@ namespace tvl1flow const dim3 block(32, 8); const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y)); - bindTexture(&tex_I1 , I1); - bindTexture(&tex_I1x, I1x); - bindTexture(&tex_I1y, I1y); + bool cc30 = deviceSupports(FEATURE_SET_COMPUTE_30); - warpBackwardKernel<<>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho); - cudaSafeCall( cudaGetLastError() ); + if (cc30) + { + cudaTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.addressMode[0] = cudaAddressModeClamp; + texDesc.addressMode[1] = cudaAddressModeClamp; + texDesc.addressMode[2] = cudaAddressModeClamp; - if (!stream) - cudaSafeCall( cudaDeviceSynchronize() ); + cudaTextureObject_t texObj_I1 = 0, texObj_I1x = 0, texObj_I1y = 0; + + createTextureObjectPitch2D(&texObj_I1, I1, texDesc); + createTextureObjectPitch2D(&texObj_I1x, I1x, texDesc); + createTextureObjectPitch2D(&texObj_I1y, I1y, texDesc); + + warpBackwardKernel << > > (I0, SrcTexObj(texObj_I1, texObj_I1x, texObj_I1y), u1, u2, I1w, I1wx, I1wy, grad, rho); + cudaSafeCall(cudaGetLastError()); + + if (!stream) + cudaSafeCall(cudaDeviceSynchronize()); + else + cudaSafeCall(cudaStreamSynchronize(stream)); + + cudaSafeCall(cudaDestroyTextureObject(texObj_I1)); + cudaSafeCall(cudaDestroyTextureObject(texObj_I1x)); + cudaSafeCall(cudaDestroyTextureObject(texObj_I1y)); + } + else + { + bindTexture(&tex_I1, I1); + bindTexture(&tex_I1x, I1x); + bindTexture(&tex_I1y, I1y); + + warpBackwardKernel << > > (I0, SrcTexRef(), u1, u2, I1w, I1wx, I1wy, grad, rho); + cudaSafeCall(cudaGetLastError()); + + if (!stream) + cudaSafeCall(cudaDeviceSynchronize()); + } } } From d93b6be3cc13d20644b694e0d1efb46c139af70b Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 16 Jun 2020 20:09:25 +0300 Subject: [PATCH 07/15] Changed StridedSlice to VariadicSplit in Region layer --- modules/dnn/src/layers/region_layer.cpp | 48 +++++++++---------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp index d229369abb..c0ba4b2ccf 100644 --- a/modules/dnn/src/layers/region_layer.cpp +++ b/modules/dnn/src/layers/region_layer.cpp @@ -412,12 +412,12 @@ public: auto scale_x_y_node = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &scale_x_y); auto shift_node = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, std::vector{0.5}); + auto axis = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0}); + auto splits = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{5}, {1, 1, 1, 1, rows - 4}); + auto split = std::make_shared(input2d, axis, splits); std::shared_ptr box_x; { - auto lower_bounds = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{0, 0}); - auto upper_bounds = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{1, cols}); - box_x = std::make_shared(input2d, lower_bounds, upper_bounds, strides, std::vector{}, std::vector{}); - box_x = std::make_shared(box_x); + box_x = std::make_shared(split->output(0)); box_x = std::make_shared(box_x, shift_node, ngraph::op::AutoBroadcastType::NUMPY); box_x = std::make_shared(box_x, scale_x_y_node, ngraph::op::AutoBroadcastType::NUMPY); box_x = std::make_shared(box_x, shift_node, ngraph::op::AutoBroadcastType::NUMPY); @@ -443,10 +443,7 @@ public: std::shared_ptr box_y; { - auto lower_bounds = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{1, 0}); - auto upper_bounds = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{2, cols}); - box_y = std::make_shared(input2d, lower_bounds, upper_bounds, strides, std::vector{}, std::vector{}); - box_y = std::make_shared(box_y); + box_y = std::make_shared(split->output(1)); box_y = std::make_shared(box_y, shift_node, ngraph::op::AutoBroadcastType::NUMPY); box_y = std::make_shared(box_y, scale_x_y_node, ngraph::op::AutoBroadcastType::NUMPY); box_y = std::make_shared(box_y, shift_node, ngraph::op::AutoBroadcastType::NUMPY); @@ -499,45 +496,32 @@ public: std::copy(bias_h.begin(), bias_h.begin() + h * anchors, bias_h.begin() + i * h * anchors); } - auto lower_bounds = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{2, 0}); - auto upper_bounds = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{3, cols}); - box_w = std::make_shared(input2d, lower_bounds, upper_bounds, strides, std::vector{}, std::vector{}); - box_w = std::make_shared(box_w); + box_w = std::make_shared(split->output(2)); box_w = std::make_shared(box_w, shape_3d, true); auto anchor_w_node = std::make_shared(ngraph::element::f32, box_broad_shape, bias_w.data()); box_w = std::make_shared(box_w, anchor_w_node, ngraph::op::AutoBroadcastType::NUMPY); - lower_bounds = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{3, 0}); - upper_bounds = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{4, cols}); - box_h = std::make_shared(input2d, lower_bounds, upper_bounds, strides, std::vector{}, std::vector{}); - box_h = std::make_shared(box_h); + box_h = std::make_shared(split->output(3)); box_h = std::make_shared(box_h, shape_3d, true); auto anchor_h_node = std::make_shared(ngraph::element::f32, box_broad_shape, bias_h.data()); box_h = std::make_shared(box_h, anchor_h_node, ngraph::op::AutoBroadcastType::NUMPY); } + auto region_splits = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{3}, {4, 1, rows - 5}); + auto region_split = std::make_shared(region, axis, region_splits); + std::shared_ptr scale; { - auto lower_bounds = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{4, 0}); - auto upper_bounds = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{5, cols}); - scale = std::make_shared(region, lower_bounds, upper_bounds, strides, std::vector{}, std::vector{}); - - if (classfix == -1) - { - auto thresh_node = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, std::vector{0.5}); - auto mask = std::make_shared(scale, thresh_node); - auto zero_node = std::make_shared(ngraph::element::f32, mask->get_shape(), std::vector(b * cols, 0)); - scale = std::make_shared(mask, scale, zero_node); - } + float thr = classfix == -1 ? 0.5 : 0; + auto thresh_node = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, std::vector{thr}); + auto mask = std::make_shared(region_split->output(1), thresh_node); + auto zero_node = std::make_shared(ngraph::element::f32, mask->get_shape(), std::vector(cols, 0)); + scale = std::make_shared(mask, zero_node, region_split->output(1)); } std::shared_ptr probs; { - auto lower_bounds = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{5, 0}); - auto upper_bounds = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{rows, cols}); - auto classes = std::make_shared(region, lower_bounds, upper_bounds, strides, std::vector{}, std::vector{}); - probs = std::make_shared(classes, scale, ngraph::op::AutoBroadcastType::NUMPY); - + probs = std::make_shared(region_split->output(2), scale, ngraph::op::AutoBroadcastType::NUMPY); auto thresh_node = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &thresh); auto mask = std::make_shared(probs, thresh_node); auto zero_node = std::make_shared(ngraph::element::f32, mask->get_shape(), std::vector((rows - 5) * cols, 0)); From b10ab7974322f991e8ff49ec59ca9612db7d0b08 Mon Sep 17 00:00:00 2001 From: NesQl <32612899+liqi-c@users.noreply.github.com> Date: Wed, 17 Jun 2020 17:05:04 +0800 Subject: [PATCH 08/15] Merge pull request #17468 from liqi-c:sharedlib_build_problem TEngine installation rules fix for static build * Modify cmake config error for -DBUILD_SHARED_LIBS=OFF * Modify for not install tengine include directory * Update compile error. * move install command to tengine/CMakeLists.txt * rm include dir when make install,only build static lib will install libtengine.a --- 3rdparty/libtengine/tengine.cmake | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/3rdparty/libtengine/tengine.cmake b/3rdparty/libtengine/tengine.cmake index 4085afcf31..10da845a86 100644 --- a/3rdparty/libtengine/tengine.cmake +++ b/3rdparty/libtengine/tengine.cmake @@ -22,7 +22,7 @@ # sqfu@openailab.com # -SET(TENGINE_COMMIT_VERSION "2f3cd86217f3530c8e4a82f3ed5af14c7a4e3943") +SET(TENGINE_COMMIT_VERSION "8a4c58e0e05cd850f4bb0936a330edc86dc0e28c") SET(OCV_TENGINE_DIR "${OpenCV_BINARY_DIR}/3rdparty/libtengine") SET(OCV_TENGINE_SOURCE_PATH "${OCV_TENGINE_DIR}/Tengine-${TENGINE_COMMIT_VERSION}") @@ -34,7 +34,7 @@ IF(EXISTS "${OCV_TENGINE_SOURCE_PATH}") ELSE() SET(OCV_TENGINE_FILENAME "${TENGINE_COMMIT_VERSION}.zip")#name2 SET(OCV_TENGINE_URL "https://github.com/OAID/Tengine/archive/") #url2 - SET(tengine_md5sum 9124324b6e2b350012e46ae1db4bad7d) #md5sum2 + SET(tengine_md5sum f51ca8f3963faeeff3f019a6f6edc206) #md5sum2 #MESSAGE(STATUS "**** TENGINE DOWNLOAD BEGIN ****") ocv_download(FILENAME ${OCV_TENGINE_FILENAME} @@ -69,7 +69,6 @@ if(BUILD_TENGINE) elseif(${ANDROID_ABI} STREQUAL "arm64-v8a") SET(CONFIG_ARCH_ARM64 ON) endif() - SET(Tengine_LIB "tengine" CACHE INTERNAL "") else() # linux system if(CMAKE_SYSTEM_PROCESSOR STREQUAL arm) @@ -77,7 +76,6 @@ if(BUILD_TENGINE) elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) ## AARCH64 SET(CONFIG_ARCH_ARM64 ON) endif() - SET(Tengine_LIB "tengine" CACHE INTERNAL "") endif() SET(BUILT_IN_OPENCV ON) ## set for tengine compile discern . @@ -86,6 +84,6 @@ if(BUILD_TENGINE) add_subdirectory("${OCV_TENGINE_SOURCE_PATH}" "${OCV_TENGINE_DIR}/build") else() message(WARNING "TENGINE: Missing 'CMakeLists.txt' in source code package: ${OCV_TENGINE_SOURCE_PATH}") - SET(HAVE_TENGINE 1) endif() + SET(Tengine_LIB "tengine" CACHE INTERNAL "") endif() From d01cbe93208335204cfe314e713013853a4dc641 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 16 Jun 2020 02:14:04 +0000 Subject: [PATCH 09/15] cudacodec(build): fix detection in CMake, cleanup duplicate includes --- cmake/OpenCVDetectCUDA.cmake | 1 + modules/cudacodec/src/cuvid_video_source.hpp | 10 +--------- modules/cudacodec/src/frame_queue.hpp | 10 +--------- modules/cudacodec/src/video_decoder.hpp | 12 +----------- modules/cudacodec/src/video_parser.hpp | 11 +---------- modules/cudacodec/src/video_source.hpp | 5 +---- 6 files changed, 6 insertions(+), 43 deletions(-) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index 37a77cd660..22879f3152 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -53,6 +53,7 @@ if(CUDA_FOUND) else() set(${_result} 1) endif() + unset(_header_result CACHE) endmacro() SEARCH_NVCUVID_HEADER("nvcuvid.h" HAVE_NVCUVID_HEADER) SEARCH_NVCUVID_HEADER("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER) diff --git a/modules/cudacodec/src/cuvid_video_source.hpp b/modules/cudacodec/src/cuvid_video_source.hpp index 4dd7761620..d36d856611 100644 --- a/modules/cudacodec/src/cuvid_video_source.hpp +++ b/modules/cudacodec/src/cuvid_video_source.hpp @@ -44,17 +44,9 @@ #ifndef __CUVID_VIDEO_SOURCE_HPP__ #define __CUVID_VIDEO_SOURCE_HPP__ -#if defined(HAVE_DYNLINK_NVCUVID_HEADER) - #include -#elif defined(HAVE_NVCUVID_HEADER) - #include -#endif -#include "opencv2/core/private.cuda.hpp" -#include "opencv2/cudacodec.hpp" #include "video_source.hpp" -namespace cv { namespace cudacodec { namespace detail -{ +namespace cv { namespace cudacodec { namespace detail { class CuvidVideoSource : public VideoSource { diff --git a/modules/cudacodec/src/frame_queue.hpp b/modules/cudacodec/src/frame_queue.hpp index f7fe7ad57d..3051a1e450 100644 --- a/modules/cudacodec/src/frame_queue.hpp +++ b/modules/cudacodec/src/frame_queue.hpp @@ -45,16 +45,8 @@ #define __FRAME_QUEUE_HPP__ #include "opencv2/core/utility.hpp" -#include "opencv2/core/private.cuda.hpp" -#if defined(HAVE_DYNLINK_NVCUVID_HEADER) - #include -#elif defined(HAVE_NVCUVID_HEADER) - #include -#endif - -namespace cv { namespace cudacodec { namespace detail -{ +namespace cv { namespace cudacodec { namespace detail { class FrameQueue { diff --git a/modules/cudacodec/src/video_decoder.hpp b/modules/cudacodec/src/video_decoder.hpp index 46caccaf83..e41b1a655f 100644 --- a/modules/cudacodec/src/video_decoder.hpp +++ b/modules/cudacodec/src/video_decoder.hpp @@ -44,17 +44,7 @@ #ifndef __VIDEO_DECODER_HPP__ #define __VIDEO_DECODER_HPP__ -#if defined(HAVE_DYNLINK_NVCUVID_HEADER) - #include -#elif defined(HAVE_NVCUVID_HEADER) - #include -#endif - -#include "opencv2/core/private.cuda.hpp" -#include "opencv2/cudacodec.hpp" - -namespace cv { namespace cudacodec { namespace detail -{ +namespace cv { namespace cudacodec { namespace detail { class VideoDecoder { diff --git a/modules/cudacodec/src/video_parser.hpp b/modules/cudacodec/src/video_parser.hpp index 03fff8e96a..91e50b3e2b 100644 --- a/modules/cudacodec/src/video_parser.hpp +++ b/modules/cudacodec/src/video_parser.hpp @@ -44,19 +44,10 @@ #ifndef __VIDEO_PARSER_HPP__ #define __VIDEO_PARSER_HPP__ -#if defined(HAVE_DYNLINK_NVCUVID_HEADER) - #include -#elif defined(HAVE_NVCUVID_HEADER) - #include -#endif - -#include "opencv2/core/private.cuda.hpp" -#include "opencv2/cudacodec.hpp" #include "frame_queue.hpp" #include "video_decoder.hpp" -namespace cv { namespace cudacodec { namespace detail -{ +namespace cv { namespace cudacodec { namespace detail { class VideoParser { diff --git a/modules/cudacodec/src/video_source.hpp b/modules/cudacodec/src/video_source.hpp index 9f2ed29d58..887ac04679 100644 --- a/modules/cudacodec/src/video_source.hpp +++ b/modules/cudacodec/src/video_source.hpp @@ -44,12 +44,9 @@ #ifndef __CUDACODEC_VIDEO_SOURCE_H__ #define __CUDACODEC_VIDEO_SOURCE_H__ -#include "opencv2/core/private.cuda.hpp" -#include "opencv2/cudacodec.hpp" #include "thread.hpp" -namespace cv { namespace cudacodec { namespace detail -{ +namespace cv { namespace cudacodec { namespace detail { class VideoParser; From 1cba763189a365a6bfd3f613c506797e4e199c21 Mon Sep 17 00:00:00 2001 From: Tomoaki Teshima Date: Wed, 17 Jun 2020 22:02:51 +0900 Subject: [PATCH 10/15] fix build error of automatic CC detection --- cmake/OpenCVDetectCUDA.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index 37a77cd660..ecaec3f190 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -101,7 +101,7 @@ if(CUDA_FOUND) else() set(CC_LIST ${ARGN}) foreach(target_arch ${CC_LIST}) - string(REPLACE "." "" target_arch_short ${target_arch}) + string(REPLACE "." "" target_arch_short "${target_arch}") set(NVCC_OPTION "-gencode;arch=compute_${target_arch_short},code=sm_${target_arch_short}") execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" @@ -111,7 +111,7 @@ if(CUDA_FOUND) set(${result_list} "${${result_list}} ${target_arch}") endif() endforeach() - string(STRIP ${${result_list}} ${result_list}) + string(STRIP "${${result_list}}" ${result_list}) set(CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "List of supported compute capability") endif() endmacro() @@ -124,7 +124,7 @@ if(CUDA_FOUND) endmacro() macro(ocv_wipeout_deprecated _arch_bin_list) - string(REPLACE "2.1" "2.1(2.0)" ${_arch_bin_list} ${${_arch_bin_list}}) + string(REPLACE "2.1" "2.1(2.0)" ${_arch_bin_list} "${${_arch_bin_list}}") endmacro() set(__cuda_arch_ptx "") From a7cc1159cdb9334041b0093393b3e49144dd0b26 Mon Sep 17 00:00:00 2001 From: Alex Cohn Date: Thu, 18 Jun 2020 10:40:43 +0300 Subject: [PATCH 11/15] Merge pull request #17573 from alexcohn:fix/android_windows_build * fixing #17572 https://github.com/opencv/opencv/issues/17572 Build for Android failed: "can't concat str to bytes" on Windows 10 64bit with python 3.6.6 * similar to changes in platforms/winpack_dldt/build_package.py --- platforms/android/build_sdk.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/platforms/android/build_sdk.py b/platforms/android/build_sdk.py index 7cd945d60e..14fd8f664f 100755 --- a/platforms/android/build_sdk.py +++ b/platforms/android/build_sdk.py @@ -59,6 +59,8 @@ def check_executable(cmd): try: log.debug("Executing: %s" % cmd) result = subprocess.check_output(cmd, stderr=subprocess.STDOUT) + if not isinstance(result, str): + result = result.decode("utf-8") log.debug("Result: %s" % (result+'\n').split('\n')[0]) return True except Exception as e: From 456e88a8a424d66e839158babcda2b8380d1cc79 Mon Sep 17 00:00:00 2001 From: Yuriy Obukh Date: Thu, 18 Jun 2020 14:31:11 +0300 Subject: [PATCH 12/15] fix VS Windows build with eigen. https://github.com/opencv/opencv/issues/17548 --- modules/core/include/opencv2/core/eigen.hpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/core/include/opencv2/core/eigen.hpp b/modules/core/include/opencv2/core/eigen.hpp index 3d7ba8fa14..3f4be931e6 100644 --- a/modules/core/include/opencv2/core/eigen.hpp +++ b/modules/core/include/opencv2/core/eigen.hpp @@ -51,18 +51,19 @@ #include "opencv2/core.hpp" +#if defined _MSC_VER && _MSC_VER >= 1200 +#define NOMINMAX // fix https://github.com/opencv/opencv/issues/17548 +#pragma warning( disable: 4714 ) //__forceinline is not inlined +#pragma warning( disable: 4127 ) //conditional expression is constant +#pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data +#endif + #if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 \ && defined(CV_CXX11) && defined(CV_CXX_STD_ARRAY) #include #define OPENCV_EIGEN_TENSOR_SUPPORT #endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 -#if defined _MSC_VER && _MSC_VER >= 1200 -#pragma warning( disable: 4714 ) //__forceinline is not inlined -#pragma warning( disable: 4127 ) //conditional expression is constant -#pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data -#endif - namespace cv { From c07af090f517a81f7bc823709c7e759d993e9062 Mon Sep 17 00:00:00 2001 From: Tomoaki Teshima Date: Thu, 18 Jun 2020 21:25:15 +0900 Subject: [PATCH 13/15] fix build error on Jetson TX1 and TX2 * enable_if_t and is_base_of is c++14 feature * override is c++11 feature --- modules/cudaimgproc/src/cuda/canny.cu | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/cudaimgproc/src/cuda/canny.cu b/modules/cudaimgproc/src/cuda/canny.cu index 253287ca31..0bc3e2a671 100644 --- a/modules/cudaimgproc/src/cuda/canny.cu +++ b/modules/cudaimgproc/src/cuda/canny.cu @@ -107,7 +107,7 @@ namespace canny { __host__ SrcTexRef(int _xoff, int _yoff) : SrcTex(_xoff, _yoff) {} - __device__ __forceinline__ int operator ()(int y, int x) const override + __device__ __forceinline__ int operator ()(int y, int x) const { return tex2D(tex_src, x + xoff, y + yoff); } @@ -117,7 +117,7 @@ namespace canny { __host__ SrcTexObj(int _xoff, int _yoff, cudaTextureObject_t _tex_src_object) : SrcTex(_xoff, _yoff), tex_src_object(_tex_src_object) { } - __device__ __forceinline__ int operator ()(int y, int x) const override + __device__ __forceinline__ int operator ()(int y, int x) const { return tex2D(tex_src_object, x + xoff, y + yoff); } @@ -127,8 +127,7 @@ namespace canny template < class T, - class Norm, - typename = std::enable_if_t::value> + class Norm > __global__ void calcMagnitudeKernel(const T src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm) { From d25293b721f4e12988f439ffe5cf4fd46072b9f8 Mon Sep 17 00:00:00 2001 From: Philippe FOUBERT Date: Thu, 18 Jun 2020 11:48:43 +0200 Subject: [PATCH 14/15] Fix the build of imgproc using MinGW (variables with the same name as symbols defined in MinGW headers) --- modules/imgproc/src/color_yuv.simd.hpp | 40 +++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/modules/imgproc/src/color_yuv.simd.hpp b/modules/imgproc/src/color_yuv.simd.hpp index 8bbd78b244..076d1a4bd5 100644 --- a/modules/imgproc/src/color_yuv.simd.hpp +++ b/modules/imgproc/src/color_yuv.simd.hpp @@ -347,16 +347,16 @@ struct RGB2YCrCb_i sr0 = sr0 - sy0; sr1 = sr1 - sy1; sb0 = sb0 - sy0; sb1 = sb1 - sy1; - v_int32 scr0, scr1, scb0, scb1; + v_int32 v_scr0, v_scr1, v_scb0, v_scb1; - scr0 = (sr0*vc3 + vdd) >> shift; - scr1 = (sr1*vc3 + vdd) >> shift; - scb0 = (sb0*vc4 + vdd) >> shift; - scb1 = (sb1*vc4 + vdd) >> shift; + v_scr0 = (sr0*vc3 + vdd) >> shift; + v_scr1 = (sr1*vc3 + vdd) >> shift; + v_scb0 = (sb0*vc4 + vdd) >> shift; + v_scb1 = (sb1*vc4 + vdd) >> shift; // saturate and pack - cr = v_pack_u(scr0, scr1); - cb = v_pack_u(scb0, scb1); + cr = v_pack_u(v_scr0, v_scr1); + cb = v_pack_u(v_scb0, v_scb1); if(yuvOrder) { @@ -781,36 +781,36 @@ struct YCrCb2RGB_i v_int8 scr = v_reinterpret_as_s8(cr); v_int8 scb = v_reinterpret_as_s8(cb); - v_int16 scr0, scr1, scb0, scb1; - v_expand(scr, scr0, scr1); - v_expand(scb, scb0, scb1); + v_int16 v_scr0, v_scr1, v_scb0, v_scb1; + v_expand(scr, v_scr0, v_scr1); + v_expand(scb, v_scb0, v_scb1); v_int32 b00, b01, b10, b11; v_int32 g00, g01, g10, g11; v_int32 r00, r01, r10, r11; - v_mul_expand(scb0, vc3, b00, b01); - v_mul_expand(scb1, vc3, b10, b11); + v_mul_expand(v_scb0, vc3, b00, b01); + v_mul_expand(v_scb1, vc3, b10, b11); if(yuvOrder) { // if YUV then C3 > 2^15 // so we fix the multiplication v_int32 cb00, cb01, cb10, cb11; - v_expand(scb0, cb00, cb01); - v_expand(scb1, cb10, cb11); + v_expand(v_scb0, cb00, cb01); + v_expand(v_scb1, cb10, cb11); b00 += cb00 << 15; b01 += cb01 << 15; b10 += cb10 << 15; b11 += cb11 << 15; } v_int32 t00, t01, t10, t11; - v_mul_expand(scb0, vc2, t00, t01); - v_mul_expand(scb1, vc2, t10, t11); - v_mul_expand(scr0, vc1, g00, g01); - v_mul_expand(scr1, vc1, g10, g11); + v_mul_expand(v_scb0, vc2, t00, t01); + v_mul_expand(v_scb1, vc2, t10, t11); + v_mul_expand(v_scr0, vc1, g00, g01); + v_mul_expand(v_scr1, vc1, g10, g11); g00 += t00; g01 += t01; g10 += t10; g11 += t11; - v_mul_expand(scr0, vc0, r00, r01); - v_mul_expand(scr1, vc0, r10, r11); + v_mul_expand(v_scr0, vc0, r00, r01); + v_mul_expand(v_scr1, vc0, r10, r11); b00 = (b00 + vdescale) >> shift; b01 = (b01 + vdescale) >> shift; b10 = (b10 + vdescale) >> shift; b11 = (b11 + vdescale) >> shift; From 77fa1a20bf2fc305a7959f78c66a70b0fbc15a02 Mon Sep 17 00:00:00 2001 From: Tomoaki Teshima Date: Fri, 19 Jun 2020 07:56:37 +0900 Subject: [PATCH 15/15] fix build on Jetson TX1 and TX2 * enable_if_t is a c++14 feature --- modules/cudaoptflow/src/cuda/tvl1flow.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/cudaoptflow/src/cuda/tvl1flow.cu b/modules/cudaoptflow/src/cuda/tvl1flow.cu index 2688e05c61..a84b9a3520 100644 --- a/modules/cudaoptflow/src/cuda/tvl1flow.cu +++ b/modules/cudaoptflow/src/cuda/tvl1flow.cu @@ -154,8 +154,7 @@ namespace tvl1flow }; template < - typename T, - typename = std::enable_if_t::value> + typename T > __global__ void warpBackwardKernel( const PtrStepSzf I0, const T src, const PtrStepf u1, const PtrStepf u2,