From 9cd5a0a1e675b3a95c796b692cbcb041481e91c6 Mon Sep 17 00:00:00 2001 From: rogday Date: Tue, 19 Apr 2022 19:40:25 +0300 Subject: [PATCH] Merge pull request #21884 from rogday:cuda_cleanup Fix CUDA compilation issues and adjust thresholds. * Fix CUDA compilation issues and adjust thresholds. * add conformance tests to denylist --- modules/dnn/src/cuda/activations.cu | 2 +- modules/dnn/src/cuda/functors.hpp | 10 ++++++---- modules/dnn/src/layers/convolution_layer.cpp | 1 + modules/dnn/src/layers/elementwise_layers.cpp | 8 ++++---- modules/dnn/src/opencl/activations.cl | 2 +- modules/dnn/test/test_backends.cpp | 2 +- modules/dnn/test/test_misc.cpp | 5 +++-- modules/dnn/test/test_model.cpp | 5 +++-- modules/dnn/test/test_onnx_conformance.cpp | 2 +- ...formance_layer_filter__cuda_denylist.inl.hpp | 9 +++++++++ modules/dnn/test/test_onnx_importer.cpp | 17 +++++++++++------ 11 files changed, 41 insertions(+), 22 deletions(-) diff --git a/modules/dnn/src/cuda/activations.cu b/modules/dnn/src/cuda/activations.cu index ed34d57e0b..e12457a164 100644 --- a/modules/dnn/src/cuda/activations.cu +++ b/modules/dnn/src/cuda/activations.cu @@ -260,7 +260,7 @@ void shrink(const Stream& stream, Span output, View input, T bias, T lambd template void reciprocal(const Stream& stream, Span output, View input) { - generic_op>(stream, output, input); + generic_op>(stream, output, input); } template diff --git a/modules/dnn/src/cuda/functors.hpp b/modules/dnn/src/cuda/functors.hpp index 378df82f26..83a949f8e7 100644 --- a/modules/dnn/src/cuda/functors.hpp +++ b/modules/dnn/src/cuda/functors.hpp @@ -732,7 +732,8 @@ struct SignFunctor { CUDA4DNN_HOST_DEVICE Params() {} }; - CUDA4DNN_DEVICE SignFunctor() : SignFunctor(Params{}) { } + CUDA4DNN_DEVICE SignFunctor() { } + CUDA4DNN_DEVICE SignFunctor(const Params& params) { } CUDA4DNN_DEVICE T operator()(T value) { return value > T(0) ? T(1) : (value < T(0) ? T(-1) : T(0)); @@ -747,7 +748,7 @@ struct ShrinkFunctor { T bias, lambd; }; - CUDA4DNN_DEVICE ShrinkFunctor() : bias(0), lambd(0.5) { } + CUDA4DNN_DEVICE ShrinkFunctor() : ShrinkFunctor(Params{}) { } CUDA4DNN_DEVICE ShrinkFunctor(const Params& params) : bias{params.bias}, lambd{params.lambd} { } CUDA4DNN_DEVICE T operator()(T value) { @@ -763,10 +764,11 @@ struct ReciprocalFunctor { CUDA4DNN_HOST_DEVICE Params() {} }; - CUDA4DNN_DEVICE ReciprocalFunctor() : ReciprocalFunctor(Params{}) { } + CUDA4DNN_DEVICE ReciprocalFunctor() { } + CUDA4DNN_DEVICE ReciprocalFunctor(const Params& params) { } CUDA4DNN_DEVICE T operator()(T value) { - return T(1.0f)/value; + return T(1.f)/value; } }; diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 4e377b9f7e..0bf39f93b3 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -2080,6 +2080,7 @@ public: { auto context = reinterpret_cast(context_); + // TODO: extract bias from inputs and pass it CV_Assert(inputs.size() == 1 || inputs.size() == 2); auto input_wrapper = inputs[0].dynamicCast(); auto input_shape = input_wrapper->getShape(); diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index eb2ecce3ce..353ce8c0b4 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -2282,7 +2282,7 @@ struct SignFunctor : public BaseDefaultFunctor inline float calculate(float x) const { - return x > 0 ? 1 : (x < 0 ? -1 : 0); + return x > 0.f ? 1.f : (x < 0.f ? -1.f : 0.f); } #ifdef HAVE_CUDA @@ -2315,13 +2315,13 @@ struct ShrinkFunctor : public BaseDefaultFunctor inline float calculate(float x) const { - return x > lambd ? x - bias : (x < -lambd ? x + bias : 0); + return x > lambd ? x - bias : (x < -lambd ? x + bias : 0.f); } #ifdef HAVE_CUDA Ptr initCUDA(int target, csl::Stream stream) { - return make_cuda_node(target, stream); + return make_cuda_node(target, stream, bias, lambd); } #endif @@ -2343,7 +2343,7 @@ struct ReciprocalFunctor : public BaseDefaultFunctor inline float calculate(float x) const { - return 1.0/x; + return 1.f/x; } #ifdef HAVE_CUDA diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl index d105623403..0624f48e19 100644 --- a/modules/dnn/src/opencl/activations.cl +++ b/modules/dnn/src/opencl/activations.cl @@ -320,7 +320,7 @@ __kernel void SignForward(const int n, __global T* in, __global T* out) { int index = get_global_id(0); if(index < n) - out[index] = in[index] > 0.f ? 1.0f : (in[index] < 0.f) ? -1.0f : 0.0f); + out[index] = in[index] > 0.f ? 1.0f : ((in[index] < 0.f) ? -1.0f : 0.0f); } __kernel void ReciprocalForward(const int n, __global T* in, __global T* out) diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 8a673ba0d8..f2c6f1e5a0 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -516,7 +516,7 @@ TEST_P(DNNTestNetwork, DenseNet_121) else if (target == DNN_TARGET_CUDA_FP16) { l1 = 0.008; - lInf = 0.05; + lInf = 0.06; } processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf); if (target != DNN_TARGET_MYRIAD || getInferenceEngineVPUType() != CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index 108131bd9d..0fab7551a5 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -844,8 +844,9 @@ TEST_P(Test_two_inputs, basic) Mat ref; addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F); - double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.06 : 1e-6; - double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.3 : 1e-5; + double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD || targetId == DNN_TARGET_CUDA_FP16) ? 0.06 : 1e-6; + double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD || targetId == DNN_TARGET_CUDA_FP16) ? 0.3 : 1e-5; + normAssert(out, ref, "", l1, lInf); if (cvtest::debugLevel > 0 || HasFailure()) diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index c5a0f2fa7f..25d1a18d52 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -512,7 +512,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD) } else if (target == DNN_TARGET_CUDA_FP16) { - scoreDiff = 0.002; + scoreDiff = 0.0021; iouDiff = 1e-2; } float confThreshold = FLT_MIN; @@ -661,7 +661,8 @@ TEST_P(Test_Model, Segmentation) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif - if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + || (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16)) { norm = 2.0f; // l1 = 0.01 lInf = 2 } diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp index 0e912ede54..e9bc0e4187 100644 --- a/modules/dnn/test/test_onnx_conformance.cpp +++ b/modules/dnn/test/test_onnx_conformance.cpp @@ -954,7 +954,7 @@ public: if (target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) { - default_l1 = 4e-3; + default_l1 = 7e-3; default_lInf = 2e-2; } else diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp index 0f5f387132..c18ced0c59 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp @@ -66,6 +66,15 @@ "test_maxunpool_export_with_output_shape", "test_mul_bcast", "test_mul_uint8", +"test_reduce_prod_default_axes_keepdims_example", // FP16 only +"test_reduce_prod_default_axes_keepdims_random", // FP16 only +"test_reduce_prod_do_not_keepdims_random", // FP16 only +"test_reduce_prod_keepdims_random", // FP16 only +"test_reduce_prod_negative_axes_keepdims_random", // FP16 only +"test_reduce_sum_square_default_axes_keepdims_random", // FP16 only +"test_reduce_sum_square_do_not_keepdims_random", // FP16 only +"test_reduce_sum_square_keepdims_random", // FP16 only +"test_reduce_sum_square_negative_axes_keepdims_random", // FP16 only "test_softmax_default_axis", "test_softmax_large_number", // FP16 only "test_softmax_large_number_expanded", // FP16 only diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 11bf91b868..8503f55c25 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -169,16 +169,17 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias) backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); - if (backend == DNN_BACKEND_CUDA) - applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported - if (backend == DNN_BACKEND_VKCOM) - applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU && getInferenceEngineCPUType() == CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_ARM_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // supports only <= 2 inputs + + if (backend == DNN_BACKEND_VKCOM) + applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported + String basename = "conv_variable_wb"; Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); ASSERT_FALSE(net.empty()); @@ -464,11 +465,15 @@ TEST_P(Test_ONNX_layers, Scale) TEST_P(Test_ONNX_layers, Scale_broadcast) { + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // doesn't support broadcasting testONNXModels("scale_broadcast", npy, 0, 0, false, true, 3); } TEST_P(Test_ONNX_layers, Scale_broadcast_mid) { + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // doesn't support broadcasting testONNXModels("scale_broadcast_mid", npy, 0, 0, false, true, 2); } @@ -2131,7 +2136,7 @@ TEST_P(Test_ONNX_nets, Emotion_ferplus) double lInf = default_lInf; // Output values are in range [-2.011, 2.111] - if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) || (target == DNN_TARGET_CUDA_FP16)) l1 = 0.007; else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16) {