Merge pull request #21884 from rogday:cuda_cleanup

Fix CUDA compilation issues and adjust thresholds.

* Fix CUDA compilation issues and adjust thresholds.

* add conformance tests to denylist
This commit is contained in:
rogday 2022-04-19 19:40:25 +03:00 committed by GitHub
parent 27c15bed60
commit 9cd5a0a1e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 41 additions and 22 deletions

View File

@ -260,7 +260,7 @@ void shrink(const Stream& stream, Span<T> output, View<T> input, T bias, T lambd
template <class T> template <class T>
void reciprocal(const Stream& stream, Span<T> output, View<T> input) { void reciprocal(const Stream& stream, Span<T> output, View<T> input) {
generic_op<T, SignFunctor<T>>(stream, output, input); generic_op<T, ReciprocalFunctor<T>>(stream, output, input);
} }
template <class T> template <class T>

View File

@ -732,7 +732,8 @@ struct SignFunctor {
CUDA4DNN_HOST_DEVICE Params() {} CUDA4DNN_HOST_DEVICE Params() {}
}; };
CUDA4DNN_DEVICE SignFunctor() : SignFunctor(Params{}) { } CUDA4DNN_DEVICE SignFunctor() { }
CUDA4DNN_DEVICE SignFunctor(const Params& params) { }
CUDA4DNN_DEVICE T operator()(T value) { CUDA4DNN_DEVICE T operator()(T value) {
return value > T(0) ? T(1) : (value < T(0) ? T(-1) : T(0)); return value > T(0) ? T(1) : (value < T(0) ? T(-1) : T(0));
@ -747,7 +748,7 @@ struct ShrinkFunctor {
T bias, lambd; T bias, lambd;
}; };
CUDA4DNN_DEVICE ShrinkFunctor() : bias(0), lambd(0.5) { } CUDA4DNN_DEVICE ShrinkFunctor() : ShrinkFunctor(Params{}) { }
CUDA4DNN_DEVICE ShrinkFunctor(const Params& params) : bias{params.bias}, lambd{params.lambd} { } CUDA4DNN_DEVICE ShrinkFunctor(const Params& params) : bias{params.bias}, lambd{params.lambd} { }
CUDA4DNN_DEVICE T operator()(T value) { CUDA4DNN_DEVICE T operator()(T value) {
@ -763,10 +764,11 @@ struct ReciprocalFunctor {
CUDA4DNN_HOST_DEVICE Params() {} CUDA4DNN_HOST_DEVICE Params() {}
}; };
CUDA4DNN_DEVICE ReciprocalFunctor() : ReciprocalFunctor(Params{}) { } CUDA4DNN_DEVICE ReciprocalFunctor() { }
CUDA4DNN_DEVICE ReciprocalFunctor(const Params& params) { }
CUDA4DNN_DEVICE T operator()(T value) { CUDA4DNN_DEVICE T operator()(T value) {
return T(1.0f)/value; return T(1.f)/value;
} }
}; };

View File

@ -2080,6 +2080,7 @@ public:
{ {
auto context = reinterpret_cast<csl::CSLContext*>(context_); auto context = reinterpret_cast<csl::CSLContext*>(context_);
// TODO: extract bias from inputs and pass it
CV_Assert(inputs.size() == 1 || inputs.size() == 2); CV_Assert(inputs.size() == 1 || inputs.size() == 2);
auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>(); auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
auto input_shape = input_wrapper->getShape(); auto input_shape = input_wrapper->getShape();

View File

@ -2282,7 +2282,7 @@ struct SignFunctor : public BaseDefaultFunctor<SignFunctor>
inline float calculate(float x) const inline float calculate(float x) const
{ {
return x > 0 ? 1 : (x < 0 ? -1 : 0); return x > 0.f ? 1.f : (x < 0.f ? -1.f : 0.f);
} }
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
@ -2315,13 +2315,13 @@ struct ShrinkFunctor : public BaseDefaultFunctor<ShrinkFunctor>
inline float calculate(float x) const inline float calculate(float x) const
{ {
return x > lambd ? x - bias : (x < -lambd ? x + bias : 0); return x > lambd ? x - bias : (x < -lambd ? x + bias : 0.f);
} }
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
Ptr<BackendNode> initCUDA(int target, csl::Stream stream) Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
{ {
return make_cuda_node<cuda4dnn::ShrinkOp>(target, stream); return make_cuda_node<cuda4dnn::ShrinkOp>(target, stream, bias, lambd);
} }
#endif #endif
@ -2343,7 +2343,7 @@ struct ReciprocalFunctor : public BaseDefaultFunctor<ReciprocalFunctor>
inline float calculate(float x) const inline float calculate(float x) const
{ {
return 1.0/x; return 1.f/x;
} }
#ifdef HAVE_CUDA #ifdef HAVE_CUDA

View File

@ -320,7 +320,7 @@ __kernel void SignForward(const int n, __global T* in, __global T* out)
{ {
int index = get_global_id(0); int index = get_global_id(0);
if(index < n) if(index < n)
out[index] = in[index] > 0.f ? 1.0f : (in[index] < 0.f) ? -1.0f : 0.0f); out[index] = in[index] > 0.f ? 1.0f : ((in[index] < 0.f) ? -1.0f : 0.0f);
} }
__kernel void ReciprocalForward(const int n, __global T* in, __global T* out) __kernel void ReciprocalForward(const int n, __global T* in, __global T* out)

View File

@ -516,7 +516,7 @@ TEST_P(DNNTestNetwork, DenseNet_121)
else if (target == DNN_TARGET_CUDA_FP16) else if (target == DNN_TARGET_CUDA_FP16)
{ {
l1 = 0.008; l1 = 0.008;
lInf = 0.05; lInf = 0.06;
} }
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf); processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf);
if (target != DNN_TARGET_MYRIAD || getInferenceEngineVPUType() != CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) if (target != DNN_TARGET_MYRIAD || getInferenceEngineVPUType() != CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)

View File

@ -844,8 +844,9 @@ TEST_P(Test_two_inputs, basic)
Mat ref; Mat ref;
addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F); addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F);
double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.06 : 1e-6; double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD || targetId == DNN_TARGET_CUDA_FP16) ? 0.06 : 1e-6;
double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.3 : 1e-5; double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD || targetId == DNN_TARGET_CUDA_FP16) ? 0.3 : 1e-5;
normAssert(out, ref, "", l1, lInf); normAssert(out, ref, "", l1, lInf);
if (cvtest::debugLevel > 0 || HasFailure()) if (cvtest::debugLevel > 0 || HasFailure())

View File

@ -512,7 +512,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD)
} }
else if (target == DNN_TARGET_CUDA_FP16) else if (target == DNN_TARGET_CUDA_FP16)
{ {
scoreDiff = 0.002; scoreDiff = 0.0021;
iouDiff = 1e-2; iouDiff = 1e-2;
} }
float confThreshold = FLT_MIN; float confThreshold = FLT_MIN;
@ -661,7 +661,8 @@ TEST_P(Test_Model, Segmentation)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif #endif
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|| (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16))
{ {
norm = 2.0f; // l1 = 0.01 lInf = 2 norm = 2.0f; // l1 = 0.01 lInf = 2
} }

View File

@ -954,7 +954,7 @@ public:
if (target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) if (target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
{ {
default_l1 = 4e-3; default_l1 = 7e-3;
default_lInf = 2e-2; default_lInf = 2e-2;
} }
else else

View File

@ -66,6 +66,15 @@
"test_maxunpool_export_with_output_shape", "test_maxunpool_export_with_output_shape",
"test_mul_bcast", "test_mul_bcast",
"test_mul_uint8", "test_mul_uint8",
"test_reduce_prod_default_axes_keepdims_example", // FP16 only
"test_reduce_prod_default_axes_keepdims_random", // FP16 only
"test_reduce_prod_do_not_keepdims_random", // FP16 only
"test_reduce_prod_keepdims_random", // FP16 only
"test_reduce_prod_negative_axes_keepdims_random", // FP16 only
"test_reduce_sum_square_default_axes_keepdims_random", // FP16 only
"test_reduce_sum_square_do_not_keepdims_random", // FP16 only
"test_reduce_sum_square_keepdims_random", // FP16 only
"test_reduce_sum_square_negative_axes_keepdims_random", // FP16 only
"test_softmax_default_axis", "test_softmax_default_axis",
"test_softmax_large_number", // FP16 only "test_softmax_large_number", // FP16 only
"test_softmax_large_number_expanded", // FP16 only "test_softmax_large_number_expanded", // FP16 only

View File

@ -169,16 +169,17 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias)
backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) && target == DNN_TARGET_MYRIAD) backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) && target == DNN_TARGET_MYRIAD)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported
if (backend == DNN_BACKEND_VKCOM)
applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU && if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU &&
getInferenceEngineCPUType() == CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE) getInferenceEngineCPUType() == CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_ARM_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_ARM_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
#endif #endif
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // supports only <= 2 inputs
if (backend == DNN_BACKEND_VKCOM)
applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported
String basename = "conv_variable_wb"; String basename = "conv_variable_wb";
Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); Net net = readNetFromONNX(_tf("models/" + basename + ".onnx"));
ASSERT_FALSE(net.empty()); ASSERT_FALSE(net.empty());
@ -464,11 +465,15 @@ TEST_P(Test_ONNX_layers, Scale)
TEST_P(Test_ONNX_layers, Scale_broadcast) TEST_P(Test_ONNX_layers, Scale_broadcast)
{ {
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // doesn't support broadcasting
testONNXModels("scale_broadcast", npy, 0, 0, false, true, 3); testONNXModels("scale_broadcast", npy, 0, 0, false, true, 3);
} }
TEST_P(Test_ONNX_layers, Scale_broadcast_mid) TEST_P(Test_ONNX_layers, Scale_broadcast_mid)
{ {
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // doesn't support broadcasting
testONNXModels("scale_broadcast_mid", npy, 0, 0, false, true, 2); testONNXModels("scale_broadcast_mid", npy, 0, 0, false, true, 2);
} }
@ -2131,7 +2136,7 @@ TEST_P(Test_ONNX_nets, Emotion_ferplus)
double lInf = default_lInf; double lInf = default_lInf;
// Output values are in range [-2.011, 2.111] // Output values are in range [-2.011, 2.111]
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) || (target == DNN_TARGET_CUDA_FP16))
l1 = 0.007; l1 = 0.007;
else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16) else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16)
{ {