diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 8e5f478b00..a3d1efbc8b 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -2518,7 +2518,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) if (outputBlobs.isUMat()) { - outputBlobs.assign(impl->getBlob(layerName).getUMat(ACCESS_RW)); + impl->getBlob(layerName).copyTo(outputBlobs); } else if (outputBlobs.isMat()) { @@ -2566,7 +2566,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) { outputvec.resize(ld.outputBlobs.size()); for (int i = 0; i < outputvec.size(); ++i) - outputvec[i] = ld.outputBlobs[i].getUMat(ACCESS_RW); + ld.outputBlobs[i].copyTo(outputvec[i]); } } } diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index c0e8159532..f0fa5f21a5 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -172,8 +172,8 @@ public: if (umat_weight.empty()) { - umat_weight = weights_.getUMat(ACCESS_READ); - umat_bias = bias_.getUMat(ACCESS_READ); + weights_.copyTo(umat_weight); + bias_.copyTo(umat_bias); } UMat &inpBlob = inputs[0]; diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 38d56180c7..0f0d3d41e1 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -1452,8 +1452,10 @@ public: if (umat_weights.empty()) { transpose(blobs[0].reshape(1, inpCn), umat_weights); - umat_biases = hasBias() ? blobs[1].reshape(1, outCn).getUMat(ACCESS_READ) : - UMat::zeros(outCn, 1, CV_32F); + if (hasBias()) + blobs[1].reshape(1, outCn).copyTo(umat_biases); + else + umat_biases = UMat::zeros(outCn, 1, CV_32F); } String buildopt = format("-DT=%s ", ocl::typeToStr(inputs[0].type())); diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index c042f5fc55..536c0ff50d 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -969,11 +969,12 @@ struct ChannelsPReLUFunctor { typedef ChannelsPReLULayer Layer; Mat scale; +#ifdef HAVE_OPENCL UMat scale_umat; +#endif explicit ChannelsPReLUFunctor(const Mat& scale_=Mat()) : scale(scale_) { - scale_umat = scale.getUMat(ACCESS_READ); } bool supportBackend(int backendId, int) @@ -1021,6 +1022,9 @@ struct ChannelsPReLUFunctor #ifdef HAVE_OPENCL bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) { + if (scale_umat.empty()) + scale.copyTo(scale_umat); + std::vector inputs; std::vector outputs; diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 930ce2a4ce..f36813ff18 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -96,12 +96,6 @@ public: biasMat = blobs[1] = blobs[1].reshape(1, 1); else biasMat = Mat::zeros(1, numOutput, weightsMat.type()); - -#ifdef HAVE_OPENCL - size_t n = blobs.size(); - umat_blobs.resize(n); - for (int i = 0; i < n; i++) umat_blobs[i] = blobs[i].getUMat(ACCESS_READ); -#endif } bool getMemoryShapes(const std::vector &inputs, @@ -276,6 +270,8 @@ public: virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE { innerProductOp.release(); + umat_blobs.clear(); + half_blobs.clear(); } bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, InputArrayOfArrays internals) @@ -288,13 +284,17 @@ public: outs.getUMatVector(outputs); int axisCan = clamp(axis, inputs[0].dims); - int numOutput = umat_blobs[0].size[0]; - int innerSize = umat_blobs[0].size[1]; + int numOutput = blobs[0].size[0]; + int innerSize = blobs[0].size[1]; int outerSize = total(shape(inputs[0]), 0, axisCan); bool ret = true; if (innerProductOp.empty()) { + size_t n = blobs.size(); + umat_blobs.resize(n); + for (int i = 0; i < n; i++) blobs[i].copyTo(umat_blobs[i]); + OCL4DNNInnerProductConfig config; config.num_output = numOutput; config.bias_term = bias; diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp index 2edbffb90f..a32cbecee9 100644 --- a/modules/dnn/src/layers/mvn_layer.cpp +++ b/modules/dnn/src/layers/mvn_layer.cpp @@ -71,6 +71,9 @@ public: } Mat scale, shift; +#ifdef HAVE_OPENCL + UMat umat_scale, umat_shift; +#endif bool fuse_batch_norm; Ptr activ_relu; @@ -105,6 +108,10 @@ public: for( i = 0; i < splitDim; i++ ) newRows *= inputs[0].size[i]; zeroDev = inputs[0].total() == newRows; +#ifdef HAVE_OPENCL + umat_scale.release(); + umat_shift.release(); +#endif } virtual bool supportBackend(int backendId) CV_OVERRIDE @@ -118,8 +125,13 @@ public: #ifdef HAVE_OPENCL bool fast_forward_ocl(std::vector &inputs, std::vector &outputs) { - UMat bnorm_weight = scale.empty() ? UMat() : scale.getUMat(ACCESS_READ); - UMat bnorm_bias = shift.empty() ? UMat() : shift.getUMat(ACCESS_READ); + if (umat_scale.empty() && !scale.empty()) + scale.copyTo(umat_scale); + if (umat_shift.empty() && !shift.empty()) + shift.copyTo(umat_shift); + UMat& bnorm_weight = umat_scale; + UMat& bnorm_bias = umat_shift; + bool use_half = (inputs[0].depth() == CV_16S); String opts = format(" -DT=%s -DT4=%s -Dconvert_T=%s", use_half ? "half" : "float", use_half ? "half4" : "float4", use_half ? "convert_half4" : "convert_float4"); @@ -177,6 +189,13 @@ public: bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) { + if (umat_scale.empty() && !scale.empty()) + scale.copyTo(umat_scale); + if (umat_shift.empty() && !shift.empty()) + shift.copyTo(umat_shift); + UMat& bnorm_weight = umat_scale; + UMat& bnorm_bias = umat_shift; + std::vector inputs; std::vector outputs; @@ -192,8 +211,6 @@ public: if (inputs[0].depth() == CV_16S) return false; - UMat bnorm_weight = scale.empty() ? UMat() : scale.getUMat(ACCESS_READ); - UMat bnorm_bias = shift.empty() ? UMat() : shift.getUMat(ACCESS_READ); String opts = format(" -DT=float -DT4=float4 -Dconvert_T=convert_float4"); for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++) diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp index f721d409a9..cfe61c1f65 100644 --- a/modules/dnn/src/layers/region_layer.cpp +++ b/modules/dnn/src/layers/region_layer.cpp @@ -60,6 +60,9 @@ public: int coords, classes, anchors, classfix; float thresh, nmsThreshold; bool useSoftmax, useLogistic; +#ifdef HAVE_OPENCL + UMat blob_umat; +#endif RegionLayerImpl(const LayerParams& params) { @@ -123,6 +126,9 @@ public: #ifdef HAVE_OPENCL bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) { + if (blob_umat.empty()) + blobs[0].copyTo(blob_umat); + std::vector inputs; std::vector outputs; @@ -135,7 +141,6 @@ public: CV_Assert(inputs.size() >= 1); int const cell_size = classes + coords + 1; - UMat blob_umat = blobs[0].getUMat(ACCESS_READ); for (size_t ii = 0; ii < outputs.size(); ii++) {