Merge pull request #16868 from YashasSamaga:cuda4dnn-scale-fix_and_improvements
This commit is contained in:
commit
4dfa798e75
@ -19,25 +19,51 @@
|
|||||||
|
|
||||||
namespace cv { namespace dnn { namespace cuda4dnn {
|
namespace cv { namespace dnn { namespace cuda4dnn {
|
||||||
|
|
||||||
|
struct ScaleShiftConfiguration {
|
||||||
|
enum class OpMode {
|
||||||
|
NONE,
|
||||||
|
TRAINABLE, /* use a pretrained blob */
|
||||||
|
UNTRAINABLE /* use another input */
|
||||||
|
};
|
||||||
|
|
||||||
|
OpMode scaleMode;
|
||||||
|
OpMode shiftMode;
|
||||||
|
|
||||||
|
std::size_t axis;
|
||||||
|
};
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
class ScaleShiftOp final : public CUDABackendNode {
|
class ScaleShiftOp final : public CUDABackendNode {
|
||||||
public:
|
public:
|
||||||
using wrapper_type = GetCUDABackendWrapperType<T>;
|
using wrapper_type = GetCUDABackendWrapperType<T>;
|
||||||
|
|
||||||
ScaleShiftOp(csl::Stream stream_, std::size_t axis, const cv::Mat& weights, const cv::Mat& bias)
|
ScaleShiftOp(csl::Stream stream_, const ScaleShiftConfiguration& config, const cv::Mat& weights, const cv::Mat& bias)
|
||||||
: stream(std::move(stream_)), axis{ axis }
|
: stream(std::move(stream_)), axis{ config.axis }
|
||||||
{
|
{
|
||||||
if (!weights.empty())
|
scaleMode = config.scaleMode;
|
||||||
|
if (scaleMode == ScaleShiftConfiguration::OpMode::TRAINABLE)
|
||||||
{
|
{
|
||||||
|
CV_Assert(!weights.empty());
|
||||||
weightsTensor = csl::makeTensorHeader<T>(weights);
|
weightsTensor = csl::makeTensorHeader<T>(weights);
|
||||||
csl::copyMatToTensor<T>(weights, weightsTensor, stream);
|
csl::copyMatToTensor<T>(weights, weightsTensor, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!bias.empty())
|
shiftMode = config.shiftMode;
|
||||||
|
if (shiftMode == ScaleShiftConfiguration::OpMode::TRAINABLE)
|
||||||
{
|
{
|
||||||
|
CV_Assert(!bias.empty());
|
||||||
biasTensor = csl::makeTensorHeader<T>(bias);
|
biasTensor = csl::makeTensorHeader<T>(bias);
|
||||||
csl::copyMatToTensor<T>(bias, biasTensor, stream);
|
csl::copyMatToTensor<T>(bias, biasTensor, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CV_Assert(scaleMode != ScaleShiftConfiguration::OpMode::NONE ||
|
||||||
|
shiftMode != ScaleShiftConfiguration::OpMode::NONE);
|
||||||
|
|
||||||
|
if (scaleMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE &&
|
||||||
|
shiftMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE)
|
||||||
|
{
|
||||||
|
CV_Error(cv::Error::StsNotImplemented, "scale and shift both in untrainable mode is not supported");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward(
|
void forward(
|
||||||
@ -53,40 +79,60 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
|||||||
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
|
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
|
||||||
auto output = output_wrapper->getSpan();
|
auto output = output_wrapper->getSpan();
|
||||||
|
|
||||||
|
/* number of batches in the weights/bias
|
||||||
|
* trainable mode: same for all batches
|
||||||
|
* untrainable mode: could be different for different batch samples
|
||||||
|
*/
|
||||||
|
std::size_t parameter_batch_size = 1;
|
||||||
|
|
||||||
csl::TensorView<T> weights;
|
csl::TensorView<T> weights;
|
||||||
if (weightsTensor.empty() && biasTensor.empty())
|
if (scaleMode == ScaleShiftConfiguration::OpMode::TRAINABLE)
|
||||||
|
{
|
||||||
|
CV_Assert(!weightsTensor.empty());
|
||||||
|
weights = csl::TensorView<T>(weightsTensor);
|
||||||
|
}
|
||||||
|
else if (scaleMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE)
|
||||||
{
|
{
|
||||||
CV_Assert(inputs.size() == 2);
|
CV_Assert(inputs.size() == 2);
|
||||||
|
|
||||||
/* no explicit scale/shift values provided; use the second input as weights */
|
|
||||||
auto wrapper = inputs[1].dynamicCast<wrapper_type>();
|
auto wrapper = inputs[1].dynamicCast<wrapper_type>();
|
||||||
weights = wrapper->getView();
|
weights = wrapper->getView();
|
||||||
}
|
|
||||||
else if (!weightsTensor.empty())
|
parameter_batch_size = weights.get_axis_size(0);
|
||||||
{
|
CV_Assert(parameter_batch_size == input.get_axis_size(0));
|
||||||
weights = csl::TensorSpan<T>(weightsTensor);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
csl::TensorView<T> bias;
|
csl::TensorView<T> bias;
|
||||||
if (!biasTensor.empty())
|
if (shiftMode == ScaleShiftConfiguration::OpMode::TRAINABLE)
|
||||||
bias = csl::TensorSpan<T>(biasTensor);
|
|
||||||
|
|
||||||
const auto numParams = !weights.empty() ? weights.size() : bias.size();
|
|
||||||
CV_Assert(numParams != 0);
|
|
||||||
if (!weightsTensor.empty() && !biasTensor.empty())
|
|
||||||
{
|
{
|
||||||
CV_CheckEQ(weights.size(), bias.size(), "weights and bias size are not equal");
|
CV_Assert(!biasTensor.empty());
|
||||||
|
bias = csl::TensorView<T>(biasTensor);
|
||||||
|
}
|
||||||
|
else if (shiftMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE)
|
||||||
|
{
|
||||||
|
CV_Assert(inputs.size() == 2);
|
||||||
|
auto wrapper = inputs[1].dynamicCast<wrapper_type>();
|
||||||
|
bias = wrapper->getView();
|
||||||
|
|
||||||
|
parameter_batch_size = bias.get_axis_size(0);
|
||||||
|
CV_Assert(parameter_batch_size == input.get_axis_size(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* the weights/bias might require broadcasting to scale/shift */
|
CV_Assert(!weights.empty() || !bias.empty());
|
||||||
|
if (!weights.empty() && !bias.empty())
|
||||||
|
{
|
||||||
|
CV_CheckEQ(weights.size(), bias.size(), "different broadcasting options for weights and bias is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto num_parameters = !weights.empty() ? weights.size() : bias.size();
|
||||||
|
const auto mid_size = num_parameters / parameter_batch_size;
|
||||||
|
|
||||||
|
/* the scale shift operation might require broadcasting */
|
||||||
const int end_axis = [&] {
|
const int end_axis = [&] {
|
||||||
for (int endAxis = axis + 1; endAxis <= input.rank(); endAxis++)
|
for (int endAxis = axis + 1; endAxis <= input.rank(); endAxis++) {
|
||||||
{
|
if (input.size_range(axis, endAxis) == mid_size)
|
||||||
std::size_t size = input.size_range(axis, endAxis);
|
|
||||||
if (size == numParams)
|
|
||||||
return endAxis;
|
return endAxis;
|
||||||
}
|
}
|
||||||
CV_Assert(0 /* invalid weights matrix */);
|
CV_Assert(0 /* failed to find a broadcast config */);
|
||||||
}();
|
}();
|
||||||
|
|
||||||
std::size_t inner_size = input.size_range(end_axis, input.rank());
|
std::size_t inner_size = input.size_range(end_axis, input.rank());
|
||||||
@ -103,6 +149,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
|
|||||||
csl::Stream stream;
|
csl::Stream stream;
|
||||||
csl::Tensor<T> weightsTensor, biasTensor;
|
csl::Tensor<T> weightsTensor, biasTensor;
|
||||||
std::size_t axis;
|
std::size_t axis;
|
||||||
|
|
||||||
|
ScaleShiftConfiguration::OpMode scaleMode, shiftMode;
|
||||||
};
|
};
|
||||||
|
|
||||||
}}} /* namespace cv::dnn::cuda4dnn */
|
}}} /* namespace cv::dnn::cuda4dnn */
|
||||||
|
|||||||
@ -159,14 +159,49 @@ public:
|
|||||||
|
|
||||||
CV_Assert(!blobs.empty() || inputs.size() == 2);
|
CV_Assert(!blobs.empty() || inputs.size() == 2);
|
||||||
|
|
||||||
cv::Mat weightsMat = hasWeights ? blobs[0] : Mat();
|
auto weightsMat = Mat(), biasMat = Mat();
|
||||||
|
|
||||||
/* if the weights are provided, bias will be in blobs[1]; otherwise, it will be in blobs[0]
|
cuda4dnn::ScaleShiftConfiguration config;
|
||||||
* in either case, it is at the end of the blobs vector => bias = blobs.back()
|
if (hasWeights)
|
||||||
*/
|
{
|
||||||
cv::Mat biasMat = hasBias ? blobs.back() : Mat();
|
if (blobs.empty())
|
||||||
|
{
|
||||||
|
config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::UNTRAINABLE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
weightsMat = blobs[0];
|
||||||
|
config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::TRAINABLE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::NONE;
|
||||||
|
}
|
||||||
|
|
||||||
return make_cuda_node<cuda4dnn::ScaleShiftOp>(preferableTarget, std::move(context->stream), axis, weightsMat, biasMat);
|
if (hasBias)
|
||||||
|
{
|
||||||
|
if(blobs.empty())
|
||||||
|
{
|
||||||
|
config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::UNTRAINABLE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* if the weights are provided, bias will be in blobs[1]; otherwise, it will be in blobs[0]
|
||||||
|
* in either case, it is at the end of the blobs vector => bias = blobs.back()
|
||||||
|
*/
|
||||||
|
biasMat = blobs.back();
|
||||||
|
config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::TRAINABLE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
config.axis = axis;
|
||||||
|
|
||||||
|
return make_cuda_node<cuda4dnn::ScaleShiftOp>(preferableTarget, std::move(context->stream), config, weightsMat, biasMat);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -580,8 +580,8 @@ TEST_P(Test_Darknet_layers, convolutional)
|
|||||||
|
|
||||||
TEST_P(Test_Darknet_layers, scale_channels)
|
TEST_P(Test_Darknet_layers, scale_channels)
|
||||||
{
|
{
|
||||||
// TODO: test fails for batches due to a bug/missing feature in ScaleLayer
|
bool testBatches = backend == DNN_BACKEND_CUDA;
|
||||||
testDarknetLayer("scale_channels", false, false);
|
testDarknetLayer("scale_channels", false, testBatches);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Darknet_layers, connected)
|
TEST_P(Test_Darknet_layers, connected)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user