dnn: fix gather layer implementation

- support FP16 data
This commit is contained in:
Alexander Alekhin 2022-12-20 06:09:34 +00:00
parent da43778c1f
commit 1102b7eff8
2 changed files with 47 additions and 9 deletions

View File

@ -45,34 +45,70 @@ public:
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_TRACE_ARG_VALUE(name, "name", name.c_str());
// FP16 fallback is not needed as we handle FP16 below
std::vector<Mat> inputs, outputs; std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs); inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs); outputs_arr.getMatVector(outputs);
CV_CheckEQ(inputs.size(), (size_t)2, "");
CV_CheckEQ(outputs.size(), (size_t)1, "");
const Mat& inp = inputs[0]; const Mat& inp = inputs[0];
const Mat& indices = inputs[1];
int indicesType = inputs[1].type();
CV_CheckType(indicesType, indicesType == CV_32FC1 || indicesType == CV_16SC1, "");
Mat indices32S;
if (indicesType == CV_16S/*FP16*/)
{
Mat indicesF32;
convertFp16(inputs[1], indicesF32);
indicesF32.convertTo(indices32S, CV_32S);
}
else
{
inputs[1].convertTo(indices32S, CV_32S);
}
const size_t indices_total = indices32S.total();
indices32S = indices32S.reshape(1, indices_total);
Mat& out = outputs[0]; Mat& out = outputs[0];
CV_CheckTypeEQ(inp.type(), out.type(), "");
CV_CheckTypeEQ(indices32S.type(), CV_32SC1, "");
const int axis = normalize_axis(m_axis, shape(inp)); const int axis = normalize_axis(m_axis, shape(inp));
// FIXIT: why should we work with non-normalized input? it should be handled in importer or layers's output generator
const int axis_size = (int)inp.size[axis];
for (size_t j = 0 ; j < indices_total; ++j)
{
int& idx = indices32S.at<int>(j);
idx = normalize_axis(idx, axis_size); // validate and normalize indices
}
const size_t outer_size = axis == 0 ? inp.total() : inp.step1(axis - 1); const size_t outer_size = axis == 0 ? inp.total() : inp.step1(axis - 1);
const size_t outer_dims = inp.total() / outer_size; const size_t outer_dims = inp.total() / outer_size;
const size_t inner_size = inp.step1(axis); const size_t inner_size = inp.step1(axis);
const float* idx = indices.ptr<const float>(); // TODO: change type to integer in the future. const int* idx = indices32S.ptr<int>();
const char* src = inp.ptr<const char>(); const char* src = inp.ptr<const char>();
char* dst = out.ptr<char>(); char* dst = out.ptr<char>();
CV_CheckEQ(out.total(), outer_dims * indices_total * inner_size, "");
const size_t es = inp.elemSize1(); const size_t es = inp.elemSize1();
// TODO: optimize through switch (inner_size * es)
const size_t inner_bytes = inner_size * es;
for (size_t i = 0; i < outer_dims; ++i) for (size_t i = 0; i < outer_dims; ++i)
{ {
const size_t src_offset = i * outer_size; const size_t src_offset = i * outer_size;
for (size_t j = 0 ; j < indices.total(); ++j) for (size_t j = 0 ; j < indices_total; ++j)
{ {
const size_t index = (static_cast<int>(idx[j]) + inp.size[axis]) % inp.size[axis]; const int index = idx[j];
const size_t new_offset = src_offset + index * inp.step1(axis); CV_DbgCheck(index, index >= 0 && index < axis_size, "");
std::memcpy(dst, src + new_offset * es, inner_size * es); const size_t new_offset = src_offset + index * inner_size;
dst += inner_size * es; std::memcpy(dst, src + new_offset * es, inner_bytes);
dst += inner_bytes;
} }
} }
} }

View File

@ -199,9 +199,11 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias)
TEST_P(Test_ONNX_layers, Gather) TEST_P(Test_ONNX_layers, Gather)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
testONNXModels("gather", npy, 0, 0, false, false); testONNXModels("gather", npy, 0, 0, false, false);
}
TEST_P(Test_ONNX_layers, Gather_Scalar)
{
testONNXModels("gather_scalar", npy, 0, 0, false, false); testONNXModels("gather_scalar", npy, 0, 0, false, false);
} }