390 lines
12 KiB
C++
390 lines
12 KiB
C++
// This file is part of OpenCV project.
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
// of this distribution and at http://opencv.org/license.html.
|
|
|
|
#include "../precomp.hpp"
|
|
#include "opencv2/core/hal/intrin.hpp"
|
|
#include "../op_cuda.hpp"
|
|
#include "../op_webnn.hpp"
|
|
|
|
#include <float.h>
|
|
#include <algorithm>
|
|
#include <numeric>
|
|
using std::max;
|
|
using std::min;
|
|
|
|
#include <opencv2/core/utils/logger.hpp>
|
|
|
|
namespace cv
|
|
{
|
|
namespace dnn
|
|
{
|
|
|
|
class ReduceLayerImpl CV_FINAL : public ReduceLayer
|
|
{
|
|
public:
|
|
ReduceLayerImpl(const LayerParams& params)
|
|
{
|
|
setParamsFrom(params);
|
|
// set reduce type
|
|
CV_Assert(params.has("reduce"));
|
|
String typeString = toLowerCase(params.get<String>("reduce"));
|
|
if (typeString == "max")
|
|
reduceType= MAX;
|
|
else if (typeString == "min")
|
|
reduceType= MIN;
|
|
else if (typeString == "ave")
|
|
reduceType= AVE;
|
|
else if (typeString == "sum")
|
|
reduceType= SUM;
|
|
else if (typeString == "sum_square")
|
|
reduceType= SUM_SQUARE;
|
|
else if (typeString == "l1")
|
|
reduceType= L1;
|
|
else if (typeString == "l2")
|
|
reduceType= L2;
|
|
else if (typeString == "log_sum")
|
|
reduceType= LOG_SUM;
|
|
else if (typeString == "log_sum_exp")
|
|
reduceType= LOG_SUM_EXP;
|
|
else if (typeString == "prod")
|
|
reduceType= PROD;
|
|
else
|
|
CV_Error(Error::StsBadArg, "Unknown reduce type\"" + typeString + "\"");
|
|
|
|
// set deleted dims
|
|
CV_Assert(params.has("deleted_dims"));
|
|
DictValue tempDims = params.get("deleted_dims");
|
|
int i, n = tempDims.size();
|
|
reduceDims.resize(n);
|
|
for (i = 0; i < n; i++)
|
|
{
|
|
reduceDims[i] = tempDims.get<int>(i);
|
|
}
|
|
}
|
|
|
|
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
|
{
|
|
if (backendId == DNN_BACKEND_OPENCV)
|
|
{
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// reduceType == MIN
|
|
struct ReduceOpMIN
|
|
{
|
|
float apply(const float* first, const float* last, const float ikarea = 1.0f)
|
|
{
|
|
return std::accumulate(first, last, FLT_MAX,
|
|
[](float a, float b)
|
|
{
|
|
return std::min(a, b);
|
|
});
|
|
}
|
|
};
|
|
|
|
// reduceType == MAX
|
|
struct ReduceOpMAX
|
|
{
|
|
float apply(const float* first, const float* last, const float ikarea = 1.0f)
|
|
{
|
|
return std::accumulate(first, last, -FLT_MAX,
|
|
[](float a, float b)
|
|
{
|
|
return std::max(a, b);
|
|
});
|
|
}
|
|
};
|
|
|
|
// reduceType == SUM
|
|
struct ReduceOpSUM
|
|
{
|
|
float apply(const float* first, const float* last, const float ikarea = 1.0f)
|
|
{
|
|
return std::accumulate(first, last, 0.f);
|
|
}
|
|
};
|
|
|
|
// reduceType == AVE
|
|
struct ReduceOpAVE
|
|
{
|
|
float apply(const float* first, const float* last, const float ikarea = 1.0f)
|
|
{
|
|
float output = std::accumulate(first, last, 0.f);
|
|
return output * ikarea;
|
|
}
|
|
};
|
|
|
|
// reduceType == SUM_SQUARE
|
|
struct ReduceOpSUM_SQUARE
|
|
{
|
|
float apply(const float* first, const float* last, const float ikarea = 1.0f)
|
|
{
|
|
return std::accumulate(first, last, 0.f,
|
|
[](float a, float b)
|
|
{
|
|
return a + b * b;
|
|
});
|
|
}
|
|
};
|
|
|
|
// reduceType == L1
|
|
struct ReduceOpL1
|
|
{
|
|
float apply(const float* first, const float* last, const float ikarea = 1.0f)
|
|
{
|
|
return std::accumulate(first, last, 0.f,
|
|
[](float a, float b)
|
|
{
|
|
return a + std::abs(b);
|
|
});
|
|
}
|
|
};
|
|
|
|
// reduceType == L2
|
|
struct ReduceOpL2
|
|
{
|
|
float apply(const float* first, const float* last, const float ikarea = 1.0f)
|
|
{
|
|
float output = std::accumulate(first, last, 0.f,
|
|
[](float a, float b)
|
|
{
|
|
return a + b * b;
|
|
});
|
|
return std::sqrt(output);
|
|
}
|
|
};
|
|
|
|
// reduceType == PROD
|
|
struct ReduceOpPROD
|
|
{
|
|
float apply(const float* first, const float* last, const float ikarea = 1.0f)
|
|
{
|
|
return std::accumulate(first, last, 1.0f, std::multiplies<float>());
|
|
}
|
|
};
|
|
|
|
// reduceType == LOG_SUM
|
|
struct ReduceOpLOG_SUM
|
|
{
|
|
float apply(const float* first, const float* last, const float ikarea = 1.0f)
|
|
{
|
|
float output = std::accumulate(first, last, 0.0f);
|
|
return std::log(output);
|
|
}
|
|
};
|
|
|
|
// reduceType == LOG_SUM_EXP
|
|
struct ReduceOpLOG_SUM_EXP
|
|
{
|
|
float apply(const float* first, const float* last, const float ikarea = 1.0f)
|
|
{
|
|
float output = std::accumulate(first, last, 0.0f,
|
|
[](float a, float b)
|
|
{
|
|
return a + std::exp(b);
|
|
});
|
|
return std::log(output);
|
|
}
|
|
};
|
|
|
|
template<typename Func>
|
|
class ReduceInvoker : public ParallelLoopBody
|
|
{
|
|
public:
|
|
const Mat* src;
|
|
Mat *dst;
|
|
std::vector<size_t> reduceDims;
|
|
int nstripes;
|
|
int reduceType;
|
|
Ptr<Func> func;
|
|
|
|
ReduceInvoker() : src(0), dst(0), nstripes(0), reduceType(MAX), func(makePtr<Func>()) {}
|
|
|
|
static void run(const Mat& src, Mat& dst, std::vector<size_t> reduceDims, int reduceType, int nstripes)
|
|
{
|
|
CV_Assert_N( src.isContinuous(), dst.isContinuous(), src.type() == CV_32F, src.type() == dst.type());
|
|
|
|
ReduceInvoker<Func> p;
|
|
|
|
p.src = &src;
|
|
p.dst = &dst;
|
|
|
|
p.reduceDims = reduceDims;
|
|
p.nstripes = nstripes;
|
|
p.reduceType = reduceType;
|
|
|
|
parallel_for_(Range(0, nstripes), p, nstripes);
|
|
}
|
|
|
|
void operator()(const Range& r) const CV_OVERRIDE
|
|
{
|
|
size_t total = dst->total();
|
|
size_t stripeSize = (total + nstripes - 1)/nstripes;
|
|
size_t stripeStart = r.start*stripeSize;
|
|
size_t stripeEnd = std::min(r.end*stripeSize, total);
|
|
size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies<size_t>());
|
|
|
|
float *dstData = (float *)dst->data;
|
|
float *srcData = (float *)src->data;
|
|
|
|
for (size_t ofs = stripeStart; ofs < stripeEnd;)
|
|
{
|
|
const float* first = srcData + ofs * stride_w;
|
|
const float* last = srcData + (ofs + 1) * stride_w;
|
|
|
|
if (ofs < stripeEnd)
|
|
{
|
|
dstData[ofs] = func->apply(first, last, 1.0 / stride_w);
|
|
ofs += 1;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
|
|
|
if (inputs_arr.depth() == CV_16S)
|
|
{
|
|
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
|
return;
|
|
}
|
|
|
|
std::vector<Mat> inputs, outputs;
|
|
inputs_arr.getMatVector(inputs);
|
|
outputs_arr.getMatVector(outputs);
|
|
CV_Assert(inputs.size() == 1 || (inputs.size() == 2 && reduceType== SUM));
|
|
const int nstripes = getNumThreads();
|
|
|
|
switch (reduceType)
|
|
{
|
|
case MIN:
|
|
{
|
|
ReduceInvoker<ReduceOpMIN>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
|
|
break;
|
|
}
|
|
case MAX:
|
|
{
|
|
ReduceInvoker<ReduceOpMAX>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
|
|
break;
|
|
}
|
|
case AVE:
|
|
{
|
|
ReduceInvoker<ReduceOpAVE>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
|
|
break;
|
|
}
|
|
case SUM:
|
|
{
|
|
ReduceInvoker<ReduceOpSUM>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
|
|
break;
|
|
}
|
|
case L1:
|
|
{
|
|
ReduceInvoker<ReduceOpL1>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
|
|
break;
|
|
}
|
|
case L2:
|
|
{
|
|
ReduceInvoker<ReduceOpL2>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
|
|
break;
|
|
}
|
|
case SUM_SQUARE:
|
|
{
|
|
ReduceInvoker<ReduceOpSUM_SQUARE>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
|
|
break;
|
|
}
|
|
case PROD:
|
|
{
|
|
ReduceInvoker<ReduceOpPROD>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
|
|
break;
|
|
}
|
|
case LOG_SUM:
|
|
{
|
|
ReduceInvoker<ReduceOpLOG_SUM>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
|
|
break;
|
|
}
|
|
case LOG_SUM_EXP:
|
|
{
|
|
ReduceInvoker<ReduceOpLOG_SUM_EXP>::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes);
|
|
break;
|
|
}
|
|
default:
|
|
CV_Error(Error::StsNotImplemented, "Not implemented");
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
|
const int requiredOutputs,
|
|
std::vector<MatShape> &outputs,
|
|
std::vector<MatShape> &internals) const CV_OVERRIDE
|
|
{
|
|
CV_Assert(inputs.size() > 0);
|
|
CV_Assert(reduceDims.size() != 0 && inputs[0].size() >= reduceDims.size());
|
|
|
|
std::vector<int> outShape;
|
|
if (inputs[0].size() == reduceDims.size())
|
|
outShape.push_back(1);
|
|
else
|
|
{
|
|
for (int i = 0; i < inputs[0].size() - reduceDims.size(); i++)
|
|
{
|
|
outShape.push_back(inputs[0][i]);
|
|
}
|
|
}
|
|
outputs.assign(1, outShape);
|
|
|
|
return false;
|
|
}
|
|
|
|
virtual bool tryQuantize(const std::vector<std::vector<float> > &scales,
|
|
const std::vector<std::vector<int> > &zeropoints, LayerParams& params) CV_OVERRIDE
|
|
{
|
|
if (reduceType== MAX || reduceType== MIN)
|
|
{
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
|
|
const std::vector<MatShape> &outputs) const CV_OVERRIDE
|
|
{
|
|
CV_UNUSED(inputs); // suppress unused variable warning
|
|
long flops = 0;
|
|
size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies<size_t>());
|
|
for (int i = 0; i < outputs.size(); i++)
|
|
{
|
|
flops += total(outputs[i])*(stride_w);
|
|
}
|
|
return flops;
|
|
}
|
|
private:
|
|
enum ReduceType
|
|
{
|
|
MAX,
|
|
MIN,
|
|
AVE,
|
|
SUM,
|
|
L1,
|
|
L2,
|
|
PROD,
|
|
SUM_SQUARE,
|
|
LOG_SUM,
|
|
LOG_SUM_EXP
|
|
};
|
|
};
|
|
|
|
Ptr<ReduceLayer> ReduceLayer::create(const LayerParams& params)
|
|
{
|
|
return Ptr<ReduceLayer>(new ReduceLayerImpl(params));
|
|
}
|
|
|
|
}
|
|
}
|