// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. #include "../precomp.hpp" #include "opencv2/core/hal/intrin.hpp" #include "../op_cuda.hpp" #include "../op_webnn.hpp" #include #include #include using std::max; using std::min; #include namespace cv { namespace dnn { class ReduceLayerImpl CV_FINAL : public ReduceLayer { public: ReduceLayerImpl(const LayerParams& params) { setParamsFrom(params); // set reduce type CV_Assert(params.has("reduce")); String typeString = toLowerCase(params.get("reduce")); if (typeString == "max") reduceType= MAX; else if (typeString == "min") reduceType= MIN; else if (typeString == "ave") reduceType= AVE; else if (typeString == "sum") reduceType= SUM; else if (typeString == "sum_square") reduceType= SUM_SQUARE; else if (typeString == "l1") reduceType= L1; else if (typeString == "l2") reduceType= L2; else if (typeString == "log_sum") reduceType= LOG_SUM; else if (typeString == "log_sum_exp") reduceType= LOG_SUM_EXP; else if (typeString == "prod") reduceType= PROD; else CV_Error(Error::StsBadArg, "Unknown reduce type\"" + typeString + "\""); // set deleted dims CV_Assert(params.has("deleted_dims")); DictValue tempDims = params.get("deleted_dims"); int i, n = tempDims.size(); reduceDims.resize(n); for (i = 0; i < n; i++) { reduceDims[i] = tempDims.get(i); } } virtual bool supportBackend(int backendId) CV_OVERRIDE { if (backendId == DNN_BACKEND_OPENCV) { return true; } return false; } // reduceType == MIN struct ReduceOpMIN { float apply(const float* first, const float* last, const float ikarea = 1.0f) { return std::accumulate(first, last, FLT_MAX, [](float a, float b) { return std::min(a, b); }); } }; // reduceType == MAX struct ReduceOpMAX { float apply(const float* first, const float* last, const float ikarea = 1.0f) { return std::accumulate(first, last, -FLT_MAX, [](float a, float b) { return std::max(a, b); }); } }; // reduceType == SUM struct ReduceOpSUM { float apply(const float* first, const float* last, const float ikarea = 1.0f) { return std::accumulate(first, last, 0.f); } }; // reduceType == AVE struct ReduceOpAVE { float apply(const float* first, const float* last, const float ikarea = 1.0f) { float output = std::accumulate(first, last, 0.f); return output * ikarea; } }; // reduceType == SUM_SQUARE struct ReduceOpSUM_SQUARE { float apply(const float* first, const float* last, const float ikarea = 1.0f) { return std::accumulate(first, last, 0.f, [](float a, float b) { return a + b * b; }); } }; // reduceType == L1 struct ReduceOpL1 { float apply(const float* first, const float* last, const float ikarea = 1.0f) { return std::accumulate(first, last, 0.f, [](float a, float b) { return a + std::abs(b); }); } }; // reduceType == L2 struct ReduceOpL2 { float apply(const float* first, const float* last, const float ikarea = 1.0f) { float output = std::accumulate(first, last, 0.f, [](float a, float b) { return a + b * b; }); return std::sqrt(output); } }; // reduceType == PROD struct ReduceOpPROD { float apply(const float* first, const float* last, const float ikarea = 1.0f) { return std::accumulate(first, last, 1.0f, std::multiplies()); } }; // reduceType == LOG_SUM struct ReduceOpLOG_SUM { float apply(const float* first, const float* last, const float ikarea = 1.0f) { float output = std::accumulate(first, last, 0.0f); return std::log(output); } }; // reduceType == LOG_SUM_EXP struct ReduceOpLOG_SUM_EXP { float apply(const float* first, const float* last, const float ikarea = 1.0f) { float output = std::accumulate(first, last, 0.0f, [](float a, float b) { return a + std::exp(b); }); return std::log(output); } }; template class ReduceInvoker : public ParallelLoopBody { public: const Mat* src; Mat *dst; std::vector reduceDims; int nstripes; int reduceType; Ptr func; ReduceInvoker() : src(0), dst(0), nstripes(0), reduceType(MAX), func(makePtr()) {} static void run(const Mat& src, Mat& dst, std::vector reduceDims, int reduceType, int nstripes) { CV_Assert_N( src.isContinuous(), dst.isContinuous(), src.type() == CV_32F, src.type() == dst.type()); ReduceInvoker p; p.src = &src; p.dst = &dst; p.reduceDims = reduceDims; p.nstripes = nstripes; p.reduceType = reduceType; parallel_for_(Range(0, nstripes), p, nstripes); } void operator()(const Range& r) const CV_OVERRIDE { size_t total = dst->total(); size_t stripeSize = (total + nstripes - 1)/nstripes; size_t stripeStart = r.start*stripeSize; size_t stripeEnd = std::min(r.end*stripeSize, total); size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies()); float *dstData = (float *)dst->data; float *srcData = (float *)src->data; for (size_t ofs = stripeStart; ofs < stripeEnd;) { const float* first = srcData + ofs * stride_w; const float* last = srcData + (ofs + 1) * stride_w; if (ofs < stripeEnd) { dstData[ofs] = func->apply(first, last, 1.0 / stride_w); ofs += 1; } } } }; void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); if (inputs_arr.depth() == CV_16S) { forward_fallback(inputs_arr, outputs_arr, internals_arr); return; } std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); CV_Assert(inputs.size() == 1 || (inputs.size() == 2 && reduceType== SUM)); const int nstripes = getNumThreads(); switch (reduceType) { case MIN: { ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); break; } case MAX: { ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); break; } case AVE: { ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); break; } case SUM: { ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); break; } case L1: { ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); break; } case L2: { ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); break; } case SUM_SQUARE: { ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); break; } case PROD: { ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); break; } case LOG_SUM: { ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); break; } case LOG_SUM_EXP: { ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); break; } default: CV_Error(Error::StsNotImplemented, "Not implemented"); break; } } bool getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, std::vector &internals) const CV_OVERRIDE { CV_Assert(inputs.size() > 0); CV_Assert(reduceDims.size() != 0 && inputs[0].size() >= reduceDims.size()); std::vector outShape; if (inputs[0].size() == reduceDims.size()) outShape.push_back(1); else { for (int i = 0; i < inputs[0].size() - reduceDims.size(); i++) { outShape.push_back(inputs[0][i]); } } outputs.assign(1, outShape); return false; } virtual bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE { if (reduceType== MAX || reduceType== MIN) { return true; } return false; } virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { CV_UNUSED(inputs); // suppress unused variable warning long flops = 0; size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies()); for (int i = 0; i < outputs.size(); i++) { flops += total(outputs[i])*(stride_w); } return flops; } private: enum ReduceType { MAX, MIN, AVE, SUM, L1, L2, PROD, SUM_SQUARE, LOG_SUM, LOG_SUM_EXP }; }; Ptr ReduceLayer::create(const LayerParams& params) { return Ptr(new ReduceLayerImpl(params)); } } }