Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
665408e57f
@ -148,7 +148,13 @@ private:
|
||||
#else
|
||||
cv::dnn::Net net;
|
||||
cv::dnn::LayerParams lp;
|
||||
net.addLayerToPrev("testLayer", "Identity", lp);
|
||||
lp.set("kernel_size", 1);
|
||||
lp.set("num_output", 1);
|
||||
lp.set("bias_term", false);
|
||||
lp.type = "Convolution";
|
||||
lp.name = "testLayer";
|
||||
lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1)));
|
||||
net.addLayerToPrev(lp.name, lp.type, lp);
|
||||
net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
|
||||
net.setPreferableTarget(target);
|
||||
static int inpDims[] = {1, 2, 3, 4};
|
||||
@ -2676,7 +2682,7 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
|
||||
backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
|
||||
for (auto& it : ieNet.getOutputsInfo())
|
||||
{
|
||||
Ptr<Layer> cvLayer(new InfEngineBackendLayer(it.second));
|
||||
Ptr<Layer> cvLayer(new InfEngineBackendLayer(ieNet));
|
||||
InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str());
|
||||
CV_Assert(ieLayer);
|
||||
|
||||
@ -2871,8 +2877,7 @@ void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
|
||||
std::vector<LayerPin> pins;
|
||||
for (int i = 0; i < outBlobNames.size(); i++)
|
||||
{
|
||||
std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
|
||||
pins.insert(pins.end(), lp.begin(), lp.end());
|
||||
pins.push_back(impl->getPinByAlias(outBlobNames[i]));
|
||||
}
|
||||
|
||||
impl->setUpNet(pins);
|
||||
@ -2885,9 +2890,10 @@ void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
|
||||
for (int i = 0; i < outBlobNames.size(); i++)
|
||||
{
|
||||
std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
|
||||
for (int i = 0; i < lp.size(); i++)
|
||||
outputBlobs[i].resize(lp.size());
|
||||
for (int j = 0; j < lp.size(); j++)
|
||||
{
|
||||
outputBlobs[i].push_back(impl->getBlob(lp[i]));
|
||||
outputBlobs[i][j] = impl->getBlob(lp[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -110,14 +110,25 @@ public:
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::SplitLayer ieLayer(name);
|
||||
ieLayer.setOutputPorts({InferenceEngine::Port()});
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
|
||||
CV_Assert(!input->dims.empty());
|
||||
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::Layer ieLayer(name);
|
||||
ieLayer.setName(name);
|
||||
if (preferableTarget == DNN_TARGET_MYRIAD)
|
||||
{
|
||||
ieLayer.setType("Copy");
|
||||
}
|
||||
else
|
||||
{
|
||||
ieLayer.setType("Split");
|
||||
ieLayer.getParameters()["axis"] = input->dims.size() - 1;
|
||||
ieLayer.getParameters()["out_sizes"] = input->dims[0];
|
||||
}
|
||||
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#else
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "Split";
|
||||
|
||||
@ -281,7 +281,7 @@ public:
|
||||
const int outCn = blobs[0].size[0];
|
||||
// prepare weightsMat where each row is aligned and has enough zero padding on the right to
|
||||
// use vectorized (i.e. with intrinsics) loops without tail processing
|
||||
Mat wm = blobs[0].reshape(1, outCn).clone();
|
||||
Mat wm = blobs[0].reshape(1, outCn);
|
||||
if( wm.step1() % VEC_ALIGN != 0 )
|
||||
{
|
||||
int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
|
||||
@ -374,6 +374,10 @@ public:
|
||||
|
||||
if (!w.empty())
|
||||
{
|
||||
// Keep origin weights unchanged.
|
||||
if (weightsMat.data == blobs[0].data)
|
||||
weightsMat = weightsMat.clone();
|
||||
|
||||
Mat originWeights = blobs[0].reshape(1, outCn);
|
||||
for (int i = 0; i < outCn; ++i)
|
||||
{
|
||||
@ -551,13 +555,13 @@ public:
|
||||
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
|
||||
InferenceEngine::Builder::ConvolutionLayer ieLayer(name);
|
||||
|
||||
ieLayer.setKernel({kernel.height, kernel.width});
|
||||
ieLayer.setStrides({stride.height, stride.width});
|
||||
ieLayer.setDilation({dilation.height, dilation.width});
|
||||
ieLayer.setPaddingsBegin({pad.height, pad.width});
|
||||
ieLayer.setPaddingsEnd({pad.height, pad.width});
|
||||
ieLayer.setGroup(group);
|
||||
ieLayer.setOutDepth(outCn);
|
||||
ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width});
|
||||
ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width});
|
||||
ieLayer.setDilation({(size_t)dilation.height, (size_t)dilation.width});
|
||||
ieLayer.setPaddingsBegin({(size_t)pad.height, (size_t)pad.width});
|
||||
ieLayer.setPaddingsEnd({(size_t)pad.height, (size_t)pad.width});
|
||||
ieLayer.setGroup((size_t)group);
|
||||
ieLayer.setOutDepth((size_t)outCn);
|
||||
|
||||
ieLayer.setWeights(ieWeights);
|
||||
if (ieBiases)
|
||||
@ -1220,7 +1224,7 @@ public:
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
{
|
||||
if (INF_ENGINE_RELEASE == 2018050000 && (adjustPad.height || adjustPad.width))
|
||||
if (INF_ENGINE_RELEASE >= 2018050000 && (adjustPad.height || adjustPad.width))
|
||||
return false;
|
||||
|
||||
const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout
|
||||
@ -1783,13 +1787,13 @@ public:
|
||||
|
||||
InferenceEngine::Builder::DeconvolutionLayer ieLayer(name);
|
||||
|
||||
ieLayer.setKernel({kernel.height, kernel.width});
|
||||
ieLayer.setStrides({stride.height, stride.width});
|
||||
ieLayer.setDilation({dilation.height, dilation.width});
|
||||
ieLayer.setPaddingsBegin({pad.height, pad.width});
|
||||
ieLayer.setPaddingsEnd({pad.height, pad.width});
|
||||
ieLayer.setGroup(group);
|
||||
ieLayer.setOutDepth(numOutput);
|
||||
ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width});
|
||||
ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width});
|
||||
ieLayer.setDilation({(size_t)dilation.height, (size_t)dilation.width});
|
||||
ieLayer.setPaddingsBegin({(size_t)pad.height, (size_t)pad.width});
|
||||
ieLayer.setPaddingsEnd({(size_t)pad.height, (size_t)pad.width});
|
||||
ieLayer.setGroup((size_t)group);
|
||||
ieLayer.setOutDepth((size_t)numOutput);
|
||||
|
||||
ieLayer.setWeights(wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW));
|
||||
if (hasBias())
|
||||
|
||||
@ -299,10 +299,10 @@ public:
|
||||
if (type == MAX || type == AVE)
|
||||
{
|
||||
InferenceEngine::Builder::PoolingLayer ieLayer(name);
|
||||
ieLayer.setKernel({kernel.height, kernel.width});
|
||||
ieLayer.setStrides({stride.height, stride.width});
|
||||
ieLayer.setPaddingsBegin({pad_t, pad_l});
|
||||
ieLayer.setPaddingsEnd({pad_b, pad_r});
|
||||
ieLayer.setKernel({(size_t)kernel.height, (size_t)kernel.width});
|
||||
ieLayer.setStrides({(size_t)stride.height, (size_t)stride.width});
|
||||
ieLayer.setPaddingsBegin({(size_t)pad_t, (size_t)pad_l});
|
||||
ieLayer.setPaddingsEnd({(size_t)pad_b, (size_t)pad_r});
|
||||
ieLayer.setPoolingType(type == MAX ?
|
||||
InferenceEngine::Builder::PoolingLayer::PoolingType::MAX :
|
||||
InferenceEngine::Builder::PoolingLayer::PoolingType::AVG);
|
||||
|
||||
@ -82,7 +82,7 @@ void InfEngineBackendNet::connect(const std::vector<Ptr<BackendWrapper> >& input
|
||||
CV_Assert(it != layers.end());
|
||||
|
||||
const int layerId = it->second;
|
||||
for (int i = 0; i < inpWrappers.size(); ++i)
|
||||
for (size_t i = 0; i < inpWrappers.size(); ++i)
|
||||
{
|
||||
const auto& inp = inpWrappers[i];
|
||||
const std::string& inpName = inp->dataPtr->name;
|
||||
@ -103,7 +103,7 @@ void InfEngineBackendNet::connect(const std::vector<Ptr<BackendWrapper> >& input
|
||||
else
|
||||
inpId = it->second;
|
||||
|
||||
netBuilder.connect(inpId, {layerId, i});
|
||||
netBuilder.connect((size_t)inpId, {(size_t)layerId, i});
|
||||
unconnectedLayersIds.erase(inpId);
|
||||
}
|
||||
CV_Assert(!outputs.empty());
|
||||
@ -119,7 +119,7 @@ void InfEngineBackendNet::init(int targetId)
|
||||
for (int id : unconnectedLayersIds)
|
||||
{
|
||||
InferenceEngine::Builder::OutputLayer outLayer("myconv1");
|
||||
netBuilder.addLayer({id}, outLayer);
|
||||
netBuilder.addLayer({InferenceEngine::PortInfo(id)}, outLayer);
|
||||
}
|
||||
cnn = InferenceEngine::CNNNetwork(InferenceEngine::Builder::convertToICNNNetwork(netBuilder.build()));
|
||||
}
|
||||
@ -718,19 +718,33 @@ Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
|
||||
return Mat(size, CV_32F, (void*)blob->buffer());
|
||||
}
|
||||
|
||||
InfEngineBackendLayer::InfEngineBackendLayer(const InferenceEngine::DataPtr& output_)
|
||||
{
|
||||
output = output_;
|
||||
}
|
||||
|
||||
bool InfEngineBackendLayer::getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape> &outputs,
|
||||
std::vector<MatShape> &internals) const
|
||||
{
|
||||
std::vector<size_t> dims = output->dims;
|
||||
std::vector<int> shape(dims.rbegin(), dims.rend());
|
||||
outputs.assign(1, shape);
|
||||
InferenceEngine::ICNNNetwork::InputShapes inShapes = t_net.getInputShapes();
|
||||
InferenceEngine::ICNNNetwork::InputShapes::iterator itr;
|
||||
bool equal_flag = true;
|
||||
size_t i = 0;
|
||||
for (itr = inShapes.begin(); itr != inShapes.end(); ++itr)
|
||||
{
|
||||
InferenceEngine::SizeVector currentInShape(inputs[i].begin(), inputs[i].end());
|
||||
if (itr->second != currentInShape)
|
||||
{
|
||||
itr->second = currentInShape;
|
||||
equal_flag = false;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
if (!equal_flag)
|
||||
{
|
||||
InferenceEngine::CNNNetwork curr_t_net(t_net);
|
||||
curr_t_net.reshape(inShapes);
|
||||
}
|
||||
std::vector<size_t> dims = t_net.getOutputsInfo()[name]->getDims();
|
||||
outputs.push_back(MatShape(dims.begin(), dims.end()));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -260,7 +260,7 @@ InferenceEngine::TBlob<int16_t>::Ptr convertFp16(const InferenceEngine::Blob::Pt
|
||||
class InfEngineBackendLayer : public Layer
|
||||
{
|
||||
public:
|
||||
InfEngineBackendLayer(const InferenceEngine::DataPtr& output);
|
||||
InfEngineBackendLayer(const InferenceEngine::CNNNetwork &t_net_) : t_net(t_net_) {};
|
||||
|
||||
virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
const int requiredOutputs,
|
||||
@ -273,7 +273,7 @@ public:
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE;
|
||||
|
||||
private:
|
||||
InferenceEngine::DataPtr output;
|
||||
InferenceEngine::CNNNetwork t_net;
|
||||
};
|
||||
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
@ -236,6 +236,10 @@ TEST_P(Test_Caffe_layers, Dropout)
|
||||
|
||||
TEST_P(Test_Caffe_layers, Concat)
|
||||
{
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE > 2018050000
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
||||
throw SkipTestException("");
|
||||
#endif
|
||||
testLayerUsingCaffeModels("layer_concat");
|
||||
testLayerUsingCaffeModels("layer_concat_optim", true, false);
|
||||
testLayerUsingCaffeModels("layer_concat_shared_input", true, false);
|
||||
@ -923,8 +927,9 @@ TEST_P(Layer_Test_Convolution_DLDT, Accuracy)
|
||||
{
|
||||
Target targetId = GetParam();
|
||||
|
||||
std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : "";
|
||||
Net netDefault = readNet(_tf("layer_convolution.caffemodel"), _tf("layer_convolution.prototxt"));
|
||||
Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
|
||||
Net net = readNet(_tf("layer_convolution" + suffix + ".xml"), _tf("layer_convolution" + suffix + ".bin"));
|
||||
|
||||
Mat inp = blobFromNPY(_tf("blob.npy"));
|
||||
|
||||
@ -935,22 +940,15 @@ TEST_P(Layer_Test_Convolution_DLDT, Accuracy)
|
||||
net.setInput(inp);
|
||||
net.setPreferableTarget(targetId);
|
||||
|
||||
if (targetId != DNN_TARGET_MYRIAD)
|
||||
{
|
||||
Mat out = net.forward();
|
||||
Mat out = net.forward();
|
||||
|
||||
normAssert(outDefault, out);
|
||||
double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.4e-3 : 1e-5;
|
||||
double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.8e-2 : 1e-4;
|
||||
normAssert(outDefault, out, "", l1, lInf);
|
||||
|
||||
std::vector<int> outLayers = net.getUnconnectedOutLayers();
|
||||
ASSERT_EQ(net.getLayer(outLayers[0])->name, "output_merge");
|
||||
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat");
|
||||
}
|
||||
else
|
||||
{
|
||||
// An assertion is expected because the model is in FP32 format but
|
||||
// Myriad plugin supports only FP16 models.
|
||||
ASSERT_ANY_THROW(net.forward());
|
||||
}
|
||||
std::vector<int> outLayers = net.getUnconnectedOutLayers();
|
||||
ASSERT_EQ(net.getLayer(outLayers[0])->name, "output");
|
||||
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Convolution");
|
||||
}
|
||||
|
||||
TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8)
|
||||
@ -962,23 +960,16 @@ TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8)
|
||||
randu(inputs[0], 0, 255);
|
||||
inputs[0].convertTo(inputs[1], CV_32F);
|
||||
|
||||
std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : "";
|
||||
|
||||
Mat outs[2];
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
|
||||
Net net = readNet(_tf("layer_convolution" + suffix + ".xml"), _tf("layer_convolution" + suffix + ".bin"));
|
||||
net.setPreferableTarget(targetId);
|
||||
net.setInput(inputs[i]);
|
||||
if (targetId != DNN_TARGET_MYRIAD)
|
||||
{
|
||||
outs[i] = net.forward();
|
||||
ASSERT_EQ(outs[i].type(), CV_32F);
|
||||
}
|
||||
else
|
||||
{
|
||||
// An assertion is expected because the model is in FP32 format but
|
||||
// Myriad plugin supports only FP16 models.
|
||||
ASSERT_ANY_THROW(net.forward());
|
||||
}
|
||||
outs[i] = net.forward();
|
||||
ASSERT_EQ(outs[i].type(), CV_32F);
|
||||
}
|
||||
if (targetId != DNN_TARGET_MYRIAD)
|
||||
normAssert(outs[0], outs[1]);
|
||||
@ -1008,8 +999,8 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Convolution_DLDT,
|
||||
// net.save('/path/to/caffemodel')
|
||||
//
|
||||
// 3. Convert using ModelOptimizer.
|
||||
typedef testing::TestWithParam<tuple<int, int, Target> > Test_DLDT_two_inputs;
|
||||
TEST_P(Test_DLDT_two_inputs, as_IR)
|
||||
typedef testing::TestWithParam<tuple<int, int, Target, std::vector<int> > > Test_DLDT_two_inputs_3dim;
|
||||
TEST_P(Test_DLDT_two_inputs_3dim, as_IR)
|
||||
{
|
||||
int firstInpType = get<0>(GetParam());
|
||||
int secondInpType = get<1>(GetParam());
|
||||
@ -1020,32 +1011,39 @@ TEST_P(Test_DLDT_two_inputs, as_IR)
|
||||
throw SkipTestException("Test is enabled starts from OpenVINO 2018R4");
|
||||
#endif
|
||||
|
||||
Net net = readNet(_tf("net_two_inputs.xml"), _tf("net_two_inputs.bin"));
|
||||
int inpSize[] = {1, 2, 3};
|
||||
Mat firstInp(3, &inpSize[0], firstInpType);
|
||||
Mat secondInp(3, &inpSize[0], secondInpType);
|
||||
std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : "";
|
||||
Net net = readNet(_tf("net_two_inputs" + suffix + ".xml"), _tf("net_two_inputs.bin"));
|
||||
std::vector<int> inpSize = get<3>(GetParam());
|
||||
Mat firstInp(3, inpSize.data(), firstInpType);
|
||||
Mat secondInp(3, inpSize.data(), secondInpType);
|
||||
randu(firstInp, 0, 255);
|
||||
randu(secondInp, 0, 255);
|
||||
|
||||
net.setInput(firstInp, "data");
|
||||
net.setInput(secondInp, "second_input");
|
||||
net.setPreferableTarget(targetId);
|
||||
if (targetId != DNN_TARGET_MYRIAD)
|
||||
{
|
||||
Mat out = net.forward();
|
||||
|
||||
Mat ref;
|
||||
cv::add(firstInp, secondInp, ref, Mat(), CV_32F);
|
||||
normAssert(out, ref);
|
||||
}
|
||||
else
|
||||
{
|
||||
// An assertion is expected because the model is in FP32 format but
|
||||
// Myriad plugin supports only FP16 models.
|
||||
ASSERT_ANY_THROW(net.forward());
|
||||
}
|
||||
double l1 = ((targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) &&
|
||||
(firstInpType == CV_32F || secondInpType == CV_32F)) ? 0.06 : 0.0;
|
||||
double lInf = ((targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) &&
|
||||
(firstInpType == CV_32F || secondInpType == CV_32F)) ? 0.23 : 0.0;
|
||||
|
||||
Mat out = net.forward();
|
||||
|
||||
Mat ref;
|
||||
cv::add(firstInp, secondInp, ref, Mat(), CV_32F);
|
||||
normAssert(out, ref, "", l1, lInf);
|
||||
}
|
||||
|
||||
std::vector< std::vector<int> > list_sizes{ {1, 2, 3}, {3, 2, 1}, {5, 5, 5}, {13, 7, 11} };
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_two_inputs_3dim, Combine(
|
||||
Values(CV_8U, CV_32F), Values(CV_8U, CV_32F),
|
||||
testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE)),
|
||||
testing::ValuesIn(list_sizes)
|
||||
));
|
||||
|
||||
typedef testing::TestWithParam<tuple<int, int, Target> > Test_DLDT_two_inputs;
|
||||
TEST_P(Test_DLDT_two_inputs, as_backend)
|
||||
{
|
||||
static const float kScale = 0.5f;
|
||||
|
||||
@ -308,4 +308,38 @@ TEST_P(DeprecatedForward, CustomLayerWithFallback)
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, DeprecatedForward, dnnBackendsAndTargets());
|
||||
|
||||
TEST(Net, forwardAndRetrieve)
|
||||
{
|
||||
std::string prototxt =
|
||||
"input: \"data\"\n"
|
||||
"layer {\n"
|
||||
" name: \"testLayer\"\n"
|
||||
" type: \"Slice\"\n"
|
||||
" bottom: \"data\"\n"
|
||||
" top: \"firstCopy\"\n"
|
||||
" top: \"secondCopy\"\n"
|
||||
" slice_param {\n"
|
||||
" axis: 0\n"
|
||||
" slice_point: 2\n"
|
||||
" }\n"
|
||||
"}";
|
||||
Net net = readNetFromCaffe(&prototxt[0], prototxt.size());
|
||||
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
||||
|
||||
Mat inp(4, 5, CV_32F);
|
||||
randu(inp, -1, 1);
|
||||
net.setInput(inp);
|
||||
|
||||
std::vector<String> outNames;
|
||||
outNames.push_back("testLayer");
|
||||
std::vector<std::vector<Mat> > outBlobs;
|
||||
|
||||
net.forward(outBlobs, outNames);
|
||||
|
||||
EXPECT_EQ(outBlobs.size(), 1);
|
||||
EXPECT_EQ(outBlobs[0].size(), 2);
|
||||
normAssert(outBlobs[0][0], inp.rowRange(0, 2), "first part");
|
||||
normAssert(outBlobs[0][1], inp.rowRange(2, 4), "second part");
|
||||
}
|
||||
|
||||
}} // namespace
|
||||
|
||||
@ -395,7 +395,7 @@ TEST_P(Test_ONNX_nets, DenseNet121)
|
||||
|
||||
TEST_P(Test_ONNX_nets, Inception_v1)
|
||||
{
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018050000
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
||||
throw SkipTestException("Test is disabled for OpenVINO 2018R5");
|
||||
#endif
|
||||
|
||||
@ -241,7 +241,7 @@ TEST_P(Test_TensorFlow_layers, unfused_flatten)
|
||||
|
||||
TEST_P(Test_TensorFlow_layers, leaky_relu)
|
||||
{
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018050000
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
|
||||
throw SkipTestException("");
|
||||
#endif
|
||||
@ -388,7 +388,7 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN)
|
||||
|
||||
TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN)
|
||||
{
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018050000
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
|
||||
throw SkipTestException("Unstable test case");
|
||||
#endif
|
||||
|
||||
@ -230,4 +230,27 @@ PERF_TEST_P(Size_MatType_BorderType, blur5x5,
|
||||
SANITY_CHECK(dst, 1);
|
||||
}
|
||||
|
||||
///////////// BlendLinear ////////////////////////
|
||||
PERF_TEST_P(Size_MatType, BlendLinear,
|
||||
testing::Combine(
|
||||
testing::Values(szVGA, sz720p, sz1080p, sz2160p),
|
||||
testing::Values(CV_8UC1, CV_32FC1, CV_8UC3, CV_32FC3, CV_8UC4, CV_32FC4)
|
||||
)
|
||||
)
|
||||
{
|
||||
const Size srcSize = get<0>(GetParam());
|
||||
const int srcType = get<1>(GetParam());
|
||||
|
||||
Mat src1(srcSize, srcType), src2(srcSize, srcType), dst(srcSize, srcType);
|
||||
Mat weights1(srcSize, CV_32FC1), weights2(srcSize, CV_32FC1);
|
||||
|
||||
declare.in(src1, src2, WARMUP_RNG).in(weights1, weights2, WARMUP_READ).out(dst);
|
||||
randu(weights1, 0, 1);
|
||||
randu(weights2, 0, 1);
|
||||
|
||||
TEST_CYCLE() blendLinear(src1, src2, weights1, weights2, dst);
|
||||
|
||||
SANITY_CHECK_NOTHING();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@ -48,44 +48,44 @@
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
|
||||
namespace cv {
|
||||
#if CV_SIMD128
|
||||
static inline v_float32x4 blend(const v_float32x4& v_src1, const v_float32x4& v_src2, const v_float32x4& v_w1, const v_float32x4& v_w2)
|
||||
#if CV_SIMD
|
||||
static inline v_float32 blend(const v_float32& v_src1, const v_float32& v_src2, const v_float32& v_w1, const v_float32& v_w2)
|
||||
{
|
||||
const v_float32x4 v_eps = v_setall_f32(1e-5f);
|
||||
v_float32x4 v_denom = v_w1 + v_w2 + v_eps;
|
||||
const v_float32 v_eps = vx_setall_f32(1e-5f);
|
||||
v_float32 v_denom = v_w1 + v_w2 + v_eps;
|
||||
return (v_src1 * v_w1 + v_src2 * v_w2) / v_denom;
|
||||
}
|
||||
static inline v_float32x4 blend(const v_float32x4& v_src1, const v_float32x4& v_src2, const float* w_ptr1, const float* w_ptr2, int offset)
|
||||
static inline v_float32 blend(const v_float32& v_src1, const v_float32& v_src2, const float* w_ptr1, const float* w_ptr2, int offset)
|
||||
{
|
||||
v_float32x4 v_w1 = v_load(w_ptr1 + offset);
|
||||
v_float32x4 v_w2 = v_load(w_ptr2 + offset);
|
||||
v_float32 v_w1 = vx_load(w_ptr1 + offset);
|
||||
v_float32 v_w2 = vx_load(w_ptr2 + offset);
|
||||
return blend(v_src1, v_src2, v_w1, v_w2);
|
||||
}
|
||||
static inline v_uint32x4 saturate_f32_u32(const v_float32x4& vec)
|
||||
static inline v_uint32 saturate_f32_u32(const v_float32& vec)
|
||||
{
|
||||
const v_int32x4 z = v_setzero_s32();
|
||||
const v_int32x4 x = v_setall_s32(255);
|
||||
const v_int32 z = vx_setzero_s32();
|
||||
const v_int32 x = vx_setall_s32(255);
|
||||
return v_reinterpret_as_u32(v_min(v_max(v_round(vec), z), x));
|
||||
}
|
||||
static inline v_uint8x16 pack_f32tou8(v_float32x4& val0, v_float32x4& val1, v_float32x4& val2, v_float32x4& val3)
|
||||
static inline v_uint8 pack_f32tou8(v_float32& val0, v_float32& val1, v_float32& val2, v_float32& val3)
|
||||
{
|
||||
v_uint32x4 a = saturate_f32_u32(val0);
|
||||
v_uint32x4 b = saturate_f32_u32(val1);
|
||||
v_uint32x4 c = saturate_f32_u32(val2);
|
||||
v_uint32x4 d = saturate_f32_u32(val3);
|
||||
v_uint16x8 e = v_pack(a, b);
|
||||
v_uint16x8 f = v_pack(c, d);
|
||||
v_uint32 a = saturate_f32_u32(val0);
|
||||
v_uint32 b = saturate_f32_u32(val1);
|
||||
v_uint32 c = saturate_f32_u32(val2);
|
||||
v_uint32 d = saturate_f32_u32(val3);
|
||||
v_uint16 e = v_pack(a, b);
|
||||
v_uint16 f = v_pack(c, d);
|
||||
return v_pack(e, f);
|
||||
}
|
||||
static inline void store_pack_f32tou8(uchar* ptr, v_float32x4& val0, v_float32x4& val1, v_float32x4& val2, v_float32x4& val3)
|
||||
static inline void store_pack_f32tou8(uchar* ptr, v_float32& val0, v_float32& val1, v_float32& val2, v_float32& val3)
|
||||
{
|
||||
v_store((ptr), pack_f32tou8(val0, val1, val2, val3));
|
||||
}
|
||||
static inline void expand_u8tof32(const v_uint8x16& src, v_float32x4& dst0, v_float32x4& dst1, v_float32x4& dst2, v_float32x4& dst3)
|
||||
static inline void expand_u8tof32(const v_uint8& src, v_float32& dst0, v_float32& dst1, v_float32& dst2, v_float32& dst3)
|
||||
{
|
||||
v_uint16x8 a0, a1;
|
||||
v_uint16 a0, a1;
|
||||
v_expand(src, a0, a1);
|
||||
v_uint32x4 b0, b1,b2,b3;
|
||||
v_uint32 b0, b1,b2,b3;
|
||||
v_expand(a0, b0, b1);
|
||||
v_expand(a1, b2, b3);
|
||||
dst0 = v_cvt_f32(v_reinterpret_as_s32(b0));
|
||||
@ -93,71 +93,69 @@ static inline void expand_u8tof32(const v_uint8x16& src, v_float32x4& dst0, v_fl
|
||||
dst2 = v_cvt_f32(v_reinterpret_as_s32(b2));
|
||||
dst3 = v_cvt_f32(v_reinterpret_as_s32(b3));
|
||||
}
|
||||
static inline void load_expand_u8tof32(const uchar* ptr, v_float32x4& dst0, v_float32x4& dst1, v_float32x4& dst2, v_float32x4& dst3)
|
||||
static inline void load_expand_u8tof32(const uchar* ptr, v_float32& dst0, v_float32& dst1, v_float32& dst2, v_float32& dst3)
|
||||
{
|
||||
v_uint8x16 a = v_load((ptr));
|
||||
v_uint8 a = vx_load((ptr));
|
||||
expand_u8tof32(a, dst0, dst1, dst2, dst3);
|
||||
}
|
||||
int blendLinearSimd128(const uchar* src1, const uchar* src2, const float* weights1, const float* weights2, uchar* dst, int x, int width, int cn);
|
||||
int blendLinearSimd128(const float* src1, const float* src2, const float* weights1, const float* weights2, float* dst, int x, int width, int cn);
|
||||
int blendLinearSimd128(const uchar* src1, const uchar* src2, const float* weights1, const float* weights2, uchar* dst, int x, int width, int cn)
|
||||
int blendLinearSimd(const uchar* src1, const uchar* src2, const float* weights1, const float* weights2, uchar* dst, int x, int width, int cn);
|
||||
int blendLinearSimd(const float* src1, const float* src2, const float* weights1, const float* weights2, float* dst, int x, int width, int cn);
|
||||
int blendLinearSimd(const uchar* src1, const uchar* src2, const float* weights1, const float* weights2, uchar* dst, int x, int width, int cn)
|
||||
{
|
||||
int step = v_uint8x16::nlanes * cn;
|
||||
int weight_step = v_uint8x16::nlanes;
|
||||
switch(cn)
|
||||
{
|
||||
case 1:
|
||||
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += weight_step)
|
||||
for(int weight_offset = 0 ; x <= width - v_uint8::nlanes; x += v_uint8::nlanes, weight_offset += v_uint8::nlanes)
|
||||
{
|
||||
v_float32x4 v_src10, v_src11, v_src12, v_src13;
|
||||
v_float32x4 v_src20, v_src21, v_src22, v_src23;
|
||||
v_float32 v_src10, v_src11, v_src12, v_src13;
|
||||
v_float32 v_src20, v_src21, v_src22, v_src23;
|
||||
load_expand_u8tof32(src1 + x, v_src10, v_src11, v_src12, v_src13);
|
||||
load_expand_u8tof32(src2 + x, v_src20, v_src21, v_src22, v_src23);
|
||||
|
||||
v_float32x4 v_dst0 = blend(v_src10, v_src20, weights1, weights2, weight_offset);
|
||||
v_float32x4 v_dst1 = blend(v_src11, v_src21, weights1, weights2, weight_offset + 4);
|
||||
v_float32x4 v_dst2 = blend(v_src12, v_src22, weights1, weights2, weight_offset + 8);
|
||||
v_float32x4 v_dst3 = blend(v_src13, v_src23, weights1, weights2, weight_offset + 12);
|
||||
v_float32 v_dst0 = blend(v_src10, v_src20, weights1, weights2, weight_offset);
|
||||
v_float32 v_dst1 = blend(v_src11, v_src21, weights1, weights2, weight_offset + v_float32::nlanes);
|
||||
v_float32 v_dst2 = blend(v_src12, v_src22, weights1, weights2, weight_offset + 2*v_float32::nlanes);
|
||||
v_float32 v_dst3 = blend(v_src13, v_src23, weights1, weights2, weight_offset + 3*v_float32::nlanes);
|
||||
|
||||
store_pack_f32tou8(dst + x, v_dst0, v_dst1, v_dst2, v_dst3);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += weight_step)
|
||||
for(int weight_offset = 0 ; x <= width - 2*v_uint8::nlanes; x += 2*v_uint8::nlanes, weight_offset += v_uint8::nlanes)
|
||||
{
|
||||
v_uint8x16 v_src10, v_src11, v_src20, v_src21;
|
||||
v_uint8 v_src10, v_src11, v_src20, v_src21;
|
||||
v_load_deinterleave(src1 + x, v_src10, v_src11);
|
||||
v_load_deinterleave(src2 + x, v_src20, v_src21);
|
||||
v_float32x4 v_src100, v_src101, v_src102, v_src103, v_src110, v_src111, v_src112, v_src113;
|
||||
v_float32x4 v_src200, v_src201, v_src202, v_src203, v_src210, v_src211, v_src212, v_src213;
|
||||
v_float32 v_src100, v_src101, v_src102, v_src103, v_src110, v_src111, v_src112, v_src113;
|
||||
v_float32 v_src200, v_src201, v_src202, v_src203, v_src210, v_src211, v_src212, v_src213;
|
||||
expand_u8tof32(v_src10, v_src100, v_src101, v_src102, v_src103);
|
||||
expand_u8tof32(v_src11, v_src110, v_src111, v_src112, v_src113);
|
||||
expand_u8tof32(v_src20, v_src200, v_src201, v_src202, v_src203);
|
||||
expand_u8tof32(v_src21, v_src210, v_src211, v_src212, v_src213);
|
||||
|
||||
v_float32x4 v_dst0 = blend(v_src100, v_src200, weights1, weights2, weight_offset);
|
||||
v_float32x4 v_dst1 = blend(v_src110, v_src210, weights1, weights2, weight_offset);
|
||||
v_float32x4 v_dst2 = blend(v_src101, v_src201, weights1, weights2, weight_offset + 4);
|
||||
v_float32x4 v_dst3 = blend(v_src111, v_src211, weights1, weights2, weight_offset + 4);
|
||||
v_float32x4 v_dst4 = blend(v_src102, v_src202, weights1, weights2, weight_offset + 8);
|
||||
v_float32x4 v_dst5 = blend(v_src112, v_src212, weights1, weights2, weight_offset + 8);
|
||||
v_float32x4 v_dst6 = blend(v_src103, v_src203, weights1, weights2, weight_offset + 12);
|
||||
v_float32x4 v_dst7 = blend(v_src113, v_src213, weights1, weights2, weight_offset + 12);
|
||||
v_float32 v_dst0 = blend(v_src100, v_src200, weights1, weights2, weight_offset);
|
||||
v_float32 v_dst1 = blend(v_src110, v_src210, weights1, weights2, weight_offset);
|
||||
v_float32 v_dst2 = blend(v_src101, v_src201, weights1, weights2, weight_offset + v_float32::nlanes);
|
||||
v_float32 v_dst3 = blend(v_src111, v_src211, weights1, weights2, weight_offset + v_float32::nlanes);
|
||||
v_float32 v_dst4 = blend(v_src102, v_src202, weights1, weights2, weight_offset + 2*v_float32::nlanes);
|
||||
v_float32 v_dst5 = blend(v_src112, v_src212, weights1, weights2, weight_offset + 2*v_float32::nlanes);
|
||||
v_float32 v_dst6 = blend(v_src103, v_src203, weights1, weights2, weight_offset + 3*v_float32::nlanes);
|
||||
v_float32 v_dst7 = blend(v_src113, v_src213, weights1, weights2, weight_offset + 3*v_float32::nlanes);
|
||||
|
||||
v_uint8x16 v_dsta = pack_f32tou8(v_dst0, v_dst2, v_dst4, v_dst6);
|
||||
v_uint8x16 v_dstb = pack_f32tou8(v_dst1, v_dst3, v_dst5, v_dst7);
|
||||
v_uint8 v_dsta = pack_f32tou8(v_dst0, v_dst2, v_dst4, v_dst6);
|
||||
v_uint8 v_dstb = pack_f32tou8(v_dst1, v_dst3, v_dst5, v_dst7);
|
||||
v_store_interleave(dst + x, v_dsta, v_dstb);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += weight_step)
|
||||
for(int weight_offset = 0 ; x <= width - 3*v_uint8::nlanes; x += 3*v_uint8::nlanes, weight_offset += v_uint8::nlanes)
|
||||
{
|
||||
v_uint8x16 v_src10, v_src11, v_src12, v_src20, v_src21, v_src22;
|
||||
v_uint8 v_src10, v_src11, v_src12, v_src20, v_src21, v_src22;
|
||||
v_load_deinterleave(src1 + x, v_src10, v_src11, v_src12);
|
||||
v_load_deinterleave(src2 + x, v_src20, v_src21, v_src22);
|
||||
|
||||
v_float32x4 v_src100, v_src101, v_src102, v_src103, v_src110, v_src111, v_src112, v_src113, v_src120, v_src121, v_src122, v_src123;
|
||||
v_float32x4 v_src200, v_src201, v_src202, v_src203, v_src210, v_src211, v_src212, v_src213, v_src220, v_src221, v_src222, v_src223;
|
||||
v_float32 v_src100, v_src101, v_src102, v_src103, v_src110, v_src111, v_src112, v_src113, v_src120, v_src121, v_src122, v_src123;
|
||||
v_float32 v_src200, v_src201, v_src202, v_src203, v_src210, v_src211, v_src212, v_src213, v_src220, v_src221, v_src222, v_src223;
|
||||
expand_u8tof32(v_src10, v_src100, v_src101, v_src102, v_src103);
|
||||
expand_u8tof32(v_src11, v_src110, v_src111, v_src112, v_src113);
|
||||
expand_u8tof32(v_src12, v_src120, v_src121, v_src122, v_src123);
|
||||
@ -165,14 +163,14 @@ int blendLinearSimd128(const uchar* src1, const uchar* src2, const float* weight
|
||||
expand_u8tof32(v_src21, v_src210, v_src211, v_src212, v_src213);
|
||||
expand_u8tof32(v_src22, v_src220, v_src221, v_src222, v_src223);
|
||||
|
||||
v_float32x4 v_w10 = v_load(weights1 + weight_offset);
|
||||
v_float32x4 v_w11 = v_load(weights1 + weight_offset + 4);
|
||||
v_float32x4 v_w12 = v_load(weights1 + weight_offset + 8);
|
||||
v_float32x4 v_w13 = v_load(weights1 + weight_offset + 12);
|
||||
v_float32x4 v_w20 = v_load(weights2 + weight_offset);
|
||||
v_float32x4 v_w21 = v_load(weights2 + weight_offset + 4);
|
||||
v_float32x4 v_w22 = v_load(weights2 + weight_offset + 8);
|
||||
v_float32x4 v_w23 = v_load(weights2 + weight_offset + 12);
|
||||
v_float32 v_w10 = vx_load(weights1 + weight_offset);
|
||||
v_float32 v_w11 = vx_load(weights1 + weight_offset + v_float32::nlanes);
|
||||
v_float32 v_w12 = vx_load(weights1 + weight_offset + 2*v_float32::nlanes);
|
||||
v_float32 v_w13 = vx_load(weights1 + weight_offset + 3*v_float32::nlanes);
|
||||
v_float32 v_w20 = vx_load(weights2 + weight_offset);
|
||||
v_float32 v_w21 = vx_load(weights2 + weight_offset + v_float32::nlanes);
|
||||
v_float32 v_w22 = vx_load(weights2 + weight_offset + 2*v_float32::nlanes);
|
||||
v_float32 v_w23 = vx_load(weights2 + weight_offset + 3*v_float32::nlanes);
|
||||
v_src100 = blend(v_src100, v_src200, v_w10, v_w20);
|
||||
v_src110 = blend(v_src110, v_src210, v_w10, v_w20);
|
||||
v_src120 = blend(v_src120, v_src220, v_w10, v_w20);
|
||||
@ -187,34 +185,36 @@ int blendLinearSimd128(const uchar* src1, const uchar* src2, const float* weight
|
||||
v_src123 = blend(v_src123, v_src223, v_w13, v_w23);
|
||||
|
||||
|
||||
v_uint8x16 v_dst0 = pack_f32tou8(v_src100, v_src101, v_src102, v_src103);
|
||||
v_uint8x16 v_dst1 = pack_f32tou8(v_src110, v_src111, v_src112, v_src113);
|
||||
v_uint8x16 v_dst2 = pack_f32tou8(v_src120, v_src121, v_src122, v_src123);
|
||||
v_uint8 v_dst0 = pack_f32tou8(v_src100, v_src101, v_src102, v_src103);
|
||||
v_uint8 v_dst1 = pack_f32tou8(v_src110, v_src111, v_src112, v_src113);
|
||||
v_uint8 v_dst2 = pack_f32tou8(v_src120, v_src121, v_src122, v_src123);
|
||||
v_store_interleave(dst + x, v_dst0, v_dst1, v_dst2);
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
step = v_uint8x16::nlanes;
|
||||
weight_step = v_float32x4::nlanes;
|
||||
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += weight_step)
|
||||
for(int weight_offset = 0 ; x <= width - v_uint8::nlanes; x += v_uint8::nlanes, weight_offset += v_float32::nlanes)
|
||||
{
|
||||
v_float32x4 v_src10, v_src11, v_src12, v_src13, v_src14, v_src15, v_src16, v_src17;
|
||||
v_float32x4 v_src20, v_src21, v_src22, v_src23, v_src24, v_src25, v_src26, v_src27;
|
||||
v_float32 v_src10, v_src11, v_src12, v_src13;
|
||||
v_float32 v_src20, v_src21, v_src22, v_src23;
|
||||
load_expand_u8tof32(src1 + x, v_src10, v_src11, v_src12, v_src13);
|
||||
load_expand_u8tof32(src2 + x, v_src20, v_src21, v_src22, v_src23);
|
||||
|
||||
v_transpose4x4(v_src10, v_src11, v_src12, v_src13, v_src14, v_src15, v_src16, v_src17);
|
||||
v_transpose4x4(v_src20, v_src21, v_src22, v_src23, v_src24, v_src25, v_src26, v_src27);
|
||||
v_float32 v_w10, v_w11, v_w12, v_w13, v_w20, v_w21, v_w22, v_w23, v_w0, v_w1;
|
||||
v_w10 = vx_load(weights1 + weight_offset);
|
||||
v_zip(v_w10, v_w10, v_w0, v_w1);
|
||||
v_zip(v_w0, v_w0, v_w10, v_w11);
|
||||
v_zip(v_w1, v_w1, v_w12, v_w13);
|
||||
v_w20 = vx_load(weights2 + weight_offset);
|
||||
v_zip(v_w20, v_w20, v_w0, v_w1);
|
||||
v_zip(v_w0, v_w0, v_w20, v_w21);
|
||||
v_zip(v_w1, v_w1, v_w22, v_w23);
|
||||
|
||||
v_float32x4 v_w1 = v_load(weights1 + weight_offset);
|
||||
v_float32x4 v_w2 = v_load(weights2 + weight_offset);
|
||||
v_src10 = blend(v_src14, v_src24, v_w1, v_w2);
|
||||
v_src11 = blend(v_src15, v_src25, v_w1, v_w2);
|
||||
v_src12 = blend(v_src16, v_src26, v_w1, v_w2);
|
||||
v_src13 = blend(v_src17, v_src27, v_w1, v_w2);
|
||||
v_float32 v_dst0, v_dst1, v_dst2, v_dst3;
|
||||
v_dst0 = blend(v_src10, v_src20, v_w10, v_w20);
|
||||
v_dst1 = blend(v_src11, v_src21, v_w11, v_w21);
|
||||
v_dst2 = blend(v_src12, v_src22, v_w12, v_w22);
|
||||
v_dst3 = blend(v_src13, v_src23, v_w13, v_w23);
|
||||
|
||||
v_float32x4 v_dst0, v_dst1, v_dst2, v_dst3;
|
||||
v_transpose4x4(v_src10, v_src11, v_src12, v_src13, v_dst0, v_dst1, v_dst2, v_dst3);
|
||||
store_pack_f32tou8(dst + x, v_dst0, v_dst1, v_dst2, v_dst3);
|
||||
}
|
||||
break;
|
||||
@ -224,68 +224,67 @@ int blendLinearSimd128(const uchar* src1, const uchar* src2, const float* weight
|
||||
return x;
|
||||
}
|
||||
|
||||
int blendLinearSimd128(const float* src1, const float* src2, const float* weights1, const float* weights2, float* dst, int x, int width, int cn)
|
||||
int blendLinearSimd(const float* src1, const float* src2, const float* weights1, const float* weights2, float* dst, int x, int width, int cn)
|
||||
{
|
||||
int step = v_float32x4::nlanes*cn;
|
||||
switch(cn)
|
||||
{
|
||||
case 1:
|
||||
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += v_float32x4::nlanes)
|
||||
for(int weight_offset = 0 ; x <= width - v_float32::nlanes; x += v_float32::nlanes, weight_offset += v_float32::nlanes)
|
||||
{
|
||||
v_float32x4 v_src1 = v_load(src1 + x);
|
||||
v_float32x4 v_src2 = v_load(src2 + x);
|
||||
v_float32x4 v_w1 = v_load(weights1 + weight_offset);
|
||||
v_float32x4 v_w2 = v_load(weights2 + weight_offset);
|
||||
v_float32 v_src1 = vx_load(src1 + x);
|
||||
v_float32 v_src2 = vx_load(src2 + x);
|
||||
v_float32 v_w1 = vx_load(weights1 + weight_offset);
|
||||
v_float32 v_w2 = vx_load(weights2 + weight_offset);
|
||||
|
||||
v_float32x4 v_dst = blend(v_src1, v_src2, v_w1, v_w2);
|
||||
v_float32 v_dst = blend(v_src1, v_src2, v_w1, v_w2);
|
||||
|
||||
v_store(dst + x, v_dst);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += v_float32x4::nlanes)
|
||||
for(int weight_offset = 0 ; x <= width - 2*v_float32::nlanes; x += 2*v_float32::nlanes, weight_offset += v_float32::nlanes)
|
||||
{
|
||||
v_float32x4 v_src10, v_src11, v_src20, v_src21;
|
||||
v_float32 v_src10, v_src11, v_src20, v_src21;
|
||||
v_load_deinterleave(src1 + x, v_src10, v_src11);
|
||||
v_load_deinterleave(src2 + x, v_src20, v_src21);
|
||||
v_float32x4 v_w1 = v_load(weights1 + weight_offset);
|
||||
v_float32x4 v_w2 = v_load(weights2 + weight_offset);
|
||||
v_float32 v_w1 = vx_load(weights1 + weight_offset);
|
||||
v_float32 v_w2 = vx_load(weights2 + weight_offset);
|
||||
|
||||
v_float32x4 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2);
|
||||
v_float32x4 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2);
|
||||
v_float32 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2);
|
||||
v_float32 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2);
|
||||
|
||||
v_store_interleave(dst + x, v_dst0, v_dst1);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += v_float32x4::nlanes)
|
||||
for(int weight_offset = 0 ; x <= width - 3*v_float32::nlanes; x += 3*v_float32::nlanes, weight_offset += v_float32::nlanes)
|
||||
{
|
||||
v_float32x4 v_src10, v_src11, v_src12, v_src20, v_src21, v_src22;
|
||||
v_float32 v_src10, v_src11, v_src12, v_src20, v_src21, v_src22;
|
||||
v_load_deinterleave(src1 + x, v_src10, v_src11, v_src12);
|
||||
v_load_deinterleave(src2 + x, v_src20, v_src21, v_src22);
|
||||
v_float32x4 v_w1 = v_load(weights1 + weight_offset);
|
||||
v_float32x4 v_w2 = v_load(weights2 + weight_offset);
|
||||
v_float32 v_w1 = vx_load(weights1 + weight_offset);
|
||||
v_float32 v_w2 = vx_load(weights2 + weight_offset);
|
||||
|
||||
v_float32x4 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2);
|
||||
v_float32x4 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2);
|
||||
v_float32x4 v_dst2 = blend(v_src12, v_src22, v_w1, v_w2);
|
||||
v_float32 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2);
|
||||
v_float32 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2);
|
||||
v_float32 v_dst2 = blend(v_src12, v_src22, v_w1, v_w2);
|
||||
|
||||
v_store_interleave(dst + x, v_dst0, v_dst1, v_dst2);
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
for(int weight_offset = 0 ; x <= width - step; x += step, weight_offset += v_float32x4::nlanes)
|
||||
for(int weight_offset = 0 ; x <= width - 4*v_float32::nlanes; x += 4*v_float32::nlanes, weight_offset += v_float32::nlanes)
|
||||
{
|
||||
v_float32x4 v_src10, v_src11, v_src12, v_src13, v_src20, v_src21, v_src22, v_src23;
|
||||
v_float32 v_src10, v_src11, v_src12, v_src13, v_src20, v_src21, v_src22, v_src23;
|
||||
v_load_deinterleave(src1 + x, v_src10, v_src11, v_src12, v_src13);
|
||||
v_load_deinterleave(src2 + x, v_src20, v_src21, v_src22, v_src23);
|
||||
v_float32x4 v_w1 = v_load(weights1 + weight_offset);
|
||||
v_float32x4 v_w2 = v_load(weights2 + weight_offset);
|
||||
v_float32 v_w1 = vx_load(weights1 + weight_offset);
|
||||
v_float32 v_w2 = vx_load(weights2 + weight_offset);
|
||||
|
||||
v_float32x4 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2);
|
||||
v_float32x4 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2);
|
||||
v_float32x4 v_dst2 = blend(v_src12, v_src22, v_w1, v_w2);
|
||||
v_float32x4 v_dst3 = blend(v_src13, v_src23, v_w1, v_w2);
|
||||
v_float32 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2);
|
||||
v_float32 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2);
|
||||
v_float32 v_dst2 = blend(v_src12, v_src22, v_w1, v_w2);
|
||||
v_float32 v_dst3 = blend(v_src13, v_src23, v_w1, v_w2);
|
||||
|
||||
v_store_interleave(dst + x, v_dst0, v_dst1, v_dst2, v_dst3);
|
||||
}
|
||||
@ -321,8 +320,8 @@ public:
|
||||
T * const dst_row = dst->ptr<T>(y);
|
||||
|
||||
int x = 0;
|
||||
#if CV_SIMD128
|
||||
x = blendLinearSimd128(src1_row, src2_row, weights1_row, weights2_row, dst_row, x, width, cn);
|
||||
#if CV_SIMD
|
||||
x = blendLinearSimd(src1_row, src2_row, weights1_row, weights2_row, dst_row, x, width, cn);
|
||||
#endif
|
||||
|
||||
for ( ; x < width; ++x)
|
||||
|
||||
@ -110,15 +110,19 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
|
||||
int cn = _dst.channels(), m = _dst.rows, r = (ksize-1)/2;
|
||||
CV_Assert(cn > 0 && cn <= 4);
|
||||
size_t sstep = _src.step, dstep = _dst.step;
|
||||
Histogram CV_DECL_ALIGNED(16) H[4];
|
||||
HT CV_DECL_ALIGNED(16) luc[4][16];
|
||||
|
||||
int STRIPE_SIZE = std::min( _dst.cols, 512/cn );
|
||||
|
||||
std::vector<HT> _h_coarse(1 * 16 * (STRIPE_SIZE + 2*r) * cn + 16);
|
||||
std::vector<HT> _h_fine(16 * 16 * (STRIPE_SIZE + 2*r) * cn + 16);
|
||||
HT* h_coarse = alignPtr(&_h_coarse[0], 16);
|
||||
HT* h_fine = alignPtr(&_h_fine[0], 16);
|
||||
#if defined(CV_SIMD_WIDTH) && CV_SIMD_WIDTH >= 16
|
||||
# define CV_ALIGNMENT CV_SIMD_WIDTH
|
||||
#else
|
||||
# define CV_ALIGNMENT 16
|
||||
#endif
|
||||
|
||||
std::vector<HT> _h_coarse(1 * 16 * (STRIPE_SIZE + 2*r) * cn + CV_ALIGNMENT);
|
||||
std::vector<HT> _h_fine(16 * 16 * (STRIPE_SIZE + 2*r) * cn + CV_ALIGNMENT);
|
||||
HT* h_coarse = alignPtr(&_h_coarse[0], CV_ALIGNMENT);
|
||||
HT* h_fine = alignPtr(&_h_fine[0], CV_ALIGNMENT);
|
||||
|
||||
for( int x = 0; x < _dst.cols; x += STRIPE_SIZE )
|
||||
{
|
||||
@ -148,10 +152,14 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
|
||||
const uchar* p0 = src + sstep * std::max( 0, i-r-1 );
|
||||
const uchar* p1 = src + sstep * std::min( m-1, i+r );
|
||||
|
||||
memset( H, 0, cn*sizeof(H[0]) );
|
||||
memset( luc, 0, cn*sizeof(luc[0]) );
|
||||
for( c = 0; c < cn; c++ )
|
||||
{
|
||||
Histogram CV_DECL_ALIGNED(CV_ALIGNMENT) H;
|
||||
HT CV_DECL_ALIGNED(CV_ALIGNMENT) luc[16];
|
||||
|
||||
memset(&H, 0, sizeof(H));
|
||||
memset(luc, 0, sizeof(luc));
|
||||
|
||||
// Update column histograms for the entire row.
|
||||
for( j = 0; j < n; j++ )
|
||||
{
|
||||
@ -163,21 +171,21 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
|
||||
for (k = 0; k < 16; ++k)
|
||||
{
|
||||
#if CV_SIMD256
|
||||
v_store(H[c].fine[k], v_mul_wrap(v256_load(h_fine + 16 * n*(16 * c + k)), v256_setall_u16(2 * r + 1)) + v256_load(H[c].fine[k]));
|
||||
v_store(H.fine[k], v_mul_wrap(v256_load(h_fine + 16 * n*(16 * c + k)), v256_setall_u16(2 * r + 1)) + v256_load(H.fine[k]));
|
||||
#elif CV_SIMD128
|
||||
v_store(H[c].fine[k], v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k)), v_setall_u16((ushort)(2 * r + 1))) + v_load(H[c].fine[k]));
|
||||
v_store(H[c].fine[k] + 8, v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k) + 8), v_setall_u16((ushort)(2 * r + 1))) + v_load(H[c].fine[k] + 8));
|
||||
v_store(H.fine[k], v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k)), v_setall_u16((ushort)(2 * r + 1))) + v_load(H.fine[k]));
|
||||
v_store(H.fine[k] + 8, v_mul_wrap(v_load(h_fine + 16 * n*(16 * c + k) + 8), v_setall_u16((ushort)(2 * r + 1))) + v_load(H.fine[k] + 8));
|
||||
#else
|
||||
for (int ind = 0; ind < 16; ++ind)
|
||||
H[c].fine[k][ind] = (HT)(H[c].fine[k][ind] + (2 * r + 1) * h_fine[16 * n*(16 * c + k) + ind]);
|
||||
H.fine[k][ind] = (HT)(H.fine[k][ind] + (2 * r + 1) * h_fine[16 * n*(16 * c + k) + ind]);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CV_SIMD256
|
||||
v_uint16x16 v_coarse = v256_load(H[c].coarse);
|
||||
v_uint16x16 v_coarse = v256_load(H.coarse);
|
||||
#elif CV_SIMD128
|
||||
v_uint16x8 v_coarsel = v_load(H[c].coarse);
|
||||
v_uint16x8 v_coarseh = v_load(H[c].coarse + 8);
|
||||
v_uint16x8 v_coarsel = v_load(H.coarse);
|
||||
v_uint16x8 v_coarseh = v_load(H.coarse + 8);
|
||||
#endif
|
||||
HT* px = h_coarse + 16 * n*c;
|
||||
for( j = 0; j < 2*r; ++j, px += 16 )
|
||||
@ -189,7 +197,7 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
|
||||
v_coarseh += v_load(px + 8);
|
||||
#else
|
||||
for (int ind = 0; ind < 16; ++ind)
|
||||
H[c].coarse[ind] += px[ind];
|
||||
H.coarse[ind] += px[ind];
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -201,24 +209,24 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
|
||||
px = h_coarse + 16 * (n*c + std::min(j + r, n - 1));
|
||||
#if CV_SIMD256
|
||||
v_coarse += v256_load(px);
|
||||
v_store(H[c].coarse, v_coarse);
|
||||
v_store(H.coarse, v_coarse);
|
||||
#elif CV_SIMD128
|
||||
v_coarsel += v_load(px);
|
||||
v_coarseh += v_load(px + 8);
|
||||
v_store(H[c].coarse, v_coarsel);
|
||||
v_store(H[c].coarse + 8, v_coarseh);
|
||||
v_store(H.coarse, v_coarsel);
|
||||
v_store(H.coarse + 8, v_coarseh);
|
||||
#else
|
||||
for (int ind = 0; ind < 16; ++ind)
|
||||
H[c].coarse[ind] += px[ind];
|
||||
H.coarse[ind] += px[ind];
|
||||
#endif
|
||||
|
||||
// Find median at coarse level
|
||||
for ( k = 0; k < 16 ; ++k )
|
||||
{
|
||||
sum += H[c].coarse[k];
|
||||
sum += H.coarse[k];
|
||||
if ( sum > t )
|
||||
{
|
||||
sum -= H[c].coarse[k];
|
||||
sum -= H.coarse[k];
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -231,7 +239,7 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
|
||||
v_uint16x8 v_finel;
|
||||
v_uint16x8 v_fineh;
|
||||
#endif
|
||||
if ( luc[c][k] <= j-r )
|
||||
if ( luc[k] <= j-r )
|
||||
{
|
||||
#if CV_SIMD256
|
||||
v_fine = v256_setzero_u16();
|
||||
@ -239,10 +247,10 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
|
||||
v_finel = v_setzero_u16();
|
||||
v_fineh = v_setzero_u16();
|
||||
#else
|
||||
memset(&H[c].fine[k], 0, 16 * sizeof(HT));
|
||||
memset(&H.fine[k], 0, 16 * sizeof(HT));
|
||||
#endif
|
||||
px = h_fine + 16 * (n*(16 * c + k) + j - r);
|
||||
for (luc[c][k] = HT(j - r); luc[c][k] < MIN(j + r + 1, n); ++luc[c][k], px += 16)
|
||||
for (luc[k] = HT(j - r); luc[k] < MIN(j + r + 1, n); ++luc[k], px += 16)
|
||||
{
|
||||
#if CV_SIMD256
|
||||
v_fine += v256_load(px);
|
||||
@ -251,11 +259,11 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
|
||||
v_fineh += v_load(px + 8);
|
||||
#else
|
||||
for (int ind = 0; ind < 16; ++ind)
|
||||
H[c].fine[k][ind] += px[ind];
|
||||
H.fine[k][ind] += px[ind];
|
||||
#endif
|
||||
}
|
||||
|
||||
if ( luc[c][k] < j+r+1 )
|
||||
if ( luc[k] < j+r+1 )
|
||||
{
|
||||
px = h_fine + 16 * (n*(16 * c + k) + (n - 1));
|
||||
#if CV_SIMD256
|
||||
@ -265,50 +273,50 @@ medianBlur_8u_O1( const Mat& _src, Mat& _dst, int ksize )
|
||||
v_fineh += v_mul_wrap(v_load(px + 8), v_setall_u16((ushort)(j + r + 1 - n)));
|
||||
#else
|
||||
for (int ind = 0; ind < 16; ++ind)
|
||||
H[c].fine[k][ind] = (HT)(H[c].fine[k][ind] + (j + r + 1 - n) * px[ind]);
|
||||
H.fine[k][ind] = (HT)(H.fine[k][ind] + (j + r + 1 - n) * px[ind]);
|
||||
#endif
|
||||
luc[c][k] = (HT)(j+r+1);
|
||||
luc[k] = (HT)(j+r+1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#if CV_SIMD256
|
||||
v_fine = v256_load(H[c].fine[k]);
|
||||
v_fine = v256_load(H.fine[k]);
|
||||
#elif CV_SIMD128
|
||||
v_finel = v_load(H[c].fine[k]);
|
||||
v_fineh = v_load(H[c].fine[k] + 8);
|
||||
v_finel = v_load(H.fine[k]);
|
||||
v_fineh = v_load(H.fine[k] + 8);
|
||||
#endif
|
||||
px = h_fine + 16*n*(16 * c + k);
|
||||
for ( ; luc[c][k] < j+r+1; ++luc[c][k] )
|
||||
for ( ; luc[k] < j+r+1; ++luc[k] )
|
||||
{
|
||||
#if CV_SIMD256
|
||||
v_fine = v_fine + v256_load(px + 16 * MIN(luc[c][k], n - 1)) - v256_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0));
|
||||
v_fine = v_fine + v256_load(px + 16 * MIN(luc[k], n - 1)) - v256_load(px + 16 * MAX(luc[k] - 2 * r - 1, 0));
|
||||
#elif CV_SIMD128
|
||||
v_finel = v_finel + v_load(px + 16 * MIN(luc[c][k], n - 1) ) - v_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0));
|
||||
v_fineh = v_fineh + v_load(px + 16 * MIN(luc[c][k], n - 1) + 8) - v_load(px + 16 * MAX(luc[c][k] - 2 * r - 1, 0) + 8);
|
||||
v_finel = v_finel + v_load(px + 16 * MIN(luc[k], n - 1) ) - v_load(px + 16 * MAX(luc[k] - 2 * r - 1, 0));
|
||||
v_fineh = v_fineh + v_load(px + 16 * MIN(luc[k], n - 1) + 8) - v_load(px + 16 * MAX(luc[k] - 2 * r - 1, 0) + 8);
|
||||
#else
|
||||
for (int ind = 0; ind < 16; ++ind)
|
||||
H[c].fine[k][ind] += px[16 * MIN(luc[c][k], n - 1) + ind] - px[16 * MAX(luc[c][k] - 2 * r - 1, 0) + ind];
|
||||
H.fine[k][ind] += px[16 * MIN(luc[k], n - 1) + ind] - px[16 * MAX(luc[k] - 2 * r - 1, 0) + ind];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
px = h_coarse + 16 * (n*c + MAX(j - r, 0));
|
||||
#if CV_SIMD256
|
||||
v_store(H[c].fine[k], v_fine);
|
||||
v_store(H.fine[k], v_fine);
|
||||
v_coarse -= v256_load(px);
|
||||
#elif CV_SIMD128
|
||||
v_store(H[c].fine[k], v_finel);
|
||||
v_store(H[c].fine[k] + 8, v_fineh);
|
||||
v_store(H.fine[k], v_finel);
|
||||
v_store(H.fine[k] + 8, v_fineh);
|
||||
v_coarsel -= v_load(px);
|
||||
v_coarseh -= v_load(px + 8);
|
||||
#else
|
||||
for (int ind = 0; ind < 16; ++ind)
|
||||
H[c].coarse[ind] -= px[ind];
|
||||
H.coarse[ind] -= px[ind];
|
||||
#endif
|
||||
|
||||
/* Find median in segment */
|
||||
segment = H[c].fine[k];
|
||||
segment = H.fine[k];
|
||||
for ( b = 0; b < 16 ; b++ )
|
||||
{
|
||||
sum += segment[b];
|
||||
|
||||
@ -112,6 +112,7 @@ struct PyrDownVec_32s8u
|
||||
v_rshr_pack_store<8>(dst + x, t0);
|
||||
x += v_uint16::nlanes;
|
||||
}
|
||||
typedef int CV_DECL_ALIGNED(1) unaligned_int;
|
||||
for ( ; x <= width - v_int32x4::nlanes; x += v_int32x4::nlanes)
|
||||
{
|
||||
v_int32x4 r0, r1, r2, r3, r4, t0;
|
||||
@ -122,7 +123,7 @@ struct PyrDownVec_32s8u
|
||||
r4 = v_load(row4 + x);
|
||||
t0 = r0 + r4 + (r2 + r2) + ((r1 + r3 + r2) << 2);
|
||||
|
||||
*(int*)(dst + x) = v_reinterpret_as_s32(v_rshr_pack<8>(v_pack_u(t0, t0), v_setzero_u16())).get0();
|
||||
*((unaligned_int*) (dst + x)) = v_reinterpret_as_s32(v_rshr_pack<8>(v_pack_u(t0, t0), v_setzero_u16())).get0();
|
||||
}
|
||||
|
||||
return x;
|
||||
|
||||
@ -123,139 +123,125 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
|
||||
}
|
||||
}
|
||||
|
||||
// Pointer to row vectors
|
||||
uchar *p_src, *c_src, *n_src; // previous, current, next row
|
||||
short *c_dx, *c_dy;
|
||||
|
||||
int i_start = 0;
|
||||
int j_start = 0;
|
||||
#if CV_SIMD128
|
||||
if(hasSIMD128())
|
||||
#if CV_SIMD
|
||||
// Characters in variable names have the following meanings:
|
||||
// u: unsigned char
|
||||
// s: signed int
|
||||
//
|
||||
// [row][column]
|
||||
// m: offset -1
|
||||
// n: offset 0
|
||||
// p: offset 1
|
||||
// Example: umn is offset -1 in row and offset 0 in column
|
||||
for ( i = 0; i < H - 1; i += 2 )
|
||||
{
|
||||
uchar *m_src;
|
||||
short *n_dx, *n_dy;
|
||||
uchar *p_src = src.ptr<uchar>(i == 0 ? i_top : i - 1);
|
||||
uchar *c_src = src.ptr<uchar>(i);
|
||||
uchar *n_src = src.ptr<uchar>(i+1);
|
||||
uchar *m_src = src.ptr<uchar>(i == H - 2 ? i_bottom : i + 2);
|
||||
|
||||
// Characters in variable names have the following meanings:
|
||||
// u: unsigned char
|
||||
// s: signed int
|
||||
//
|
||||
// [row][column]
|
||||
// m: offset -1
|
||||
// n: offset 0
|
||||
// p: offset 1
|
||||
// Example: umn is offset -1 in row and offset 0 in column
|
||||
for ( i = 0; i < H - 1; i += 2 )
|
||||
short *c_dx = dx.ptr<short>(i);
|
||||
short *c_dy = dy.ptr<short>(i);
|
||||
short *n_dx = dx.ptr<short>(i+1);
|
||||
short *n_dy = dy.ptr<short>(i+1);
|
||||
|
||||
// Process rest of columns 16-column chunks at a time
|
||||
for ( j = 1; j < W - v_uint8::nlanes; j += v_uint8::nlanes)
|
||||
{
|
||||
if ( i == 0 ) p_src = src.ptr<uchar>(i_top);
|
||||
else p_src = src.ptr<uchar>(i-1);
|
||||
// Load top row for 3x3 Sobel filter
|
||||
v_uint8 v_um = vx_load(&p_src[j-1]);
|
||||
v_uint8 v_un = vx_load(&p_src[j]);
|
||||
v_uint8 v_up = vx_load(&p_src[j+1]);
|
||||
v_uint16 v_um1, v_um2, v_un1, v_un2, v_up1, v_up2;
|
||||
v_expand(v_um, v_um1, v_um2);
|
||||
v_expand(v_un, v_un1, v_un2);
|
||||
v_expand(v_up, v_up1, v_up2);
|
||||
v_int16 v_s1m1 = v_reinterpret_as_s16(v_um1);
|
||||
v_int16 v_s1m2 = v_reinterpret_as_s16(v_um2);
|
||||
v_int16 v_s1n1 = v_reinterpret_as_s16(v_un1);
|
||||
v_int16 v_s1n2 = v_reinterpret_as_s16(v_un2);
|
||||
v_int16 v_s1p1 = v_reinterpret_as_s16(v_up1);
|
||||
v_int16 v_s1p2 = v_reinterpret_as_s16(v_up2);
|
||||
|
||||
c_src = src.ptr<uchar>(i);
|
||||
n_src = src.ptr<uchar>(i+1);
|
||||
// Load second row for 3x3 Sobel filter
|
||||
v_um = vx_load(&c_src[j-1]);
|
||||
v_un = vx_load(&c_src[j]);
|
||||
v_up = vx_load(&c_src[j+1]);
|
||||
v_expand(v_um, v_um1, v_um2);
|
||||
v_expand(v_un, v_un1, v_un2);
|
||||
v_expand(v_up, v_up1, v_up2);
|
||||
v_int16 v_s2m1 = v_reinterpret_as_s16(v_um1);
|
||||
v_int16 v_s2m2 = v_reinterpret_as_s16(v_um2);
|
||||
v_int16 v_s2n1 = v_reinterpret_as_s16(v_un1);
|
||||
v_int16 v_s2n2 = v_reinterpret_as_s16(v_un2);
|
||||
v_int16 v_s2p1 = v_reinterpret_as_s16(v_up1);
|
||||
v_int16 v_s2p2 = v_reinterpret_as_s16(v_up2);
|
||||
|
||||
if ( i == H - 2 ) m_src = src.ptr<uchar>(i_bottom);
|
||||
else m_src = src.ptr<uchar>(i+2);
|
||||
// Load third row for 3x3 Sobel filter
|
||||
v_um = vx_load(&n_src[j-1]);
|
||||
v_un = vx_load(&n_src[j]);
|
||||
v_up = vx_load(&n_src[j+1]);
|
||||
v_expand(v_um, v_um1, v_um2);
|
||||
v_expand(v_un, v_un1, v_un2);
|
||||
v_expand(v_up, v_up1, v_up2);
|
||||
v_int16 v_s3m1 = v_reinterpret_as_s16(v_um1);
|
||||
v_int16 v_s3m2 = v_reinterpret_as_s16(v_um2);
|
||||
v_int16 v_s3n1 = v_reinterpret_as_s16(v_un1);
|
||||
v_int16 v_s3n2 = v_reinterpret_as_s16(v_un2);
|
||||
v_int16 v_s3p1 = v_reinterpret_as_s16(v_up1);
|
||||
v_int16 v_s3p2 = v_reinterpret_as_s16(v_up2);
|
||||
|
||||
c_dx = dx.ptr<short>(i);
|
||||
c_dy = dy.ptr<short>(i);
|
||||
n_dx = dx.ptr<short>(i+1);
|
||||
n_dy = dy.ptr<short>(i+1);
|
||||
// dx & dy for rows 1, 2, 3
|
||||
v_int16 v_sdx1, v_sdy1;
|
||||
spatialGradientKernel<v_int16>( v_sdx1, v_sdy1,
|
||||
v_s1m1, v_s1n1, v_s1p1,
|
||||
v_s2m1, v_s2p1,
|
||||
v_s3m1, v_s3n1, v_s3p1 );
|
||||
|
||||
// Process rest of columns 16-column chunks at a time
|
||||
for ( j = 1; j < W - 16; j += 16 )
|
||||
{
|
||||
// Load top row for 3x3 Sobel filter
|
||||
v_uint8x16 v_um = v_load(&p_src[j-1]);
|
||||
v_uint8x16 v_un = v_load(&p_src[j]);
|
||||
v_uint8x16 v_up = v_load(&p_src[j+1]);
|
||||
v_uint16x8 v_um1, v_um2, v_un1, v_un2, v_up1, v_up2;
|
||||
v_expand(v_um, v_um1, v_um2);
|
||||
v_expand(v_un, v_un1, v_un2);
|
||||
v_expand(v_up, v_up1, v_up2);
|
||||
v_int16x8 v_s1m1 = v_reinterpret_as_s16(v_um1);
|
||||
v_int16x8 v_s1m2 = v_reinterpret_as_s16(v_um2);
|
||||
v_int16x8 v_s1n1 = v_reinterpret_as_s16(v_un1);
|
||||
v_int16x8 v_s1n2 = v_reinterpret_as_s16(v_un2);
|
||||
v_int16x8 v_s1p1 = v_reinterpret_as_s16(v_up1);
|
||||
v_int16x8 v_s1p2 = v_reinterpret_as_s16(v_up2);
|
||||
v_int16 v_sdx2, v_sdy2;
|
||||
spatialGradientKernel<v_int16>( v_sdx2, v_sdy2,
|
||||
v_s1m2, v_s1n2, v_s1p2,
|
||||
v_s2m2, v_s2p2,
|
||||
v_s3m2, v_s3n2, v_s3p2 );
|
||||
|
||||
// Load second row for 3x3 Sobel filter
|
||||
v_um = v_load(&c_src[j-1]);
|
||||
v_un = v_load(&c_src[j]);
|
||||
v_up = v_load(&c_src[j+1]);
|
||||
v_expand(v_um, v_um1, v_um2);
|
||||
v_expand(v_un, v_un1, v_un2);
|
||||
v_expand(v_up, v_up1, v_up2);
|
||||
v_int16x8 v_s2m1 = v_reinterpret_as_s16(v_um1);
|
||||
v_int16x8 v_s2m2 = v_reinterpret_as_s16(v_um2);
|
||||
v_int16x8 v_s2n1 = v_reinterpret_as_s16(v_un1);
|
||||
v_int16x8 v_s2n2 = v_reinterpret_as_s16(v_un2);
|
||||
v_int16x8 v_s2p1 = v_reinterpret_as_s16(v_up1);
|
||||
v_int16x8 v_s2p2 = v_reinterpret_as_s16(v_up2);
|
||||
// Store
|
||||
v_store(&c_dx[j], v_sdx1);
|
||||
v_store(&c_dx[j+v_int16::nlanes], v_sdx2);
|
||||
v_store(&c_dy[j], v_sdy1);
|
||||
v_store(&c_dy[j+v_int16::nlanes], v_sdy2);
|
||||
|
||||
// Load third row for 3x3 Sobel filter
|
||||
v_um = v_load(&n_src[j-1]);
|
||||
v_un = v_load(&n_src[j]);
|
||||
v_up = v_load(&n_src[j+1]);
|
||||
v_expand(v_um, v_um1, v_um2);
|
||||
v_expand(v_un, v_un1, v_un2);
|
||||
v_expand(v_up, v_up1, v_up2);
|
||||
v_int16x8 v_s3m1 = v_reinterpret_as_s16(v_um1);
|
||||
v_int16x8 v_s3m2 = v_reinterpret_as_s16(v_um2);
|
||||
v_int16x8 v_s3n1 = v_reinterpret_as_s16(v_un1);
|
||||
v_int16x8 v_s3n2 = v_reinterpret_as_s16(v_un2);
|
||||
v_int16x8 v_s3p1 = v_reinterpret_as_s16(v_up1);
|
||||
v_int16x8 v_s3p2 = v_reinterpret_as_s16(v_up2);
|
||||
// Load fourth row for 3x3 Sobel filter
|
||||
v_um = vx_load(&m_src[j-1]);
|
||||
v_un = vx_load(&m_src[j]);
|
||||
v_up = vx_load(&m_src[j+1]);
|
||||
v_expand(v_um, v_um1, v_um2);
|
||||
v_expand(v_un, v_un1, v_un2);
|
||||
v_expand(v_up, v_up1, v_up2);
|
||||
v_int16 v_s4m1 = v_reinterpret_as_s16(v_um1);
|
||||
v_int16 v_s4m2 = v_reinterpret_as_s16(v_um2);
|
||||
v_int16 v_s4n1 = v_reinterpret_as_s16(v_un1);
|
||||
v_int16 v_s4n2 = v_reinterpret_as_s16(v_un2);
|
||||
v_int16 v_s4p1 = v_reinterpret_as_s16(v_up1);
|
||||
v_int16 v_s4p2 = v_reinterpret_as_s16(v_up2);
|
||||
|
||||
// dx & dy for rows 1, 2, 3
|
||||
v_int16x8 v_sdx1, v_sdy1;
|
||||
spatialGradientKernel<v_int16x8>( v_sdx1, v_sdy1,
|
||||
v_s1m1, v_s1n1, v_s1p1,
|
||||
v_s2m1, v_s2p1,
|
||||
v_s3m1, v_s3n1, v_s3p1 );
|
||||
// dx & dy for rows 2, 3, 4
|
||||
spatialGradientKernel<v_int16>( v_sdx1, v_sdy1,
|
||||
v_s2m1, v_s2n1, v_s2p1,
|
||||
v_s3m1, v_s3p1,
|
||||
v_s4m1, v_s4n1, v_s4p1 );
|
||||
|
||||
v_int16x8 v_sdx2, v_sdy2;
|
||||
spatialGradientKernel<v_int16x8>( v_sdx2, v_sdy2,
|
||||
v_s1m2, v_s1n2, v_s1p2,
|
||||
v_s2m2, v_s2p2,
|
||||
v_s3m2, v_s3n2, v_s3p2 );
|
||||
spatialGradientKernel<v_int16>( v_sdx2, v_sdy2,
|
||||
v_s2m2, v_s2n2, v_s2p2,
|
||||
v_s3m2, v_s3p2,
|
||||
v_s4m2, v_s4n2, v_s4p2 );
|
||||
|
||||
// Store
|
||||
v_store(&c_dx[j], v_sdx1);
|
||||
v_store(&c_dx[j+8], v_sdx2);
|
||||
v_store(&c_dy[j], v_sdy1);
|
||||
v_store(&c_dy[j+8], v_sdy2);
|
||||
|
||||
// Load fourth row for 3x3 Sobel filter
|
||||
v_um = v_load(&m_src[j-1]);
|
||||
v_un = v_load(&m_src[j]);
|
||||
v_up = v_load(&m_src[j+1]);
|
||||
v_expand(v_um, v_um1, v_um2);
|
||||
v_expand(v_un, v_un1, v_un2);
|
||||
v_expand(v_up, v_up1, v_up2);
|
||||
v_int16x8 v_s4m1 = v_reinterpret_as_s16(v_um1);
|
||||
v_int16x8 v_s4m2 = v_reinterpret_as_s16(v_um2);
|
||||
v_int16x8 v_s4n1 = v_reinterpret_as_s16(v_un1);
|
||||
v_int16x8 v_s4n2 = v_reinterpret_as_s16(v_un2);
|
||||
v_int16x8 v_s4p1 = v_reinterpret_as_s16(v_up1);
|
||||
v_int16x8 v_s4p2 = v_reinterpret_as_s16(v_up2);
|
||||
|
||||
// dx & dy for rows 2, 3, 4
|
||||
spatialGradientKernel<v_int16x8>( v_sdx1, v_sdy1,
|
||||
v_s2m1, v_s2n1, v_s2p1,
|
||||
v_s3m1, v_s3p1,
|
||||
v_s4m1, v_s4n1, v_s4p1 );
|
||||
|
||||
spatialGradientKernel<v_int16x8>( v_sdx2, v_sdy2,
|
||||
v_s2m2, v_s2n2, v_s2p2,
|
||||
v_s3m2, v_s3p2,
|
||||
v_s4m2, v_s4n2, v_s4p2 );
|
||||
|
||||
// Store
|
||||
v_store(&n_dx[j], v_sdx1);
|
||||
v_store(&n_dx[j+8], v_sdx2);
|
||||
v_store(&n_dy[j], v_sdy1);
|
||||
v_store(&n_dy[j+8], v_sdy2);
|
||||
}
|
||||
// Store
|
||||
v_store(&n_dx[j], v_sdx1);
|
||||
v_store(&n_dx[j+v_int16::nlanes], v_sdx2);
|
||||
v_store(&n_dy[j], v_sdy1);
|
||||
v_store(&n_dy[j+v_int16::nlanes], v_sdy2);
|
||||
}
|
||||
}
|
||||
i_start = i;
|
||||
@ -265,16 +251,12 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
|
||||
uchar v00, v01, v02, v10, v11, v12, v20, v21, v22;
|
||||
for ( i = 0; i < H; i++ )
|
||||
{
|
||||
if ( i == 0 ) p_src = src.ptr<uchar>(i_top);
|
||||
else p_src = src.ptr<uchar>(i-1);
|
||||
uchar *p_src = src.ptr<uchar>(i == 0 ? i_top : i - 1);
|
||||
uchar *c_src = src.ptr<uchar>(i);
|
||||
uchar *n_src = src.ptr<uchar>(i == H - 1 ? i_bottom : i + 1);
|
||||
|
||||
c_src = src.ptr<uchar>(i);
|
||||
|
||||
if ( i == H - 1 ) n_src = src.ptr<uchar>(i_bottom);
|
||||
else n_src = src.ptr<uchar>(i+1);
|
||||
|
||||
c_dx = dx.ptr<short>(i);
|
||||
c_dy = dy.ptr<short>(i);
|
||||
short *c_dx = dx.ptr<short>(i);
|
||||
short *c_dy = dy.ptr<short>(i);
|
||||
|
||||
// Process left-most column
|
||||
j = 0;
|
||||
|
||||
@ -2235,4 +2235,13 @@ TEST(Imgproc_Sobel, s16_regression_13506)
|
||||
Sobel(src, dst, CV_16S, 0, 1, 5);
|
||||
ASSERT_EQ(0.0, cvtest::norm(dst, ref, NORM_INF));
|
||||
}
|
||||
|
||||
TEST(Imgproc_Pyrdown, issue_12961)
|
||||
{
|
||||
Mat src(9, 9, CV_8UC1, Scalar::all(0));
|
||||
Mat dst;
|
||||
cv::pyrDown(src, dst);
|
||||
ASSERT_EQ(0.0, cv::norm(dst));
|
||||
}
|
||||
|
||||
}} // namespace
|
||||
|
||||
@ -341,6 +341,9 @@ EMSCRIPTEN_BINDINGS(binding_utils)
|
||||
register_vector<cv::Mat>("MatVector");
|
||||
register_vector<cv::Rect>("RectVector");
|
||||
register_vector<cv::KeyPoint>("KeyPointVector");
|
||||
register_vector<cv::DMatch>("DMatchVector");
|
||||
register_vector<std::vector<cv::DMatch>>("DMatchVectorVector");
|
||||
|
||||
|
||||
emscripten::class_<cv::Mat>("Mat")
|
||||
.constructor<>()
|
||||
@ -494,6 +497,12 @@ EMSCRIPTEN_BINDINGS(binding_utils)
|
||||
.field("response", &cv::KeyPoint::response)
|
||||
.field("size", &cv::KeyPoint::size);
|
||||
|
||||
emscripten::value_object<cv::DMatch>("DMatch")
|
||||
.field("queryIdx", &cv::DMatch::queryIdx)
|
||||
.field("trainIdx", &cv::DMatch::trainIdx)
|
||||
.field("imgIdx", &cv::DMatch::imgIdx)
|
||||
.field("distance", &cv::DMatch::distance);
|
||||
|
||||
emscripten::value_array<cv::Scalar_<double>> ("Scalar")
|
||||
.element(index<0>())
|
||||
.element(index<1>())
|
||||
|
||||
@ -200,20 +200,19 @@ public:
|
||||
{
|
||||
int j;
|
||||
calc_non_rbf_base( vcount, var_count, vecs, another, results,
|
||||
-2*params.gamma, -2*params.coef0 );
|
||||
2*params.gamma, 2*params.coef0 );
|
||||
// TODO: speedup this
|
||||
for( j = 0; j < vcount; j++ )
|
||||
{
|
||||
Qfloat t = results[j];
|
||||
Qfloat e = std::exp(-std::abs(t));
|
||||
Qfloat e = std::exp(std::abs(t));
|
||||
if( t > 0 )
|
||||
results[j] = (Qfloat)((1. - e)/(1. + e));
|
||||
else
|
||||
results[j] = (Qfloat)((e - 1.)/(e + 1.));
|
||||
else
|
||||
results[j] = (Qfloat)((1. - e)/(1. + e));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void calc_rbf( int vcount, int var_count, const float* vecs,
|
||||
const float* another, Qfloat* results )
|
||||
{
|
||||
@ -1310,8 +1309,6 @@ public:
|
||||
|
||||
if( kernelType != SIGMOID && kernelType != POLY )
|
||||
params.coef0 = 0;
|
||||
else if( params.coef0 < 0 )
|
||||
CV_Error( CV_StsOutOfRange, "The kernel parameter <coef0> must be positive or zero" );
|
||||
|
||||
if( kernelType != POLY )
|
||||
params.degree = 0;
|
||||
|
||||
@ -88,6 +88,51 @@ void CV_SVMTrainAutoTest::run( int /*start_from*/ )
|
||||
|
||||
TEST(ML_SVM, trainauto) { CV_SVMTrainAutoTest test; test.safe_run(); }
|
||||
|
||||
TEST(ML_SVM, trainauto_sigmoid)
|
||||
{
|
||||
const int datasize = 100;
|
||||
cv::Mat samples = cv::Mat::zeros( datasize, 2, CV_32FC1 );
|
||||
cv::Mat responses = cv::Mat::zeros( datasize, 1, CV_32S );
|
||||
|
||||
const float scale_factor = 0.5;
|
||||
const float radius = 2.0;
|
||||
|
||||
// Populate samples with data that can be split into two concentric circles
|
||||
for (int i = 0; i < datasize; i+=2)
|
||||
{
|
||||
const float pi = 3.14159f;
|
||||
const float angle_rads = (i/datasize) * pi;
|
||||
const float x = radius * cos(angle_rads);
|
||||
const float y = radius * cos(angle_rads);
|
||||
|
||||
// Larger circle
|
||||
samples.at<float>( i, 0 ) = x;
|
||||
samples.at<float>( i, 1 ) = y;
|
||||
responses.at<int>( i, 0 ) = 0;
|
||||
|
||||
// Smaller circle
|
||||
samples.at<float>( i + 1, 0 ) = x * scale_factor;
|
||||
samples.at<float>( i + 1, 1 ) = y * scale_factor;
|
||||
responses.at<int>( i + 1, 0 ) = 1;
|
||||
}
|
||||
|
||||
cv::Ptr<TrainData> data = TrainData::create( samples, cv::ml::ROW_SAMPLE, responses );
|
||||
cv::Ptr<SVM> svm = SVM::create();
|
||||
svm->setKernel(SVM::SIGMOID);
|
||||
|
||||
svm->setGamma(10.0);
|
||||
svm->setCoef0(-10.0);
|
||||
svm->trainAuto( data, 10 ); // 2-fold cross validation.
|
||||
|
||||
float test_data0[2] = {radius, radius};
|
||||
cv::Mat test_point0 = cv::Mat( 1, 2, CV_32FC1, test_data0 );
|
||||
ASSERT_EQ(0, svm->predict( test_point0 ));
|
||||
|
||||
float test_data1[2] = {scale_factor * radius, scale_factor * radius};
|
||||
cv::Mat test_point1 = cv::Mat( 1, 2, CV_32FC1, test_data1 );
|
||||
ASSERT_EQ(1, svm->predict( test_point1 ));
|
||||
}
|
||||
|
||||
|
||||
TEST(ML_SVM, trainAuto_regression_5369)
|
||||
{
|
||||
|
||||
@ -323,7 +323,7 @@ def writeTextGraph(modelPath, outputPath, outNodes):
|
||||
|
||||
for node in graph_def.node:
|
||||
if node.op == 'Const':
|
||||
if 'value' in node.attr:
|
||||
del node.attr['value']
|
||||
if 'value' in node.attr and node.attr['value'].tensor.tensor_content:
|
||||
node.attr['value'].tensor.tensor_content = ''
|
||||
|
||||
tf.train.write_graph(graph_def, "", outputPath, as_text=True)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user