From ba8d20e9ae4400be593583138c47eb72fd2adf6e Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Fri, 12 Feb 2021 15:28:37 +0300 Subject: [PATCH] Merge pull request #19425 from TolyaTalamanov:at/async-infer [G-API] Implement async infer * Implement async infer * Fix typo --- modules/gapi/src/backends/ie/giebackend.cpp | 559 +++++++++++------- modules/gapi/src/backends/ie/giebackend.hpp | 49 +- .../gapi/test/infer/gapi_infer_ie_test.cpp | 349 +++++++++++ 3 files changed, 740 insertions(+), 217 deletions(-) diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 6e8c2d3377..bc5a16ba70 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -268,46 +268,168 @@ struct IEUnit { } }; -struct IECallContext +class IECallContext { - // Input parameters passed to an inference operation. - std::vector args; - cv::GShapes in_shapes; +public: + IECallContext(const IEUnit & unit, + cv::gimpl::GIslandExecutable::IOutput & output, + cv::gimpl::ie::SyncPrim & sync, + const cv::GArgs & args, + const std::vector & outs, + std::vector && input_objs, + std::vector && output_objs); - //FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call - //to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run, - //once on enter for input and output arguments, and once before return for output arguments only - //FIXME: check if the above applies to this backend (taken from CPU) - std::unordered_map results; + const cv::GArgs& inArgs() const; // Generic accessor API template - const T& inArg(std::size_t input) { return args.at(input).get(); } + const T& inArg(std::size_t input) const { + return m_args.at(input).get(); + } - const cv::MediaFrame& inFrame(std::size_t input) { - return inArg(input); + template + std::vector& outVecR(std::size_t output) { + return outVecRef(output).wref(); } // Syntax sugar - const cv::Mat& inMat(std::size_t input) { - return inArg(input); - } - cv::Mat& outMatR(std::size_t output) { - return *cv::util::get(results.at(output)); - } + cv::GShape inShape(int i) const; + const cv::Mat& inMat(std::size_t input) const; + const cv::MediaFrame& inFrame(std::size_t input) const; - template std::vector& outVecR(std::size_t output) { // FIXME: the same issue - return outVecRef(output).wref(); - } - cv::detail::VectorRef& outVecRef(std::size_t output) { - return cv::util::get(results.at(output)); - } + cv::Mat& outMatR(std::size_t idx); + cv::GRunArgP output(int idx); + + const IEUnit &uu; + cv::gimpl::GIslandExecutable::IOutput &out; + cv::gimpl::ie::SyncPrim &sync; + + // NB: Need to gurantee that MediaFrame::View don't die until request is over. + using Views = std::vector>; + Views views; + +private: + cv::detail::VectorRef& outVecRef(std::size_t idx); + + cv::GArg packArg(const cv::GArg &arg); + + // To store input/output data from frames + std::vector m_input_objs; + std::vector m_output_objs; + + // To simplify access to cv::Mat inside cv::RMat + cv::gimpl::Mag m_res; + + // FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call + //to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run, + //once on enter for input and output arguments, and once before return for output arguments only + // FIXME: check if the above applies to this backend (taken from CPU) + std::unordered_map m_results; + + // Input parameters passed to an inference operation. + cv::GArgs m_args; + cv::GShapes m_in_shapes; }; +IECallContext::IECallContext(const IEUnit & unit, + cv::gimpl::GIslandExecutable::IOutput & output, + cv::gimpl::ie::SyncPrim & syncp, + const cv::GArgs & args, + const std::vector & outs, + std::vector && input_objs, + std::vector && output_objs) +: uu(unit), out(output), sync(syncp), m_input_objs(std::move(input_objs)), + m_output_objs(std::move(output_objs)) +{ + for (auto& it : m_input_objs) cv::gimpl::magazine::bindInArg (m_res, it.first, it.second); + for (auto& it : m_output_objs) cv::gimpl::magazine::bindOutArg(m_res, it.first, it.second); + + m_args.reserve(args.size()); + using namespace std::placeholders; + ade::util::transform(args, + std::back_inserter(m_args), + std::bind(&IECallContext::packArg, this, _1)); + + ade::util::transform(args, std::back_inserter(m_in_shapes), + [](const cv::GArg& arg) { + return arg.get().shape; + }); + + for (const auto out_it : ade::util::indexed(outs)) { + // FIXME: Can the same GArg type resolution mechanism be reused here? + const auto port = ade::util::index(out_it); + const auto desc = ade::util::value(out_it); + m_results[port] = cv::gimpl::magazine::getObjPtr(m_res, desc); + } +} + +const cv::GArgs& IECallContext::inArgs() const { + return m_args; +} + +cv::GShape IECallContext::inShape(int i) const { + return m_in_shapes[i]; +} + +const cv::Mat& IECallContext::inMat(std::size_t input) const { + return inArg(input); +} + +const cv::MediaFrame& IECallContext::inFrame(std::size_t input) const { + return inArg(input); +} + +cv::Mat& IECallContext::outMatR(std::size_t idx) { + return *cv::util::get(m_results.at(idx)); +} + +cv::GRunArgP IECallContext::output(int idx) { + return m_output_objs[idx].second; +}; + +cv::detail::VectorRef& IECallContext::outVecRef(std::size_t idx) { + return cv::util::get(m_results.at(idx)); +} + +cv::GArg IECallContext::packArg(const cv::GArg &arg) { + // No API placeholders allowed at this point + // FIXME: this check has to be done somewhere in compilation stage. + GAPI_Assert( arg.kind != cv::detail::ArgKind::GMAT + && arg.kind != cv::detail::ArgKind::GSCALAR + && arg.kind != cv::detail::ArgKind::GARRAY); + + if (arg.kind != cv::detail::ArgKind::GOBJREF) { + cv::util::throw_error(std::logic_error("Inference supports G-types ONLY!")); + } + GAPI_Assert(arg.kind == cv::detail::ArgKind::GOBJREF); + + // Wrap associated CPU object (either host or an internal one) + // FIXME: object can be moved out!!! GExecutor faced that. + const cv::gimpl::RcDesc &ref = arg.get(); + switch (ref.shape) + { + case cv::GShape::GMAT: return cv::GArg(m_res.slot()[ref.id]); + + // Note: .at() is intentional for GArray as object MUST be already there + // (and constructed by either bindIn/Out or resetInternal) + case cv::GShape::GARRAY: return cv::GArg(m_res.slot().at(ref.id)); + + // Note: .at() is intentional for GOpaque as object MUST be already there + // (and constructed by either bindIn/Out or resetInternal) + case cv::GShape::GOPAQUE: return cv::GArg(m_res.slot().at(ref.id)); + + case cv::GShape::GFRAME: return cv::GArg(m_res.slot().at(ref.id)); + + default: + cv::util::throw_error(std::logic_error("Unsupported GShape type")); + break; + } +} + + struct IECallable { static const char *name() { return "IERequestCallable"; } - // FIXME: Make IECallContext manage them all? (3->1) - using Run = std::function; + using Run = std::function)>; Run run; }; @@ -341,14 +463,12 @@ using GConstGIEModel = ade::ConstTypedGraph , IECallable >; -using Views = std::vector>; - -inline IE::Blob::Ptr extractBlob(IECallContext& ctx, std::size_t i, Views& views) { - switch (ctx.in_shapes[i]) { +inline IE::Blob::Ptr extractBlob(IECallContext& ctx, std::size_t i) { + switch (ctx.inShape(i)) { case cv::GShape::GFRAME: { const auto& frame = ctx.inFrame(i); - views.emplace_back(new cv::MediaFrame::View(frame.access(cv::MediaFrame::Access::R))); - return wrapIE(*views.back(), frame.desc()); + ctx.views.emplace_back(new cv::MediaFrame::View(frame.access(cv::MediaFrame::Access::R))); + return wrapIE(*(ctx.views.back()), frame.desc()); } case cv::GShape::GMAT: { return wrapIE(ctx.inMat(i), cv::gapi::ie::TraitAs::IMAGE); @@ -395,90 +515,76 @@ cv::gimpl::ie::GIEExecutable::GIEExecutable(const ade::Graph &g, } } -// FIXME: Document what it does -cv::GArg cv::gimpl::ie::GIEExecutable::packArg(const cv::GArg &arg) { - // No API placeholders allowed at this point - // FIXME: this check has to be done somewhere in compilation stage. - GAPI_Assert( arg.kind != cv::detail::ArgKind::GMAT - && arg.kind != cv::detail::ArgKind::GSCALAR - && arg.kind != cv::detail::ArgKind::GARRAY); +void cv::gimpl::ie::GIEExecutable::run(cv::gimpl::GIslandExecutable::IInput &in, + cv::gimpl::GIslandExecutable::IOutput &out) { + // General alghoritm: + // 1. Get input message from IInput + // 2. Collect island inputs/outputs. + // 3. Create kernel context. (Every kernel has his own context.) + // 4. Since only single async request is supported + // wait until it is over and run kernel. + // (At this point, an asynchronous request will be started.) + // 5. Without waiting for the completion of the asynchronous request + // started by kernel go to the next frame (1) + // + // 6. If graph is compiled in non-streaming mode, wait until request is over. - if (arg.kind != cv::detail::ArgKind::GOBJREF) { - util::throw_error(std::logic_error("Inference supports G-types ONLY!")); - } - GAPI_Assert(arg.kind == cv::detail::ArgKind::GOBJREF); + std::vector input_objs; + std::vector output_objs; - // Wrap associated CPU object (either host or an internal one) - // FIXME: object can be moved out!!! GExecutor faced that. - const cv::gimpl::RcDesc &ref = arg.get(); - switch (ref.shape) + const auto &in_desc = in.desc(); + const auto &out_desc = out.desc(); + const auto in_msg = in.get(); + + if (cv::util::holds_alternative(in_msg)) { - case GShape::GMAT: return GArg(m_res.slot()[ref.id]); - - // Note: .at() is intentional for GArray as object MUST be already there - // (and constructed by either bindIn/Out or resetInternal) - case GShape::GARRAY: return GArg(m_res.slot().at(ref.id)); - - // Note: .at() is intentional for GOpaque as object MUST be already there - // (and constructed by either bindIn/Out or resetInternal) - case GShape::GOPAQUE: return GArg(m_res.slot().at(ref.id)); - - case GShape::GFRAME: return GArg(m_res.slot().at(ref.id)); - - default: - util::throw_error(std::logic_error("Unsupported GShape type")); - break; + // (1) Since kernel is executing asynchronously + // need to wait until the previous is over + m_sync.wait(); + out.post(cv::gimpl::EndOfStream{}); + return; } -} -void cv::gimpl::ie::GIEExecutable::run(std::vector &&input_objs, - std::vector &&output_objs) { - // Update resources with run-time information - what this Island - // has received from user (or from another Island, or mix...) - // FIXME: Check input/output objects against GIsland protocol + GAPI_Assert(cv::util::holds_alternative(in_msg)); + const auto in_vector = cv::util::get(in_msg); - for (auto& it : input_objs) magazine::bindInArg (m_res, it.first, it.second); - for (auto& it : output_objs) magazine::bindOutArg(m_res, it.first, it.second); + // (2) Collect inputs/outputs + input_objs.reserve(in_desc.size()); + output_objs.reserve(out_desc.size()); + for (auto &&it: ade::util::zip(ade::util::toRange(in_desc), + ade::util::toRange(in_vector))) + { + input_objs.emplace_back(std::get<0>(it), std::get<1>(it)); + } + for (auto &&it: ade::util::indexed(ade::util::toRange(out_desc))) + { + output_objs.emplace_back(ade::util::value(it), + out.get(ade::util::checked_cast(ade::util::index(it)))); + } + GConstGIEModel giem(m_g); + const auto &uu = giem.metadata(this_nh).get(); + const auto &op = m_gm.metadata(this_nh).get(); + // (3) Create kernel context + auto context = std::make_shared(uu, out, m_sync, op.args, op.outs, + std::move(input_objs), std::move(output_objs)); + + + // (4) Only single async request is supported now, + // so need to wait until the previous is over. + m_sync.wait(); + // (5) Run the kernel and start handle next frame. + const auto &kk = giem.metadata(this_nh).get(); // FIXME: Running just a single node now. // Not sure if need to support many of them, though // FIXME: Make this island-unmergeable? - const auto &op = m_gm.metadata(this_nh).get(); + kk.run(this_iec, context); - // Initialize kernel's execution context: - // - Input parameters - IECallContext context; - context.args.reserve(op.args.size()); - using namespace std::placeholders; - ade::util::transform(op.args, - std::back_inserter(context.args), - std::bind(&GIEExecutable::packArg, this, _1)); - - // NB: Need to store inputs shape to recognize GFrame/GMat - ade::util::transform(op.args, - std::back_inserter(context.in_shapes), - [](const cv::GArg& arg) { - return arg.get().shape; - }); - // - Output parameters. - for (const auto out_it : ade::util::indexed(op.outs)) { - // FIXME: Can the same GArg type resolution mechanism be reused here? - const auto out_port = ade::util::index(out_it); - const auto out_desc = ade::util::value(out_it); - context.results[out_port] = magazine::getObjPtr(m_res, out_desc); + // (6) In not-streaming mode need to wait until the async request is over + // FIXME: Is there more graceful way to handle this case ? + if (!m_gm.metadata().contains()) { + m_sync.wait(); } - - // And now trigger the execution - GConstGIEModel giem(m_g); - const auto &uu = giem.metadata(this_nh).get(); - const auto &kk = giem.metadata(this_nh).get(); - kk.run(this_iec, uu, context); - - for (auto &it : output_objs) magazine::writeBack(m_res, it.first, it.second); - - // In/Out args clean-up is mandatory now with RMat - for (auto &it : input_objs) magazine::unbind(m_res, it.first); - for (auto &it : output_objs) magazine::unbind(m_res, it.first); } namespace cv { @@ -488,31 +594,82 @@ namespace ie { static void configureInputInfo(const IE::InputInfo::Ptr& ii, const cv::GMetaArg mm) { switch (mm.index()) { case cv::GMetaArg::index_of(): - { - ii->setPrecision(toIE(util::get(mm).depth)); - break; - } + { + ii->setPrecision(toIE(util::get(mm).depth)); + break; + } case cv::GMetaArg::index_of(): - { - const auto &meta = util::get(mm); - switch (meta.fmt) { - case cv::MediaFormat::NV12: - ii->getPreProcess().setColorFormat(IE::ColorFormat::NV12); - break; - case cv::MediaFormat::BGR: - // NB: Do nothing - break; - default: - GAPI_Assert(false && "Unsupported media format for IE backend"); - } - ii->setPrecision(toIE(CV_8U)); - break; + { + const auto &meta = util::get(mm); + switch (meta.fmt) { + case cv::MediaFormat::NV12: + ii->getPreProcess().setColorFormat(IE::ColorFormat::NV12); + break; + case cv::MediaFormat::BGR: + // NB: Do nothing + break; + default: + GAPI_Assert(false && "Unsupported media format for IE backend"); } + ii->setPrecision(toIE(CV_8U)); + break; + } default: util::throw_error(std::runtime_error("Unsupported input meta for IE backend")); } } +// NB: This is a callback used by async infer +// to post outputs blobs (cv::GMat's). +struct PostOutputs { + // NB: Should be const to pass into SetCompletionCallback + void operator()() const { + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + auto& out_mat = ctx->outMatR(i); + IE::Blob::Ptr this_blob = iec.this_request.GetBlob(ctx->uu.params.output_names[i]); + copyFromIE(this_blob, out_mat); + ctx->out.post(ctx->output(i)); + } + ctx->sync.release_and_notify(); + } + + IECompiled &iec ; + std::shared_ptr ctx ; +}; + +// NB: This is a callback used by async infer +// to post output list of blobs (cv::GArray). +struct PostOutputsList { + // NB: Should be const to pass into SetCompletionCallback + void operator()() const { + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + std::vector &out_vec = ctx->outVecR(i); + + IE::Blob::Ptr out_blob = iec.this_request.GetBlob(ctx->uu.params.output_names[i]); + + cv::Mat out_mat(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision())); + // FIXME: Avoid data copy. Not sure if it is possible though + copyFromIE(out_blob, out_mat); + out_vec.push_back(std::move(out_mat)); + } + // NB: Callbacks run synchronously yet, so the lock isn't necessary + auto&& out_vec_size = ctx->outVecR(0).size(); + // NB: Now output vector is collected and can be posted to output + if (nrequests == out_vec_size) { + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + ctx->out.post(ctx->output(i)); + } + } + + ctx->sync.release_and_notify(); + } + + IECompiled &iec ; + std::shared_ptr ctx ; + std::vector< std::vector > cached_dims; + size_t nrequests; +}; + struct Infer: public cv::detail::KernelTag { using API = cv::GInferBase; static cv::gapi::GBackend backend() { return cv::gapi::ie::backend(); } @@ -563,29 +720,23 @@ struct Infer: public cv::detail::KernelTag { return result; } - static void run(IECompiled &iec, const IEUnit &uu, IECallContext &ctx) { + static void run(IECompiled &iec, std::shared_ptr ctx) { // non-generic version for now: // - assumes all inputs/outputs are always Mats - Views views; - for (auto i : ade::util::iota(uu.params.num_in)) { + for (auto i : ade::util::iota(ctx->uu.params.num_in)) { // TODO: Ideally we shouldn't do SetBlob() but GetBlob() instead, // and redirect our data producers to this memory // (A memory dialog comes to the picture again) - IE::Blob::Ptr this_blob = extractBlob(ctx, i, views); - iec.this_request.SetBlob(uu.params.input_names[i], this_blob); + IE::Blob::Ptr this_blob = extractBlob(*ctx, i); + iec.this_request.SetBlob(ctx->uu.params.input_names[i], this_blob); } - iec.this_request.Infer(); - for (auto i : ade::util::iota(uu.params.num_out)) { - // TODO: Think on avoiding copying here. - // Either we should ask IE to use our memory (what is not always the - // best policy) or use IE-allocated buffer inside (and pass it to the graph). - // Not a big deal for classifiers and detectors, - // but may be critical to segmentation. - cv::Mat& out_mat = ctx.outMatR(i); - IE::Blob::Ptr this_blob = iec.this_request.GetBlob(uu.params.output_names[i]); - copyFromIE(this_blob, out_mat); - } + iec.this_request.SetCompletionCallback(PostOutputs{iec, ctx}); + + // NB: Since only single async request is supported, need to lock other + // attempts to get access while request is working. + ctx->sync.acquire(); + iec.this_request.StartAsync(); } }; @@ -630,22 +781,22 @@ struct InferROI: public cv::detail::KernelTag { return result; } - static void run(IECompiled &iec, const IEUnit &uu, IECallContext &ctx) { + static void run(IECompiled &iec, std::shared_ptr ctx) { // non-generic version for now, per the InferROI's definition - GAPI_Assert(uu.params.num_in == 1); - const auto& this_roi = ctx.inArg(0).rref(); + GAPI_Assert(ctx->uu.params.num_in == 1); + const auto& this_roi = ctx->inArg(0).rref(); - Views views; - IE::Blob::Ptr this_blob = extractBlob(ctx, 1, views); + IE::Blob::Ptr this_blob = extractBlob(*ctx, 1u); - iec.this_request.SetBlob(*uu.params.input_names.begin(), - IE::make_shared_blob(this_blob, toIE(this_roi))); - iec.this_request.Infer(); - for (auto i : ade::util::iota(uu.params.num_out)) { - cv::Mat& out_mat = ctx.outMatR(i); - IE::Blob::Ptr out_blob = iec.this_request.GetBlob(uu.params.output_names[i]); - copyFromIE(out_blob, out_mat); - } + iec.this_request.SetBlob(*(ctx->uu.params.input_names.begin()), + IE::make_shared_blob(this_blob, toIE(this_roi))); + + iec.this_request.SetCompletionCallback(PostOutputs{iec, ctx}); + + // NB: Since only single async request is supported, need to lock other + // attempts to get access while request is working. + ctx->sync.acquire(); + iec.this_request.StartAsync(); } }; @@ -688,44 +839,43 @@ struct InferList: public cv::detail::KernelTag { cv::GMetaArg{cv::empty_array_desc()}); } - static void run(IECompiled &iec, const IEUnit &uu, IECallContext &ctx) { + static void run(IECompiled &iec, std::shared_ptr ctx) { // non-generic version for now: // - assumes zero input is always ROI list // - assumes all inputs/outputs are always Mats - GAPI_Assert(uu.params.num_in == 1); // roi list is not counted in net's inputs + GAPI_Assert(ctx->uu.params.num_in == 1); // roi list is not counted in net's inputs - const auto& in_roi_vec = ctx.inArg(0u).rref(); + const auto& in_roi_vec = ctx->inArg(0u).rref(); - Views views; - IE::Blob::Ptr this_blob = extractBlob(ctx, 1, views); + IE::Blob::Ptr this_blob = extractBlob(*ctx, 1u); // FIXME: This could be done ONCE at graph compile stage! - std::vector< std::vector > cached_dims(uu.params.num_out); - for (auto i : ade::util::iota(uu.params.num_out)) { - const IE::DataPtr& ie_out = uu.outputs.at(uu.params.output_names[i]); + std::vector< std::vector > cached_dims(ctx->uu.params.num_out); + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]); cached_dims[i] = toCV(ie_out->getTensorDesc().getDims()); - ctx.outVecR(i).clear(); + ctx->outVecR(i).clear(); // FIXME: Isn't this should be done automatically // by some resetInternalData(), etc? (Probably at the GExecutor level) } - for (const auto &rc : in_roi_vec) { - // FIXME: Assumed only 1 input + for (auto&& rc : in_roi_vec) { + // NB: Only single async request is supported now, + // so need to wait until previos iteration is over. + // However there is no need to wait async request from last iteration, + // this will be done by backend. + ctx->sync.wait(); + IE::Blob::Ptr roi_blob = IE::make_shared_blob(this_blob, toIE(rc)); - iec.this_request.SetBlob(uu.params.input_names[0u], roi_blob); - iec.this_request.Infer(); + iec.this_request.SetBlob(ctx->uu.params.input_names[0u], roi_blob); - // While input is fixed to be 1, - // there may be still multiple outputs - for (auto i : ade::util::iota(uu.params.num_out)) { - std::vector &out_vec = ctx.outVecR(i); + iec.this_request.SetCompletionCallback( + PostOutputsList{iec, ctx, cached_dims, in_roi_vec.size()}); - IE::Blob::Ptr out_blob = iec.this_request.GetBlob(uu.params.output_names[i]); - - cv::Mat out_mat(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision())); - copyFromIE(out_blob, out_mat); // FIXME: Avoid data copy. Not sure if it is possible though - out_vec.push_back(std::move(out_mat)); - } + // NB: Since only single async request is supported, need to lock other + // attempts to get access while request is working. + ctx->sync.acquire(); + iec.this_request.StartAsync(); } } }; @@ -813,69 +963,58 @@ struct InferList2: public cv::detail::KernelTag { cv::GMetaArg{cv::empty_array_desc()}); } - static void run(IECompiled &iec, const IEUnit &uu, IECallContext &ctx) { - GAPI_Assert(ctx.args.size() > 1u - && "This operation must have at least two arguments"); + static void run(IECompiled &iec, std::shared_ptr ctx) { + GAPI_Assert(ctx->inArgs().size() > 1u + && "This operation must have at least two arguments"); - Views views; - IE::Blob::Ptr blob_0 = extractBlob(ctx, 0, views); + IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0u); // Take the next argument, which must be vector (of any kind). // Use it only to obtain the ROI list size (sizes of all other // vectors must be equal to this one) - const auto list_size = ctx.inArg(1u).size(); + const auto list_size = ctx->inArg(1u).size(); // FIXME: This could be done ONCE at graph compile stage! - std::vector< std::vector > cached_dims(uu.params.num_out); - for (auto i : ade::util::iota(uu.params.num_out)) { - const IE::DataPtr& ie_out = uu.outputs.at(uu.params.output_names[i]); + std::vector< std::vector > cached_dims(ctx->uu.params.num_out); + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]); cached_dims[i] = toCV(ie_out->getTensorDesc().getDims()); - ctx.outVecR(i).clear(); + ctx->outVecR(i).clear(); // FIXME: Isn't this should be done automatically // by some resetInternalData(), etc? (Probably at the GExecutor level) } - // For every ROI in the list {{{ for (const auto &list_idx : ade::util::iota(list_size)) { - // For every input of the net {{{ - for (auto in_idx : ade::util::iota(uu.params.num_in)) { - const auto &this_vec = ctx.inArg(in_idx+1u); + // NB: Only single async request is supported now, + // so need to wait until previos iteration is over. + // However there is no need to wait async request from last iteration, + // this will be done by backend. + ctx->sync.wait(); + for (auto in_idx : ade::util::iota(ctx->uu.params.num_in)) { + const auto &this_vec = ctx->inArg(in_idx+1u); GAPI_Assert(this_vec.size() == list_size); - // Prepare input {{{ IE::Blob::Ptr this_blob; if (this_vec.getKind() == cv::detail::OpaqueKind::CV_RECT) { - // ROI case - create an ROI blob const auto &vec = this_vec.rref(); this_blob = IE::make_shared_blob(blob_0, toIE(vec[list_idx])); } else if (this_vec.getKind() == cv::detail::OpaqueKind::CV_MAT) { - // Mat case - create a regular blob - // FIXME: NOW Assume Mats are always BLOBS (not - // images) const auto &vec = this_vec.rref(); const auto &mat = vec[list_idx]; this_blob = wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR); } else { GAPI_Assert(false && "Only Rect and Mat types are supported for infer list 2!"); } - iec.this_request.SetBlob(uu.params.input_names[in_idx], this_blob); - // }}} (Preapre input) - } // }}} (For every input of the net) + iec.this_request.SetBlob(ctx->uu.params.input_names[in_idx], this_blob); + } - // Run infer request {{{ - iec.this_request.Infer(); - // }}} (Run infer request) + iec.this_request.SetCompletionCallback( + PostOutputsList{iec, ctx, cached_dims, list_size}); - // For every output of the net {{{ - for (auto i : ade::util::iota(uu.params.num_out)) { - // Push results to the list {{{ - std::vector &out_vec = ctx.outVecR(i); - IE::Blob::Ptr out_blob = iec.this_request.GetBlob(uu.params.output_names[i]); - cv::Mat out_mat(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision())); - copyFromIE(out_blob, out_mat); // FIXME: Avoid data copy. Not sure if it is possible though - out_vec.push_back(std::move(out_mat)); - // }}} (Push results to the list) - } // }}} (For every output of the net) - } // }}} (For every ROI in the list) + // NB: Since only single async request is supported, need to lock other + // attempts to get access while request is working. + ctx->sync.acquire(); + iec.this_request.StartAsync(); + } } }; diff --git a/modules/gapi/src/backends/ie/giebackend.hpp b/modules/gapi/src/backends/ie/giebackend.hpp index 4bac71c378..87d3a99d2f 100644 --- a/modules/gapi/src/backends/ie/giebackend.hpp +++ b/modules/gapi/src/backends/ie/giebackend.hpp @@ -13,6 +13,7 @@ #ifdef HAVE_INF_ENGINE #include // type_list_index +#include #include @@ -37,6 +38,37 @@ struct IECompiled { InferenceEngine::InferRequest this_request; }; +// FIXME: Structure which collect all necessary sync primitives +// will be deleted when the async request pool appears +class SyncPrim { +public: + void wait() { + std::unique_lock l(m_mutex); + m_cv.wait(l, [this]{ return !m_is_busy; }); + } + + void release_and_notify() { + { + std::lock_guard lock(m_mutex); + m_is_busy = false; + } + m_cv.notify_one(); + } + + void acquire() { + std::lock_guard lock(m_mutex); + m_is_busy = true; + } + +private: + // To wait until the async request isn't over + std::condition_variable m_cv; + // To avoid spurious cond var wake up + bool m_is_busy = false; + // To sleep until condition variable wakes up + std::mutex m_mutex; +}; + class GIEExecutable final: public GIslandExecutable { const ade::Graph &m_g; @@ -50,11 +82,8 @@ class GIEExecutable final: public GIslandExecutable // List of all resources in graph (both internal and external) std::vector m_dataNodes; - // Actual data of all resources in graph (both internal and external) - Mag m_res; - - // Execution helpers - GArg packArg(const GArg &arg); + // Sync primitive + SyncPrim m_sync; public: GIEExecutable(const ade::Graph &graph, @@ -65,8 +94,14 @@ public: GAPI_Assert(false); // Not implemented yet } - virtual void run(std::vector &&input_objs, - std::vector &&output_objs) override; + virtual void run(std::vector &&, + std::vector &&) override { + GAPI_Assert(false && "Not implemented"); + } + + virtual void run(GIslandExecutable::IInput &in, + GIslandExecutable::IOutput &out) override; + }; }}} diff --git a/modules/gapi/test/infer/gapi_infer_ie_test.cpp b/modules/gapi/test/infer/gapi_infer_ie_test.cpp index 4cba82a348..4611c3be9e 100644 --- a/modules/gapi/test/infer/gapi_infer_ie_test.cpp +++ b/modules/gapi/test/infer/gapi_infer_ie_test.cpp @@ -15,6 +15,7 @@ #include #include +#include #include "backends/ie/util.hpp" #include "backends/ie/giebackend/giewrapper.hpp" @@ -22,6 +23,21 @@ namespace opencv_test { namespace { +void initTestDataPath() +{ +#ifndef WINRT + static bool initialized = false; + if (!initialized) + { + // Since G-API has no own test data (yet), it is taken from the common space + const char* testDataPath = getenv("OPENCV_TEST_DATA_PATH"); + if (testDataPath) { + cvtest::addDataSearchPath(testDataPath); + } + initialized = true; + } +#endif // WINRT +} class TestMediaBGR final: public cv::MediaFrame::IAdapter { cv::Mat m_mat; @@ -937,6 +953,339 @@ TEST_F(ROIListNV12, Infer2MediaInputNV12) validate(); } +TEST(Infer, TestStreamingInfer) +{ + initTestDataPath(); + initDLDTDataPath(); + + std::string filepath = findDataFile("cv/video/768x576.avi"); + + cv::gapi::ie::detail::ParamDesc params; + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.device_id = "CPU"; + + // Load IE network, initialize input data using that. + cv::Mat in_mat; + cv::Mat gapi_age, gapi_gender; + + using AGInfo = std::tuple; + G_API_NET(AgeGender, , "test-age-gender"); + + cv::GMat in; + cv::GMat age, gender; + + std::tie(age, gender) = cv::gapi::infer(in); + cv::GComputation comp(cv::GIn(in), cv::GOut(age, gender)); + + auto pp = cv::gapi::ie::Params { + params.model_path, params.weights_path, params.device_id + }.cfgOutputLayers({ "age_conv3", "prob" }); + + + std::size_t num_frames = 0u; + std::size_t max_frames = 10u; + + cv::VideoCapture cap; + cap.open(filepath); + if (!cap.isOpened()) + throw SkipTestException("Video file can not be opened"); + + cap >> in_mat; + auto pipeline = comp.compileStreaming(cv::compile_args(cv::gapi::networks(pp))); + pipeline.setSource(filepath); + + pipeline.start(); + while (num_frames < max_frames && pipeline.pull(cv::gout(gapi_age, gapi_gender))) + { + IE::Blob::Ptr ie_age, ie_gender; + { + auto plugin = cv::gimpl::ie::wrap::getPlugin(params); + auto net = cv::gimpl::ie::wrap::readNetwork(params); + setNetParameters(net); + auto this_network = cv::gimpl::ie::wrap::loadNetwork(plugin, net, params); + auto infer_request = this_network.CreateInferRequest(); + + infer_request.SetBlob("data", cv::gapi::ie::util::to_ie(in_mat)); + infer_request.Infer(); + ie_age = infer_request.GetBlob("age_conv3"); + ie_gender = infer_request.GetBlob("prob"); + } + // Validate with IE itself (avoid DNN module dependency here) + normAssert(cv::gapi::ie::util::to_ocv(ie_age), gapi_age, "Test age output" ); + normAssert(cv::gapi::ie::util::to_ocv(ie_gender), gapi_gender, "Test gender output"); + ++num_frames; + cap >> in_mat; + } + pipeline.stop(); +} + +TEST(InferROI, TestStreamingInfer) +{ + initTestDataPath(); + initDLDTDataPath(); + + std::string filepath = findDataFile("cv/video/768x576.avi"); + + cv::gapi::ie::detail::ParamDesc params; + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.device_id = "CPU"; + + // Load IE network, initialize input data using that. + cv::Mat in_mat; + cv::Mat gapi_age, gapi_gender; + cv::Rect rect(cv::Point{64, 60}, cv::Size{96, 96}); + + using AGInfo = std::tuple; + G_API_NET(AgeGender, , "test-age-gender"); + + cv::GMat in; + cv::GOpaque roi; + cv::GMat age, gender; + + std::tie(age, gender) = cv::gapi::infer(roi, in); + cv::GComputation comp(cv::GIn(in, roi), cv::GOut(age, gender)); + + auto pp = cv::gapi::ie::Params { + params.model_path, params.weights_path, params.device_id + }.cfgOutputLayers({ "age_conv3", "prob" }); + + + std::size_t num_frames = 0u; + std::size_t max_frames = 10u; + + cv::VideoCapture cap; + cap.open(filepath); + if (!cap.isOpened()) + throw SkipTestException("Video file can not be opened"); + + cap >> in_mat; + auto pipeline = comp.compileStreaming(cv::compile_args(cv::gapi::networks(pp))); + pipeline.setSource( + cv::gin(cv::gapi::wip::make_src(filepath), rect)); + + pipeline.start(); + while (num_frames < max_frames && pipeline.pull(cv::gout(gapi_age, gapi_gender))) + { + // Load & run IE network + IE::Blob::Ptr ie_age, ie_gender; + { + auto plugin = cv::gimpl::ie::wrap::getPlugin(params); + auto net = cv::gimpl::ie::wrap::readNetwork(params); + setNetParameters(net); + auto this_network = cv::gimpl::ie::wrap::loadNetwork(plugin, net, params); + auto infer_request = this_network.CreateInferRequest(); + const auto ie_rc = IE::ROI { + 0u + , static_cast(rect.x) + , static_cast(rect.y) + , static_cast(rect.width) + , static_cast(rect.height) + }; + IE::Blob::Ptr roi_blob = IE::make_shared_blob(cv::gapi::ie::util::to_ie(in_mat), ie_rc); + infer_request.SetBlob("data", roi_blob); + infer_request.Infer(); + ie_age = infer_request.GetBlob("age_conv3"); + ie_gender = infer_request.GetBlob("prob"); + } + // Validate with IE itself (avoid DNN module dependency here) + normAssert(cv::gapi::ie::util::to_ocv(ie_age), gapi_age, "Test age output" ); + normAssert(cv::gapi::ie::util::to_ocv(ie_gender), gapi_gender, "Test gender output"); + ++num_frames; + cap >> in_mat; + } + pipeline.stop(); +} + +TEST(InferList, TestStreamingInfer) +{ + initTestDataPath(); + initDLDTDataPath(); + + std::string filepath = findDataFile("cv/video/768x576.avi"); + + cv::gapi::ie::detail::ParamDesc params; + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.device_id = "CPU"; + + // Load IE network, initialize input data using that. + cv::Mat in_mat; + std::vector ie_ages; + std::vector ie_genders; + std::vector gapi_ages; + std::vector gapi_genders; + + std::vector roi_list = { + cv::Rect(cv::Point{64, 60}, cv::Size{ 96, 96}), + cv::Rect(cv::Point{50, 32}, cv::Size{128, 160}), + }; + + using AGInfo = std::tuple; + G_API_NET(AgeGender, , "test-age-gender"); + + cv::GMat in; + cv::GArray roi; + cv::GArray age, gender; + + std::tie(age, gender) = cv::gapi::infer(roi, in); + cv::GComputation comp(cv::GIn(in, roi), cv::GOut(age, gender)); + + auto pp = cv::gapi::ie::Params { + params.model_path, params.weights_path, params.device_id + }.cfgOutputLayers({ "age_conv3", "prob" }); + + + std::size_t num_frames = 0u; + std::size_t max_frames = 10u; + + cv::VideoCapture cap; + cap.open(filepath); + if (!cap.isOpened()) + throw SkipTestException("Video file can not be opened"); + + cap >> in_mat; + auto pipeline = comp.compileStreaming(cv::compile_args(cv::gapi::networks(pp))); + pipeline.setSource( + cv::gin(cv::gapi::wip::make_src(filepath), roi_list)); + + pipeline.start(); + while (num_frames < max_frames && pipeline.pull(cv::gout(gapi_ages, gapi_genders))) + { + { + auto plugin = cv::gimpl::ie::wrap::getPlugin(params); + auto net = cv::gimpl::ie::wrap::readNetwork(params); + setNetParameters(net); + auto this_network = cv::gimpl::ie::wrap::loadNetwork(plugin, net, params); + auto infer_request = this_network.CreateInferRequest(); + auto frame_blob = cv::gapi::ie::util::to_ie(in_mat); + + for (auto &&rc : roi_list) { + const auto ie_rc = IE::ROI { + 0u + , static_cast(rc.x) + , static_cast(rc.y) + , static_cast(rc.width) + , static_cast(rc.height) + }; + infer_request.SetBlob("data", IE::make_shared_blob(frame_blob, ie_rc)); + infer_request.Infer(); + + using namespace cv::gapi::ie::util; + ie_ages.push_back(to_ocv(infer_request.GetBlob("age_conv3")).clone()); + ie_genders.push_back(to_ocv(infer_request.GetBlob("prob")).clone()); + } + } // namespace IE = .. + // Validate with IE itself (avoid DNN module dependency here) + normAssert(ie_ages [0], gapi_ages [0], "0: Test age output"); + normAssert(ie_genders[0], gapi_genders[0], "0: Test gender output"); + normAssert(ie_ages [1], gapi_ages [1], "1: Test age output"); + normAssert(ie_genders[1], gapi_genders[1], "1: Test gender output"); + + ie_ages.clear(); + ie_genders.clear(); + + ++num_frames; + cap >> in_mat; + } + pipeline.stop(); +} + +TEST(Infer2, TestStreamingInfer) +{ + initTestDataPath(); + initDLDTDataPath(); + + std::string filepath = findDataFile("cv/video/768x576.avi"); + + cv::gapi::ie::detail::ParamDesc params; + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.device_id = "CPU"; + + // Load IE network, initialize input data using that. + cv::Mat in_mat; + std::vector ie_ages; + std::vector ie_genders; + std::vector gapi_ages; + std::vector gapi_genders; + + std::vector roi_list = { + cv::Rect(cv::Point{64, 60}, cv::Size{ 96, 96}), + cv::Rect(cv::Point{50, 32}, cv::Size{128, 160}), + }; + + using AGInfo = std::tuple; + G_API_NET(AgeGender, , "test-age-gender"); + + cv::GArray rr; + cv::GMat in; + cv::GArray age, gender; + std::tie(age, gender) = cv::gapi::infer2(in, rr); + + cv::GComputation comp(cv::GIn(in, rr), cv::GOut(age, gender)); + + auto pp = cv::gapi::ie::Params { + params.model_path, params.weights_path, params.device_id + }.cfgOutputLayers({ "age_conv3", "prob" }); + + + std::size_t num_frames = 0u; + std::size_t max_frames = 10u; + + cv::VideoCapture cap; + cap.open(filepath); + if (!cap.isOpened()) + throw SkipTestException("Video file can not be opened"); + + cap >> in_mat; + auto pipeline = comp.compileStreaming(cv::compile_args(cv::gapi::networks(pp))); + pipeline.setSource( + cv::gin(cv::gapi::wip::make_src(filepath), roi_list)); + + pipeline.start(); + while (num_frames < max_frames && pipeline.pull(cv::gout(gapi_ages, gapi_genders))) + { + { + auto plugin = cv::gimpl::ie::wrap::getPlugin(params); + auto net = cv::gimpl::ie::wrap::readNetwork(params); + setNetParameters(net); + auto this_network = cv::gimpl::ie::wrap::loadNetwork(plugin, net, params); + auto infer_request = this_network.CreateInferRequest(); + auto frame_blob = cv::gapi::ie::util::to_ie(in_mat); + + for (auto &&rc : roi_list) { + const auto ie_rc = IE::ROI { + 0u + , static_cast(rc.x) + , static_cast(rc.y) + , static_cast(rc.width) + , static_cast(rc.height) + }; + infer_request.SetBlob("data", IE::make_shared_blob(frame_blob, ie_rc)); + infer_request.Infer(); + + using namespace cv::gapi::ie::util; + ie_ages.push_back(to_ocv(infer_request.GetBlob("age_conv3")).clone()); + ie_genders.push_back(to_ocv(infer_request.GetBlob("prob")).clone()); + } + } // namespace IE = .. + // Validate with IE itself (avoid DNN module dependency here) + normAssert(ie_ages [0], gapi_ages [0], "0: Test age output"); + normAssert(ie_genders[0], gapi_genders[0], "0: Test gender output"); + normAssert(ie_ages [1], gapi_ages [1], "1: Test age output"); + normAssert(ie_genders[1], gapi_genders[1], "1: Test gender output"); + + ie_ages.clear(); + ie_genders.clear(); + + ++num_frames; + cap >> in_mat; + } + pipeline.stop(); +} + } // namespace opencv_test #endif // HAVE_INF_ENGINE