Merge pull request #19425 from TolyaTalamanov:at/async-infer

[G-API] Implement async infer

* Implement async infer

* Fix typo
This commit is contained in:
Anatoliy Talamanov
2021-02-12 15:28:37 +03:00
committed by GitHub
parent 767127c92e
commit ba8d20e9ae
3 changed files with 740 additions and 217 deletions
+349 -210
View File
@@ -268,46 +268,168 @@ struct IEUnit {
}
};
struct IECallContext
class IECallContext
{
// Input parameters passed to an inference operation.
std::vector<cv::GArg> args;
cv::GShapes in_shapes;
public:
IECallContext(const IEUnit & unit,
cv::gimpl::GIslandExecutable::IOutput & output,
cv::gimpl::ie::SyncPrim & sync,
const cv::GArgs & args,
const std::vector<cv::gimpl::RcDesc> & outs,
std::vector<cv::gimpl::GIslandExecutable::InObj> && input_objs,
std::vector<cv::gimpl::GIslandExecutable::OutObj> && output_objs);
//FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call
//to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run,
//once on enter for input and output arguments, and once before return for output arguments only
//FIXME: check if the above applies to this backend (taken from CPU)
std::unordered_map<std::size_t, cv::GRunArgP> results;
const cv::GArgs& inArgs() const;
// Generic accessor API
template<typename T>
const T& inArg(std::size_t input) { return args.at(input).get<T>(); }
const T& inArg(std::size_t input) const {
return m_args.at(input).get<T>();
}
const cv::MediaFrame& inFrame(std::size_t input) {
return inArg<cv::MediaFrame>(input);
template<typename T>
std::vector<T>& outVecR(std::size_t output) {
return outVecRef(output).wref<T>();
}
// Syntax sugar
const cv::Mat& inMat(std::size_t input) {
return inArg<cv::Mat>(input);
}
cv::Mat& outMatR(std::size_t output) {
return *cv::util::get<cv::Mat*>(results.at(output));
}
cv::GShape inShape(int i) const;
const cv::Mat& inMat(std::size_t input) const;
const cv::MediaFrame& inFrame(std::size_t input) const;
template<typename T> std::vector<T>& outVecR(std::size_t output) { // FIXME: the same issue
return outVecRef(output).wref<T>();
}
cv::detail::VectorRef& outVecRef(std::size_t output) {
return cv::util::get<cv::detail::VectorRef>(results.at(output));
}
cv::Mat& outMatR(std::size_t idx);
cv::GRunArgP output(int idx);
const IEUnit &uu;
cv::gimpl::GIslandExecutable::IOutput &out;
cv::gimpl::ie::SyncPrim &sync;
// NB: Need to gurantee that MediaFrame::View don't die until request is over.
using Views = std::vector<std::unique_ptr<cv::MediaFrame::View>>;
Views views;
private:
cv::detail::VectorRef& outVecRef(std::size_t idx);
cv::GArg packArg(const cv::GArg &arg);
// To store input/output data from frames
std::vector<cv::gimpl::GIslandExecutable::InObj> m_input_objs;
std::vector<cv::gimpl::GIslandExecutable::OutObj> m_output_objs;
// To simplify access to cv::Mat inside cv::RMat
cv::gimpl::Mag m_res;
// FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call
//to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run,
//once on enter for input and output arguments, and once before return for output arguments only
// FIXME: check if the above applies to this backend (taken from CPU)
std::unordered_map<std::size_t, cv::GRunArgP> m_results;
// Input parameters passed to an inference operation.
cv::GArgs m_args;
cv::GShapes m_in_shapes;
};
IECallContext::IECallContext(const IEUnit & unit,
cv::gimpl::GIslandExecutable::IOutput & output,
cv::gimpl::ie::SyncPrim & syncp,
const cv::GArgs & args,
const std::vector<cv::gimpl::RcDesc> & outs,
std::vector<cv::gimpl::GIslandExecutable::InObj> && input_objs,
std::vector<cv::gimpl::GIslandExecutable::OutObj> && output_objs)
: uu(unit), out(output), sync(syncp), m_input_objs(std::move(input_objs)),
m_output_objs(std::move(output_objs))
{
for (auto& it : m_input_objs) cv::gimpl::magazine::bindInArg (m_res, it.first, it.second);
for (auto& it : m_output_objs) cv::gimpl::magazine::bindOutArg(m_res, it.first, it.second);
m_args.reserve(args.size());
using namespace std::placeholders;
ade::util::transform(args,
std::back_inserter(m_args),
std::bind(&IECallContext::packArg, this, _1));
ade::util::transform(args, std::back_inserter(m_in_shapes),
[](const cv::GArg& arg) {
return arg.get<cv::gimpl::RcDesc>().shape;
});
for (const auto out_it : ade::util::indexed(outs)) {
// FIXME: Can the same GArg type resolution mechanism be reused here?
const auto port = ade::util::index(out_it);
const auto desc = ade::util::value(out_it);
m_results[port] = cv::gimpl::magazine::getObjPtr(m_res, desc);
}
}
const cv::GArgs& IECallContext::inArgs() const {
return m_args;
}
cv::GShape IECallContext::inShape(int i) const {
return m_in_shapes[i];
}
const cv::Mat& IECallContext::inMat(std::size_t input) const {
return inArg<cv::Mat>(input);
}
const cv::MediaFrame& IECallContext::inFrame(std::size_t input) const {
return inArg<cv::MediaFrame>(input);
}
cv::Mat& IECallContext::outMatR(std::size_t idx) {
return *cv::util::get<cv::Mat*>(m_results.at(idx));
}
cv::GRunArgP IECallContext::output(int idx) {
return m_output_objs[idx].second;
};
cv::detail::VectorRef& IECallContext::outVecRef(std::size_t idx) {
return cv::util::get<cv::detail::VectorRef>(m_results.at(idx));
}
cv::GArg IECallContext::packArg(const cv::GArg &arg) {
// No API placeholders allowed at this point
// FIXME: this check has to be done somewhere in compilation stage.
GAPI_Assert( arg.kind != cv::detail::ArgKind::GMAT
&& arg.kind != cv::detail::ArgKind::GSCALAR
&& arg.kind != cv::detail::ArgKind::GARRAY);
if (arg.kind != cv::detail::ArgKind::GOBJREF) {
cv::util::throw_error(std::logic_error("Inference supports G-types ONLY!"));
}
GAPI_Assert(arg.kind == cv::detail::ArgKind::GOBJREF);
// Wrap associated CPU object (either host or an internal one)
// FIXME: object can be moved out!!! GExecutor faced that.
const cv::gimpl::RcDesc &ref = arg.get<cv::gimpl::RcDesc>();
switch (ref.shape)
{
case cv::GShape::GMAT: return cv::GArg(m_res.slot<cv::Mat>()[ref.id]);
// Note: .at() is intentional for GArray as object MUST be already there
// (and constructed by either bindIn/Out or resetInternal)
case cv::GShape::GARRAY: return cv::GArg(m_res.slot<cv::detail::VectorRef>().at(ref.id));
// Note: .at() is intentional for GOpaque as object MUST be already there
// (and constructed by either bindIn/Out or resetInternal)
case cv::GShape::GOPAQUE: return cv::GArg(m_res.slot<cv::detail::OpaqueRef>().at(ref.id));
case cv::GShape::GFRAME: return cv::GArg(m_res.slot<cv::MediaFrame>().at(ref.id));
default:
cv::util::throw_error(std::logic_error("Unsupported GShape type"));
break;
}
}
struct IECallable {
static const char *name() { return "IERequestCallable"; }
// FIXME: Make IECallContext manage them all? (3->1)
using Run = std::function<void(cv::gimpl::ie::IECompiled &, const IEUnit &, IECallContext &)>;
using Run = std::function<void(cv::gimpl::ie::IECompiled&, std::shared_ptr<IECallContext>)>;
Run run;
};
@@ -341,14 +463,12 @@ using GConstGIEModel = ade::ConstTypedGraph
, IECallable
>;
using Views = std::vector<std::unique_ptr<cv::MediaFrame::View>>;
inline IE::Blob::Ptr extractBlob(IECallContext& ctx, std::size_t i, Views& views) {
switch (ctx.in_shapes[i]) {
inline IE::Blob::Ptr extractBlob(IECallContext& ctx, std::size_t i) {
switch (ctx.inShape(i)) {
case cv::GShape::GFRAME: {
const auto& frame = ctx.inFrame(i);
views.emplace_back(new cv::MediaFrame::View(frame.access(cv::MediaFrame::Access::R)));
return wrapIE(*views.back(), frame.desc());
ctx.views.emplace_back(new cv::MediaFrame::View(frame.access(cv::MediaFrame::Access::R)));
return wrapIE(*(ctx.views.back()), frame.desc());
}
case cv::GShape::GMAT: {
return wrapIE(ctx.inMat(i), cv::gapi::ie::TraitAs::IMAGE);
@@ -395,90 +515,76 @@ cv::gimpl::ie::GIEExecutable::GIEExecutable(const ade::Graph &g,
}
}
// FIXME: Document what it does
cv::GArg cv::gimpl::ie::GIEExecutable::packArg(const cv::GArg &arg) {
// No API placeholders allowed at this point
// FIXME: this check has to be done somewhere in compilation stage.
GAPI_Assert( arg.kind != cv::detail::ArgKind::GMAT
&& arg.kind != cv::detail::ArgKind::GSCALAR
&& arg.kind != cv::detail::ArgKind::GARRAY);
void cv::gimpl::ie::GIEExecutable::run(cv::gimpl::GIslandExecutable::IInput &in,
cv::gimpl::GIslandExecutable::IOutput &out) {
// General alghoritm:
// 1. Get input message from IInput
// 2. Collect island inputs/outputs.
// 3. Create kernel context. (Every kernel has his own context.)
// 4. Since only single async request is supported
// wait until it is over and run kernel.
// (At this point, an asynchronous request will be started.)
// 5. Without waiting for the completion of the asynchronous request
// started by kernel go to the next frame (1)
//
// 6. If graph is compiled in non-streaming mode, wait until request is over.
if (arg.kind != cv::detail::ArgKind::GOBJREF) {
util::throw_error(std::logic_error("Inference supports G-types ONLY!"));
}
GAPI_Assert(arg.kind == cv::detail::ArgKind::GOBJREF);
std::vector<InObj> input_objs;
std::vector<OutObj> output_objs;
// Wrap associated CPU object (either host or an internal one)
// FIXME: object can be moved out!!! GExecutor faced that.
const cv::gimpl::RcDesc &ref = arg.get<cv::gimpl::RcDesc>();
switch (ref.shape)
const auto &in_desc = in.desc();
const auto &out_desc = out.desc();
const auto in_msg = in.get();
if (cv::util::holds_alternative<cv::gimpl::EndOfStream>(in_msg))
{
case GShape::GMAT: return GArg(m_res.slot<cv::Mat>()[ref.id]);
// Note: .at() is intentional for GArray as object MUST be already there
// (and constructed by either bindIn/Out or resetInternal)
case GShape::GARRAY: return GArg(m_res.slot<cv::detail::VectorRef>().at(ref.id));
// Note: .at() is intentional for GOpaque as object MUST be already there
// (and constructed by either bindIn/Out or resetInternal)
case GShape::GOPAQUE: return GArg(m_res.slot<cv::detail::OpaqueRef>().at(ref.id));
case GShape::GFRAME: return GArg(m_res.slot<cv::MediaFrame>().at(ref.id));
default:
util::throw_error(std::logic_error("Unsupported GShape type"));
break;
// (1) Since kernel is executing asynchronously
// need to wait until the previous is over
m_sync.wait();
out.post(cv::gimpl::EndOfStream{});
return;
}
}
void cv::gimpl::ie::GIEExecutable::run(std::vector<InObj> &&input_objs,
std::vector<OutObj> &&output_objs) {
// Update resources with run-time information - what this Island
// has received from user (or from another Island, or mix...)
// FIXME: Check input/output objects against GIsland protocol
GAPI_Assert(cv::util::holds_alternative<cv::GRunArgs>(in_msg));
const auto in_vector = cv::util::get<cv::GRunArgs>(in_msg);
for (auto& it : input_objs) magazine::bindInArg (m_res, it.first, it.second);
for (auto& it : output_objs) magazine::bindOutArg(m_res, it.first, it.second);
// (2) Collect inputs/outputs
input_objs.reserve(in_desc.size());
output_objs.reserve(out_desc.size());
for (auto &&it: ade::util::zip(ade::util::toRange(in_desc),
ade::util::toRange(in_vector)))
{
input_objs.emplace_back(std::get<0>(it), std::get<1>(it));
}
for (auto &&it: ade::util::indexed(ade::util::toRange(out_desc)))
{
output_objs.emplace_back(ade::util::value(it),
out.get(ade::util::checked_cast<int>(ade::util::index(it))));
}
GConstGIEModel giem(m_g);
const auto &uu = giem.metadata(this_nh).get<IEUnit>();
const auto &op = m_gm.metadata(this_nh).get<Op>();
// (3) Create kernel context
auto context = std::make_shared<IECallContext>(uu, out, m_sync, op.args, op.outs,
std::move(input_objs), std::move(output_objs));
// (4) Only single async request is supported now,
// so need to wait until the previous is over.
m_sync.wait();
// (5) Run the kernel and start handle next frame.
const auto &kk = giem.metadata(this_nh).get<IECallable>();
// FIXME: Running just a single node now.
// Not sure if need to support many of them, though
// FIXME: Make this island-unmergeable?
const auto &op = m_gm.metadata(this_nh).get<Op>();
kk.run(this_iec, context);
// Initialize kernel's execution context:
// - Input parameters
IECallContext context;
context.args.reserve(op.args.size());
using namespace std::placeholders;
ade::util::transform(op.args,
std::back_inserter(context.args),
std::bind(&GIEExecutable::packArg, this, _1));
// NB: Need to store inputs shape to recognize GFrame/GMat
ade::util::transform(op.args,
std::back_inserter(context.in_shapes),
[](const cv::GArg& arg) {
return arg.get<cv::gimpl::RcDesc>().shape;
});
// - Output parameters.
for (const auto out_it : ade::util::indexed(op.outs)) {
// FIXME: Can the same GArg type resolution mechanism be reused here?
const auto out_port = ade::util::index(out_it);
const auto out_desc = ade::util::value(out_it);
context.results[out_port] = magazine::getObjPtr(m_res, out_desc);
// (6) In not-streaming mode need to wait until the async request is over
// FIXME: Is there more graceful way to handle this case ?
if (!m_gm.metadata().contains<Streaming>()) {
m_sync.wait();
}
// And now trigger the execution
GConstGIEModel giem(m_g);
const auto &uu = giem.metadata(this_nh).get<IEUnit>();
const auto &kk = giem.metadata(this_nh).get<IECallable>();
kk.run(this_iec, uu, context);
for (auto &it : output_objs) magazine::writeBack(m_res, it.first, it.second);
// In/Out args clean-up is mandatory now with RMat
for (auto &it : input_objs) magazine::unbind(m_res, it.first);
for (auto &it : output_objs) magazine::unbind(m_res, it.first);
}
namespace cv {
@@ -488,31 +594,82 @@ namespace ie {
static void configureInputInfo(const IE::InputInfo::Ptr& ii, const cv::GMetaArg mm) {
switch (mm.index()) {
case cv::GMetaArg::index_of<cv::GMatDesc>():
{
ii->setPrecision(toIE(util::get<cv::GMatDesc>(mm).depth));
break;
}
{
ii->setPrecision(toIE(util::get<cv::GMatDesc>(mm).depth));
break;
}
case cv::GMetaArg::index_of<cv::GFrameDesc>():
{
const auto &meta = util::get<cv::GFrameDesc>(mm);
switch (meta.fmt) {
case cv::MediaFormat::NV12:
ii->getPreProcess().setColorFormat(IE::ColorFormat::NV12);
break;
case cv::MediaFormat::BGR:
// NB: Do nothing
break;
default:
GAPI_Assert(false && "Unsupported media format for IE backend");
}
ii->setPrecision(toIE(CV_8U));
break;
{
const auto &meta = util::get<cv::GFrameDesc>(mm);
switch (meta.fmt) {
case cv::MediaFormat::NV12:
ii->getPreProcess().setColorFormat(IE::ColorFormat::NV12);
break;
case cv::MediaFormat::BGR:
// NB: Do nothing
break;
default:
GAPI_Assert(false && "Unsupported media format for IE backend");
}
ii->setPrecision(toIE(CV_8U));
break;
}
default:
util::throw_error(std::runtime_error("Unsupported input meta for IE backend"));
}
}
// NB: This is a callback used by async infer
// to post outputs blobs (cv::GMat's).
struct PostOutputs {
// NB: Should be const to pass into SetCompletionCallback
void operator()() const {
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
auto& out_mat = ctx->outMatR(i);
IE::Blob::Ptr this_blob = iec.this_request.GetBlob(ctx->uu.params.output_names[i]);
copyFromIE(this_blob, out_mat);
ctx->out.post(ctx->output(i));
}
ctx->sync.release_and_notify();
}
IECompiled &iec ;
std::shared_ptr<IECallContext> ctx ;
};
// NB: This is a callback used by async infer
// to post output list of blobs (cv::GArray<cv::GMat>).
struct PostOutputsList {
// NB: Should be const to pass into SetCompletionCallback
void operator()() const {
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
std::vector<cv::Mat> &out_vec = ctx->outVecR<cv::Mat>(i);
IE::Blob::Ptr out_blob = iec.this_request.GetBlob(ctx->uu.params.output_names[i]);
cv::Mat out_mat(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision()));
// FIXME: Avoid data copy. Not sure if it is possible though
copyFromIE(out_blob, out_mat);
out_vec.push_back(std::move(out_mat));
}
// NB: Callbacks run synchronously yet, so the lock isn't necessary
auto&& out_vec_size = ctx->outVecR<cv::Mat>(0).size();
// NB: Now output vector is collected and can be posted to output
if (nrequests == out_vec_size) {
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
ctx->out.post(ctx->output(i));
}
}
ctx->sync.release_and_notify();
}
IECompiled &iec ;
std::shared_ptr<IECallContext> ctx ;
std::vector< std::vector<int> > cached_dims;
size_t nrequests;
};
struct Infer: public cv::detail::KernelTag {
using API = cv::GInferBase;
static cv::gapi::GBackend backend() { return cv::gapi::ie::backend(); }
@@ -563,29 +720,23 @@ struct Infer: public cv::detail::KernelTag {
return result;
}
static void run(IECompiled &iec, const IEUnit &uu, IECallContext &ctx) {
static void run(IECompiled &iec, std::shared_ptr<IECallContext> ctx) {
// non-generic version for now:
// - assumes all inputs/outputs are always Mats
Views views;
for (auto i : ade::util::iota(uu.params.num_in)) {
for (auto i : ade::util::iota(ctx->uu.params.num_in)) {
// TODO: Ideally we shouldn't do SetBlob() but GetBlob() instead,
// and redirect our data producers to this memory
// (A memory dialog comes to the picture again)
IE::Blob::Ptr this_blob = extractBlob(ctx, i, views);
iec.this_request.SetBlob(uu.params.input_names[i], this_blob);
IE::Blob::Ptr this_blob = extractBlob(*ctx, i);
iec.this_request.SetBlob(ctx->uu.params.input_names[i], this_blob);
}
iec.this_request.Infer();
for (auto i : ade::util::iota(uu.params.num_out)) {
// TODO: Think on avoiding copying here.
// Either we should ask IE to use our memory (what is not always the
// best policy) or use IE-allocated buffer inside (and pass it to the graph).
// Not a <very> big deal for classifiers and detectors,
// but may be critical to segmentation.
cv::Mat& out_mat = ctx.outMatR(i);
IE::Blob::Ptr this_blob = iec.this_request.GetBlob(uu.params.output_names[i]);
copyFromIE(this_blob, out_mat);
}
iec.this_request.SetCompletionCallback(PostOutputs{iec, ctx});
// NB: Since only single async request is supported, need to lock other
// attempts to get access while request is working.
ctx->sync.acquire();
iec.this_request.StartAsync();
}
};
@@ -630,22 +781,22 @@ struct InferROI: public cv::detail::KernelTag {
return result;
}
static void run(IECompiled &iec, const IEUnit &uu, IECallContext &ctx) {
static void run(IECompiled &iec, std::shared_ptr<IECallContext> ctx) {
// non-generic version for now, per the InferROI's definition
GAPI_Assert(uu.params.num_in == 1);
const auto& this_roi = ctx.inArg<cv::detail::OpaqueRef>(0).rref<cv::Rect>();
GAPI_Assert(ctx->uu.params.num_in == 1);
const auto& this_roi = ctx->inArg<cv::detail::OpaqueRef>(0).rref<cv::Rect>();
Views views;
IE::Blob::Ptr this_blob = extractBlob(ctx, 1, views);
IE::Blob::Ptr this_blob = extractBlob(*ctx, 1u);
iec.this_request.SetBlob(*uu.params.input_names.begin(),
IE::make_shared_blob(this_blob, toIE(this_roi)));
iec.this_request.Infer();
for (auto i : ade::util::iota(uu.params.num_out)) {
cv::Mat& out_mat = ctx.outMatR(i);
IE::Blob::Ptr out_blob = iec.this_request.GetBlob(uu.params.output_names[i]);
copyFromIE(out_blob, out_mat);
}
iec.this_request.SetBlob(*(ctx->uu.params.input_names.begin()),
IE::make_shared_blob(this_blob, toIE(this_roi)));
iec.this_request.SetCompletionCallback(PostOutputs{iec, ctx});
// NB: Since only single async request is supported, need to lock other
// attempts to get access while request is working.
ctx->sync.acquire();
iec.this_request.StartAsync();
}
};
@@ -688,44 +839,43 @@ struct InferList: public cv::detail::KernelTag {
cv::GMetaArg{cv::empty_array_desc()});
}
static void run(IECompiled &iec, const IEUnit &uu, IECallContext &ctx) {
static void run(IECompiled &iec, std::shared_ptr<IECallContext> ctx) {
// non-generic version for now:
// - assumes zero input is always ROI list
// - assumes all inputs/outputs are always Mats
GAPI_Assert(uu.params.num_in == 1); // roi list is not counted in net's inputs
GAPI_Assert(ctx->uu.params.num_in == 1); // roi list is not counted in net's inputs
const auto& in_roi_vec = ctx.inArg<cv::detail::VectorRef>(0u).rref<cv::Rect>();
const auto& in_roi_vec = ctx->inArg<cv::detail::VectorRef>(0u).rref<cv::Rect>();
Views views;
IE::Blob::Ptr this_blob = extractBlob(ctx, 1, views);
IE::Blob::Ptr this_blob = extractBlob(*ctx, 1u);
// FIXME: This could be done ONCE at graph compile stage!
std::vector< std::vector<int> > cached_dims(uu.params.num_out);
for (auto i : ade::util::iota(uu.params.num_out)) {
const IE::DataPtr& ie_out = uu.outputs.at(uu.params.output_names[i]);
std::vector< std::vector<int> > cached_dims(ctx->uu.params.num_out);
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]);
cached_dims[i] = toCV(ie_out->getTensorDesc().getDims());
ctx.outVecR<cv::Mat>(i).clear();
ctx->outVecR<cv::Mat>(i).clear();
// FIXME: Isn't this should be done automatically
// by some resetInternalData(), etc? (Probably at the GExecutor level)
}
for (const auto &rc : in_roi_vec) {
// FIXME: Assumed only 1 input
for (auto&& rc : in_roi_vec) {
// NB: Only single async request is supported now,
// so need to wait until previos iteration is over.
// However there is no need to wait async request from last iteration,
// this will be done by backend.
ctx->sync.wait();
IE::Blob::Ptr roi_blob = IE::make_shared_blob(this_blob, toIE(rc));
iec.this_request.SetBlob(uu.params.input_names[0u], roi_blob);
iec.this_request.Infer();
iec.this_request.SetBlob(ctx->uu.params.input_names[0u], roi_blob);
// While input is fixed to be 1,
// there may be still multiple outputs
for (auto i : ade::util::iota(uu.params.num_out)) {
std::vector<cv::Mat> &out_vec = ctx.outVecR<cv::Mat>(i);
iec.this_request.SetCompletionCallback(
PostOutputsList{iec, ctx, cached_dims, in_roi_vec.size()});
IE::Blob::Ptr out_blob = iec.this_request.GetBlob(uu.params.output_names[i]);
cv::Mat out_mat(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision()));
copyFromIE(out_blob, out_mat); // FIXME: Avoid data copy. Not sure if it is possible though
out_vec.push_back(std::move(out_mat));
}
// NB: Since only single async request is supported, need to lock other
// attempts to get access while request is working.
ctx->sync.acquire();
iec.this_request.StartAsync();
}
}
};
@@ -813,69 +963,58 @@ struct InferList2: public cv::detail::KernelTag {
cv::GMetaArg{cv::empty_array_desc()});
}
static void run(IECompiled &iec, const IEUnit &uu, IECallContext &ctx) {
GAPI_Assert(ctx.args.size() > 1u
&& "This operation must have at least two arguments");
static void run(IECompiled &iec, std::shared_ptr<IECallContext> ctx) {
GAPI_Assert(ctx->inArgs().size() > 1u
&& "This operation must have at least two arguments");
Views views;
IE::Blob::Ptr blob_0 = extractBlob(ctx, 0, views);
IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0u);
// Take the next argument, which must be vector (of any kind).
// Use it only to obtain the ROI list size (sizes of all other
// vectors must be equal to this one)
const auto list_size = ctx.inArg<cv::detail::VectorRef>(1u).size();
const auto list_size = ctx->inArg<cv::detail::VectorRef>(1u).size();
// FIXME: This could be done ONCE at graph compile stage!
std::vector< std::vector<int> > cached_dims(uu.params.num_out);
for (auto i : ade::util::iota(uu.params.num_out)) {
const IE::DataPtr& ie_out = uu.outputs.at(uu.params.output_names[i]);
std::vector< std::vector<int> > cached_dims(ctx->uu.params.num_out);
for (auto i : ade::util::iota(ctx->uu.params.num_out)) {
const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]);
cached_dims[i] = toCV(ie_out->getTensorDesc().getDims());
ctx.outVecR<cv::Mat>(i).clear();
ctx->outVecR<cv::Mat>(i).clear();
// FIXME: Isn't this should be done automatically
// by some resetInternalData(), etc? (Probably at the GExecutor level)
}
// For every ROI in the list {{{
for (const auto &list_idx : ade::util::iota(list_size)) {
// For every input of the net {{{
for (auto in_idx : ade::util::iota(uu.params.num_in)) {
const auto &this_vec = ctx.inArg<cv::detail::VectorRef>(in_idx+1u);
// NB: Only single async request is supported now,
// so need to wait until previos iteration is over.
// However there is no need to wait async request from last iteration,
// this will be done by backend.
ctx->sync.wait();
for (auto in_idx : ade::util::iota(ctx->uu.params.num_in)) {
const auto &this_vec = ctx->inArg<cv::detail::VectorRef>(in_idx+1u);
GAPI_Assert(this_vec.size() == list_size);
// Prepare input {{{
IE::Blob::Ptr this_blob;
if (this_vec.getKind() == cv::detail::OpaqueKind::CV_RECT) {
// ROI case - create an ROI blob
const auto &vec = this_vec.rref<cv::Rect>();
this_blob = IE::make_shared_blob(blob_0, toIE(vec[list_idx]));
} else if (this_vec.getKind() == cv::detail::OpaqueKind::CV_MAT) {
// Mat case - create a regular blob
// FIXME: NOW Assume Mats are always BLOBS (not
// images)
const auto &vec = this_vec.rref<cv::Mat>();
const auto &mat = vec[list_idx];
this_blob = wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR);
} else {
GAPI_Assert(false && "Only Rect and Mat types are supported for infer list 2!");
}
iec.this_request.SetBlob(uu.params.input_names[in_idx], this_blob);
// }}} (Preapre input)
} // }}} (For every input of the net)
iec.this_request.SetBlob(ctx->uu.params.input_names[in_idx], this_blob);
}
// Run infer request {{{
iec.this_request.Infer();
// }}} (Run infer request)
iec.this_request.SetCompletionCallback(
PostOutputsList{iec, ctx, cached_dims, list_size});
// For every output of the net {{{
for (auto i : ade::util::iota(uu.params.num_out)) {
// Push results to the list {{{
std::vector<cv::Mat> &out_vec = ctx.outVecR<cv::Mat>(i);
IE::Blob::Ptr out_blob = iec.this_request.GetBlob(uu.params.output_names[i]);
cv::Mat out_mat(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision()));
copyFromIE(out_blob, out_mat); // FIXME: Avoid data copy. Not sure if it is possible though
out_vec.push_back(std::move(out_mat));
// }}} (Push results to the list)
} // }}} (For every output of the net)
} // }}} (For every ROI in the list)
// NB: Since only single async request is supported, need to lock other
// attempts to get access while request is working.
ctx->sync.acquire();
iec.this_request.StartAsync();
}
}
};
+42 -7
View File
@@ -13,6 +13,7 @@
#ifdef HAVE_INF_ENGINE
#include <ade/util/algorithm.hpp> // type_list_index
#include <condition_variable>
#include <inference_engine.hpp>
@@ -37,6 +38,37 @@ struct IECompiled {
InferenceEngine::InferRequest this_request;
};
// FIXME: Structure which collect all necessary sync primitives
// will be deleted when the async request pool appears
class SyncPrim {
public:
void wait() {
std::unique_lock<std::mutex> l(m_mutex);
m_cv.wait(l, [this]{ return !m_is_busy; });
}
void release_and_notify() {
{
std::lock_guard<std::mutex> lock(m_mutex);
m_is_busy = false;
}
m_cv.notify_one();
}
void acquire() {
std::lock_guard<std::mutex> lock(m_mutex);
m_is_busy = true;
}
private:
// To wait until the async request isn't over
std::condition_variable m_cv;
// To avoid spurious cond var wake up
bool m_is_busy = false;
// To sleep until condition variable wakes up
std::mutex m_mutex;
};
class GIEExecutable final: public GIslandExecutable
{
const ade::Graph &m_g;
@@ -50,11 +82,8 @@ class GIEExecutable final: public GIslandExecutable
// List of all resources in graph (both internal and external)
std::vector<ade::NodeHandle> m_dataNodes;
// Actual data of all resources in graph (both internal and external)
Mag m_res;
// Execution helpers
GArg packArg(const GArg &arg);
// Sync primitive
SyncPrim m_sync;
public:
GIEExecutable(const ade::Graph &graph,
@@ -65,8 +94,14 @@ public:
GAPI_Assert(false); // Not implemented yet
}
virtual void run(std::vector<InObj> &&input_objs,
std::vector<OutObj> &&output_objs) override;
virtual void run(std::vector<InObj> &&,
std::vector<OutObj> &&) override {
GAPI_Assert(false && "Not implemented");
}
virtual void run(GIslandExecutable::IInput &in,
GIslandExecutable::IOutput &out) override;
};
}}}