diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index a1314e2b1e..c5046e8be6 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -162,6 +162,9 @@ set(gapi_srcs # Python bridge src/backends/ie/bindings_ie.cpp src/backends/python/gpythonbackend.cpp + + # Utils (ITT tracing) + src/utils/itt.cpp ) ocv_add_dispatched_file(backends/fluid/gfluidimgproc_func SSE4_1 AVX2) @@ -178,13 +181,22 @@ ocv_module_include_directories("${CMAKE_CURRENT_LIST_DIR}/src") ocv_create_module() ocv_target_link_libraries(${the_module} PRIVATE ade) + if(OPENCV_GAPI_INF_ENGINE) ocv_target_link_libraries(${the_module} PRIVATE ${INF_ENGINE_TARGET}) endif() + if(HAVE_TBB) ocv_target_link_libraries(${the_module} PRIVATE tbb) endif() +# TODO: Consider support of ITT in G-API standalone mode. +if(CV_TRACE AND HAVE_ITT) + ocv_target_compile_definitions(${the_module} PRIVATE -DOPENCV_WITH_ITT=1) + ocv_module_include_directories(${ITT_INCLUDE_DIRS}) + ocv_target_link_libraries(${the_module} PRIVATE ${ITT_LIBRARIES}) +endif() + set(__test_extra_deps "") if(OPENCV_GAPI_INF_ENGINE) list(APPEND __test_extra_deps ${INF_ENGINE_TARGET}) diff --git a/modules/gapi/src/backends/common/gmetabackend.cpp b/modules/gapi/src/backends/common/gmetabackend.cpp index 5364152b65..c535569b0c 100644 --- a/modules/gapi/src/backends/common/gmetabackend.cpp +++ b/modules/gapi/src/backends/common/gmetabackend.cpp @@ -71,7 +71,7 @@ void GraphMetaExecutable::run(std::vector &&input_objs, cv::util::get(out_arg) = it->second; } -class GraphMetaBackendImpl final: public cv::gapi::GBackend::Priv { +class GGraphMetaBackendImpl final: public cv::gapi::GBackend::Priv { virtual void unpackKernel(ade::Graph &, const ade::NodeHandle &, const cv::GKernelImpl &) override { @@ -88,7 +88,7 @@ class GraphMetaBackendImpl final: public cv::gapi::GBackend::Priv { }; cv::gapi::GBackend graph_meta_backend() { - static cv::gapi::GBackend this_backend(std::make_shared()); + static cv::gapi::GBackend this_backend(std::make_shared()); return this_backend; } diff --git a/modules/gapi/src/backends/cpu/gcpubackend.cpp b/modules/gapi/src/backends/cpu/gcpubackend.cpp index cf4b087f92..dfcaf3d478 100644 --- a/modules/gapi/src/backends/cpu/gcpubackend.cpp +++ b/modules/gapi/src/backends/cpu/gcpubackend.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation #include "precomp.hpp" @@ -26,6 +26,8 @@ #include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK! +#include "utils/itt.hpp" + // FIXME: Is there a way to take a typed graph (our GModel), // and create a new typed graph _ATOP_ of that (by extending with a couple of // new types?). @@ -251,8 +253,13 @@ void cv::gimpl::GCPUExecutable::run(std::vector &&input_objs, context.m_state = m_nodesToStates.at(op_info.nh); } - // Now trigger the executable unit - k.m_runF(context); + { + GAPI_ITT_DYNAMIC_LOCAL_HANDLE(op_hndl, op.k.name.c_str()); + GAPI_ITT_AUTO_TRACE_GUARD(op_hndl); + + // Now trigger the executable unit + k.m_runF(context); + } //As Kernels are forbidden to allocate memory for (Mat) outputs, //this code seems redundant, at least for Mats diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 820ee8e11c..1cd3285441 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation #include "precomp.hpp" @@ -60,6 +60,8 @@ template using QueueClass = tbb::concurrent_bounded_queue; template using QueueClass = cv::gapi::own::concurrent_bounded_queue; #endif // TBB +#include "utils/itt.hpp" + namespace IE = InferenceEngine; namespace { @@ -757,6 +759,9 @@ static void configureInputInfo(const IE::InputInfo::Ptr& ii, const cv::GMetaArg // to post outputs blobs (cv::GMat's). static void PostOutputs(InferenceEngine::InferRequest &request, std::shared_ptr ctx) { + GAPI_ITT_STATIC_LOCAL_HANDLE(ie_cb_post_outputs_hndl, "IE_async_callback_PostOutputs"); + GAPI_ITT_AUTO_TRACE_GUARD(ie_cb_post_outputs_hndl); + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { auto& out_mat = ctx->outMatR(i); diff --git a/modules/gapi/src/compiler/gislandmodel.cpp b/modules/gapi/src/compiler/gislandmodel.cpp index fb2457a191..1a8e0939e2 100644 --- a/modules/gapi/src/compiler/gislandmodel.cpp +++ b/modules/gapi/src/compiler/gislandmodel.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation #include "precomp.hpp" @@ -10,6 +10,8 @@ #include #include #include +#include // typeid +#include // std::isdigit #include #include // zip_range, indexed @@ -335,6 +337,53 @@ ade::NodeHandle GIslandModel::producerOf(const ConstGraph &g, ade::NodeHandle &d return ade::NodeHandle(); } +std::string GIslandModel::traceIslandName(const ade::NodeHandle& island_nh, const Graph& g) { + auto island_ptr = g.metadata(island_nh).get().object; + std::string island_name = island_ptr->name(); + + std::string backend_name = ""; + + auto& backend_impl = island_ptr->backend().priv(); + std::string backend_impl_type_name = typeid(backend_impl).name(); + + // NOTE: Major part of already existing backends implementaion classes are called using + // "*G[Name]BackendImpl*" pattern. + // We are trying to match against this pattern and retrive just [Name] part. + // If matching isn't successful, full mangled class name will be used. + // + // To match we use following algorithm: + // 1) Find "BackendImpl" substring, if it doesn't exist, go to step 5. + // 2) Let from_pos be second character in a string. + // 3) Starting from from_pos, seek for "G" symbol in a string. + // If it doesn't exist or exists after "BackendImpl" position, go to step 5. + // 4) Check that previous character before found "G" is digit, means that this is + // part of characters number in a new word in a string (previous words may be + // namespaces). + // If it is so, match is found. Return name between found "G" and "BackendImpl". + // If it isn't so, assign from_pos to found "G" position + 1 and loop to step 3. + // 5) Matching is not successful, return full class name. + bool matched = false; + bool stop = false; + auto to_pos = backend_impl_type_name.find("BackendImpl"); + std::size_t from_pos = 0UL; + if (to_pos != std::string::npos) { + while (!matched && !stop) { + from_pos = backend_impl_type_name.find("G", from_pos + 1); + stop = from_pos == std::string::npos || from_pos >= to_pos; + matched = !stop && std::isdigit(backend_impl_type_name[from_pos - 1]); + } + } + + if (matched) { + backend_name = backend_impl_type_name.substr(from_pos + 1, to_pos - from_pos - 1); + } + else { + backend_name = backend_impl_type_name; + } + + return island_name + "_" + backend_name; +} + void GIslandExecutable::run(GIslandExecutable::IInput &in, GIslandExecutable::IOutput &out) { // Default implementation: just reuse the existing old-fashioned run diff --git a/modules/gapi/src/compiler/gislandmodel.hpp b/modules/gapi/src/compiler/gislandmodel.hpp index e8eb73692b..2cdd10346c 100644 --- a/modules/gapi/src/compiler/gislandmodel.hpp +++ b/modules/gapi/src/compiler/gislandmodel.hpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2019 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation #ifndef OPENCV_GAPI_GISLANDMODEL_HPP @@ -290,7 +290,11 @@ namespace GIslandModel // from the original model (! don't mix with DataSlot) // FIXME: GAPI_EXPORTS because of tests only! ade::NodeHandle GAPI_EXPORTS producerOf(const ConstGraph &g, ade::NodeHandle &data_nh); - + // traceIslandName - returns pretty island name for passed island node. + // Function uses RTTI to assembly name. + // In case if name of backend implementation class doesn't fit *G[Name]BackendImpl* pattern, + // raw mangled name of class will be used. + std::string traceIslandName(const ade::NodeHandle& op_nh, const Graph& g); } // namespace GIslandModel }} // namespace cv::gimpl diff --git a/modules/gapi/src/executor/gapi_itt.hpp b/modules/gapi/src/executor/gapi_itt.hpp deleted file mode 100644 index 2ab3237e7f..0000000000 --- a/modules/gapi/src/executor/gapi_itt.hpp +++ /dev/null @@ -1,59 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -// -// Copyright (C) 2020 Intel Corporation - -#ifndef OPENCV_GAPI_GAPI_ITT_HPP -#define OPENCV_GAPI_GAPI_ITT_HPP - -//for ITT_NAMED_TRACE_GUARD -#include -#include - -// FIXME: It seems that this macro is not propagated here by the OpenCV cmake (as this is not core module). -// (Consider using OpenCV's trace.hpp ) -#ifdef OPENCV_WITH_ITT -#include -#endif - -#include -namespace cv { -namespace util { - template< class T > - using remove_reference_t = typename std::remove_reference::type; - - // Home brew ScopeGuard - // D will be called automatically with p as argument when ScopeGuard goes out of scope. - // call release() on the ScopeGuard object to revoke guard action - template - auto make_ptr_guard(T* p, D&& d) -> std::unique_ptr> { - return {p, std::forward(d)}; - } -} // namespace util - -// FIXME: make it more reusable (and move to other place and other namespace) -namespace gimpl { namespace parallel { - #ifdef OPENCV_WITH_ITT - extern const __itt_domain* gapi_itt_domain; - - namespace { - auto make_itt_guard = [](__itt_string_handle* h) { - __itt_task_begin(gapi_itt_domain, __itt_null, __itt_null, (h)); - return util::make_ptr_guard(reinterpret_cast(1), [](int* ) { __itt_task_end(gapi_itt_domain); }); - }; - } // namespace - - #define GAPI_ITT_NAMED_TRACE_GUARD(name, h) auto name = cv::gimpl::parallel::make_itt_guard(h); cv::util::suppress_unused_warning(name) - #else - struct dumb_guard {void reset(){}}; - #define GAPI_ITT_NAMED_TRACE_GUARD(name, h) cv::gimpl::parallel::dumb_guard name; cv::util::suppress_unused_warning(name) - #endif - - #define GAPI_ITT_AUTO_TRACE_GUARD_IMPL_(LINE, h) GAPI_ITT_NAMED_TRACE_GUARD(itt_trace_guard_##LINE, h) - #define GAPI_ITT_AUTO_TRACE_GUARD_IMPL(LINE, h) GAPI_ITT_AUTO_TRACE_GUARD_IMPL_(LINE, h) - #define GAPI_ITT_AUTO_TRACE_GUARD(h) GAPI_ITT_AUTO_TRACE_GUARD_IMPL(__LINE__, h) -}} //gimpl::parallel -} //namespace cv - -#endif /* OPENCV_GAPI_GAPI_ITT_HPP */ diff --git a/modules/gapi/src/executor/gstreamingexecutor.cpp b/modules/gapi/src/executor/gstreamingexecutor.cpp index 2a06873fee..bb1c33860b 100644 --- a/modules/gapi/src/executor/gstreamingexecutor.cpp +++ b/modules/gapi/src/executor/gstreamingexecutor.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2019-2020 Intel Corporation +// Copyright (C) 2019-2021 Intel Corporation #include "precomp.hpp" @@ -16,6 +16,8 @@ #include // GCopy -- FIXME - to be removed! #endif // GAPI_STANDALONE +#include "utils/itt.hpp" + #include "api/gproto_priv.hpp" // ptr(GRunArgP) #include "compiler/passes/passes.hpp" #include "backends/common/gbackend.hpp" // createMat @@ -492,9 +494,15 @@ void emitterActorThread(std::shared_ptr emitter, return; } + GAPI_ITT_STATIC_LOCAL_HANDLE(emitter_hndl, "emitter"); + GAPI_ITT_STATIC_LOCAL_HANDLE(emitter_pull_hndl, "emitter_pull"); + GAPI_ITT_STATIC_LOCAL_HANDLE(emitter_push_hndl, "emitter_push"); + // Now start emitting the data from the source to the pipeline. while (true) { + GAPI_ITT_AUTO_TRACE_GUARD(emitter_hndl); + Cmd cancel; if (in_queue.try_pop(cancel)) { @@ -507,8 +515,15 @@ void emitterActorThread(std::shared_ptr emitter, // Try to obtain next data chunk from the source cv::GRunArg data; - if (emitter->pull(data)) + + const bool result = [&](){ + GAPI_ITT_AUTO_TRACE_GUARD(emitter_pull_hndl); + return emitter->pull(data); + }(); + + if (result) { + GAPI_ITT_AUTO_TRACE_GUARD(emitter_push_hndl); // // On success, broadcast it to our readers for (auto &&oq : out_queues) { @@ -539,7 +554,11 @@ void syncActorThread(std::vector in_queues, std::vector pop_nexts(in_queues.size()); std::vector cmds(in_queues.size()); + GAPI_ITT_STATIC_LOCAL_HANDLE(sync_hndl, "sync_actor"); + GAPI_ITT_STATIC_LOCAL_HANDLE(sync_pull_1_queue_hndl, "sync_actor_pull_from_1_queue"); + GAPI_ITT_STATIC_LOCAL_HANDLE(sync_push_hndl, "sync_actor_push"); while (true) { + GAPI_ITT_AUTO_TRACE_GUARD(sync_hndl); // pop_nexts indicates which queue still contains earlier timestamps and // needs to be popped at least one more time. // For each iteration (frame) we need to pull from each input queue at least once, @@ -562,7 +581,10 @@ void syncActorThread(std::vector in_queues, auto& q = std::get<1>(val); auto& cmd = std::get<2>(val); - q->pop(cmd); + { + GAPI_ITT_AUTO_TRACE_GUARD(sync_pull_1_queue_hndl); + q->pop(cmd); + } if (cv::util::holds_alternative(cmd)) { // We got a stop command from one of the input queues. // Rewind all input queues till Stop command, @@ -603,9 +625,12 @@ void syncActorThread(std::vector in_queues, } while (ade::util::any_of(pop_nexts, [](bool v){ return v; })); // Finally we got all our inputs synchronized, push them further down the graph - for (auto &&it : ade::util::zip(out_queues, cmds)) { - for (auto &&q : std::get<0>(it)) { - q->push(std::get<1>(it)); + { + GAPI_ITT_AUTO_TRACE_GUARD(sync_push_hndl); + for (auto &&it : ade::util::zip(out_queues, cmds)) { + for (auto &&q : std::get<0>(it)) { + q->push(std::get<1>(it)); + } } } } @@ -619,7 +644,11 @@ class StreamingInput final: public cv::gimpl::GIslandExecutable::IInput virtual cv::gimpl::StreamMsg get() override { + GAPI_ITT_STATIC_LOCAL_HANDLE(inputs_get_hndl, "StreamingInput::get"); + GAPI_ITT_AUTO_TRACE_GUARD(inputs_get_hndl); + cv::GRunArgs isl_input_args; + if (!qr.getInputVector(in_queues, in_constants, isl_input_args)) { // Stop case @@ -680,6 +709,9 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput // Prepare this object for posting virtual cv::GRunArgP get(int idx) override { + GAPI_ITT_STATIC_LOCAL_HANDLE(outputs_get_hndl, "StreamingOutput::get (alloc)"); + GAPI_ITT_AUTO_TRACE_GUARD(outputs_get_hndl); + std::lock_guard lock{m_mutex}; using MatType = cv::Mat; @@ -756,8 +788,12 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput m_postIdx[cv::gimpl::proto::ptr(ret_val)] = std::make_pair(idx, iter); return ret_val; } + virtual void post(cv::GRunArgP&& argp) override { + GAPI_ITT_STATIC_LOCAL_HANDLE(outputs_post_hndl, "StreamingOutput::post"); + GAPI_ITT_AUTO_TRACE_GUARD(outputs_post_hndl); + std::lock_guard lock{m_mutex}; // Mark the output ready for posting. If it is the first in the line, @@ -795,6 +831,7 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput post_iter = m_postings[out_idx].erase(post_iter); } } + virtual void post(cv::gimpl::EndOfStream&&) override { std::lock_guard lock{m_mutex}; @@ -859,23 +896,27 @@ public: // executable for processing. // - Pushes processing results down to consumers - to the subsequent queues. // Note: Every data object consumer has its own queue. -void islandActorThread(std::vector in_rcs, // FIXME: this is... - std::vector out_rcs, // FIXME: ...basically just... - cv::GMetaArgs out_metas, // ... - std::shared_ptr island, // FIXME: ...a copy of OpDesc{}. +void islandActorThread(std::vector in_rcs, // FIXME: this is... + std::vector out_rcs, // FIXME: ...basically just... + cv::GMetaArgs out_metas, // ... + std::shared_ptr island_exec, // FIXME: ...a copy of OpDesc{}. std::vector in_queues, cv::GRunArgs in_constants, - std::vector< std::vector > out_queues) + std::vector< std::vector > out_queues, + const std::string& island_meta_info) { GAPI_Assert(in_queues.size() == in_rcs.size()); GAPI_Assert(out_queues.size() == out_rcs.size()); GAPI_Assert(out_queues.size() == out_metas.size()); QueueReader qr; StreamingInput input(qr, in_queues, in_constants, in_rcs); - StreamingOutput output(out_metas, out_queues, out_rcs, island); + StreamingOutput output(out_metas, out_queues, out_rcs, island_exec); + + GAPI_ITT_DYNAMIC_LOCAL_HANDLE(island_hndl, island_meta_info.c_str()); while (!output.done()) { - island->run(input, output); + GAPI_ITT_AUTO_TRACE_GUARD(island_hndl); + island_exec->run(input, output); } } @@ -904,11 +945,21 @@ void collectorThread(std::vector in_queues, flags[idx] = true; } + GAPI_ITT_STATIC_LOCAL_HANDLE(collector_hndl, "collector"); + GAPI_ITT_STATIC_LOCAL_HANDLE(collector_get_results_hndl, "collector_get_results"); + GAPI_ITT_STATIC_LOCAL_HANDLE(collector_push_hndl, "collector_push"); + QueueReader qr; while (true) { + GAPI_ITT_AUTO_TRACE_GUARD(collector_hndl); cv::GRunArgs this_result(out_size); - const bool ok = qr.getResultsVector(in_queues, in_mapping, out_size, this_result); + + const bool ok = [&](){ + GAPI_ITT_AUTO_TRACE_GUARD(collector_get_results_hndl); + return qr.getResultsVector(in_queues, in_mapping, out_size, this_result); + }(); + if (!ok) { if (handle_stop) @@ -918,7 +969,11 @@ void collectorThread(std::vector in_queues, // Terminate the thread anyway return; } - out_queue.push(Cmd{Result{std::move(this_result), flags}}); + + { + GAPI_ITT_AUTO_TRACE_GUARD(collector_push_hndl); + out_queue.push(Cmd{Result{std::move(this_result), flags}}); + } } } @@ -1379,11 +1434,8 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins) m_sync->registerVideoEmitters(std::move(video_emitters)); - // FIXME: The below code assumes our graph may have only one - // real video source (and so, only one stream which may really end) - // all other inputs are "constant" generators. // Craft here a completion callback to notify Const emitters that - // a video source is over + // any of video sources is over GAPI_Assert(m_const_emitter_queues.size() == m_const_vals.size()); auto real_video_completion_cb = [this]() { @@ -1431,7 +1483,7 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins) for (auto &&op : m_ops) { // Prepare island thread parameters - auto island = m_gim.metadata(op.nh).get().object; + auto island_exec = m_gim.metadata(op.nh).get().object; // Collect actor's input queues auto in_queues = input_queues(*m_island_graph, op.nh); @@ -1443,6 +1495,13 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins) out_queues.push_back(reader_queues(*m_island_graph, out_eh)); } + // Create just empty island meta information + std::string island_meta_info { }; +#if defined(OPENCV_WITH_ITT) + // In case if ITT tracing is enabled fill meta information with the built island name + island_meta_info = GIslandModel::traceIslandName(op.nh, m_gim); +#endif // OPENCV_WITH_ITT + // If Island Executable is recompiled, all its stuff including internal kernel states // are recreated and re-initialized automatically. // But if not, we should notify Island Executable about new started stream to let it update @@ -1456,10 +1515,11 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins) op.in_objects, op.out_objects, op.out_metas, - island, + island_exec, in_queues, op.in_constants, - out_queues); + out_queues, + island_meta_info); } // Finally, start collector thread(s). @@ -1536,6 +1596,9 @@ void cv::gimpl::GStreamingExecutor::wait_shutdown() bool cv::gimpl::GStreamingExecutor::pull(cv::GRunArgsP &&outs) { + GAPI_ITT_STATIC_LOCAL_HANDLE(pull_hndl, "GStreamingExecutor::pull"); + GAPI_ITT_AUTO_TRACE_GUARD(pull_hndl); + // This pull() can only be called when there's no desynchronized // parts in the graph. GAPI_Assert(!m_desync && diff --git a/modules/gapi/src/executor/gtbbexecutor.cpp b/modules/gapi/src/executor/gtbbexecutor.cpp index 4966ba114b..cc6ccf9ef4 100644 --- a/modules/gapi/src/executor/gtbbexecutor.cpp +++ b/modules/gapi/src/executor/gtbbexecutor.cpp @@ -2,14 +2,14 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation #include "gtbbexecutor.hpp" #if defined(HAVE_TBB) && (TBB_INTERFACE_VERSION < 12000) // TODO: TBB task API has been deprecated and removed in 12000 -#include "gapi_itt.hpp" +#include "utils/itt.hpp" #include #include @@ -30,10 +30,6 @@ #define LOG_DEBUG(tag, ...) GAPI_LOG_DEBUG(tag, __VA_ARGS__) -#ifdef OPENCV_WITH_ITT -const __itt_domain* cv::gimpl::parallel::gapi_itt_domain = __itt_domain_create("GAPI Context"); -#endif - namespace cv { namespace gimpl { namespace parallel { namespace detail { @@ -82,18 +78,9 @@ void spawn_no_assert(tbb::task* root, body_t const& body) { tbb::task::spawn(* allocate_task(root, body)); } -#ifdef OPENCV_WITH_ITT -namespace { - static __itt_string_handle* ittTbbAddReadyBlocksToQueue = __itt_string_handle_create("add ready blocks to queue"); - static __itt_string_handle* ittTbbSpawnReadyBlocks = __itt_string_handle_create("spawn ready blocks"); - static __itt_string_handle* ittTbbEnqueueSpawnReadyBlocks = __itt_string_handle_create("enqueueing a spawn of ready blocks"); - static __itt_string_handle* ittTbbUnlockMasterThread = __itt_string_handle_create("Unlocking master thread"); -} -#endif // OPENCV_WITH_ITT - - template void batch_spawn(size_t count, tbb::task* root, body_t const& body, bool do_assert_graph_is_running = true) { + GAPI_ITT_STATIC_LOCAL_HANDLE(ittTbbSpawnReadyBlocks, "spawn ready blocks"); GAPI_ITT_AUTO_TRACE_GUARD(ittTbbSpawnReadyBlocks); if (do_assert_graph_is_running) { assert_graph_is_running(root); @@ -143,6 +130,7 @@ void inline wake_master(async_tasks_t& async_tasks, wake_tbb_master wake_master) if ((active_async_tasks == 0) || (wake_master == wake_tbb_master::YES)) { // Was the last async task or asked to wake TBB master up(e.g. there are new TBB tasks to execute) + GAPI_ITT_STATIC_LOCAL_HANDLE(ittTbbUnlockMasterThread, "Unlocking master thread"); GAPI_ITT_AUTO_TRACE_GUARD(ittTbbUnlockMasterThread); // While decrement of async_tasks_t::count is atomic, it might occur after the waiting // thread has read its value but _before_ it actually starts waiting on the condition variable. @@ -228,6 +216,7 @@ inline tile_node* pop(prio_items_queue_t& q) { namespace graph { // Returns : number of items actually pushed into the q std::size_t inline push_ready_dependants(prio_items_queue_t& q, tile_node* node) { + GAPI_ITT_STATIC_LOCAL_HANDLE(ittTbbAddReadyBlocksToQueue, "add ready blocks to queue"); GAPI_ITT_AUTO_TRACE_GUARD(ittTbbAddReadyBlocksToQueue); std::size_t ready_items = 0; // enable dependent tasks @@ -330,6 +319,7 @@ namespace graph { if (ready_items > 0) { auto master_was_active = is_tbb_work_present::NO; { + GAPI_ITT_STATIC_LOCAL_HANDLE(ittTbbEnqueueSpawnReadyBlocks, "enqueueing a spawn of ready blocks"); GAPI_ITT_AUTO_TRACE_GUARD(ittTbbEnqueueSpawnReadyBlocks); // Force master thread (one that does wait_for_all()) to (actively) wait for enqueued tasks // and unlock it right after all dependent tasks are spawned. diff --git a/modules/gapi/src/utils/itt.cpp b/modules/gapi/src/utils/itt.cpp new file mode 100644 index 0000000000..e92defa19c --- /dev/null +++ b/modules/gapi/src/utils/itt.cpp @@ -0,0 +1,17 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +// TODO: Consider using OpenCV's trace.hpp +#if defined(OPENCV_WITH_ITT) +#include +#include + +namespace cv { +namespace gimpl { + GAPI_EXPORTS __itt_domain* gapi_itt_domain = __itt_domain_create("GAPI Context"); +} // namespace gimpl +} // namespace cv +#endif // OPENCV_WITH_ITT diff --git a/modules/gapi/src/utils/itt.hpp b/modules/gapi/src/utils/itt.hpp new file mode 100644 index 0000000000..0b49af7226 --- /dev/null +++ b/modules/gapi/src/utils/itt.hpp @@ -0,0 +1,78 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef OPENCV_GAPI_ITT_HPP +#define OPENCV_GAPI_ITT_HPP + +// for GAPI_ITT_NAMED_TRACE_GUARD +#include +#include + +#include + +// NOTE: OPENCV_WITH_ITT is only defined if ITT dependecy is built by OpenCV infrastructure. +// There will not be such define in G-API standalone mode. +// TODO: Consider using OpenCV's trace.hpp +#if defined(OPENCV_WITH_ITT) +#include + +namespace cv { +namespace util { + template< class T > + using remove_reference_t = typename std::remove_reference::type; + + // Home brew ScopeGuard + // D will be called automatically with p as argument when ScopeGuard goes out of scope. + // call release() on the ScopeGuard object to revoke guard action + template + auto make_ptr_guard(T* p, D&& d) -> std::unique_ptr> { + return {p, std::forward(d)}; + } +} // namespace util + +namespace gimpl { + extern __itt_domain* gapi_itt_domain; + namespace { + auto make_itt_guard = [](__itt_string_handle* h) { + __itt_task_begin(gapi_itt_domain, __itt_null, __itt_null, (h)); + return util::make_ptr_guard(reinterpret_cast(1), + [](int* ){ __itt_task_end(gapi_itt_domain); }); + }; + } // namespace +} // namespace gimpl +} // namespace cv + +#define GAPI_ITT_NAMED_TRACE_GUARD(name, h) auto name = cv::gimpl::make_itt_guard(h); \ + cv::util::suppress_unused_warning(name) +#define GAPI_ITT_STATIC_LOCAL_HANDLE_IMPL(n, h) static __itt_string_handle* n = \ + __itt_string_handle_create(h) +#define GAPI_ITT_DYNAMIC_LOCAL_HANDLE_IMPL(n, h) __itt_string_handle* n = \ + __itt_string_handle_create(h) +#else // OPENCV_WITH_ITT + +namespace cv { +namespace gimpl { +struct dumb_guard { void reset() { } }; +} // namespace gimpl +} // namespace cv + +#define GAPI_ITT_NAMED_TRACE_GUARD(name, h) cv::gimpl::dumb_guard name; \ + cv::util::suppress_unused_warning(name); \ + cv::util::suppress_unused_warning(h) +#define GAPI_ITT_STATIC_LOCAL_HANDLE_IMPL(n, h) static auto n = h +#define GAPI_ITT_DYNAMIC_LOCAL_HANDLE_IMPL(n, h) auto n = h + +#endif // OPENCV_WITH_ITT + +#define GAPI_ITT_AUTO_TRACE_GUARD_IMPL_(LINE, h) GAPI_ITT_NAMED_TRACE_GUARD( \ + itt_trace_guard_##LINE, h) +#define GAPI_ITT_AUTO_TRACE_GUARD_IMPL(LINE, h) GAPI_ITT_AUTO_TRACE_GUARD_IMPL_(LINE, h) +#define GAPI_ITT_AUTO_TRACE_GUARD(h) GAPI_ITT_AUTO_TRACE_GUARD_IMPL(__LINE__, h) + +#define GAPI_ITT_STATIC_LOCAL_HANDLE(n, h) GAPI_ITT_STATIC_LOCAL_HANDLE_IMPL(n, h) +#define GAPI_ITT_DYNAMIC_LOCAL_HANDLE(n, h) GAPI_ITT_DYNAMIC_LOCAL_HANDLE_IMPL(n, h) + +#endif // OPENCV_GAPI_ITT_HPP