From a462d76a955a6e4a82f2e0f6adad8ac91afe2656 Mon Sep 17 00:00:00 2001 From: Pavel Vlasov Date: Mon, 15 Aug 2016 17:12:45 +0300 Subject: [PATCH] Implementations instrumentation framework for OpenCV performance system; --- CMakeLists.txt | 1 + cmake/templates/cvconfig.h.in | 3 + modules/core/include/opencv2/core/private.hpp | 135 ++++++++- modules/core/include/opencv2/core/utility.hpp | 131 +++++++++ modules/core/src/parallel.cpp | 17 ++ modules/core/src/stat.cpp | 41 +-- modules/core/src/system.cpp | 204 ++++++++++++++ modules/ts/include/opencv2/ts/ocl_perf.hpp | 6 +- modules/ts/include/opencv2/ts/ts_perf.hpp | 21 +- modules/ts/src/ts_perf.cpp | 263 +++++++++++++++++- 10 files changed, 790 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 05d88544c8..c9568546f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -286,6 +286,7 @@ OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too no OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF ) OCV_OPTION(ANDROID_EXAMPLES_WITH_LIBS "Build binaries of Android examples with native libraries" OFF IF ANDROID ) OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function call" OFF ) +OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF ) OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX) OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF ) diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index 4012f69426..56d415b400 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -212,3 +212,6 @@ /* FP16 */ #cmakedefine HAVE_FP16 + +/* Library was compiled with functions instrumentation */ +#cmakedefine ENABLE_INSTRUMENTATION diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index c884815d2e..6fcd1713c2 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -344,7 +344,7 @@ static struct __IppInitializer__ __ipp_initializer__; #ifdef CV_IPP_RUN_VERBOSE #define CV_IPP_RUN_(condition, func, ...) \ { \ - if (cv::ipp::useIPP() && (condition) && func) \ + if (cv::ipp::useIPP() && (condition) && (func)) \ { \ printf("%s: IPP implementation is running\n", CV_Func); \ fflush(stdout); \ @@ -376,7 +376,7 @@ static struct __IppInitializer__ __ipp_initializer__; } #else #define CV_IPP_RUN_(condition, func, ...) \ - if (cv::ipp::useIPP() && (condition) && func) \ + if (cv::ipp::useIPP() && (condition) && (func)) \ { \ CV_IMPL_ADD(CV_IMPL_IPP); \ return __VA_ARGS__; \ @@ -393,7 +393,7 @@ static struct __IppInitializer__ __ipp_initializer__; #define CV_IPP_RUN_FAST(func, ...) #endif -#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_(condition, func, __VA_ARGS__) +#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_((condition), (func), __VA_ARGS__) #ifndef IPPI_CALL @@ -449,6 +449,135 @@ CV_EXPORTS void setUseTegra(bool flag); } #endif +#ifdef ENABLE_INSTRUMENTATION +namespace cv +{ +namespace instr +{ +struct InstrTLSStruct +{ + InstrTLSStruct() + { + pCurrentNode = NULL; + } + InstrNode* pCurrentNode; +}; + +class InstrStruct +{ +public: + InstrStruct() + { + useInstr = false; + enableMapping = true; + + rootNode.m_payload = NodeData("ROOT", NULL, 0, TYPE_GENERAL, IMPL_PLAIN); + tlsStruct.get()->pCurrentNode = &rootNode; + } + + Mutex mutexCreate; + Mutex mutexCount; + + bool useInstr; + bool enableMapping; + InstrNode rootNode; + TLSData tlsStruct; +}; + +class CV_EXPORTS IntrumentationRegion +{ +public: + IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN); + ~IntrumentationRegion(); + +private: + bool m_disabled; // region status + uint64 m_regionTicks; +}; + +InstrStruct& getInstrumentStruct(); +InstrTLSStruct& getInstrumentTLSStruct(); +CV_EXPORTS InstrNode* getCurrentNode(); +} +} + +///// General instrumentation +// Instrument region +#define CV_INSTRUMENT_REGION_META(TYPE, IMPL, ...) ::cv::instr::IntrumentationRegion __instr_region__(__FUNCTION__, __FILE__, __LINE__, TYPE, IMPL); +// Instrument functions with non-void return type +#define CV_INSTRUMENT_FUN_RT_META(TYPE, IMPL, ERROR_COND, FUN, ...) ([&]()\ +{\ + if(::cv::instr::useInstrumentation()){\ + ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\ + try{\ + auto status = ((FUN)(__VA_ARGS__));\ + if(ERROR_COND){\ + ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\ + CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\ + }\ + return status;\ + }catch(...){\ + ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\ + CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\ + throw;\ + }\ + }else{\ + return ((FUN)(__VA_ARGS__));\ + }\ +}()) +// Instrument functions with void return type +#define CV_INSTRUMENT_FUN_RV_META(TYPE, IMPL, FUN, ...) ([&]()\ +{\ + if(::cv::instr::useInstrumentation()){\ + ::cv::instr::IntrumentationRegion __instr__(#FUN, __FILE__, __LINE__, TYPE, IMPL);\ + try{\ + (FUN)(__VA_ARGS__);\ + }catch(...){\ + ::cv::instr::getCurrentNode()->m_payload.m_funError = true;\ + CV_INSTRUMENT_MARK_META(IMPL, ##FUN - BadExit);\ + throw;\ + }\ + }else{\ + (FUN)(__VA_ARGS__);\ + }\ +}()) +// Instrumentation information marker +#define CV_INSTRUMENT_MARK_META(IMPL, NAME, ...) {::cv::instr::IntrumentationRegion __instr_mark__(#NAME, __FILE__, __LINE__, ::cv::instr::TYPE_MARKER, IMPL);} + +///// General instrumentation +// General OpenCV region instrumentation macro +#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN) +// Parallel OpenCV region instrumentation macro +#define CV_INSTRUMENT_REGION_MT() CV_INSTRUMENT_REGION_MT_META(cv::instr::TYPE_GENERAL, cv::instr::IMPL_PLAIN) + +///// IPP instrumentation +// Wrapper region instrumentation macro +#define CV_INSTRUMENT_REGION_IPP() CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_IPP) +// Function instrumentation macro +#define CV_INSTRUMENT_FUN_IPP(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_IPP, status < 0, FUN, __VA_ARGS__) +// Diagnostic markers +#define CV_INSTRUMENT_MARK_IPP(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_IPP, NAME) + +///// OpenCL instrumentation +// Wrapper region instrumentation macro +#define CV_INSTRUMENT_REGION_OPENCL() CV_INSTRUMENT_REGION_META(::cv::instr::TYPE_WRAPPER, ::cv::instr::IMPL_OPENCL) +// Function instrumentation macro +#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) CV_INSTRUMENT_FUN_RT_META(::cv::instr::TYPE_FUN, ::cv::instr::IMPL_OPENCL, status == 0, FUN, __VA_ARGS__) +// Diagnostic markers +#define CV_INSTRUMENT_MARK_OPENCL(NAME) CV_INSTRUMENT_MARK_META(::cv::instr::IMPL_OPENCL, NAME) +#else +#define CV_INSTRUMENT_REGION() +#define CV_INSTRUMENT_REGION_MT() + +#define CV_INSTRUMENT_REGION_IPP() +#define CV_INSTRUMENT_FUN_IPP(FUN, ...) ((FUN)(__VA_ARGS__)) +#define CV_INSTRUMENT_MARK_IPP(NAME) + +#define CV_INSTRUMENT_REGION_OPENCL() +#define CV_INSTRUMENT_FUN_OPENCL_KERNEL(FUN, ...) ((FUN)(__VA_ARGS__)) +#define CV_INSTRUMENT_MARK_OPENCL(NAME) +#endif + //! @endcond #endif // __OPENCV_CORE_PRIVATE_HPP__ diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index 768c4d5223..911f779f38 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -1002,6 +1002,137 @@ template<> inline std::string CommandLineParser::get(const String& //! @endcond + +// Basic Node class for tree building +template +class CV_EXPORTS Node +{ +public: + Node() + { + m_pParent = 0; + } + Node(OBJECT& payload) : m_payload(payload) + { + m_pParent = 0; + } + ~Node() + { + removeChilds(); + if (m_pParent) + { + int idx = m_pParent->findChild(this); + if (idx >= 0) + m_pParent->m_childs.erase(m_pParent->m_childs.begin() + idx); + } + } + + Node* findChild(OBJECT& payload) const + { + for(int i = 0; i < this->m_childs.size(); i++) + { + if(this->m_childs[i]->m_payload == payload) + return this->m_childs[i]; + } + return NULL; + } + + int findChild(Node *pNode) const + { + for (int i = 0; i < this->m_childs.size(); i++) + { + if(this->m_childs[i] == pNode) + return i; + } + return -1; + } + + void addChild(Node *pNode) + { + if(!pNode) + return; + + CV_Assert(pNode->m_pParent == 0); + pNode->m_pParent = this; + this->m_childs.push_back(pNode); + } + + void removeChilds() + { + for(int i = 0; i < m_childs.size(); i++) + { + m_childs[i]->m_pParent = 0; // avoid excessive parent vector trimming + delete m_childs[i]; + } + m_childs.clear(); + } + +public: + OBJECT m_payload; + Node* m_pParent; + std::vector*> m_childs; +}; + +// Instrumentation external interface +namespace instr +{ +enum +{ + TYPE_GENERAL = 0, // OpenCV API function, e.g. exported function + TYPE_MARKER, // Information marker + TYPE_WRAPPER, // Wrapper function for implementation + TYPE_FUN, // Simple function call +}; + +enum +{ + IMPL_PLAIN = 0, + IMPL_IPP, + IMPL_OPENCL, +}; + +enum +{ + FLAGS_MAPPING = 0x01, +}; + +enum +{ + COMMAND_RESET = 0x01, +}; + +class CV_EXPORTS NodeData +{ +public: + NodeData(const char* funName = 0, const char* fileName = NULL, int lineNum = 0, int instrType = TYPE_GENERAL, int implType = IMPL_PLAIN); + NodeData(NodeData &ref); + ~NodeData(); + NodeData& operator=(const NodeData&); + + cv::String m_funName; + int m_instrType; + int m_implType; + cv::String m_fileName; + int m_lineNum; + + bool m_funError; + volatile int m_counter; + bool m_stopPoint; + uint64 m_ticksMean; + +}; +bool operator==(const NodeData& lhs, const NodeData& rhs); + +typedef Node InstrNode; + +CV_EXPORTS bool useInstrumentation(); +CV_EXPORTS void setUseInstrumentation(bool flag); +CV_EXPORTS InstrNode* getTrace(); +CV_EXPORTS void resetTrace(); +CV_EXPORTS void setFlags(int modeFlags); +CV_EXPORTS int getFlags(); +}; + } //namespace cv #ifndef DISABLE_OPENCV_24_COMPATIBILITY diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index b805ab46b0..d4968652a3 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -149,13 +149,25 @@ namespace public: ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes) { + body = &_body; wholeRange = _r; double len = wholeRange.end - wholeRange.start; nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len)); + +#ifdef ENABLE_INSTRUMENTATION + pThreadRoot = cv::instr::getInstrumentTLSStruct().pCurrentNode; +#endif } void operator()(const cv::Range& sr) const { +#ifdef ENABLE_INSTRUMENTATION + { + cv::instr::InstrTLSStruct *pInstrTLS = &cv::instr::getInstrumentTLSStruct(); + pInstrTLS->pCurrentNode = pThreadRoot; // Initialize TLS node for thread + } +#endif + cv::Range r; r.start = (int)(wholeRange.start + ((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes); @@ -169,6 +181,9 @@ namespace const cv::ParallelLoopBody* body; cv::Range wholeRange; int nstripes; +#ifdef ENABLE_INSTRUMENTATION + cv::instr::InstrNode *pThreadRoot; +#endif }; #if defined HAVE_TBB @@ -252,6 +267,8 @@ static SchedPtr pplScheduler; void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes) { + CV_INSTRUMENT_REGION() + #ifdef CV_PARALLEL_FRAMEWORK if(numThreads != 0) diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index be0f89e1a2..727d89554d 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1522,6 +1522,8 @@ namespace cv { static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask ) { + CV_INSTRUMENT_REGION_OPENCL() + bool haveMask = _mask.kind() != _InputArray::NONE; int nz = haveMask ? -1 : (int)_src.total(); Scalar mean, stddev; @@ -1585,7 +1587,8 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv k.args(srcarg, src.cols, (int)src.total(), groups, dbarg); size_t globalsize = groups * wgs; - if (!k.run(1, &globalsize, &wgs, false)) + + if(!CV_INSTRUMENT_FUN_OPENCL_KERNEL(k.run, 1, &globalsize, &wgs, false)) return false; typedef Scalar (* part_sum)(Mat m); @@ -1641,6 +1644,8 @@ namespace cv { static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask) { + CV_INSTRUMENT_REGION_IPP() + #if IPP_VERSION_X100 >= 700 int cn = src.channels(); size_t total_size = src.total(); @@ -1679,29 +1684,29 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m if( !mask.empty() ) { typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *); - ippiMaskMeanStdDevFuncC1 ippFuncC1 = + ippiMaskMeanStdDevFuncC1 ippiMean_StdDev_C1MR = type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR : type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR : type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR : 0; - if( ippFuncC1 ) + if( ippiMean_StdDev_C1MR ) { - if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 ) + if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 ) { return true; } } typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *); - ippiMaskMeanStdDevFuncC3 ippFuncC3 = + ippiMaskMeanStdDevFuncC3 ippiMean_StdDev_C3CMR = type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR : type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR : type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR : 0; - if( ippFuncC3 ) + if( ippiMean_StdDev_C3CMR ) { - if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 && - ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 && - ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 ) + if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 && + CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 && + CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 ) { return true; } @@ -1710,31 +1715,31 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m else { typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *); - ippiMeanStdDevFuncC1 ippFuncC1 = + ippiMeanStdDevFuncC1 ippiMean_StdDev_C1R = type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R : type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R : #if (IPP_VERSION_X100 >= 810) type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0 #endif 0; - if( ippFuncC1 ) + if( ippiMean_StdDev_C1R ) { - if( ippFuncC1(src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 ) + if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1R, src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 ) { return true; } } typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *); - ippiMeanStdDevFuncC3 ippFuncC3 = + ippiMeanStdDevFuncC3 ippiMean_StdDev_C3CR = type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR : type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR : type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR : 0; - if( ippFuncC3 ) + if( ippiMean_StdDev_C3CR ) { - if( ippFuncC3(src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 && - ippFuncC3(src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 && - ippFuncC3(src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 ) + if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 && + CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 && + CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 ) { return true; } @@ -1751,6 +1756,8 @@ static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& m void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask ) { + CV_INSTRUMENT_REGION() + CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2, ocl_meanStdDev(_src, _mean, _sdv, _mask)) diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index edf9e0ebd7..cc60cf792c 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -1300,6 +1300,210 @@ void setUseCollection(bool flag) } #endif +namespace instr +{ +bool useInstrumentation() +{ +#ifdef ENABLE_INSTRUMENTATION + return getInstrumentStruct().useInstr; +#else + return false; +#endif +} + +void setUseInstrumentation(bool flag) +{ +#ifdef ENABLE_INSTRUMENTATION + getInstrumentStruct().useInstr = flag; +#else + CV_UNUSED(flag); +#endif +} + +InstrNode* getTrace() +{ +#ifdef ENABLE_INSTRUMENTATION + return &getInstrumentStruct().rootNode; +#else + return NULL; +#endif +} + +void resetTrace() +{ +#ifdef ENABLE_INSTRUMENTATION + getInstrumentStruct().rootNode.removeChilds(); + getInstrumentTLSStruct().pCurrentNode = &getInstrumentStruct().rootNode; +#endif +} + +void setFlags(int modeFlags) +{ +#ifdef ENABLE_INSTRUMENTATION + getInstrumentStruct().enableMapping = (modeFlags & FLAGS_MAPPING); +#else + CV_UNUSED(modeFlags); +#endif +} +int getFlags() +{ +#ifdef ENABLE_INSTRUMENTATION + int flags = 0; + if(getInstrumentStruct().enableMapping) + flags |= FLAGS_MAPPING; + return flags; +#else + return 0; +#endif +} + +NodeData::NodeData(const char* funName, const char* fileName, int lineNum, int instrType, int implType) +{ + m_instrType = TYPE_GENERAL; + m_implType = IMPL_PLAIN; + + m_funError = false; + m_counter = 0; + m_stopPoint = false; + m_ticksMean = 0; + + m_funName = funName; + m_instrType = instrType; + m_implType = implType; + m_fileName = fileName; + m_lineNum = lineNum; +} +NodeData::NodeData(NodeData &ref) +{ + *this = ref; +} +NodeData& NodeData::operator=(const NodeData &right) +{ + this->m_funName = right.m_funName; + this->m_instrType = right.m_instrType; + this->m_implType = right.m_implType; + this->m_funError = right.m_funError; + this->m_counter = right.m_counter; + this->m_stopPoint = right.m_stopPoint; + this->m_ticksMean = right.m_ticksMean; + this->m_fileName = right.m_fileName; + this->m_lineNum = right.m_lineNum; + + return *this; +} +NodeData::~NodeData() +{ +} +bool operator==(const NodeData& left, const NodeData& right) +{ + if(left.m_lineNum == right.m_lineNum && left.m_funName == right.m_funName && left.m_fileName == right.m_fileName) + return true; + return false; +} + +#ifdef ENABLE_INSTRUMENTATION +InstrStruct& getInstrumentStruct() +{ + static InstrStruct instr; + return instr; +} + +InstrTLSStruct& getInstrumentTLSStruct() +{ + return *getInstrumentStruct().tlsStruct.get(); +} + +InstrNode* getCurrentNode() +{ + return getInstrumentTLSStruct().pCurrentNode; +} + +IntrumentationRegion::IntrumentationRegion(const char* funName, const char* fileName, int lineNum, int instrType, int implType) +{ + m_disabled = false; + m_regionTicks = 0; + + InstrStruct *pStruct = &getInstrumentStruct(); + if(pStruct->useInstr) + { + InstrTLSStruct *pTLS = &getInstrumentTLSStruct(); + + // Disable in case of failure + if(!pTLS->pCurrentNode) + { + m_disabled = true; + return; + } + + m_disabled = pTLS->pCurrentNode->m_payload.m_stopPoint; + if(m_disabled) + return; + + NodeData payload(funName, fileName, lineNum, instrType, implType); + Node* pChild = NULL; + + if(pStruct->enableMapping) + { + // Critical section + cv::AutoLock guard(pStruct->mutexCreate); // Guard from concurrent child creation + pChild = pTLS->pCurrentNode->findChild(payload); + if(!pChild) + { + pChild = new Node(payload); + pTLS->pCurrentNode->addChild(pChild); + } + } + else + { + pChild = pTLS->pCurrentNode->findChild(payload); + if(!pChild) + { + pTLS->pCurrentNode->m_payload.m_stopPoint = true; + return; + } + } + pTLS->pCurrentNode = pChild; + + m_regionTicks = getTickCount(); + } +} + +IntrumentationRegion::~IntrumentationRegion() +{ + InstrStruct *pStruct = &getInstrumentStruct(); + if(pStruct->useInstr) + { + if(!m_disabled) + { + InstrTLSStruct *pTLS = &getInstrumentTLSStruct(); + if(pTLS->pCurrentNode->m_payload.m_stopPoint) + { + pTLS->pCurrentNode->m_payload.m_stopPoint = false; + } + else + { + if(pTLS->pCurrentNode->m_payload.m_implType == cv::instr::IMPL_OPENCL && + pTLS->pCurrentNode->m_payload.m_instrType == cv::instr::TYPE_FUN) + cv::ocl::finish(); + + uint64 ticks = (getTickCount() - m_regionTicks); + { + cv::AutoLock guard(pStruct->mutexCount); // Concurrent ticks accumulation + pTLS->pCurrentNode->m_payload.m_counter++; + if(pTLS->pCurrentNode->m_payload.m_counter <= 1) + pTLS->pCurrentNode->m_payload.m_ticksMean = ticks; + else + pTLS->pCurrentNode->m_payload.m_ticksMean = (pTLS->pCurrentNode->m_payload.m_ticksMean*(pTLS->pCurrentNode->m_payload.m_counter-1) + ticks)/pTLS->pCurrentNode->m_payload.m_counter; + } + + pTLS->pCurrentNode = pTLS->pCurrentNode->m_pParent; + } + } + } +} +#endif +} + namespace ipp { diff --git a/modules/ts/include/opencv2/ts/ocl_perf.hpp b/modules/ts/include/opencv2/ts/ocl_perf.hpp index c2e860067b..2f2f55ccaf 100644 --- a/modules/ts/include/opencv2/ts/ocl_perf.hpp +++ b/modules/ts/include/opencv2/ts/ocl_perf.hpp @@ -97,13 +97,13 @@ using std::tr1::tuple; // TODO Replace finish call to dstUMat.wait() #define OCL_TEST_CYCLE() \ - for (cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer()) + for (cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) #define OCL_TEST_CYCLE_N(n) \ - for(declare.iterations(n), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer()) + for (declare.iterations(n), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) #define OCL_TEST_CYCLE_MULTIRUN(runsNum) \ - for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer()) \ + for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); next() && startTimer(); cvtest::ocl::perf::safeFinish(), stopTimer()) \ for (int r = 0; r < runsNum; cvtest::ocl::perf::safeFinish(), ++r) diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp index 09998998b5..d1a726a486 100644 --- a/modules/ts/include/opencv2/ts/ts_perf.hpp +++ b/modules/ts/include/opencv2/ts/ts_perf.hpp @@ -354,6 +354,15 @@ typedef struct ImplData } ImplData; #endif +#ifdef ENABLE_INSTRUMENTATION +class InstumentData +{ +public: + static ::cv::String treeToString(); + static void printTree(); +}; +#endif + class CV_EXPORTS TestBase: public ::testing::Test { public: @@ -382,7 +391,7 @@ protected: virtual void SetUp(); virtual void TearDown(); - void startTimer(); + bool startTimer(); // bool is dummy for conditional loop void stopTimer(); bool next(); @@ -406,6 +415,10 @@ protected: #ifdef CV_COLLECT_IMPL_DATA ImplData implConf; #endif +#ifdef ENABLE_INSTRUMENTATION + InstumentData instrConf; +#endif + private: typedef std::vector > SizeVector; typedef std::vector TimeVector; @@ -645,9 +658,9 @@ int main(int argc, char **argv)\ CV_PERF_TEST_MAIN_INTERNALS(modulename, plain_only, __VA_ARGS__)\ } -#define TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); stopTimer()) -#define TEST_CYCLE() for(; startTimer(), next(); stopTimer()) -#define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; ++r) +#define TEST_CYCLE_N(n) for(declare.iterations(n); next() && startTimer(); stopTimer()) +#define TEST_CYCLE() for(; next() && startTimer(); stopTimer()) +#define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); next() && startTimer(); stopTimer()) for(int r = 0; r < runsNum; ++r) namespace perf { diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index 1a959f176d..50e23548fe 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -45,6 +45,9 @@ static bool param_verify_sanity; #ifdef CV_COLLECT_IMPL_DATA static bool param_collect_impl; #endif +#ifdef ENABLE_INSTRUMENTATION +static bool param_instrument; +#endif extern bool test_ipp_check; #ifdef HAVE_CUDA @@ -727,7 +730,219 @@ public: } }; +#ifdef ENABLE_INSTRUMENTATION +static void printShift(cv::instr::InstrNode *pNode, cv::instr::InstrNode* pRoot) +{ + // Print empty line for a big tree nodes + if(pNode->m_pParent) + { + int parendIdx = pNode->m_pParent->findChild(pNode); + if(parendIdx > 0 && pNode->m_pParent->m_childs[parendIdx-1]->m_childs.size()) + { + printShift(pNode->m_pParent->m_childs[parendIdx-1]->m_childs[0], pRoot); + printf("\n"); + } + } + // Check if parents have more childs + std::vector cache; + cv::instr::InstrNode *pTmpNode = pNode; + while(pTmpNode->m_pParent && pTmpNode->m_pParent != pRoot) + { + cache.push_back(pTmpNode->m_pParent); + pTmpNode = pTmpNode->m_pParent; + } + for(int i = (int)cache.size()-1; i >= 0; i--) + { + if(cache[i]->m_pParent) + { + if(cache[i]->m_pParent->findChild(cache[i]) == cache[i]->m_pParent->m_childs.size()-1) + printf(" "); + else + printf("| "); + } + } +} + +static double calcLocalWeight(cv::instr::InstrNode *pNode) +{ + if(pNode->m_pParent && pNode->m_pParent->m_pParent) + return ((double)pNode->m_payload.m_ticksMean*100/pNode->m_pParent->m_payload.m_ticksMean); + else + return 100; +} + +static double calcGlobalWeight(cv::instr::InstrNode *pNode) +{ + cv::instr::InstrNode* globNode = pNode; + + while(globNode->m_pParent && globNode->m_pParent->m_pParent) + globNode = globNode->m_pParent; + + return ((double)pNode->m_payload.m_ticksMean*100/(double)globNode->m_payload.m_ticksMean); +} + +static void printNodeRec(cv::instr::InstrNode *pNode, cv::instr::InstrNode *pRoot) +{ + printf("%s", (pNode->m_payload.m_funName.substr(0, 40) + ((pNode->m_payload.m_funName.size()>40)?"...":"")).c_str()); + + // Write instrumentation flags + if(pNode->m_payload.m_instrType != cv::instr::TYPE_GENERAL || pNode->m_payload.m_implType != cv::instr::IMPL_PLAIN) + { + printf("<"); + if(pNode->m_payload.m_instrType == cv::instr::TYPE_WRAPPER) + printf("W"); + else if(pNode->m_payload.m_instrType == cv::instr::TYPE_FUN) + printf("F"); + else if(pNode->m_payload.m_instrType == cv::instr::TYPE_MARKER) + printf("MARK"); + + if(pNode->m_payload.m_instrType != cv::instr::TYPE_GENERAL && pNode->m_payload.m_implType != cv::instr::IMPL_PLAIN) + printf("_"); + + if(pNode->m_payload.m_implType == cv::instr::IMPL_IPP) + printf("IPP"); + else if(pNode->m_payload.m_implType == cv::instr::IMPL_OPENCL) + printf("OCL"); + + printf(">"); + } + + if(pNode->m_pParent) + { + printf(" - C:%d", pNode->m_payload.m_counter); + printf(" T:%.4fms", (double)pNode->m_payload.m_ticksMean/cv::getTickFrequency()*1000); + if(pNode->m_pParent->m_pParent) + printf(" L:%.0f%% G:%.0f%%", calcLocalWeight(pNode), calcGlobalWeight(pNode)); + } + printf("\n"); + + // Group childes + std::vector groups; + { + bool bFound = false; + for(size_t i = 0; i < pNode->m_childs.size(); i++) + { + bFound = false; + for(size_t j = 0; j < groups.size(); j++) + { + if(groups[j] == pNode->m_childs[i]->m_payload.m_funName) + { + bFound = true; + break; + } + } + if(!bFound) + groups.push_back(pNode->m_childs[i]->m_payload.m_funName); + } + } + + for(size_t g = 0; g < groups.size(); g++) + { + for(size_t i = 0; i < pNode->m_childs.size(); i++) + { + if(pNode->m_childs[i]->m_payload.m_funName == groups[g]) + { + printShift(pNode->m_childs[i], pRoot); + + if(pNode->m_childs.size()-1 == pNode->m_childs[i]->m_pParent->findChild(pNode->m_childs[i])) + printf("\\---"); + else + printf("|---"); + printNodeRec(pNode->m_childs[i], pRoot); + } + } + } +} + +template +std::string to_string_with_precision(const T value, const int p = 3) +{ + std::ostringstream out; + out << std::fixed << std::setprecision(p) << value; + return out.str(); +} + +static cv::String nodeToString(cv::instr::InstrNode *pNode) +{ + cv::String string; + if (pNode->m_payload.m_funName == "ROOT") + string = pNode->m_payload.m_funName; + else + { + string = "#"; + string += std::to_string(pNode->m_payload.m_instrType); + string += pNode->m_payload.m_funName; + string += " - L:"; + string += to_string_with_precision(calcLocalWeight(pNode)); + string += ", G:"; + string += to_string_with_precision(calcGlobalWeight(pNode)); + } + string += "("; + for(size_t i = 0; i < pNode->m_childs.size(); i++) + string += nodeToString(pNode->m_childs[i]); + string += ")"; + + return string; +} + +static uint64 getNodeTimeRec(cv::instr::InstrNode *pNode, int type, int impl) +{ + uint64 ticks = 0; + + if(pNode->m_pParent && pNode->m_payload.m_instrType == type && pNode->m_payload.m_implType == impl) + ticks = pNode->m_payload.m_ticksMean; + + for(size_t i = 0; i < pNode->m_childs.size(); i++) + ticks += getNodeTimeRec(pNode->m_childs[i], type, impl); + + return ticks; +} + +static uint64 getImplTime(int impl) +{ + uint64 ticks = 0; + cv::instr::InstrNode *pRoot = cv::instr::getTrace(); + + ticks = getNodeTimeRec(pRoot, cv::instr::TYPE_FUN, impl); + + return ticks; +} + +static uint64 getTotalTime() +{ + uint64 ticks = 0; + cv::instr::InstrNode *pRoot = cv::instr::getTrace(); + + for(size_t i = 0; i < pRoot->m_childs.size(); i++) + ticks += pRoot->m_childs[i]->m_payload.m_ticksMean; + + return ticks; +} + +::cv::String InstumentData::treeToString() +{ + cv::String string = nodeToString(cv::instr::getTrace()); + return string; +} + +void InstumentData::printTree() +{ + if(cv::instr::getTrace()->m_childs.size()) + { + printf("[ TRACE ]\n"); + printNodeRec(cv::instr::getTrace(), cv::instr::getTrace()); +#ifdef HAVE_IPP + printf("\nIPP weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime())); +#endif +#ifdef HAVE_OPENCL + printf("\nOPENCL weight: %.1f%%", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime())); +#endif + printf("\n[/TRACE ]\n"); + fflush(stdout); + } +} +#endif /*****************************************************************************************\ * ::perf::TestBase @@ -774,6 +989,9 @@ void TestBase::Init(const std::vector & availableImpls, #endif #ifdef CV_COLLECT_IMPL_DATA "{ perf_collect_impl |false |collect info about executed implementations}" +#endif +#ifdef ENABLE_INSTRUMENTATION + "{ perf_instrument |false |instrument code to collect implementations trace}" #endif "{ help h |false |print help info}" #ifdef HAVE_CUDA @@ -826,6 +1044,9 @@ void TestBase::Init(const std::vector & availableImpls, #ifdef CV_COLLECT_IMPL_DATA param_collect_impl = args.get("perf_collect_impl"); #endif +#ifdef ENABLE_INSTRUMENTATION + param_instrument = args.get("perf_instrument"); +#endif #ifdef ANDROID param_affinity_mask = args.get("perf_affinity_mask"); log_power_checkpoints = args.has("perf_log_power_checkpoints"); @@ -856,6 +1077,12 @@ void TestBase::Init(const std::vector & availableImpls, else cv::setUseCollection(0); #endif +#ifdef ENABLE_INSTRUMENTATION + if(param_instrument) + cv::instr::setUseInstrumentation(true); + else + cv::instr::setUseInstrumentation(false); +#endif #ifdef HAVE_CUDA @@ -969,13 +1196,13 @@ int64 TestBase::_calibrate() cv::Mat b(2048, 2048, CV_32S, cv::Scalar(2)); declare.time(30); double s = 0; - for(declare.iterations(20); startTimer(), next(); stopTimer()) + for(declare.iterations(20); next() && startTimer(); stopTimer()) s+=a.dot(b); declare.time(s); //self calibration SetUp(); - for(declare.iterations(1000); startTimer(), next(); stopTimer()){} + for(declare.iterations(1000); next() && startTimer(); stopTimer()){} } }; @@ -1242,8 +1469,6 @@ bool TestBase::next() } #endif - if (has_next) - startTimer(); // really we should measure activity from this moment, so reset start time return has_next; } @@ -1281,9 +1506,17 @@ unsigned int TestBase::getTotalOutputSize() const return res; } -void TestBase::startTimer() +bool TestBase::startTimer() { +#ifdef ENABLE_INSTRUMENTATION + if(currentIter == 0) + { + cv::instr::setFlags(cv::instr::getFlags()|cv::instr::FLAGS_MAPPING); // enable mapping for the first run + cv::instr::resetTrace(); + } +#endif lastTime = cv::getTickCount(); + return true; // dummy true for conditional loop } void TestBase::stopTimer() @@ -1297,6 +1530,10 @@ void TestBase::stopTimer() if (lastTime < 0) lastTime = 0; times.push_back(lastTime); lastTime = 0; + +#ifdef ENABLE_INSTRUMENTATION + cv::instr::setFlags(cv::instr::getFlags()&~cv::instr::FLAGS_MAPPING); // disable mapping to decrease overhead for +1 run +#endif } performance_metrics& TestBase::calcMetrics() @@ -1469,6 +1706,16 @@ void TestBase::reportMetrics(bool toJUnitXML) RecordProperty("gstddev", cv::format("%.6f", m.gstddev).c_str()); RecordProperty("mean", cv::format("%.0f", m.mean).c_str()); RecordProperty("stddev", cv::format("%.0f", m.stddev).c_str()); +#ifdef ENABLE_INSTRUMENTATION + if(cv::instr::useInstrumentation()) + { + cv::String tree = InstumentData::treeToString(); + RecordProperty("functions_hierarchy", tree.c_str()); + RecordProperty("total_ipp_weight", cv::format("%.1f", ((double)getImplTime(cv::instr::IMPL_IPP)*100/(double)getTotalTime()))); + RecordProperty("total_opencl_weight", cv::format("%.1f", ((double)getImplTime(cv::instr::IMPL_OPENCL)*100/(double)getTotalTime()))); + cv::instr::resetTrace(); + } +#endif #ifdef CV_COLLECT_IMPL_DATA if(param_collect_impl) { @@ -1639,6 +1886,12 @@ void TestBase::TearDown() fflush(stdout); } #endif + +#ifdef ENABLE_INSTRUMENTATION + if(cv::instr::useInstrumentation()) + InstumentData::printTree(); +#endif + reportMetrics(true); }