diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 6f2258d97b..c07a2b8ddf 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -499,11 +499,12 @@ macro(ocv_glob_module_sources) source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) file(GLOB cl_kernels "src/opencl/*.cl") - if(HAVE_opencv_ocl AND cl_kernels) + if(cl_kernels) ocv_include_directories(${OPENCL_INCLUDE_DIRS}) + string(REGEX REPLACE "opencv_" "" the_module_barename "${the_module}") add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp" - COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake" + COMMAND ${CMAKE_COMMAND} -DMODULE_NAME="${the_module_barename}" -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake" DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake") source_group("OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp") list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp") diff --git a/cmake/cl2cpp.cmake b/cmake/cl2cpp.cmake index 1916c3ee5b..24d3eb2dc5 100644 --- a/cmake/cl2cpp.cmake +++ b/cmake/cl2cpp.cmake @@ -4,6 +4,15 @@ list(SORT cl_list) string(REPLACE ".cpp" ".hpp" OUTPUT_HPP "${OUTPUT}") get_filename_component(OUTPUT_HPP_NAME "${OUTPUT_HPP}" NAME) +if("${MODULE_NAME}" STREQUAL "ocl") + set(nested_namespace_start "") + set(nested_namespace_end "") +else() + set(new_mode ON) + set(nested_namespace_start "namespace ${MODULE_NAME}\n{") + set(nested_namespace_end "}") +endif() + set(STR_CPP "// This file is auto-generated. Do not edit! #include \"precomp.hpp\" @@ -13,16 +22,19 @@ namespace cv { namespace ocl { +${nested_namespace_start} + ") set(STR_HPP "// This file is auto-generated. Do not edit! -#include \"opencv2/ocl/private/util.hpp\" +#include \"opencv2/core/ocl_genbase.hpp\" namespace cv { namespace ocl { +${nested_namespace_start} ") @@ -49,12 +61,19 @@ foreach(cl ${cl_list}) string(MD5 hash "${lines}") - set(STR_CPP "${STR_CPP}const struct ProgramEntry ${cl_filename}={\"${cl_filename}\",\n\"${lines}, \"${hash}\"};\n") - set(STR_HPP "${STR_HPP}extern const struct ProgramEntry ${cl_filename};\n") + set(STR_CPP_DECL "const struct ProgramEntry ${cl_filename}={\"${cl_filename}\",\n\"${lines}, \"${hash}\"};\n") + set(STR_HPP_DECL "extern const struct ProgramEntry ${cl_filename};\n") + if(new_mode) + set(STR_CPP_DECL "${STR_CPP_DECL}ProgramSource2 ${cl_filename}_oclsrc(${cl_filename}.programStr);\n") + set(STR_HPP_DECL "${STR_HPP_DECL}extern ProgramSource2 ${cl_filename}_oclsrc;\n") + endif() + + set(STR_CPP "${STR_CPP}${STR_CPP_DECL}") + set(STR_HPP "${STR_HPP}${STR_HPP_DECL}") endforeach() -set(STR_CPP "${STR_CPP}}\n}\n") -set(STR_HPP "${STR_HPP}}\n}\n") +set(STR_CPP "${STR_CPP}}\n${nested_namespace_end}}\n") +set(STR_HPP "${STR_HPP}}\n${nested_namespace_end}}\n") file(WRITE "${OUTPUT}" "${STR_CPP}") diff --git a/modules/bioinspired/src/precomp.hpp b/modules/bioinspired/src/precomp.hpp index 541b970325..61aeb5409c 100644 --- a/modules/bioinspired/src/precomp.hpp +++ b/modules/bioinspired/src/precomp.hpp @@ -47,6 +47,7 @@ #include "opencv2/bioinspired.hpp" #include "opencv2/core/utility.hpp" #include "opencv2/core/private.hpp" +#include "opencv2/core/ocl.hpp" #include diff --git a/modules/bioinspired/src/retina_ocl.cpp b/modules/bioinspired/src/retina_ocl.cpp index a365ab0971..5d2b4bd15e 100644 --- a/modules/bioinspired/src/retina_ocl.cpp +++ b/modules/bioinspired/src/retina_ocl.cpp @@ -56,6 +56,8 @@ namespace cv { +static ocl::ProgramEntry retina_kernel = ocl::bioinspired::retina_kernel; + namespace bioinspired { namespace ocl diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 1fce576d6c..87263fa7d0 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -347,6 +347,10 @@ CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst); CV_EXPORTS void min(const Mat& src1, const Mat& src2, Mat& dst); //! computes per-element maximum of two arrays (dst = max(src1, src2)) CV_EXPORTS void max(const Mat& src1, const Mat& src2, Mat& dst); +//! computes per-element minimum of two arrays (dst = min(src1, src2)) +CV_EXPORTS void min(const UMat& src1, const UMat& src2, UMat& dst); +//! computes per-element maximum of two arrays (dst = max(src1, src2)) +CV_EXPORTS void max(const UMat& src1, const UMat& src2, UMat& dst); //! computes square root of each matrix element (dst = src**0.5) CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst); diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index 4df2432aeb..2f38f8bbb8 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -58,6 +58,8 @@ namespace cv enum { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25, ACCESS_RW=3<<24, ACCESS_MASK=ACCESS_RW, ACCESS_FAST=1<<26 }; +class CV_EXPORTS _OutputArray; + //////////////////////// Input/Output Array Arguments ///////////////////////////////// /*! @@ -116,12 +118,22 @@ public: void* getObj() const; virtual int kind() const; + virtual int dims(int i=-1) const; virtual Size size(int i=-1) const; + virtual int sizend(int* sz, int i=-1) const; + virtual bool sameSize(const _InputArray& arr) const; virtual size_t total(int i=-1) const; virtual int type(int i=-1) const; virtual int depth(int i=-1) const; virtual int channels(int i=-1) const; + virtual bool isContinuous(int i=-1) const; virtual bool empty() const; + virtual void copyTo(const _OutputArray& arr) const; + bool isMat() const; + bool isUMat() const; + bool isMatVectot() const; + bool isUMatVector() const; + bool isMatx(); virtual ~_InputArray(); @@ -197,8 +209,10 @@ public: virtual void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; virtual void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const; + virtual void createSameSize(const _InputArray& arr, int mtype) const; virtual void release() const; virtual void clear() const; + virtual void setTo(const _InputArray& value) const; }; diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp index 3c49984e7f..5e8e6ee600 100644 --- a/modules/core/include/opencv2/core/mat.inl.hpp +++ b/modules/core/include/opencv2/core/mat.inl.hpp @@ -108,6 +108,12 @@ inline _InputArray::_InputArray(const cuda::CudaMem& cuda_mem) inline _InputArray::~_InputArray() {} +inline bool _InputArray::isMat() const { return kind() == _InputArray::MAT; } +inline bool _InputArray::isUMat() const { return kind() == _InputArray::UMAT; } +inline bool _InputArray::isMatVectot() const { return kind() == _InputArray::STD_VECTOR_MAT; } +inline bool _InputArray::isUMatVector() const { return kind() == _InputArray::STD_VECTOR_UMAT; } +inline bool _InputArray::isMatx() { return kind() == _InputArray::MATX; } + //////////////////////////////////////////////////////////////////////////////////////// inline _OutputArray::_OutputArray() { init(ACCESS_WRITE, 0); } diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 419ccffd5b..9a30962061 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -49,13 +49,13 @@ namespace cv { namespace ocl { CV_EXPORTS bool haveOpenCL(); CV_EXPORTS bool useOpenCL(); CV_EXPORTS void setUseOpenCL(bool flag); -CV_EXPORTS void finish(); +CV_EXPORTS void finish2(); -class CV_EXPORTS Context; +class CV_EXPORTS Context2; class CV_EXPORTS Device; class CV_EXPORTS Kernel; class CV_EXPORTS Program; -class CV_EXPORTS ProgramSource; +class CV_EXPORTS ProgramSource2; class CV_EXPORTS Queue; class CV_EXPORTS Device @@ -199,22 +199,22 @@ protected: }; -class CV_EXPORTS Context +class CV_EXPORTS Context2 { public: - Context(); - explicit Context(int dtype); - ~Context(); - Context(const Context& c); - Context& operator = (const Context& c); + Context2(); + explicit Context2(int dtype); + ~Context2(); + Context2(const Context2& c); + Context2& operator = (const Context2& c); bool create(int dtype); size_t ndevices() const; const Device& device(size_t idx) const; - Program getProg(const ProgramSource& prog, + Program getProg(const ProgramSource2& prog, const String& buildopt, String& errmsg); - static Context& getDefault(); + static Context2& getDefault(); void* ptr() const; protected: struct Impl; @@ -226,12 +226,12 @@ class CV_EXPORTS Queue { public: Queue(); - explicit Queue(const Context& c, const Device& d=Device()); + explicit Queue(const Context2& c, const Device& d=Device()); ~Queue(); Queue(const Queue& q); Queue& operator = (const Queue& q); - bool create(const Context& c=Context(), const Device& d=Device()); + bool create(const Context2& c=Context2(), const Device& d=Device()); void finish(); void* ptr() const; static Queue& getDefault(); @@ -245,41 +245,55 @@ protected: class CV_EXPORTS KernelArg { public: - enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8 }; - KernelArg(int _flags, UMat* _m, void* _obj=0, size_t _sz=0); + enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, NO_SIZE=256 }; + KernelArg(int _flags, UMat* _m, int wscale=1, const void* _obj=0, size_t _sz=0); + KernelArg(); static KernelArg Local() { return KernelArg(LOCAL, 0); } - static KernelArg ReadOnly(const UMat& m) { return KernelArg(READ_ONLY, (UMat*)&m); } - static KernelArg WriteOnly(const UMat& m) { return KernelArg(WRITE_ONLY, (UMat*)&m); } + static KernelArg ReadWrite(const UMat& m, int wscale=1) + { return KernelArg(READ_WRITE, (UMat*)&m, wscale); } + static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1) + { return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale); } + static KernelArg ReadOnly(const UMat& m, int wscale=1) + { return KernelArg(READ_ONLY, (UMat*)&m, wscale); } + static KernelArg WriteOnly(const UMat& m, int wscale=1) + { return KernelArg(WRITE_ONLY, (UMat*)&m, wscale); } + static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1) + { return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale); } + static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1) + { return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale); } static KernelArg Constant(const Mat& m); template static KernelArg Constant(const _Tp* arr, size_t n) - { return KernelArg(CONSTANT, 0, (void*)arr, n); } + { return KernelArg(CONSTANT, 0, 1, (void*)arr, n); } int flags; UMat* m; - void* obj; + const void* obj; size_t sz; + int wscale; }; + class CV_EXPORTS Kernel { public: Kernel(); Kernel(const char* kname, const Program& prog); - Kernel(const char* kname, const ProgramSource& prog, - const String& buildopts, String& errmsg); + Kernel(const char* kname, const ProgramSource2& prog, + const String& buildopts, String* errmsg=0); ~Kernel(); Kernel(const Kernel& k); Kernel& operator = (const Kernel& k); + bool empty() const; bool create(const char* kname, const Program& prog); - bool create(const char* kname, const ProgramSource& prog, - const String& buildopts, String& errmsg); + bool create(const char* kname, const ProgramSource2& prog, + const String& buildopts, String* errmsg=0); - void set(int i, const void* value, size_t sz); - void set(int i, const UMat& m); - void set(int i, const KernelArg& arg); - template void set(int i, const _Tp& value) + int set(int i, const void* value, size_t sz); + int set(int i, const UMat& m); + int set(int i, const KernelArg& arg); + template int set(int i, const _Tp& value) { return set(i, &value, sizeof(value)); } template @@ -291,26 +305,27 @@ public: template Kernel& args(const _Tp0& a0, const _Tp1& a1) { - set(0, a0); set(1, a1); return *this; + int i = set(0, a0); set(i, a1); return *this; } template Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2) { - set(0, a0); set(1, a1); set(2, a2); return *this; + int i = set(0, a0); i = set(i, a1); set(i, a2); return *this; } template Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3) { - set(0, a0); set(1, a1); set(2, a2); set(3, a3); return *this; + int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); return *this; } template Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3, const _Tp4& a4) { - set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); return *this; + int i = set(0, a0); i = set(i, a1); i = set(i, a2); + i = set(i, a3); set(i, a4); return *this; } template 4) ) + return false; + + UMat src1 = _src1.getUMat(), src2; + UMat dst = _dst.getUMat(), mask = _mask.getUMat(); + + char opts[1024]; + int kercn = haveMask || haveScalar ? cn : 1; + sprintf(opts, "-D %s%s -D %s -D dstT=%s", + (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), oclop2str[oclop], + bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) : + ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn))); + + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); + if( k.empty() ) + return false; + + int cscale = cn/kercn; + ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cscale); + ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cscale) : + ocl::KernelArg::WriteOnly(dst, cscale); + ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1); + + if( haveScalar ) + { + size_t esz = CV_ELEM_SIZE(srctype); + double buf[4] = {0,0,0,0}; + + if( oclop != OCL_OP_NOT ) + { + Mat src2sc = _src2.getMat(); + convertAndUnrollScalar(src2sc, srctype, (uchar*)buf, 1); + } + + ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, buf, esz); + + if( !haveMask ) + k.args(src1arg, dstarg, scalararg); + else + k.args(src1arg, maskarg, dstarg, scalararg); + } + else + { + src2 = _src2.getUMat(); + ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cscale); + + if( !haveMask ) + k.args(src1arg, src2arg, dstarg); + else + k.args(src1arg, src2arg, maskarg, dstarg); + } + + size_t globalsize[] = { src1.cols*(cn/kercn), src1.rows }; + return k.run(2, globalsize, 0, false); +} + + +static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst, + InputArray _mask, const BinaryFunc* tab, + bool bitwise, int oclop ) +{ + const _InputArray *psrc1 = &_src1, *psrc2 = &_src2; + int kind1 = psrc1->kind(), kind2 = psrc2->kind(); + int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); + int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2); + int dims1 = psrc1->dims(), dims2 = psrc2->dims(); + Size sz1 = dims1 <= 2 ? psrc1->size() : Size(); + Size sz2 = dims2 <= 2 ? psrc2->size() : Size(); + bool use_opencl = (kind1 == _InputArray::UMAT || kind2 == _InputArray::UMAT) && + ocl::useOpenCL() && dims1 <= 2 && dims2 <= 2; bool haveMask = !_mask.empty(), haveScalar = false; BinaryFunc func; - int c; - if( src1.dims <= 2 && src2.dims <= 2 && kind1 == kind2 && - src1.size() == src2.size() && src1.type() == src2.type() && !haveMask ) + if( dims1 <= 2 && dims2 <= 2 && kind1 == kind2 && sz1 == sz2 && type1 == type2 && !haveMask ) { - _dst.create(src1.size(), src1.type()); - Mat dst = _dst.getMat(); + _dst.create(sz1, type1); + if( use_opencl && ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, false) ) + return; if( bitwise ) { func = *tab; - c = (int)src1.elemSize(); + cn = (int)CV_ELEM_SIZE(type1); } else - { - func = tab[src1.depth()]; - c = src1.channels(); - } + func = tab[depth1]; + Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(); Size sz = getContinuousSize(src1, src2, dst); - size_t len = sz.width*(size_t)c; + size_t len = sz.width*(size_t)cn; if( len == (size_t)(int)len ) { sz.width = (int)len; @@ -946,56 +1026,67 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst, } } - if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 || - src1.size != src2.size || src1.type() != src2.type() ) + if( oclop == OCL_OP_NOT ) + haveScalar = true; + else if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 || + !psrc1->sameSize(*psrc2) || type1 != type2 ) { - if( checkScalar(src1, src2.type(), kind1, kind2) ) + if( checkScalar(*psrc1, type2, kind1, kind2) ) + { // src1 is a scalar; swap it with src2 - swap(src1, src2); - else if( !checkScalar(src2, src1.type(), kind2, kind1) ) + swap(psrc1, psrc2); + swap(type1, type2); + swap(depth1, depth2); + swap(cn, cn2); + swap(sz1, sz2); + } + else if( !checkScalar(*psrc2, type1, kind2, kind1) ) CV_Error( CV_StsUnmatchedSizes, "The operation is neither 'array op array' (where arrays have the same size and type), " "nor 'array op scalar', nor 'scalar op array'" ); haveScalar = true; } + else + { + CV_Assert( psrc1->sameSize(*psrc2) && type1 == type2 ); + } - size_t esz = src1.elemSize(); + size_t esz = CV_ELEM_SIZE(type1); size_t blocksize0 = (BLOCK_SIZE + esz-1)/esz; - int cn = src1.channels(); BinaryFunc copymask = 0; - Mat mask; bool reallocate = false; if( haveMask ) { - mask = _mask.getMat(); - CV_Assert( (mask.type() == CV_8UC1 || mask.type() == CV_8SC1) ); - CV_Assert( mask.size == src1.size ); + int mtype = _mask.type(); + CV_Assert( (mtype == CV_8U || mtype == CV_8S) && _mask.sameSize(*psrc1)); copymask = getCopyMaskFunc(esz); - Mat tdst = _dst.getMat(); - reallocate = tdst.size != src1.size || tdst.type() != src1.type(); + reallocate = !_dst.sameSize(*psrc1) || _dst.type() != type1; } AutoBuffer _buf; uchar *scbuf = 0, *maskbuf = 0; - _dst.create(src1.dims, src1.size, src1.type()); - Mat dst = _dst.getMat(); - + _dst.createSameSize(*psrc1, type1); // if this is mask operation and dst has been reallocated, - // we have to + // we have to clear the destination if( haveMask && reallocate ) - dst = Scalar::all(0); + _dst.setTo(0.); + + if( use_opencl && ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, haveScalar )) + return; + + Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(); + Mat dst = _dst.getMat(), mask = _mask.getMat(); if( bitwise ) { func = *tab; - c = (int)esz; + cn = (int)esz; } else { - func = tab[src1.depth()]; - c = cn; + func = tab[depth1]; } if( !haveScalar ) @@ -1006,8 +1097,8 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst, NAryMatIterator it(arrays, ptrs); size_t total = it.size, blocksize = total; - if( blocksize*c > INT_MAX ) - blocksize = INT_MAX/c; + if( blocksize*cn > INT_MAX ) + blocksize = INT_MAX/cn; if( haveMask ) { @@ -1022,7 +1113,7 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst, { int bsz = (int)MIN(total - j, blocksize); - func( ptrs[0], 0, ptrs[1], 0, haveMask ? maskbuf : ptrs[2], 0, Size(bsz*c, 1), 0 ); + func( ptrs[0], 0, ptrs[1], 0, haveMask ? maskbuf : ptrs[2], 0, Size(bsz*cn, 1), 0 ); if( haveMask ) { copymask( maskbuf, 0, ptrs[3], 0, ptrs[2], 0, Size(bsz, 1), &esz ); @@ -1054,7 +1145,7 @@ static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst, { int bsz = (int)MIN(total - j, blocksize); - func( ptrs[0], 0, scbuf, 0, haveMask ? maskbuf : ptrs[1], 0, Size(bsz*c, 1), 0 ); + func( ptrs[0], 0, scbuf, 0, haveMask ? maskbuf : ptrs[1], 0, Size(bsz*cn, 1), 0 ); if( haveMask ) { copymask( maskbuf, 0, ptrs[2], 0, ptrs[1], 0, Size(bsz, 1), &esz ); @@ -1101,47 +1192,59 @@ static BinaryFunc* getMinTab() void cv::bitwise_and(InputArray a, InputArray b, OutputArray c, InputArray mask) { BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(and8u); - binary_op(a, b, c, mask, &f, true); + binary_op(a, b, c, mask, &f, true, OCL_OP_AND); } void cv::bitwise_or(InputArray a, InputArray b, OutputArray c, InputArray mask) { BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(or8u); - binary_op(a, b, c, mask, &f, true); + binary_op(a, b, c, mask, &f, true, OCL_OP_OR); } void cv::bitwise_xor(InputArray a, InputArray b, OutputArray c, InputArray mask) { BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(xor8u); - binary_op(a, b, c, mask, &f, true); + binary_op(a, b, c, mask, &f, true, OCL_OP_XOR); } void cv::bitwise_not(InputArray a, OutputArray c, InputArray mask) { BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(not8u); - binary_op(a, a, c, mask, &f, true); + binary_op(a, a, c, mask, &f, true, OCL_OP_NOT); } void cv::max( InputArray src1, InputArray src2, OutputArray dst ) { - binary_op(src1, src2, dst, noArray(), getMaxTab(), false ); + binary_op(src1, src2, dst, noArray(), getMaxTab(), false, OCL_OP_MAX ); } void cv::min( InputArray src1, InputArray src2, OutputArray dst ) { - binary_op(src1, src2, dst, noArray(), getMinTab(), false ); + binary_op(src1, src2, dst, noArray(), getMinTab(), false, OCL_OP_MIN ); } void cv::max(const Mat& src1, const Mat& src2, Mat& dst) { OutputArray _dst(dst); - binary_op(src1, src2, _dst, noArray(), getMaxTab(), false ); + binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX ); } void cv::min(const Mat& src1, const Mat& src2, Mat& dst) { OutputArray _dst(dst); - binary_op(src1, src2, _dst, noArray(), getMinTab(), false ); + binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN ); +} + +void cv::max(const UMat& src1, const UMat& src2, UMat& dst) +{ + OutputArray _dst(dst); + binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX ); +} + +void cv::min(const UMat& src1, const UMat& src2, UMat& dst) +{ + OutputArray _dst(dst); + binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN ); } @@ -1171,73 +1274,213 @@ static int actualScalarDepth(const double* data, int len) CV_32S; } -static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, - InputArray _mask, int dtype, BinaryFunc* tab, bool muldiv=false, void* usrdata=0) + +static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, + InputArray _mask, int wtype, + void* usrdata, int oclop, + bool haveScalar ) { - int kind1 = _src1.kind(), kind2 = _src2.kind(); - Mat src1 = _src1.getMat(), src2 = _src2.getMat(); + int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); + bool haveMask = !_mask.empty(); + + if( (haveMask || haveScalar) && cn > 4 ) + return false; + + int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = CV_MAT_DEPTH(wtype); + wtype = CV_MAKETYPE(wdepth, cn); + int type2 = haveScalar ? _src2.type() : wtype, depth2 = CV_MAT_DEPTH(type2); + + UMat src1 = _src1.getUMat(), src2; + UMat dst = _dst.getUMat(), mask = _mask.getUMat(); + + char opts[1024]; + int kercn = haveMask || haveScalar ? cn : 1; + + if( (depth1 == depth2 || haveScalar) && ddepth == depth1 && wdepth == depth1 ) + { + const char* oclopstr = oclop2str[oclop]; + if( wdepth <= CV_16S ) + { + oclopstr = oclop == OCL_OP_ADD ? "OCL_OP_ADD_SAT" : + oclop == OCL_OP_SUB ? "OCL_OP_SUB_SAT" : + oclop == OCL_OP_RSUB ? "OCL_OP_RSUB_SAT" : oclopstr; + } + sprintf(opts, "-D %s%s -D %s -D dstT=%s", + (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), + oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(ddepth, kercn))); + } + else + { + char cvtstr[3][32]; + sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT2=%s " + "-D dstT=%s -D workT=%s -D convertToWT1=%s " + "-D convertToWT2=%s -D convertToDT=%s", + (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), + oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), + ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), + ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), + ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)), + ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]), + ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]), + ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2])); + } + + const uchar* usrdata_p = (const uchar*)usrdata; + const double* usrdata_d = (const double*)usrdata; + float usrdata_f[3]; + int i, n = oclop == OCL_OP_MUL_SCALE || oclop == OCL_OP_DIV_SCALE || + oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0; + if( n > 0 && wdepth == CV_32F ) + { + for( i = 0; i < n; i++ ) + usrdata_f[i] = (float)usrdata_d[i]; + usrdata_p = (const uchar*)usrdata_f; + } + size_t usrdata_esz = CV_ELEM_SIZE(wdepth); + + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); + if( k.empty() ) + return false; + + int cscale = cn/kercn; + + ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cscale); + ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cscale) : + ocl::KernelArg::WriteOnly(dst, cscale); + ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1); + + if( haveScalar ) + { + size_t esz = CV_ELEM_SIZE(wtype); + double buf[4]={0,0,0,0}; + Mat src2sc = _src2.getMat(); + + if( !src2sc.empty() ) + { + convertAndUnrollScalar(src2sc, wtype, (uchar*)buf, 1); + } + ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, buf, esz); + + if( !haveMask ) + k.args(src1arg, dstarg, scalararg); + else + k.args(src1arg, maskarg, dstarg, scalararg); + } + else + { + src2 = _src2.getUMat(); + ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cscale); + + if( !haveMask ) + { + if(n == 0) + k.args(src1arg, src2arg, dstarg); + else if(n == 1) + k.args(src1arg, src2arg, dstarg, + ocl::KernelArg(0, 0, 0, usrdata_p, usrdata_esz)); + else if(n == 3) + k.args(src1arg, src2arg, dstarg, + ocl::KernelArg(0, 0, 0, usrdata_p, usrdata_esz), + ocl::KernelArg(0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz), + ocl::KernelArg(0, 0, 0, usrdata_p + usrdata_esz*2, usrdata_esz)); + else + CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters"); + } + else + { + k.args(src1arg, src2arg, maskarg, dstarg); + } + } + + size_t globalsize[] = { src1.cols*(cn/kercn), src1.rows }; + return k.run(2, globalsize, 0, false); +} + + +static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, + InputArray _mask, int dtype, BinaryFunc* tab, bool muldiv=false, + void* usrdata=0, int oclop=-1 ) +{ + const _InputArray *psrc1 = &_src1, *psrc2 = &_src2; + int kind1 = psrc1->kind(), kind2 = psrc2->kind(); bool haveMask = !_mask.empty(); bool reallocate = false; + int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); + int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2); + int wtype, dims1 = psrc1->dims(), dims2 = psrc2->dims(); + Size sz1 = dims1 <= 2 ? psrc1->size() : Size(); + Size sz2 = dims2 <= 2 ? psrc2->size() : Size(); + bool use_opencl = (kind1 == _InputArray::UMAT || kind2 == _InputArray::UMAT) && + ocl::useOpenCL() && dims1 <= 2 && dims2 <= 2; + bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2); + bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1); - bool src1Scalar = checkScalar(src1, src2.type(), kind1, kind2); - bool src2Scalar = checkScalar(src2, src1.type(), kind2, kind1); - - if( (kind1 == kind2 || src1.channels() == 1) && src1.dims <= 2 && src2.dims <= 2 && - src1.size() == src2.size() && src1.type() == src2.type() && - !haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == src1.depth())) || - (_dst.fixedType() && _dst.type() == _src1.type())) && + if( (kind1 == kind2 || cn == 1) && sz1 == sz2 && dims1 <= 2 && dims2 <= 2 && type1 == type2 && + !haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == depth1)) || + (_dst.fixedType() && _dst.type() == type1)) && ((src1Scalar && src2Scalar) || (!src1Scalar && !src2Scalar)) ) { - _dst.create(src1.size(), src1.type()); - Mat dst = _dst.getMat(); + _dst.createSameSize(*psrc1, type1); + if( use_opencl && + ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, + (!usrdata ? type1 : std::max(depth1, CV_32F)), + usrdata, oclop, false)) + return; + Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(); Size sz = getContinuousSize(src1, src2, dst, src1.channels()); - tab[src1.depth()](src1.data, src1.step, src2.data, src2.step, dst.data, dst.step, sz, usrdata); + tab[depth1](src1.data, src1.step, src2.data, src2.step, dst.data, dst.step, sz, usrdata); return; } bool haveScalar = false, swapped12 = false; - int depth2 = src2.depth(); - if( src1.size != src2.size || src1.channels() != src2.channels() || + + if( dims1 != dims2 || sz1 != sz2 || cn != cn2 || ((kind1 == _InputArray::MATX || kind2 == _InputArray::MATX) && - src1.cols == 1 && src2.rows == 4) ) + (sz1 == Size(1,4) || sz2 == Size(1,4))) ) { - if( checkScalar(src1, src2.type(), kind1, kind2) ) + if( checkScalar(*psrc1, type2, kind1, kind2) ) { // src1 is a scalar; swap it with src2 - swap(src1, src2); + swap(psrc1, psrc2); + swap(sz1, sz2); + swap(type1, type2); + swap(depth1, depth2); + swap(cn, cn2); + swap(dims1, dims2); swapped12 = true; + if( oclop == OCL_OP_SUB ) + oclop = OCL_OP_RSUB; } - else if( !checkScalar(src2, src1.type(), kind2, kind1) ) + else if( !checkScalar(*psrc2, type1, kind2, kind1) ) CV_Error( CV_StsUnmatchedSizes, - "The operation is neither 'array op array' (where arrays have the same size and the same number of channels), " + "The operation is neither 'array op array' " + "(where arrays have the same size and the same number of channels), " "nor 'array op scalar', nor 'scalar op array'" ); haveScalar = true; - CV_Assert(src2.type() == CV_64F && (src2.rows == 4 || src2.rows == 1)); + CV_Assert(type2 == CV_64F && (sz2.height == 1 || sz2.height == 4)); if (!muldiv) { - depth2 = actualScalarDepth(src2.ptr(), src1.channels()); - if( depth2 == CV_64F && (src1.depth() < CV_32S || src1.depth() == CV_32F) ) + Mat sc = psrc2->getMat(); + depth2 = actualScalarDepth(sc.ptr(), cn); + if( depth2 == CV_64F && (depth1 < CV_32S || depth1 == CV_32F) ) depth2 = CV_32F; } else depth2 = CV_64F; } - int cn = src1.channels(), depth1 = src1.depth(), wtype; - BinaryFunc cvtsrc1 = 0, cvtsrc2 = 0, cvtdst = 0; - if( dtype < 0 ) { if( _dst.fixedType() ) dtype = _dst.type(); else { - if( !haveScalar && src1.type() != src2.type() ) + if( !haveScalar && type1 != type2 ) CV_Error(CV_StsBadArg, "When the input arrays in add/subtract/multiply/divide functions have different types, " "the output array type must be explicitly specified"); - dtype = src1.type(); + dtype = type1; } } dtype = CV_MAT_DEPTH(dtype); @@ -1262,39 +1505,41 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, wtype = std::max(wtype, dtype); } - cvtsrc1 = depth1 == wtype ? 0 : getConvertFunc(depth1, wtype); - cvtsrc2 = depth2 == depth1 ? cvtsrc1 : depth2 == wtype ? 0 : getConvertFunc(depth2, wtype); - cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype); - dtype = CV_MAKETYPE(dtype, cn); wtype = CV_MAKETYPE(wtype, cn); - size_t esz1 = src1.elemSize(), esz2 = src2.elemSize(); - size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype); - size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz; - BinaryFunc copymask = 0; - Mat mask; - if( haveMask ) { - mask = _mask.getMat(); - CV_Assert( (mask.type() == CV_8UC1 || mask.type() == CV_8SC1) ); - CV_Assert( mask.size == src1.size ); - copymask = getCopyMaskFunc(dsz); - Mat tdst = _dst.getMat(); - reallocate = tdst.size != src1.size || tdst.type() != dtype; + int mtype = _mask.type(); + CV_Assert( (mtype == CV_8UC1 || mtype == CV_8SC1) && _mask.sameSize(*psrc1) ); + reallocate = !_dst.sameSize(*psrc1) || _dst.type() != dtype; } + _dst.createSameSize(*psrc1, dtype); + if( reallocate ) + _dst.setTo(0.); + + if( use_opencl && + ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, wtype, + usrdata, oclop, haveScalar)) + return; + + BinaryFunc cvtsrc1 = type1 == wtype ? 0 : getConvertFunc(type1, wtype); + BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : getConvertFunc(type2, wtype); + BinaryFunc cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype); + + size_t esz1 = CV_ELEM_SIZE(type1), esz2 = CV_ELEM_SIZE(type2); + size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype); + size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz; + BinaryFunc copymask = getCopyMaskFunc(dsz); + Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(), mask = _mask.getMat(); + AutoBuffer _buf; uchar *buf, *maskbuf = 0, *buf1 = 0, *buf2 = 0, *wbuf = 0; - size_t bufesz = (cvtsrc1 ? wsz : 0) + (cvtsrc2 || haveScalar ? wsz : 0) + (cvtdst ? wsz : 0) + (haveMask ? dsz : 0); - - _dst.create(src1.dims, src1.size, dtype); - Mat dst = _dst.getMat(); - - if( haveMask && reallocate ) - dst = Scalar::all(0); - + size_t bufesz = (cvtsrc1 ? wsz : 0) + + (cvtsrc2 || haveScalar ? wsz : 0) + + (cvtdst ? wsz : 0) + + (haveMask ? dsz : 0); BinaryFunc func = tab[CV_MAT_DEPTH(wtype)]; if( !haveScalar ) @@ -1476,7 +1721,7 @@ static BinaryFunc* getAbsDiffTab() void cv::add( InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype ) { - arithm_op(src1, src2, dst, mask, dtype, getAddTab() ); + arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD ); } void cv::subtract( InputArray src1, InputArray src2, OutputArray dst, @@ -1511,12 +1756,12 @@ void cv::subtract( InputArray src1, InputArray src2, OutputArray dst, } } #endif - arithm_op(src1, src2, dst, mask, dtype, getSubTab() ); + arithm_op(src1, src2, dst, mask, dtype, getSubTab(), false, 0, OCL_OP_SUB ); } void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst ) { - arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab()); + arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab(), false, 0, OCL_OP_ABSDIFF); } /****************************************************************************************\ @@ -1847,19 +2092,20 @@ static BinaryFunc* getRecipTab() void cv::multiply(InputArray src1, InputArray src2, OutputArray dst, double scale, int dtype) { - arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(), true, &scale); + arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(), + true, &scale, scale == 1. ? OCL_OP_MUL : OCL_OP_MUL_SCALE); } void cv::divide(InputArray src1, InputArray src2, OutputArray dst, double scale, int dtype) { - arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale); + arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE); } void cv::divide(double scale, InputArray src2, OutputArray dst, int dtype) { - arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale); + arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE); } /****************************************************************************************\ @@ -2020,7 +2266,7 @@ void cv::addWeighted( InputArray src1, double alpha, InputArray src2, double beta, double gamma, OutputArray dst, int dtype ) { double scalars[] = {alpha, beta, gamma}; - arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars); + arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars, OCL_OP_ADDW); } diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 1fb448f2a6..ee34ef4516 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -220,6 +220,21 @@ void Mat::copyTo( OutputArray _dst ) const return; } + if( _dst.isUMat() ) + { + _dst.create( dims, size.p, type() ); + UMat dst = _dst.getUMat(); + + size_t i, sz[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize(); + for( i = 0; i < (size_t)dims; i++ ) + sz[i] = size.p[i]; + sz[dims-1] *= esz; + dst.ndoffset(dstofs); + dstofs[dims-1] *= esz; + dst.u->currAllocator->upload(dst.u, data, dims, sz, dstofs, dst.step.p, step.p); + return; + } + if( dims <= 2 ) { _dst.create( rows, cols, type() ); diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index e64bae42c9..cb5d7e4cb5 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -1436,6 +1436,181 @@ Size _InputArray::size(int i) const } } + +int _InputArray::sizend(int* arrsz, int i) const +{ + int j, d=0, k = kind(); + + if( k == NONE ) + ; + else if( k == MAT ) + { + CV_Assert( i < 0 ); + const Mat& m = *(const Mat*)obj; + d = m.dims; + if(arrsz) + for(j = 0; j < d; j++) + arrsz[j] = m.size.p[j]; + } + else if( k == UMAT ) + { + CV_Assert( i < 0 ); + const UMat& m = *(const UMat*)obj; + d = m.dims; + if(arrsz) + for(j = 0; j < d; j++) + arrsz[j] = m.size.p[j]; + } + else if( k == STD_VECTOR_MAT && i >= 0 ) + { + const std::vector& vv = *(const std::vector*)obj; + CV_Assert( i < (int)vv.size() ); + const Mat& m = vv[i]; + d = m.dims; + if(arrsz) + for(j = 0; j < d; j++) + arrsz[j] = m.size.p[j]; + } + else if( k == STD_VECTOR_UMAT && i >= 0 ) + { + const std::vector& vv = *(const std::vector*)obj; + CV_Assert( i < (int)vv.size() ); + const UMat& m = vv[i]; + d = m.dims; + if(arrsz) + for(j = 0; j < d; j++) + arrsz[j] = m.size.p[j]; + } + else + { + Size sz2d = size(i); + d = 2; + if(arrsz) + { + arrsz[0] = sz2d.height; + arrsz[1] = sz2d.width; + } + } + + return d; +} + + +bool _InputArray::sameSize(const _InputArray& arr) const +{ + int k1 = kind(), k2 = arr.kind(); + Size sz1; + + if( k1 == MAT ) + { + const Mat* m = ((const Mat*)obj); + if( k2 == MAT ) + return m->size == ((const Mat*)arr.obj)->size; + if( k2 == UMAT ) + return m->size == ((const UMat*)arr.obj)->size; + if( m->dims > 2 ) + return false; + sz1 = m->size(); + } + else if( k1 == UMAT ) + { + const UMat* m = ((const UMat*)obj); + if( k2 == MAT ) + return m->size == ((const Mat*)arr.obj)->size; + if( k2 == UMAT ) + return m->size == ((const UMat*)arr.obj)->size; + if( m->dims > 2 ) + return false; + sz1 = m->size(); + } + else + sz1 = size(); + if( arr.dims() > 2 ) + return false; + return sz1 == arr.size(); +} + +int _InputArray::dims(int i) const +{ + int k = kind(); + + if( k == MAT ) + { + CV_Assert( i < 0 ); + return ((const Mat*)obj)->dims; + } + + if( k == EXPR ) + { + CV_Assert( i < 0 ); + return ((const MatExpr*)obj)->a.dims; + } + + if( k == UMAT ) + { + CV_Assert( i < 0 ); + return ((const UMat*)obj)->dims; + } + + if( k == MATX ) + { + CV_Assert( i < 0 ); + return 2; + } + + if( k == STD_VECTOR ) + { + CV_Assert( i < 0 ); + return 2; + } + + if( k == NONE ) + return 0; + + if( k == STD_VECTOR_VECTOR ) + { + const std::vector >& vv = *(const std::vector >*)obj; + if( i < 0 ) + return 1; + CV_Assert( i < (int)vv.size() ); + return 2; + } + + if( k == STD_VECTOR_MAT ) + { + const std::vector& vv = *(const std::vector*)obj; + if( i < 0 ) + return 1; + CV_Assert( i < (int)vv.size() ); + + return vv[i].dims; + } + + if( k == OPENGL_BUFFER ) + { + CV_Assert( i < 0 ); + return 2; + } + + if( k == GPU_MAT ) + { + CV_Assert( i < 0 ); + return 2; + } + + if( k == OCL_MAT ) + { + return 2; + } + + CV_Assert( k == CUDA_MEM ); + //if( k == CUDA_MEM ) + { + CV_Assert( i < 0 ); + return 2; + } +} + size_t _InputArray::total(int i) const { int k = kind(); @@ -1570,6 +1745,61 @@ bool _InputArray::empty() const return ((const cuda::CudaMem*)obj)->empty(); } +bool _InputArray::isContinuous(int i) const +{ + int k = kind(); + + if( k == MAT ) + return i < 0 ? ((const Mat*)obj)->isContinuous() : true; + + if( k == UMAT ) + return i < 0 ? ((const UMat*)obj)->isContinuous() : true; + + if( k == EXPR || k == MATX || k == STD_VECTOR || k == NONE || k == STD_VECTOR_VECTOR) + return true; + + if( k == STD_VECTOR_MAT ) + { + const std::vector& vv = *(const std::vector*)obj; + CV_Assert((size_t)i < vv.size()); + return vv[i].isContinuous(); + } + + if( k == STD_VECTOR_UMAT ) + { + const std::vector& vv = *(const std::vector*)obj; + CV_Assert((size_t)i < vv.size()); + return vv[i].isContinuous(); + } + + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); + return false; +} + +void _InputArray::copyTo(const _OutputArray& arr) const +{ + int k = kind(); + + if( k == NONE ) + arr.release(); + else if( k == MAT || k == MATX || k == STD_VECTOR ) + { + Mat m = getMat(); + m.copyTo(arr); + } + else if( k == EXPR ) + { + const MatExpr& e = *((MatExpr*)obj); + if( arr.kind() == MAT ) + arr.getMatRef() = e; + else + Mat(e).copyTo(arr); + } + else if( k == UMAT ) + ((UMat*)obj)->copyTo(arr); + else + CV_Error(Error::StsNotImplemented, ""); +} bool _OutputArray::fixedSize() const { @@ -1665,7 +1895,7 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp create(2, sizes, mtype, i, allowTransposed, fixedDepthMask); } -void _OutputArray::create(int dims, const int* sizes, int mtype, int i, +void _OutputArray::create(int d, const int* sizes, int mtype, int i, bool allowTransposed, int fixedDepthMask) const { int k = kind(); @@ -1683,7 +1913,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, m.release(); } - if( dims == 2 && m.dims == 2 && m.data && + if( d == 2 && m.dims == 2 && m.data && m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] ) return; } @@ -1697,11 +1927,11 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, } if(fixedSize()) { - CV_Assert(m.dims == dims); - for(int j = 0; j < dims; ++j) + CV_Assert(m.dims == d); + for(int j = 0; j < d; ++j) CV_Assert(m.size[j] == sizes[j]); } - m.create(dims, sizes, mtype); + m.create(d, sizes, mtype); return; } @@ -1717,7 +1947,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, m.release(); } - if( dims == 2 && m.dims == 2 && !m.empty() && + if( d == 2 && m.dims == 2 && !m.empty() && m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] ) return; } @@ -1731,11 +1961,11 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, } if(fixedSize()) { - CV_Assert(m.dims == dims); - for(int j = 0; j < dims; ++j) + CV_Assert(m.dims == d); + for(int j = 0; j < d; ++j) CV_Assert(m.size[j] == sizes[j]); } - m.create(dims, sizes, mtype); + m.create(d, sizes, mtype); return; } @@ -1744,14 +1974,14 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, CV_Assert( i < 0 ); int type0 = CV_MAT_TYPE(flags); CV_Assert( mtype == type0 || (CV_MAT_CN(mtype) == 1 && ((1 << type0) & fixedDepthMask) != 0) ); - CV_Assert( dims == 2 && ((sizes[0] == sz.height && sizes[1] == sz.width) || + CV_Assert( d == 2 && ((sizes[0] == sz.height && sizes[1] == sz.width) || (allowTransposed && sizes[0] == sz.width && sizes[1] == sz.height))); return; } if( k == STD_VECTOR || k == STD_VECTOR_VECTOR ) { - CV_Assert( dims == 2 && (sizes[0] == 1 || sizes[1] == 1 || sizes[0]*sizes[1] == 0) ); + CV_Assert( d == 2 && (sizes[0] == 1 || sizes[1] == 1 || sizes[0]*sizes[1] == 0) ); size_t len = sizes[0]*sizes[1] > 0 ? sizes[0] + sizes[1] - 1 : 0; std::vector* v = (std::vector*)obj; @@ -1843,7 +2073,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, if( i < 0 ) { - CV_Assert( dims == 2 && (sizes[0] == 1 || sizes[1] == 1 || sizes[0]*sizes[1] == 0) ); + CV_Assert( d == 2 && (sizes[0] == 1 || sizes[1] == 1 || sizes[0]*sizes[1] == 0) ); size_t len = sizes[0]*sizes[1] > 0 ? sizes[0] + sizes[1] - 1 : 0, len0 = v.size(); CV_Assert(!fixedSize() || len == len0); @@ -1873,7 +2103,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, m.release(); } - if( dims == 2 && m.dims == 2 && m.data && + if( d == 2 && m.dims == 2 && m.data && m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] ) return; } @@ -1887,18 +2117,24 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, } if(fixedSize()) { - CV_Assert(m.dims == dims); - for(int j = 0; j < dims; ++j) + CV_Assert(m.dims == d); + for(int j = 0; j < d; ++j) CV_Assert(m.size[j] == sizes[j]); } - m.create(dims, sizes, mtype); + m.create(d, sizes, mtype); return; } CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type"); } +void _OutputArray::createSameSize(const _InputArray& arr, int mtype) const +{ + int arrsz[CV_MAX_DIM], d = arr.sizend(arrsz); + create(d, arrsz, mtype); +} + void _OutputArray::release() const { CV_Assert(!fixedSize()); @@ -2010,6 +2246,23 @@ cuda::CudaMem& _OutputArray::getCudaMemRef() const return *(cuda::CudaMem*)obj; } +void _OutputArray::setTo(const _InputArray& arr) const +{ + int k = kind(); + + if( k == NONE ) + ; + else if( k == MAT || k == MATX || k == STD_VECTOR ) + { + Mat m = getMat(); + m.setTo(arr); + } + else if( k == UMAT ) + ((UMat*)obj)->setTo(arr); + else + CV_Error(Error::StsNotImplemented, ""); +} + static _InputOutputArray _none; InputOutputArray noArray() { return _none; } diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 094a80d974..f706487ac8 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -114,8 +114,13 @@ typedef struct _cl_sampler * cl_sampler; typedef int cl_int; typedef unsigned cl_uint; -typedef long cl_long; -typedef unsigned long cl_ulong; +#if defined (_WIN32) && defined(_MSC_VER) + typedef __int64 cl_long; + typedef unsigned __int64 cl_ulong; +#else + typedef long cl_long; + typedef unsigned long cl_ulong; +#endif typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ typedef cl_ulong cl_bitfield; @@ -592,9 +597,16 @@ static void* initOpenCLAndLoad(const char* funcname) { if(!initialized) { - handle = dlopen("/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL", RTLD_LAZY); + const char* oclpath = getenv("OPENCV_OPENCL_RUNTIME"); + oclpath = oclpath && strlen(oclpath) > 0 ? oclpath : + "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL"; + handle = dlopen(oclpath, RTLD_LAZY); initialized = true; g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0; + if( g_haveOpenCL ) + fprintf(stderr, "Succesffuly loaded OpenCL v1.1+ runtime from %s\n", oclpath); + else + fprintf(stderr, "Failed to load OpenCL runtime\n"); } if(!handle) return 0; @@ -1212,16 +1224,13 @@ namespace cv { namespace ocl { struct UMat2D { - UMat2D(const UMat& m, int accessFlags) + UMat2D(const UMat& m) { - CV_Assert(m.dims == 2); - data = (cl_mem)m.handle(accessFlags); offset = m.offset; step = m.step; rows = m.rows; cols = m.cols; } - cl_mem data; size_t offset; size_t step; int rows; @@ -1230,10 +1239,8 @@ struct UMat2D struct UMat3D { - UMat3D(const UMat& m, int accessFlags) + UMat3D(const UMat& m) { - CV_Assert(m.dims == 3); - data = (cl_mem)m.handle(accessFlags); offset = m.offset; step = m.step.p[1]; slicestep = m.step.p[0]; @@ -1241,7 +1248,6 @@ struct UMat3D rows = m.size.p[1]; cols = m.size.p[2]; } - cl_mem data; size_t offset; size_t slicestep; size_t step; @@ -1315,7 +1321,7 @@ void setUseOpenCL(bool flag) } } -void finish() +void finish2() { Queue::getDefault().finish(); } @@ -1528,7 +1534,7 @@ String Device::OpenCLVersion() const { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); } String Device::driverVersion() const -{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); } +{ return p ? p->getStrProp(CL_DRIVER_VERSION) : String(); } int Device::type() const { return p ? p->getProp(CL_DEVICE_TYPE) : 0; } @@ -1705,14 +1711,14 @@ size_t Device::profilingTimerResolution() const const Device& Device::getDefault() { - const Context& ctx = Context::getDefault(); + const Context2& ctx = Context2::getDefault(); int idx = TLSData::get()->device; return ctx.device(idx); } ///////////////////////////////////////////////////////////////////////////////////////// -struct Context::Impl +struct Context2::Impl { Impl(int dtype0) { @@ -1777,7 +1783,7 @@ struct Context::Impl devices.clear(); } - Program getProg(const ProgramSource& src, + Program getProg(const ProgramSource2& src, const String& buildflags, String& errmsg) { String prefix = Program::getPrefix(buildflags); @@ -1787,7 +1793,8 @@ struct Context::Impl return it->second; //String filename = format("%08x%08x_%08x%08x.clb2", Program prog(src, buildflags, errmsg); - phash.insert(std::pair(k, prog)); + if(prog.ptr()) + phash.insert(std::pair(k, prog)); return prog; } @@ -1797,7 +1804,7 @@ struct Context::Impl std::vector devices; bool initialized; - typedef ProgramSource::hash_t hash_t; + typedef ProgramSource2::hash_t hash_t; struct HashKey { @@ -1812,18 +1819,18 @@ struct Context::Impl }; -Context::Context() +Context2::Context2() { p = 0; } -Context::Context(int dtype) +Context2::Context2(int dtype) { p = 0; create(dtype); } -bool Context::create(int dtype0) +bool Context2::create(int dtype0) { if( !haveOpenCL() ) return false; @@ -1838,19 +1845,19 @@ bool Context::create(int dtype0) return p != 0; } -Context::~Context() +Context2::~Context2() { p->release(); } -Context::Context(const Context& c) +Context2::Context2(const Context2& c) { p = (Impl*)c.p; if(p) p->addref(); } -Context& Context::operator = (const Context& c) +Context2& Context2::operator = (const Context2& c) { Impl* newp = (Impl*)c.p; if(newp) @@ -1861,30 +1868,30 @@ Context& Context::operator = (const Context& c) return *this; } -void* Context::ptr() const +void* Context2::ptr() const { return p->handle; } -size_t Context::ndevices() const +size_t Context2::ndevices() const { return p ? p->devices.size() : 0; } -const Device& Context::device(size_t idx) const +const Device& Context2::device(size_t idx) const { static Device dummy; return !p || idx >= p->devices.size() ? dummy : p->devices[idx]; } -Context& Context::getDefault() +Context2& Context2::getDefault() { - static Context ctx; + static Context2 ctx; if( !ctx.p && haveOpenCL() ) { - // do not create new Context right away. + // do not create new Context2 right away. // First, try to retrieve existing context of the same type. - // In its turn, Platform::getContext() may call Context::create() + // In its turn, Platform::getContext() may call Context2::create() // if there is no such context. ctx.create(Device::TYPE_ACCELERATOR); if(!ctx.p) @@ -1898,7 +1905,7 @@ Context& Context::getDefault() return ctx; } -Program Context::getProg(const ProgramSource& prog, +Program Context2::getProg(const ProgramSource2& prog, const String& buildopts, String& errmsg) { return p ? p->getProg(prog, buildopts, errmsg) : Program(); @@ -1906,14 +1913,14 @@ Program Context::getProg(const ProgramSource& prog, struct Queue::Impl { - Impl(const Context& c, const Device& d) + Impl(const Context2& c, const Device& d) { refcount = 1; - const Context* pc = &c; + const Context2* pc = &c; cl_context ch = (cl_context)pc->ptr(); if( !ch ) { - pc = &Context::getDefault(); + pc = &Context2::getDefault(); ch = (cl_context)pc->ptr(); } cl_device_id dh = (cl_device_id)d.ptr(); @@ -1943,7 +1950,7 @@ Queue::Queue() p = 0; } -Queue::Queue(const Context& c, const Device& d) +Queue::Queue(const Context2& c, const Device& d) { p = 0; create(c, d); @@ -1973,7 +1980,7 @@ Queue::~Queue() p->release(); } -bool Queue::create(const Context& c, const Device& d) +bool Queue::create(const Context2& c, const Device& d) { if(p) p->release(); @@ -1996,7 +2003,7 @@ Queue& Queue::getDefault() { Queue& q = TLSData::get()->oclQueue; if( !q.p ) - q.create(Context::getDefault()); + q.create(Context2::getDefault()); return q; } @@ -2008,15 +2015,20 @@ static cl_command_queue getQueue(const Queue& q) return qq; } -KernelArg::KernelArg(int _flags, UMat* _m, void* _obj, size_t _sz) - : flags(_flags), m(_m), obj(_obj), sz(_sz) +KernelArg::KernelArg() + : flags(0), m(0), obj(0), sz(0), wscale(1) +{ +} + +KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, const void* _obj, size_t _sz) + : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale) { } KernelArg KernelArg::Constant(const Mat& m) { CV_Assert(m.isContinuous()); - return KernelArg(CONSTANT, 0, m.data, m.total()*m.elemSize()); + return KernelArg(CONSTANT, 0, 1, m.data, m.total()*m.elemSize()); } @@ -2099,8 +2111,8 @@ Kernel::Kernel(const char* kname, const Program& prog) create(kname, prog); } -Kernel::Kernel(const char* kname, const ProgramSource& src, - const String& buildopts, String& errmsg) +Kernel::Kernel(const char* kname, const ProgramSource2& src, + const String& buildopts, String* errmsg) { p = 0; create(kname, src, buildopts, errmsg); @@ -2143,15 +2155,17 @@ bool Kernel::create(const char* kname, const Program& prog) return p != 0; } -bool Kernel::create(const char* kname, const ProgramSource& src, - const String& buildopts, String& errmsg) +bool Kernel::create(const char* kname, const ProgramSource2& src, + const String& buildopts, String* errmsg) { if(p) { p->release(); p = 0; } - const Program& prog = Context::getDefault().getProg(src, buildopts, errmsg); + String tempmsg; + if( !errmsg ) errmsg = &tempmsg; + const Program& prog = Context2::getDefault().getProg(src, buildopts, *errmsg); return create(kname, prog); } @@ -2160,55 +2174,91 @@ void* Kernel::ptr() const return p ? p->handle : 0; } -void Kernel::set(int i, const void* value, size_t sz) +bool Kernel::empty() const { - CV_Assert( p && clSetKernelArg(p->handle, (cl_uint)i, sz, value) >= 0 ); - if( i == 0 ) - p->cleanupUMats(); + return ptr() == 0; } -void Kernel::set(int i, const UMat& m) +int Kernel::set(int i, const void* value, size_t sz) { - set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0)); -} - -void Kernel::set(int i, const KernelArg& arg) -{ - CV_Assert( p && p->handle ); + CV_Assert(i >= 0); if( i == 0 ) p->cleanupUMats(); + if( !p || !p->handle || clSetKernelArg(p->handle, (cl_uint)i, sz, value) < 0 ) + return -1; + return i+1; +} + +int Kernel::set(int i, const UMat& m) +{ + return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0)); +} + +int Kernel::set(int i, const KernelArg& arg) +{ + CV_Assert( i >= 0 ); + if( i == 0 ) + p->cleanupUMats(); + if( !p || !p->handle ) + return -1; if( arg.m ) { int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) + ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0); + cl_mem h = (cl_mem)arg.m->handle(accessFlags); + if( arg.m->dims <= 2 ) { - UMat2D u2d(*arg.m, accessFlags); - clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d), &u2d); + UMat2D u2d(*arg.m); + clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h); + clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step); + clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset); + i += 3; + + if( !(arg.flags & KernelArg::NO_SIZE) ) + { + int cols = u2d.cols*arg.wscale; + clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows); + clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.cols), &cols); + i += 2; + } } else { - UMat3D u3d(*arg.m, accessFlags); - clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d), &u3d); + UMat3D u3d(*arg.m); + clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h); + clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep); + clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step); + clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset); + i += 4; + if( !(arg.flags & KernelArg::NO_SIZE) ) + { + int cols = u3d.cols*arg.wscale; + clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.rows); + clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows); + clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols); + i += 3; + } } p->addUMat(*arg.m); + return i; } - else - { - clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj); - } + clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj); + return i+1; } -void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsize[], +bool Kernel::run(int dims, size_t globalsize[], size_t localsize[], bool sync, const Queue& q) { - CV_Assert(p && p->handle && p->e == 0); + if(!p || !p->handle || p->e != 0) + return false; cl_command_queue qq = getQueue(q); - clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims, - offset, globalsize, localsize, 0, 0, - sync ? 0 : &p->e); - if( sync ) + size_t offset[CV_MAX_DIM] = {0}; + cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims, + offset, globalsize, localsize, 0, 0, + sync ? 0 : &p->e); + if( sync || retval < 0 ) { clFinish(qq); p->cleanupUMats(); @@ -2218,14 +2268,17 @@ void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsiz p->addref(); clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p); } + return retval >= 0; } -void Kernel::runTask(bool sync, const Queue& q) +bool Kernel::runTask(bool sync, const Queue& q) { - CV_Assert(p && p->handle && p->e == 0); + if(!p || !p->handle || p->e != 0) + return false; + cl_command_queue qq = getQueue(q); - clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e); - if( sync ) + cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e); + if( sync || retval < 0 ) { clFinish(qq); p->cleanupUMats(); @@ -2235,6 +2288,7 @@ void Kernel::runTask(bool sync, const Queue& q) p->addref(); clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p); } + return retval >= 0; } @@ -2273,11 +2327,11 @@ size_t Kernel::localMemSize() const struct Program::Impl { - Impl(const ProgramSource& _src, + Impl(const ProgramSource2& _src, const String& _buildflags, String& errmsg) { refcount = 1; - const Context& ctx = Context::getDefault(); + const Context2& ctx = Context2::getDefault(); src = _src; buildflags = _buildflags; const String& srcstr = src.source(); @@ -2293,17 +2347,20 @@ struct Program::Impl void** deviceList = deviceListBuf; for( i = 0; i < n; i++ ) deviceList[i] = ctx.device(i).ptr(); + printf("Building the OpenCL program ...\n"); retval = clBuildProgram(handle, n, (const cl_device_id*)deviceList, buildflags.c_str(), 0, 0); if( retval == CL_BUILD_PROGRAM_FAILURE ) { - char buf[1024]; + char buf[1<<16]; size_t retsz = 0; clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0], CL_PROGRAM_BUILD_LOG, sizeof(buf)-16, buf, &retsz); errmsg = String(buf); + CV_Error_(Error::StsAssert, ("OpenCL program can not be built: %s", errmsg.c_str())); } + CV_Assert(retval >= 0); } } @@ -2315,7 +2372,7 @@ struct Program::Impl if(_buf.empty()) return; String prefix0 = Program::getPrefix(buildflags); - const Context& ctx = Context::getDefault(); + const Context2& ctx = Context2::getDefault(); const Device& dev = Device::getDefault(); const char* pos0 = _buf.c_str(); const char* pos1 = strchr(pos0, '\n'); @@ -2366,7 +2423,7 @@ struct Program::Impl IMPLEMENT_REFCOUNTABLE(); - ProgramSource src; + ProgramSource2 src; String buildflags; cl_program handle; }; @@ -2374,7 +2431,7 @@ struct Program::Impl Program::Program() { p = 0; } -Program::Program(const ProgramSource& src, +Program::Program(const ProgramSource2& src, const String& buildflags, String& errmsg) { p = 0; @@ -2405,7 +2462,7 @@ Program::~Program() p->release(); } -bool Program::create(const ProgramSource& src, +bool Program::create(const ProgramSource2& src, const String& buildflags, String& errmsg) { if(p) @@ -2419,9 +2476,9 @@ bool Program::create(const ProgramSource& src, return p != 0; } -const ProgramSource& Program::source() const +const ProgramSource2& Program::source() const { - static ProgramSource dummy; + static ProgramSource2 dummy; return p ? p->src : dummy; } @@ -2455,7 +2512,7 @@ String Program::getPrefix() const String Program::getPrefix(const String& buildflags) { - const Context& ctx = Context::getDefault(); + const Context2& ctx = Context2::getDefault(); const Device& dev = ctx.device(0); return format("name=%s\ndriver=%s\nbuildflags=%s\n", dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str()); @@ -2463,7 +2520,7 @@ String Program::getPrefix(const String& buildflags) //////////////////////////////////////////////////////////////////////////////////////// -struct ProgramSource::Impl +struct ProgramSource2::Impl { Impl(const char* _src) { @@ -2482,39 +2539,39 @@ struct ProgramSource::Impl IMPLEMENT_REFCOUNTABLE(); String src; - ProgramSource::hash_t h; + ProgramSource2::hash_t h; }; -ProgramSource::ProgramSource() +ProgramSource2::ProgramSource2() { p = 0; } -ProgramSource::ProgramSource(const char* prog) +ProgramSource2::ProgramSource2(const char* prog) { p = new Impl(prog); } -ProgramSource::ProgramSource(const String& prog) +ProgramSource2::ProgramSource2(const String& prog) { p = new Impl(prog); } -ProgramSource::~ProgramSource() +ProgramSource2::~ProgramSource2() { if(p) p->release(); } -ProgramSource::ProgramSource(const ProgramSource& prog) +ProgramSource2::ProgramSource2(const ProgramSource2& prog) { p = prog.p; if(p) p->addref(); } -ProgramSource& ProgramSource::operator = (const ProgramSource& prog) +ProgramSource2& ProgramSource2::operator = (const ProgramSource2& prog) { Impl* newp = (Impl*)prog.p; if(newp) @@ -2525,13 +2582,13 @@ ProgramSource& ProgramSource::operator = (const ProgramSource& prog) return *this; } -const String& ProgramSource::source() const +const String& ProgramSource2::source() const { static String dummy; return p ? p->src : dummy; } -ProgramSource::hash_t ProgramSource::hash() const +ProgramSource2::hash_t ProgramSource2::hash() const { return p ? p->h : 0; } @@ -2551,7 +2608,7 @@ public: return u; } - void getBestFlags(const Context& ctx, int& createFlags, int& flags0) const + void getBestFlags(const Context2& ctx, int& createFlags, int& flags0) const { const Device& dev = ctx.device(0); createFlags = CL_MEM_READ_WRITE; @@ -2574,7 +2631,7 @@ public: total *= sizes[i]; } - Context& ctx = Context::getDefault(); + Context2& ctx = Context2::getDefault(); int createFlags = 0, flags0 = 0; getBestFlags(ctx, createFlags, flags0); @@ -2603,7 +2660,7 @@ public: if(u->handle == 0) { CV_Assert(u->origdata != 0); - Context& ctx = Context::getDefault(); + Context2& ctx = Context2::getDefault(); int createFlags = 0, flags0 = 0; getBestFlags(ctx, createFlags, flags0); @@ -2848,7 +2905,6 @@ public: new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1], new_dststep[0], new_dststep[1], dstptr, 0, 0, 0) >= 0 ); } - clFinish(q); } void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[], @@ -2890,6 +2946,9 @@ public: if( iscontinuous ) { + int crc = 0; + for( size_t i = 0; i < total; i++ ) + crc ^= ((uchar*)srcptr)[i]; CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) >= 0 ); } @@ -2949,10 +3008,11 @@ public: } else { - CV_Assert( clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle, + cl_int retval; + CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle, new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1], new_dststep[0], new_dststep[1], - 0, 0, 0) >= 0 ); + 0, 0, 0)) >= 0 ); } dst->markHostCopyObsolete(true); @@ -2969,4 +3029,61 @@ MatAllocator* getOpenCLAllocator() return &allocator; } +const char* typeToStr(int t) +{ + static const char* tab[]= + { + "uchar", "uchar2", "uchar3", "uchar4", + "char", "char2", "char3", "char4", + "ushort", "ushort2", "ushort3", "ushort4", + "short", "short2", "short3", "short4", + "int", "int2", "int3", "int4", + "float", "float2", "float3", "float4", + "double", "double2", "double3", "double4", + "?", "?", "?", "?" + }; + int cn = CV_MAT_CN(t); + return cn > 4 ? "?" : tab[CV_MAT_DEPTH(t)*4 + cn-1]; +} + +const char* memopTypeToStr(int t) +{ + static const char* tab[]= + { + "uchar", "uchar2", "uchar3", "uchar4", + "uchar", "uchar2", "uchar3", "uchar4", + "ushort", "ushort2", "ushort3", "ushort4", + "ushort", "ushort2", "ushort3", "ushort4", + "int", "int2", "int3", "int4", + "int", "int2", "int3", "int4", + "long", "long2", "long3", "long4", + "?", "?", "?", "?" + }; + int cn = CV_MAT_CN(t); + return cn > 4 ? "?" : tab[CV_MAT_DEPTH(t)*4 + cn-1]; +} + +const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf) +{ + if( sdepth == ddepth ) + return "noconvert"; + const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn)); + if( ddepth >= CV_32F || + (ddepth == CV_32S && sdepth < CV_32S) || + (ddepth == CV_16S && sdepth <= CV_8S) || + (ddepth == CV_16U && sdepth == CV_8U)) + { + sprintf(buf, "convert_%s", typestr); + } + else if( sdepth >= CV_32F ) + { + sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : "")); + } + else + { + sprintf(buf, "convert_%s_sat", typestr); + } + return buf; +} + }} diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl new file mode 100644 index 0000000000..baba41a01b --- /dev/null +++ b/modules/core/src/opencl/arithm.cl @@ -0,0 +1,303 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the copyright holders or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/* + Usage: + after compiling this program user gets a single kernel called KF. + the following flags should be passed: + 1) one of "-D BINARY_OP", "-D UNARY_OP", "-D MASK_BINARY_OP" or "-D MASK_UNARY_OP" + 2) the actual operation performed, one of "-D OP_...", see below the list of operations. + 2a) "-D dstDepth= [-D cn= -D srcDepth2= -D dstDepth= + -D workDepth= [-D cn=]" - for mixed-type operations +*/ + +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif +#endif + +#define CV_32S 4 +#define CV_32F 5 + +#define dstelem *(dstT*)(dstptr + dst_index) +#define noconvert(x) x + +#ifndef workT + + #define srcT1 dstT + #define srcT2 dstT + #define workT dstT + #define srcelem1 *(dstT*)(srcptr1 + src1_index) + #define srcelem2 *(dstT*)(srcptr2 + src2_index) + #define convertToDT noconvert + +#else + + #define srcelem1 convertToWT1(*(srcT1*)(srcptr1 + src1_index)) + #define srcelem2 convertToWT2(*(srcT2*)(srcptr2 + src2_index)) + +#endif + +#define EXTRA_PARAMS + +#if defined OP_ADD_SAT +#define PROCESS_ELEM dstelem = add_sat(srcelem1, srcelem2) + +#elif defined OP_ADD +#define PROCESS_ELEM dstelem = convertToDT(srcelem1 + srcelem2) + +#elif defined OP_SUB_SAT +#define PROCESS_ELEM dstelem = sub_sat(srcelem1, srcelem2) + +#elif defined OP_SUB +#define PROCESS_ELEM dstelem = convertToDT(srcelem1 - srcelem2) + +#elif defined OP_RSUB_SAT +#define PROCESS_ELEM dstelem = sub_sat(srcelem2, srcelem1) + +#elif defined OP_RSUB +#define PROCESS_ELEM dstelem = convertToDT(srcelem2 - srcelem1) + +#elif defined OP_ABSDIFF +#define PROCESS_ELEM dstelem = abs_diff(srcelem1, srcelem2) + +#elif defined OP_AND +#define PROCESS_ELEM dstelem = srcelem1 & srcelem2 + +#elif defined OP_OR +#define PROCESS_ELEM dstelem = srcelem1 | srcelem2 + +#elif defined OP_XOR +#define PROCESS_ELEM dstelem = srcelem1 ^ srcelem2 + +#elif defined OP_NOT +#define PROCESS_ELEM dstelem = ~srcelem1 + +#elif defined OP_MIN +#define PROCESS_ELEM dstelem = min(srcelem1, srcelem2) + +#elif defined OP_MAX +#define PROCESS_ELEM dstelem = max(srcelem1, srcelem2) + +#elif defined OP_MUL +#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2) + +#elif defined OP_MUL_SCALE +#undef EXTRA_PARAMS +#define EXTRA_PARAMS , workT scale +#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2 * scale) + +#elif defined OP_DIV +#define PROCESS_ELEM \ + workT e2 = srcelem2, zero = (workT)(0); \ + dstelem = convertToDT(e2 != zero ? srcelem1 / e2 : zero) + +#elif defined OP_DIV_SCALE +#undef EXTRA_PARAMS +#define EXTRA_PARAMS , workT scale +#define PROCESS_ELEM \ + workT e2 = srcelem2, zero = (workT)(0); \ + dstelem = convertToDT(e2 != zero ? srcelem1 * scale / e2 : zero) + +#elif defined OP_RECIP_SCALE +#undef EXTRA_PARAMS +#define EXTRA_PARAMS , workT scale +#define PROCESS_ELEM \ + workT e1 = srcelem1, zero = (workT)(0); \ + dstelem = convertToDT(e1 != zero ? scale / e1 : zero) + +#elif defined OP_ADDW +#undef EXTRA_PARAMS +#define EXTRA_PARAMS , workT alpha, workT beta, workT gamma +#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + srcelem2*beta + gamma) + +#elif defined OP_MAG +#define PROCESS_ELEM dstelem = hypot(srcelem1, srcelem2) + +#elif defined OP_PHASE_RADIANS +#define PROCESS_ELEM \ + workT tmp = atan2(srcelem2, srcelem1); \ + if(tmp < 0) tmp += 6.283185307179586232; \ + dstelem = tmp + +#elif defined OP_PHASE_DEGREES + #define PROCESS_ELEM \ + workT tmp = atan2(srcelem2, srcelem1)*57.29577951308232286465; \ + if(tmp < 0) tmp += 360; \ + dstelem = tmp + +#elif defined OP_EXP +#define PROCESS_ELEM dstelem = exp(srcelem1) + +#elif defined OP_SQRT +#define PROCESS_ELEM dstelem = sqrt(srcelem1) + +#elif defined OP_LOG +#define PROCESS_ELEM dstelem = log(abs(srcelem1)) + +#elif defined OP_CMP +#define PROCESS_ELEM dstelem = convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0) + +#elif defined OP_CONVERT +#define PROCESS_ELEM dstelem = convertToDT(srcelem1) + +#elif defined OP_CONVERT_SCALE +#undef EXTRA_PARAMS +#define EXTRA_PARAMS , workT alpha, workT beta +#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + beta) + +#else +#error "unknown op type" +#endif + +#if defined UNARY_OP || defined MASK_UNARY_OP +#undef srcelem2 +#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \ + defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \ + defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX + #undef EXTRA_PARAMS + #define EXTRA_PARAMS , workT srcelem2 +#endif +#endif + +#if defined BINARY_OP + +__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1, + __global const uchar* srcptr2, int srcstep2, int srcoffset2, + __global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols EXTRA_PARAMS ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1); + int src2_index = mad24(y, srcstep2, x*sizeof(srcT2) + srcoffset2); + int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset); + + PROCESS_ELEM; + //printf("(x=%d, y=%d). %d, %d, %d\n", x, y, (int)srcelem1, (int)srcelem2, (int)dstelem); + } +} + +#elif defined MASK_BINARY_OP + +__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1, + __global const uchar* srcptr2, int srcstep2, int srcoffset2, + __global const uchar* mask, int maskstep, int maskoffset, + __global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols EXTRA_PARAMS ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int mask_index = mad24(y, maskstep, x + maskoffset); + if( mask[mask_index] ) + { + int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1); + int src2_index = mad24(y, srcstep2, x*sizeof(srcT2) + srcoffset2); + int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset); + + PROCESS_ELEM; + } + } +} + +#elif defined UNARY_OP + +__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1, + __global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols EXTRA_PARAMS ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1); + int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset); + + PROCESS_ELEM; + } +} + +#elif defined MASK_UNARY_OP + +__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1, + __global const uchar* mask, int maskstep, int maskoffset, + __global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols EXTRA_PARAMS ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int mask_index = mad24(y, maskstep, x + maskoffset); + if( mask[mask_index] ) + { + int src1_index = mad24(y, srcstep1, x*sizeof(srcT1) + srcoffset1); + int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset); + + PROCESS_ELEM; + } + } +} + +#else + +#error "Unknown operation type" + +#endif diff --git a/modules/core/src/opencl/copyset.cl b/modules/core/src/opencl/copyset.cl new file mode 100644 index 0000000000..df5bdf5883 --- /dev/null +++ b/modules/core/src/opencl/copyset.cl @@ -0,0 +1,73 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the copyright holders or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +__kernel void setMask(__global const uchar* mask, int maskstep, int maskoffset, + __global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols, dstT value ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int mask_index = mad24(y, maskstep, x + maskoffset); + if( mask[mask_index] ) + { + int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset); + *(dstT*)(dstptr + dst_index) = value; + } + } +} + +__kernel void set(__global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols, dstT value ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int dst_index = mad24(y, dststep, x*sizeof(dstT) + dstoffset); + *(dstT*)(dstptr + dst_index) = value; + } +} diff --git a/modules/core/src/precomp.hpp b/modules/core/src/precomp.hpp index 073a54e034..7465685fd6 100644 --- a/modules/core/src/precomp.hpp +++ b/modules/core/src/precomp.hpp @@ -205,13 +205,30 @@ enum { BLOCK_SIZE = 1024 }; inline bool checkScalar(const Mat& sc, int atype, int sckind, int akind) { - if( sc.dims > 2 || (sc.cols != 1 && sc.rows != 1) || !sc.isContinuous() ) + if( sc.dims > 2 || !sc.isContinuous() ) + return false; + Size sz = sc.size(); + if(sz.width != 1 && sz.height != 1) return false; int cn = CV_MAT_CN(atype); if( akind == _InputArray::MATX && sckind != _InputArray::MATX ) return false; - return sc.size() == Size(1, 1) || sc.size() == Size(1, cn) || sc.size() == Size(cn, 1) || - (sc.size() == Size(1, 4) && sc.type() == CV_64F && cn <= 4); + return sz == Size(1, 1) || sz == Size(1, cn) || sz == Size(cn, 1) || + (sz == Size(1, 4) && sc.type() == CV_64F && cn <= 4); +} + +inline bool checkScalar(InputArray sc, int atype, int sckind, int akind) +{ + if( sc.dims() > 2 || !sc.isContinuous() ) + return false; + Size sz = sc.size(); + if(sz.width != 1 && sz.height != 1) + return false; + int cn = CV_MAT_CN(atype); + if( akind == _InputArray::MATX && sckind != _InputArray::MATX ) + return false; + return sz == Size(1, 1) || sz == Size(1, cn) || sz == Size(cn, 1) || + (sz == Size(1, 4) && sc.type() == CV_64F && cn <= 4); } void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize ); @@ -227,7 +244,10 @@ struct TLSData static TLSData* get(); }; -namespace ocl { MatAllocator* getOpenCLAllocator(); } +namespace ocl +{ + MatAllocator* getOpenCLAllocator(); +} } diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index 2ea71acc8b..2b659fb0a9 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -41,6 +41,7 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" ///////////////////////////////// UMat implementation /////////////////////////////// @@ -174,8 +175,8 @@ static void updateContinuityFlag(UMat& m) break; } - uint64 t = (uint64)m.step[0]*m.size[0]; - if( j <= i && t == (size_t)t ) + uint64 total = (uint64)m.step[0]*m.size[0]; + if( j <= i && total == (size_t)total ) m.flags |= UMat::CONTINUOUS_FLAG; else m.flags &= ~UMat::CONTINUOUS_FLAG; @@ -197,6 +198,7 @@ UMat Mat::getUMat(int accessFlags) const if(!u) return hdr; UMat::getStdAllocator()->allocate(u, accessFlags); + hdr.flags = flags; setSize(hdr, dims, size.p, step.p); finalizeHdr(hdr); hdr.u = u; @@ -548,7 +550,8 @@ Mat UMat::getMat(int accessFlags) const CV_Assert(u->data != 0); Mat hdr(dims, size.p, type(), u->data + offset, step.p); hdr.u = u; - hdr.datastart = hdr.data = u->data; + hdr.datastart = u->data; + hdr.data = hdr.datastart + offset; hdr.datalimit = hdr.dataend = u->data + u->size; CV_XADD(&hdr.u->refcount, 1); return hdr; @@ -617,7 +620,7 @@ void UMat::copyTo(OutputArray _dst) const void* dsthandle = dst.handle(ACCESS_WRITE); if( srchandle == dsthandle && dst.offset == offset ) return; - ndoffset(dstofs); + dst.ndoffset(dstofs); CV_Assert(u->currAllocator == dst.u->currAllocator); u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false); } @@ -633,6 +636,50 @@ void UMat::convertTo(OutputArray, int, double, double) const CV_Error(Error::StsNotImplemented, ""); } +UMat& UMat::setTo(InputArray _value, InputArray _mask) +{ + bool haveMask = !_mask.empty(); + int tp = type(), cn = CV_MAT_CN(tp); + if( dims <= 2 && cn <= 4 && ocl::useOpenCL() ) + { + Mat value = _value.getMat(); + CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) ); + double buf[4]; + convertAndUnrollScalar(value, tp, (uchar*)buf, 1); + + char opts[1024]; + sprintf(opts, "-D dstT=%s", ocl::memopTypeToStr(tp)); + + ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts); + if( !setK.empty() ) + { + ocl::KernelArg scalararg(0, 0, 0, buf, CV_ELEM_SIZE(tp)); + UMat mask; + + if( haveMask ) + { + mask = _mask.getUMat(); + CV_Assert( mask.size() == size() && mask.type() == CV_8U ); + ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask); + ocl::KernelArg dstarg = ocl::KernelArg::ReadWrite(*this); + setK.args(maskarg, dstarg, scalararg); + } + else + { + ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this); + setK.args(dstarg, scalararg); + } + + size_t globalsize[] = { cols, rows }; + if( setK.run(2, globalsize, 0, false) ) + return *this; + } + } + Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE); + m.setTo(_value, _mask); + return *this; +} + UMat& UMat::operator = (const Scalar&) { CV_Error(Error::StsNotImplemented, ""); diff --git a/modules/core/test/test_umat.cpp b/modules/core/test/test_umat.cpp index 56ec72c7a2..0b011209a9 100644 --- a/modules/core/test/test_umat.cpp +++ b/modules/core/test/test_umat.cpp @@ -91,11 +91,11 @@ bool CV_UMatTest::TestUMat() { try { - Mat a(100, 100, CV_16S), b; + Mat a(100, 100, CV_16SC2), b, c; randu(a, Scalar::all(-100), Scalar::all(100)); - Rect roi(1, 3, 10, 20); - Mat ra(a, roi), rb; - UMat ua, ura; + Rect roi(1, 3, 5, 4); + Mat ra(a, roi), rb, rc, rc0; + UMat ua, ura, ub, urb, uc, urc; a.copyTo(ua); ua.copyTo(b); CHECK_DIFF(a, b); @@ -112,6 +112,71 @@ bool CV_UMatTest::TestUMat() } ra.copyTo(rb); CHECK_DIFF(ra, rb); + + b = a.clone(); + ra = a(roi); + rb = b(roi); + randu(b, Scalar::all(-100), Scalar::all(100)); + b.copyTo(ub); + urb = ub(roi); + + /*std::cout << "==============================================\nbefore op (CPU):\n"; + std::cout << "ra: " << ra << std::endl; + std::cout << "rb: " << rb << std::endl;*/ + + ra.copyTo(ura); + rb.copyTo(urb); + ra.release(); + rb.release(); + ura.copyTo(ra); + urb.copyTo(rb); + + /*std::cout << "==============================================\nbefore op (GPU):\n"; + std::cout << "ra: " << ra << std::endl; + std::cout << "rb: " << rb << std::endl;*/ + + cv::max(ra, rb, rc); + cv::max(ura, urb, urc); + urc.copyTo(rc0); + + /*std::cout << "==============================================\nafter op:\n"; + std::cout << "rc: " << rc << std::endl; + std::cout << "rc0: " << rc0 << std::endl;*/ + + CHECK_DIFF(rc0, rc); + + { + UMat tmp = rc0.getUMat(ACCESS_WRITE); + cv::max(ura, urb, tmp); + } + CHECK_DIFF(rc0, rc); + + ura.copyTo(urc); + cv::max(urc, urb, urc); + urc.copyTo(rc0); + CHECK_DIFF(rc0, rc); + + rc = ra ^ rb; + cv::bitwise_xor(ura, urb, urc); + urc.copyTo(rc0); + + /*std::cout << "==============================================\nafter op:\n"; + std::cout << "ra: " << rc0 << std::endl; + std::cout << "rc: " << rc << std::endl;*/ + + CHECK_DIFF(rc0, rc); + + rc = ra + rb; + cv::add(ura, urb, urc); + urc.copyTo(rc0); + + CHECK_DIFF(rc0, rc); + + cv::subtract(ra, Scalar::all(5), rc); + cv::subtract(ura, Scalar::all(5), urc); + urc.copyTo(rc0); + + CHECK_DIFF(rc0, rc); } catch (const test_excep& e) { diff --git a/modules/highgui/doc/reading_and_writing_images_and_video.rst b/modules/highgui/doc/reading_and_writing_images_and_video.rst index 6b956b40dd..95d3e05c12 100644 --- a/modules/highgui/doc/reading_and_writing_images_and_video.rst +++ b/modules/highgui/doc/reading_and_writing_images_and_video.rst @@ -320,7 +320,7 @@ VideoCapture::retrieve ---------------------- Decodes and returns the grabbed video frame. -.. ocv:function:: bool VideoCapture::retrieve( Mat& image, int flag=0 ) +.. ocv:function:: bool VideoCapture::retrieve( OutputArray image, int flag=0 ) .. ocv:pyfunction:: cv2.VideoCapture.retrieve([image[, flag]]) -> retval, image @@ -337,7 +337,9 @@ Grabs, decodes and returns the next video frame. .. ocv:function:: VideoCapture& VideoCapture::operator >> (Mat& image) -.. ocv:function:: bool VideoCapture::read(Mat& image) +.. ocv:function:: VideoCapture& VideoCapture::operator >> (UMat& image) + +.. ocv:function:: bool VideoCapture::read(OutputArray image) .. ocv:pyfunction:: cv2.VideoCapture.read([image]) -> retval, image diff --git a/modules/highgui/include/opencv2/highgui.hpp b/modules/highgui/include/opencv2/highgui.hpp index c4fc73a81f..cebf8fe22a 100644 --- a/modules/highgui/include/opencv2/highgui.hpp +++ b/modules/highgui/include/opencv2/highgui.hpp @@ -511,9 +511,10 @@ public: CV_WRAP virtual void release(); CV_WRAP virtual bool grab(); - CV_WRAP virtual bool retrieve(CV_OUT Mat& image, int flag = 0); + CV_WRAP virtual bool retrieve(OutputArray image, int flag = 0); virtual VideoCapture& operator >> (CV_OUT Mat& image); - CV_WRAP virtual bool read(CV_OUT Mat& image); + virtual VideoCapture& operator >> (CV_OUT UMat& image); + CV_WRAP virtual bool read(OutputArray image); CV_WRAP virtual bool set(int propId, double value); CV_WRAP virtual double get(int propId); diff --git a/modules/highgui/src/cap.cpp b/modules/highgui/src/cap.cpp index 04da481d01..0f4e6afb89 100644 --- a/modules/highgui/src/cap.cpp +++ b/modules/highgui/src/cap.cpp @@ -515,7 +515,7 @@ bool VideoCapture::grab() return cvGrabFrame(cap) != 0; } -bool VideoCapture::retrieve(Mat& image, int channel) +bool VideoCapture::retrieve(OutputArray image, int channel) { IplImage* _img = cvRetrieveFrame(cap, channel); if( !_img ) @@ -533,7 +533,7 @@ bool VideoCapture::retrieve(Mat& image, int channel) return true; } -bool VideoCapture::read(Mat& image) +bool VideoCapture::read(OutputArray image) { if(grab()) retrieve(image); @@ -548,6 +548,12 @@ VideoCapture& VideoCapture::operator >> (Mat& image) return *this; } +VideoCapture& VideoCapture::operator >> (UMat& image) +{ + read(image); + return *this; +} + bool VideoCapture::set(int propId, double value) { return cvSetCaptureProperty(cap, propId, value) != 0; diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 49312ba09b..0d0cf82306 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -90,6 +90,7 @@ \**********************************************************************************/ #include "precomp.hpp" +#include "opencl_kernels.hpp" #include #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) @@ -2687,6 +2688,125 @@ struct mRGBA2RGBA } }; + +static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) +{ + bool ok = true; + UMat src = _src.getUMat(), dst; + Size sz = src.size(), dstSz = sz; + int scn = src.channels(), depth = src.depth(), bidx; + size_t globalsize[] = { src.cols, src.rows }; + ocl::Kernel k; + + if(depth != CV_8U && depth != CV_16U && depth != CV_32F) + return false; + + switch (code) + { + /* + case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR: + case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA: + case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555: + case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555: + case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB: + case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA: + */ + case COLOR_BGR2GRAY: + case COLOR_BGRA2GRAY: + case COLOR_RGB2GRAY: + case COLOR_RGBA2GRAY: + { + CV_Assert(scn == 3 || scn == 4); + bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2; + dcn = 1; + k.create("RGB2Gray", ocl::imgproc::cvtcolor_oclsrc, + format("-D depth=%d -D scn=%d -D dcn=1 -D bidx=%d", depth, scn, bidx)); + break; + } + case COLOR_GRAY2BGR: + case COLOR_GRAY2BGRA: + { + CV_Assert(scn == 1); + dcn = code == COLOR_GRAY2BGRA ? 4 : 3; + k.create("Gray2RGB", ocl::imgproc::cvtcolor_oclsrc, + format("-D depth=%d -D scn=1 -D dcn=%d", depth, dcn)); + break; + } + case COLOR_BGR2YUV: + case COLOR_RGB2YUV: + { + CV_Assert(scn == 3 || scn == 4); + bidx = code == COLOR_RGB2YUV ? 0 : 2; + dcn = 3; + k.create("RGB2YUV", ocl::imgproc::cvtcolor_oclsrc, + format("-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d", depth, scn, bidx)); + break; + } + case COLOR_YUV2BGR: + case COLOR_YUV2RGB: + { + if(dcn < 0) dcn = 3; + CV_Assert(dcn == 3 || dcn == 4); + bidx = code == COLOR_YUV2RGB ? 0 : 2; + k.create("YUV2RGB", ocl::imgproc::cvtcolor_oclsrc, + format("-D depth=%d -D scn=3 -D dcn=%d -D bidx=%d", depth, dcn, bidx)); + break; + } + case COLOR_YUV2RGB_NV12: + case COLOR_YUV2BGR_NV12: + case COLOR_YUV2RGBA_NV12: + case COLOR_YUV2BGRA_NV12: + { + CV_Assert( scn == 1 ); + CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); + dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ? 4 : 3; + bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 ? 0 : 2; + + dstSz = Size(sz.width, sz.height * 2 / 3); + globalsize[0] = dstSz.height/2; + globalsize[1] = dstSz.width/2; + k.create("YUV2RGBA_NV12", ocl::imgproc::cvtcolor_oclsrc, + format("-D depth=0 -D scn=1 -D dcn=%d -D bidx=%d", dcn, bidx)); + break; + } + case COLOR_BGR2YCrCb: + case COLOR_RGB2YCrCb: + { + CV_Assert(scn == 3 || scn == 4); + bidx = code == COLOR_BGR2YCrCb ? 0 : 2; + dcn = 3; + k.create("RGB2YCrCb", ocl::imgproc::cvtcolor_oclsrc, + format("-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d", depth, scn, bidx)); + break; + } + case COLOR_YCrCb2BGR: + case COLOR_YCrCb2RGB: + break; + /* + case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY: + case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555: + case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb: + case COLOR_BGR2XYZ: case COLOR_RGB2XYZ: + case COLOR_XYZ2BGR: case COLOR_XYZ2RGB: + case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL: + case COLOR_BGR2HLS: case COLOR_RGB2HLS: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL: + case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL: + case COLOR_HLS2BGR: case COLOR_HLS2RGB: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL: + */ + default: + ; + } + + if( !k.empty() ) + { + _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); + dst = _dst.getUMat(); + k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst)); + ok = k.run(2, globalsize, 0, false); + } + return ok; +} + }//namespace cv ////////////////////////////////////////////////////////////////////////////////////////// @@ -2695,9 +2815,15 @@ struct mRGBA2RGBA void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) { + bool use_opencl = ocl::useOpenCL() && _dst.kind() == _InputArray::UMAT; + int stype = _src.type(); + int scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype), bidx; + + if( use_opencl && ocl_cvtColor(_src, _dst, code, dcn) ) + return; + Mat src = _src.getMat(), dst; Size sz = src.size(); - int scn = src.channels(), depth = src.depth(), bidx; CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32F ); diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index cf8c43cf3e..ab134fd3c8 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -47,6 +47,7 @@ // */ #include "precomp.hpp" +#include "opencl_kernels.hpp" #include #include @@ -1901,8 +1902,45 @@ private: }; #endif +static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, + double fx, double fy, int interpolation) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + if( !(cn <= 4 && + (interpolation == INTER_NEAREST || + (interpolation == INTER_LINEAR && (depth == CV_8U || depth == CV_32F)))) ) + return false; + UMat src = _src.getUMat(); + _dst.create(dsize, type); + UMat dst = _dst.getUMat(); + ocl::Kernel k; + + if (interpolation == INTER_LINEAR) + { + int wdepth = depth == CV_8U ? CV_32S : CV_32F; + int wtype = CV_MAKETYPE(wdepth, cn); + char buf[2][32]; + k.create("resizeLN", ocl::imgproc::resize_oclsrc, + format("-D INTER_LINEAR -D depth=%s -D PIXTYPE=%s -D WORKTYPE=%s -D convertToWT=%s -D convertToDT=%s", + depth, ocl::typeToStr(type), ocl::typeToStr(wtype), + ocl::convertTypeStr(depth, wdepth, cn, buf[0]), + ocl::convertTypeStr(wdepth, depth, cn, buf[1]))); + } + else if (interpolation == INTER_NEAREST) + { + k.create("resizeNN", ocl::imgproc::resize_oclsrc, + format("-D INTER_NEAREST -D PIXTYPE=%s", ocl::memopTypeToStr(type) )); + } + + if( k.empty() ) + return false; + k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), + (float)(1./fx), (float)(1./fy)); + size_t globalsize[] = { dst.cols, dst.rows }; + return k.run(2, globalsize, 0, false); } +} ////////////////////////////////////////////////////////////////////////////////////////// @@ -2013,26 +2051,30 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, resizeArea_, 0 }; - Mat src = _src.getMat(); - Size ssize = src.size(); + Size ssize = _src.size(); CV_Assert( ssize.area() > 0 ); - CV_Assert( dsize.area() || (inv_scale_x > 0 && inv_scale_y > 0) ); - if( !dsize.area() ) + CV_Assert( dsize.area() > 0 || (inv_scale_x > 0 && inv_scale_y > 0) ); + if( dsize.area() == 0 ) { - dsize = Size(saturate_cast(src.cols*inv_scale_x), - saturate_cast(src.rows*inv_scale_y)); - CV_Assert( dsize.area() ); + dsize = Size(saturate_cast(ssize.width*inv_scale_x), + saturate_cast(ssize.height*inv_scale_y)); + CV_Assert( dsize.area() > 0 ); } else { - inv_scale_x = (double)dsize.width/src.cols; - inv_scale_y = (double)dsize.height/src.rows; + inv_scale_x = (double)dsize.width/ssize.width; + inv_scale_y = (double)dsize.height/ssize.height; } + + if( ocl::useOpenCL() && _dst.kind() == _InputArray::UMAT && + ocl_resize(_src, _dst, dsize, inv_scale_x, inv_scale_y, interpolation) ) + return; + + Mat src = _src.getMat(); _dst.create(dsize, src.type()); Mat dst = _dst.getMat(); - #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::resize(src, dst, (float)inv_scale_x, (float)inv_scale_y, interpolation)) return; diff --git a/modules/imgproc/src/opencl/cvtcolor.cl b/modules/imgproc/src/opencl/cvtcolor.cl new file mode 100644 index 0000000000..9ca98b0b91 --- /dev/null +++ b/modules/imgproc/src/opencl/cvtcolor.cl @@ -0,0 +1,306 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/**************************************PUBLICFUNC*************************************/ + +#if defined (DOUBLE_SUPPORT) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif + +#if depth == 0 + #define DATA_TYPE uchar + #define MAX_NUM 255 + #define HALF_MAX 128 + #define SAT_CAST(num) convert_uchar_sat(num) + #define DEPTH_0 +#elif depth == 2 + #define DATA_TYPE ushort + #define MAX_NUM 65535 + #define HALF_MAX 32768 + #define SAT_CAST(num) convert_ushort_sat(num) + #define DEPTH_2 +#elif depth == 5 + #define DATA_TYPE float + #define MAX_NUM 1.0f + #define HALF_MAX 0.5f + #define SAT_CAST(num) (num) + #define DEPTH_5 +#else + #error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)" +#endif + +#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) + +enum +{ + yuv_shift = 14, + xyz_shift = 12, + R2Y = 4899, + G2Y = 9617, + B2Y = 1868, + BLOCK_SIZE = 256 +}; + +#define scnbytes ((int)sizeof(DATA_TYPE)*scn) +#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn) + +///////////////////////////////////// RGB <-> GRAY ////////////////////////////////////// + +__kernel void RGB2Gray(__global const uchar* srcptr, int srcstep, int srcoffset, + __global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if (y < rows && x < cols) + { + const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes)); + DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes)); +#if defined (DEPTH_5) + dst[0] = src[bidx] * 0.114f + src[1] * 0.587f + src[(bidx^2)] * 0.299f; +#else + dst[0] = (DATA_TYPE)CV_DESCALE((src[bidx] * B2Y + src[1] * G2Y + src[(bidx^2)] * R2Y), yuv_shift); +#endif + } +} + +__kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset, + __global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if (y < rows && x < cols) + { + const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes)); + DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes)); + DATA_TYPE val = src[0]; + dst[0] = dst[1] = dst[2] = val; +#if dcn == 4 + dst[3] = MAX_NUM; +#endif + } +} + +///////////////////////////////////// RGB <-> YUV ////////////////////////////////////// + +__constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f }; +__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 }; + +__kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset, + __global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (y < rows && x < cols) + { + const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes)); + DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes)); + DATA_TYPE b=src[bidx], g=src[1], r=src[bidx^2]; + +#if defined (DEPTH_5) + __constant float * coeffs = c_RGB2YUVCoeffs_f; + const DATA_TYPE Y = b * coeffs[0] + g * coeffs[1] + r * coeffs[2]; + const DATA_TYPE U = (b - Y) * coeffs[3] + HALF_MAX; + const DATA_TYPE V = (r - Y) * coeffs[4] + HALF_MAX; +#else + __constant int * coeffs = c_RGB2YUVCoeffs_i; + const int delta = HALF_MAX * (1 << yuv_shift); + const int Y = CV_DESCALE(b * coeffs[0] + g * coeffs[1] + r * coeffs[2], yuv_shift); + const int U = CV_DESCALE((b - Y) * coeffs[3] + delta, yuv_shift); + const int V = CV_DESCALE((r - Y) * coeffs[4] + delta, yuv_shift); +#endif + + dst[0] = SAT_CAST( Y ); + dst[1] = SAT_CAST( U ); + dst[2] = SAT_CAST( V ); + } +} + +__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f }; +__constant int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 }; + +__kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset, + __global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (y < rows && x < cols) + { + const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes)); + DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes)); + DATA_TYPE Y = src[0], U = src[1], V = src[2]; + +#if defined (DEPTH_5) + __constant float * coeffs = c_YUV2RGBCoeffs_f; + const float r = Y + (V - HALF_MAX) * coeffs[3]; + const float g = Y + (V - HALF_MAX) * coeffs[2] + (U - HALF_MAX) * coeffs[1]; + const float b = Y + (U - HALF_MAX) * coeffs[0]; +#else + __constant int * coeffs = c_YUV2RGBCoeffs_i; + const int r = Y + CV_DESCALE((V - HALF_MAX) * coeffs[3], yuv_shift); + const int g = Y + CV_DESCALE((V - HALF_MAX) * coeffs[2] + (U - HALF_MAX) * coeffs[1], yuv_shift); + const int b = Y + CV_DESCALE((U - HALF_MAX) * coeffs[0], yuv_shift); +#endif + + dst[bidx] = SAT_CAST( b ); + dst[1] = SAT_CAST( g ); + dst[bidx^2] = SAT_CAST( r ); +#if dcn == 4 + dst[3] = MAX_NUM; +#endif + } +} + +__constant int ITUR_BT_601_CY = 1220542; +__constant int ITUR_BT_601_CUB = 2116026; +__constant int ITUR_BT_601_CUG = 409993; +__constant int ITUR_BT_601_CVG = 852492; +__constant int ITUR_BT_601_CVR = 1673527; +__constant int ITUR_BT_601_SHIFT = 20; + +__kernel void YUV2RGBA_NV12(__global const uchar* srcptr, int srcstep, int srcoffset, + __global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols) +{ + const int x = get_global_id(0); // max_x = width / 2 + const int y = get_global_id(1); // max_y = height/ 2 + + if (y < rows / 2 && x < cols / 2 ) + { + __global const uchar* ysrc = srcptr + mad24(y << 1, srcstep, (x << 1) + srcoffset); + __global const uchar* usrc = srcptr + mad24(rows + y, srcstep, (x << 1) + srcoffset); + __global uchar* dst1 = dstptr + mad24(y << 1, dststep, x*(dcn*2) + dstoffset); + __global uchar* dst2 = dstptr + mad24((y << 1) + 1, dststep, x*(dcn*2) + dstoffset); + + int Y1 = ysrc[0]; + int Y2 = ysrc[1]; + int Y3 = ysrc[srcstep]; + int Y4 = ysrc[srcstep + 1]; + + int U = usrc[0] - 128; + int V = usrc[1] - 128; + + int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * V; + int guv = (1 << (ITUR_BT_601_SHIFT - 1)) - ITUR_BT_601_CVG * V - ITUR_BT_601_CUG * U; + int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * U; + + Y1 = max(0, Y1 - 16) * ITUR_BT_601_CY; + dst1[2 - bidx] = convert_uchar_sat((Y1 + ruv) >> ITUR_BT_601_SHIFT); + dst1[1] = convert_uchar_sat((Y1 + guv) >> ITUR_BT_601_SHIFT); + dst1[bidx] = convert_uchar_sat((Y1 + buv) >> ITUR_BT_601_SHIFT); +#if dcn == 4 + dst1[3] = 255; +#endif + + Y2 = max(0, Y2 - 16) * ITUR_BT_601_CY; + dst1[(dcn + 2) - bidx] = convert_uchar_sat((Y2 + ruv) >> ITUR_BT_601_SHIFT); + dst1[dcn + 1] = convert_uchar_sat((Y2 + guv) >> ITUR_BT_601_SHIFT); + dst1[dcn + bidx] = convert_uchar_sat((Y2 + buv) >> ITUR_BT_601_SHIFT); +#if dcn == 4 + dst1[7] = 255; +#endif + + Y3 = max(0, Y3 - 16) * ITUR_BT_601_CY; + dst2[2 - bidx] = convert_uchar_sat((Y3 + ruv) >> ITUR_BT_601_SHIFT); + dst2[1] = convert_uchar_sat((Y3 + guv) >> ITUR_BT_601_SHIFT); + dst2[bidx] = convert_uchar_sat((Y3 + buv) >> ITUR_BT_601_SHIFT); +#if dcn == 4 + dst2[3] = 255; +#endif + + Y4 = max(0, Y4 - 16) * ITUR_BT_601_CY; + dst2[(dcn + 2) - bidx] = convert_uchar_sat((Y4 + ruv) >> ITUR_BT_601_SHIFT); + dst2[dcn + 1] = convert_uchar_sat((Y4 + guv) >> ITUR_BT_601_SHIFT); + dst2[dcn + bidx] = convert_uchar_sat((Y4 + buv) >> ITUR_BT_601_SHIFT); +#if dcn == 4 + dst2[7] = 255; +#endif + } +} + +///////////////////////////////////// RGB <-> YUV ////////////////////////////////////// + +__constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; +__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241}; + +__kernel void RGB2YCrCb(__global const uchar* srcptr, int srcstep, int srcoffset, + __global uchar* dstptr, int dststep, int dstoffset, + int rows, int cols) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (y < rows && x < cols) + { + const DATA_TYPE* src = (const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes)); + DATA_TYPE* dst = (DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes)); + DATA_TYPE b=src[bidx], g=src[1], r=src[bidx^2]; + +#if defined (DEPTH_5) + __constant float * coeffs = c_RGB2YCrCbCoeffs_f; + const DATA_TYPE Y = b * coeffs[0] + g * coeffs[1] + r * coeffs[2]; + const DATA_TYPE Cr = (r - Y) * coeffs[3] + HALF_MAX; + const DATA_TYPE Cb = (b - Y) * coeffs[4] + HALF_MAX; +#else + __constant int * coeffs = c_RGB2YCrCbCoeffs_i; + const int delta = HALF_MAX * (1 << yuv_shift); + const int Y = CV_DESCALE(b * coeffs[0] + g * coeffs[1] + r * coeffs[2], yuv_shift); + const int Cr = CV_DESCALE((r - Y) * coeffs[3] + delta, yuv_shift); + const int Cb = CV_DESCALE((b - Y) * coeffs[4] + delta, yuv_shift); +#endif + + dst[0] = SAT_CAST( Y ); + dst[1] = SAT_CAST( Cr ); + dst[2] = SAT_CAST( Cb ); + } +} diff --git a/modules/imgproc/src/opencl/resize.cl b/modules/imgproc/src/opencl/resize.cl new file mode 100644 index 0000000000..0805246ca3 --- /dev/null +++ b/modules/imgproc/src/opencl/resize.cl @@ -0,0 +1,151 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Zhang Ying, zhangying913@gmail.com +// Niko Li, newlife20080214@gmail.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +// resize kernel +// Currently, CV_8UC1 CV_8UC4 CV_32FC1 and CV_32FC4are supported. +// We shall support other types later if necessary. + +#if defined DOUBLE_SUPPORT +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#define F double +#else +#define F float +#endif + +#define INTER_RESIZE_COEF_BITS 11 +#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS) +#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1) +#define CAST_SCALE (1.0f/(1<=srccols ) x=srccols-1,u=0; + if ( y<0 ) y=0,v=0; + if ( y>=srcrows ) y=srcrows-1,v=0; + + int y_ = INC(y,srcrows); + int x_ = INC(x,srccols); + const PIXTYPE* src = (const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE)); + +#if depth == 0 + u = u * INTER_RESIZE_COEF_SCALE; + v = v * INTER_RESIZE_COEF_SCALE; + + int U = rint(u); + int V = rint(v); + int U1 = rint(INTER_RESIZE_COEF_SCALE - u); + int V1 = rint(INTER_RESIZE_COEF_SCALE - v); + + WORKTYPE data0 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE))); + WORKTYPE data1 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x_*PIXSIZE))); + WORKTYPE data2 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y_, srcstep, srcoffset + x*PIXSIZE))); + WORKTYPE data3 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y_, srcstep, srcoffset + x_*PIXSIZE))); + WORKTYPE val = mul24((WORKTYPE)mul24(U1, V1), data0) + mul24((WORKTYPE)mul24(U, V1), data1) + + mul24((WORKTYPE)mul24(U1, V), data2) + mul24((WORKTYPE)mul24(U, V), data3); + + PIXTYPE uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS); +#else + float u1 = 1.f-u; + float v1 = 1.f-v; + WORKTYPE data0 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE))); + WORKTYPE data1 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y, srcstep, srcoffset + x_*PIXSIZE))); + WORKTYPE data2 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y_, srcstep, srcoffset + x*PIXSIZE))); + WORKTYPE data3 = convertToWT(*(const PIXTYPE*)(srcptr + mad24(y_, srcstep, srcoffset + x_*PIXSIZE))); + PIXTYPE uval = u1 * v1 * s_data1 + u * v1 * s_data2 + u1 * v *s_data3 + u * v *s_data4; +#endif + + if(dx < dstcols && dy < dstrows) + { + PIXTYPE* dst = (PIXTYPE*)(dstptr + mad24(dy, dststep, dstoffset + dx*PIXSIZE)); + dst[0] = uval; + } +} + +#elif defined INTER_NEAREST + +__kernel void resizeNN(__global const uchar* srcptr, int srcstep, int srcoffset, + int srcrows, int srccols, + __global uchar* dstptr, int dststep, int dstoffset, + int dstrows, int dstcols, + float ifx, float ify) +{ + int dx = get_global_id(0); + int dy = get_global_id(1); + + if( dx < dstcols && dy < dstrows ) + { + F s1 = dx*ifx; + F s2 = dy*ify; + int sx = min(convert_int_rtz(s1), srccols-1); + int sy = min(convert_int_rtz(s2), srcrows-1); + PIXTYPE* dst = (PIXTYPE*)(dstptr + + mad24(dy, dststep, dstoffset + dx*PIXSIZE)); + const PIXTYPE* src = (const PIXTYPE*)(srcptr + + mad24(sy, srcstep, srcoffset + sx*PIXSIZE)); + dst[0] = src[0]; + } +} + +#endif diff --git a/modules/imgproc/src/precomp.hpp b/modules/imgproc/src/precomp.hpp index a3bbd65dbb..9fa244109b 100644 --- a/modules/imgproc/src/precomp.hpp +++ b/modules/imgproc/src/precomp.hpp @@ -48,6 +48,7 @@ #include "opencv2/imgproc/imgproc_c.h" #include "opencv2/core/private.hpp" +#include "opencv2/core/ocl.hpp" #include #include diff --git a/modules/imgproc/test/test_imgproc_umat.cpp b/modules/imgproc/test/test_imgproc_umat.cpp new file mode 100644 index 0000000000..523703846f --- /dev/null +++ b/modules/imgproc/test/test_imgproc_umat.cpp @@ -0,0 +1,82 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include + +using namespace cv; +using namespace std; + +class CV_ImgprocUMatTest : public cvtest::BaseTest +{ +public: + CV_ImgprocUMatTest() {} + ~CV_ImgprocUMatTest() {} +protected: + void run(int) + { + string imgpath = string(ts->get_data_path()) + "shared/lena.png"; + Mat img = imread(imgpath, 1), gray, smallimg, result; + UMat uimg = img.getUMat(ACCESS_READ), ugray, usmallimg, uresult; + + cvtColor(img, gray, COLOR_BGR2GRAY); + resize(gray, smallimg, Size(), 0.75, 0.75, INTER_LINEAR); + equalizeHist(smallimg, result); + + cvtColor(uimg, ugray, COLOR_BGR2GRAY); + resize(ugray, usmallimg, Size(), 0.75, 0.75, INTER_LINEAR); + equalizeHist(usmallimg, uresult); + +#if 0 + imshow("orig", uimg); + imshow("small", usmallimg); + imshow("equalized gray", uresult); + waitKey(); + destroyWindow("orig"); + destroyWindow("small"); + destroyWindow("equalized gray"); +#endif + ts->set_failed_test_info(cvtest::TS::OK); + } +}; + +TEST(Imgproc_UMat, regression) { CV_ImgprocUMatTest test; test.safe_run(); } diff --git a/modules/nonfree/src/precomp.hpp b/modules/nonfree/src/precomp.hpp index 1aeb1df434..204feaf717 100644 --- a/modules/nonfree/src/precomp.hpp +++ b/modules/nonfree/src/precomp.hpp @@ -52,6 +52,8 @@ #include "opencv2/nonfree/cuda.hpp" #include "opencv2/core/private.cuda.hpp" +#include "opencv2/core/ocl.hpp" + #include "opencv2/opencv_modules.hpp" #ifdef HAVE_OPENCV_CUDAARITHM diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index 20367ab98f..3b30663d5f 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -51,6 +51,8 @@ using namespace cv; using namespace cv::ocl; +static ProgramEntry surfprog = cv::ocl::nonfree::surf; + namespace cv { namespace ocl @@ -499,7 +501,7 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2), 1 }; - openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1); } void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset, @@ -545,7 +547,7 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat 1 }; - openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1); } void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter, @@ -570,7 +572,7 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMa size_t localThreads[3] = {3, 3, 3}; size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1}; - openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1); } void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures) @@ -597,7 +599,7 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat size_t localThreads[3] = {32, 4, 1}; size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1}; - openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1); } void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures) @@ -614,7 +616,7 @@ void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures) size_t localThreads[3] = {256, 1, 1}; size_t globalThreads[3] = {saturate_cast(nFeatures), 1, 1}; - openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1); } @@ -654,7 +656,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step)); - openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1); kernelName = "normalize_descriptors64"; @@ -668,7 +670,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step)); - openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1); } else { @@ -697,7 +699,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step)); - openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1); kernelName = "normalize_descriptors128"; @@ -711,7 +713,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step)); - openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1); } } diff --git a/modules/objdetect/doc/cascade_classification.rst b/modules/objdetect/doc/cascade_classification.rst index 453f05285b..b7a986229e 100644 --- a/modules/objdetect/doc/cascade_classification.rst +++ b/modules/objdetect/doc/cascade_classification.rst @@ -188,8 +188,8 @@ CascadeClassifier::detectMultiScale --------------------------------------- Detects objects of different sizes in the input image. The detected objects are returned as a list of rectangles. -.. ocv:function:: void CascadeClassifier::detectMultiScale( const Mat& image, vector& objects, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size()) -.. ocv:function:: void CascadeClassifier::detectMultiScale( const Mat& image, vector& objects, vector& numDetections, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size()) +.. ocv:function:: void CascadeClassifier::detectMultiScale( InputArray image, vector& objects, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size()) +.. ocv:function:: void CascadeClassifier::detectMultiScale( InputArray image, vector& objects, vector& numDetections, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size()) .. ocv:pyfunction:: cv2.CascadeClassifier.detectMultiScale(image[, scaleFactor[, minNeighbors[, flags[, minSize[, maxSize]]]]]) -> objects .. ocv:pyfunction:: cv2.CascadeClassifier.detectMultiScale(image[, scaleFactor[, minNeighbors[, flags[, minSize[, maxSize[, outputRejectLevels]]]]]]) -> objects, rejectLevels, levelWeights diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp index f1b371610d..c3dee4a2bb 100644 --- a/modules/objdetect/include/opencv2/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect.hpp @@ -159,14 +159,14 @@ public: CV_WRAP virtual bool empty() const; CV_WRAP bool load( const String& filename ); virtual bool read( const FileNode& node ); - CV_WRAP virtual void detectMultiScale( const Mat& image, + CV_WRAP virtual void detectMultiScale( InputArray image, CV_OUT std::vector& objects, double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0, Size minSize = Size(), Size maxSize = Size() ); - CV_WRAP virtual void detectMultiScale( const Mat& image, + CV_WRAP virtual void detectMultiScale( InputArray image, CV_OUT std::vector& objects, CV_OUT std::vector& numDetections, double scaleFactor=1.1, @@ -174,7 +174,7 @@ public: Size minSize=Size(), Size maxSize=Size() ); - CV_WRAP virtual void detectMultiScale( const Mat& image, + CV_WRAP virtual void detectMultiScale( InputArray image, CV_OUT std::vector& objects, CV_OUT std::vector& rejectLevels, CV_OUT std::vector& levelWeights, diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp index 88f463faa0..92b685c5dd 100644 --- a/modules/objdetect/src/cascadedetect.cpp +++ b/modules/objdetect/src/cascadedetect.cpp @@ -1154,13 +1154,14 @@ void CascadeClassifier::detectMultiScaleNoGrouping( const Mat& image, std::vecto } } -void CascadeClassifier::detectMultiScale( const Mat& image, std::vector& objects, +void CascadeClassifier::detectMultiScale( InputArray _image, std::vector& objects, std::vector& rejectLevels, std::vector& levelWeights, double scaleFactor, int minNeighbors, int flags, Size minObjectSize, Size maxObjectSize, bool outputRejectLevels ) { + Mat image = _image.getMat(); CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); if( empty() ) @@ -1188,21 +1189,23 @@ void CascadeClassifier::detectMultiScale( const Mat& image, std::vector& o } } -void CascadeClassifier::detectMultiScale( const Mat& image, std::vector& objects, +void CascadeClassifier::detectMultiScale( InputArray _image, std::vector& objects, double scaleFactor, int minNeighbors, int flags, Size minObjectSize, Size maxObjectSize) { + Mat image = _image.getMat(); std::vector fakeLevels; std::vector fakeWeights; detectMultiScale( image, objects, fakeLevels, fakeWeights, scaleFactor, minNeighbors, flags, minObjectSize, maxObjectSize ); } -void CascadeClassifier::detectMultiScale( const Mat& image, std::vector& objects, +void CascadeClassifier::detectMultiScale( InputArray _image, std::vector& objects, std::vector& numDetections, double scaleFactor, int minNeighbors, int flags, Size minObjectSize, Size maxObjectSize ) { + Mat image = _image.getMat(); CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); if( empty() ) diff --git a/modules/objdetect/src/precomp.hpp b/modules/objdetect/src/precomp.hpp index e5157d022f..97b976baf2 100644 --- a/modules/objdetect/src/precomp.hpp +++ b/modules/objdetect/src/precomp.hpp @@ -49,6 +49,7 @@ #include "opencv2/ml.hpp" #include "opencv2/core/utility.hpp" +#include "opencv2/core/ocl.hpp" #include "opencv2/opencv_modules.hpp" #ifdef HAVE_OPENCV_HIGHGUI diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp index efb684cc2a..98b734a539 100644 --- a/modules/ocl/include/opencv2/ocl/private/util.hpp +++ b/modules/ocl/include/opencv2/ocl/private/util.hpp @@ -47,6 +47,7 @@ #define __OPENCV_OCL_PRIVATE_UTIL__ #include "opencv2/ocl/cl_runtime/cl_runtime.hpp" +#include "opencv2/core/ocl_genbase.hpp" #include "opencv2/ocl.hpp" @@ -55,13 +56,6 @@ namespace cv namespace ocl { -struct ProgramEntry -{ - const char* name; - const char* programStr; - const char* programHash; -}; - inline cl_device_id getClDeviceID(const Context *ctx) { return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr()); diff --git a/modules/photo/test/test_hdr.cpp b/modules/photo/test/test_hdr.cpp index 1d232b3ef7..82ae25f525 100644 --- a/modules/photo/test/test_hdr.cpp +++ b/modules/photo/test/test_hdr.cpp @@ -50,11 +50,11 @@ void loadImage(string path, Mat &img) ASSERT_FALSE(img.empty()) << "Could not load input image " << path; } -void checkEqual(Mat img0, Mat img1, double threshold) +void checkEqual(Mat img0, Mat img1, double threshold, const string& name) { double max = 1.0; minMaxLoc(abs(img0 - img1), NULL, &max); - ASSERT_FALSE(max > threshold) << max; + ASSERT_FALSE(max > threshold) << "max=" << max << " threshold=" << threshold << " method=" << name; } static vector DEFAULT_VECTOR; @@ -98,31 +98,31 @@ TEST(Photo_Tonemap, regression) linear->process(img, result); loadImage(test_path + "linear.png", expected); result.convertTo(result, CV_8UC3, 255); - checkEqual(result, expected, 3); + checkEqual(result, expected, 3, "Simple"); Ptr drago = createTonemapDrago(gamma); drago->process(img, result); loadImage(test_path + "drago.png", expected); result.convertTo(result, CV_8UC3, 255); - checkEqual(result, expected, 3); + checkEqual(result, expected, 3, "Drago"); Ptr durand = createTonemapDurand(gamma); durand->process(img, result); loadImage(test_path + "durand.png", expected); result.convertTo(result, CV_8UC3, 255); - checkEqual(result, expected, 3); + checkEqual(result, expected, 3, "Durand"); Ptr reinhard = createTonemapReinhard(gamma); reinhard->process(img, result); loadImage(test_path + "reinhard.png", expected); result.convertTo(result, CV_8UC3, 255); - checkEqual(result, expected, 3); + checkEqual(result, expected, 3, "Reinhard"); Ptr mantiuk = createTonemapMantiuk(gamma); mantiuk->process(img, result); loadImage(test_path + "mantiuk.png", expected); result.convertTo(result, CV_8UC3, 255); - checkEqual(result, expected, 3); + checkEqual(result, expected, 3, "Mantiuk"); } TEST(Photo_AlignMTB, regression) @@ -165,7 +165,7 @@ TEST(Photo_MergeMertens, regression) loadImage(test_path + "merge/mertens.png", expected); merge->process(images, result); result.convertTo(result, CV_8UC3, 255); - checkEqual(expected, result, 3); + checkEqual(expected, result, 3, "Mertens"); } TEST(Photo_MergeDebevec, regression) @@ -188,7 +188,7 @@ TEST(Photo_MergeDebevec, regression) map->process(result, result); map->process(expected, expected); - checkEqual(expected, result, 1e-2f); + checkEqual(expected, result, 1e-2f, "Debevec"); } TEST(Photo_MergeRobertson, regression) @@ -208,7 +208,7 @@ TEST(Photo_MergeRobertson, regression) map->process(result, result); map->process(expected, expected); - checkEqual(expected, result, 1e-2f); + checkEqual(expected, result, 1e-2f, "MergeRobertson"); } TEST(Photo_CalibrateDebevec, regression) @@ -242,5 +242,5 @@ TEST(Photo_CalibrateRobertson, regression) Ptr calibrate = createCalibrateRobertson(); calibrate->process(images, response, times); - checkEqual(expected, response, 1e-3f); + checkEqual(expected, response, 1e-3f, "CalibrateRobertson"); } diff --git a/modules/superres/src/btv_l1_ocl.cpp b/modules/superres/src/btv_l1_ocl.cpp index 44edc815ec..7fd6741e8d 100644 --- a/modules/superres/src/btv_l1_ocl.cpp +++ b/modules/superres/src/btv_l1_ocl.cpp @@ -64,6 +64,8 @@ using namespace cv::ocl; using namespace cv::superres; using namespace cv::superres::detail; +static ProgramEntry superres_btvl1 = cv::ocl::superres::superres_btvl1; + namespace cv { namespace ocl diff --git a/modules/superres/src/precomp.hpp b/modules/superres/src/precomp.hpp index c5dbe2db29..0681bfa28c 100644 --- a/modules/superres/src/precomp.hpp +++ b/modules/superres/src/precomp.hpp @@ -56,6 +56,7 @@ #include "opencv2/core/private.hpp" #include "opencv2/core/private.cuda.hpp" +#include "opencv2/core/ocl.hpp" #ifdef HAVE_OPENCV_CUDAARITHM # include "opencv2/cudaarithm.hpp" diff --git a/samples/cpp/ufacedetect.cpp b/samples/cpp/ufacedetect.cpp new file mode 100644 index 0000000000..9cd6b3a860 --- /dev/null +++ b/samples/cpp/ufacedetect.cpp @@ -0,0 +1,276 @@ +#include "opencv2/objdetect.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/core/utility.hpp" +#include "opencv2/core/ocl.hpp" + +#include +#include +#include +#include + +using namespace std; +using namespace cv; + +static void help() +{ + cout << "\nThis program demonstrates the cascade recognizer. Now you can use Haar or LBP features.\n" + "This classifier can recognize many kinds of rigid objects, once the appropriate classifier is trained.\n" + "It's most known use is for faces.\n" + "Usage:\n" + "./facedetect [--cascade= this is the primary trained classifier such as frontal face]\n" + " [--nested-cascade[=nested_cascade_path this an optional secondary classifier such as eyes]]\n" + " [--scale=]\n" + " [--try-flip]\n" + " [filename|camera_index]\n\n" + "see facedetect.cmd for one call:\n" + "./facedetect --cascade=\"../../data/haarcascades/haarcascade_frontalface_alt.xml\" --nested-cascade=\"../../data/haarcascades/haarcascade_eye.xml\" --scale=1.3\n\n" + "During execution:\n\tHit any key to quit.\n" + "\tUsing OpenCV version " << CV_VERSION << "\n" << endl; +} + +void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade, + CascadeClassifier& nestedCascade, + double scale, bool tryflip ); + +string cascadeName = "../../data/haarcascades/haarcascade_frontalface_alt.xml"; +string nestedCascadeName = "../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml"; + +int main( int argc, const char** argv ) +{ + VideoCapture capture; + UMat frame, image; + Mat canvas; + const string scaleOpt = "--scale="; + size_t scaleOptLen = scaleOpt.length(); + const string cascadeOpt = "--cascade="; + size_t cascadeOptLen = cascadeOpt.length(); + const string nestedCascadeOpt = "--nested-cascade"; + size_t nestedCascadeOptLen = nestedCascadeOpt.length(); + const string tryFlipOpt = "--try-flip"; + size_t tryFlipOptLen = tryFlipOpt.length(); + String inputName; + bool tryflip = false; + + help(); + + CascadeClassifier cascade, nestedCascade; + double scale = 1; + + for( int i = 1; i < argc; i++ ) + { + cout << "Processing " << i << " " << argv[i] << endl; + if( cascadeOpt.compare( 0, cascadeOptLen, argv[i], cascadeOptLen ) == 0 ) + { + cascadeName.assign( argv[i] + cascadeOptLen ); + cout << " from which we have cascadeName= " << cascadeName << endl; + } + else if( nestedCascadeOpt.compare( 0, nestedCascadeOptLen, argv[i], nestedCascadeOptLen ) == 0 ) + { + if( argv[i][nestedCascadeOpt.length()] == '=' ) + nestedCascadeName.assign( argv[i] + nestedCascadeOpt.length() + 1 ); + if( !nestedCascade.load( nestedCascadeName ) ) + cerr << "WARNING: Could not load classifier cascade for nested objects" << endl; + } + else if( scaleOpt.compare( 0, scaleOptLen, argv[i], scaleOptLen ) == 0 ) + { + if( !sscanf( argv[i] + scaleOpt.length(), "%lf", &scale ) || scale > 1 ) + scale = 1; + cout << " from which we read scale = " << scale << endl; + } + else if( tryFlipOpt.compare( 0, tryFlipOptLen, argv[i], tryFlipOptLen ) == 0 ) + { + tryflip = true; + cout << " will try to flip image horizontally to detect assymetric objects\n"; + } + else if( argv[i][0] == '-' ) + { + cerr << "WARNING: Unknown option %s" << argv[i] << endl; + } + else + inputName = argv[i]; + } + + if( !cascade.load( cascadeName ) ) + { + cerr << "ERROR: Could not load classifier cascade" << endl; + help(); + return -1; + } + + if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') ) + { + int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0'; + if(!capture.open(c)) + cout << "Capture from camera #" << c << " didn't work" << endl; + } + else + { + if( inputName.empty() ) + inputName = "lena.jpg"; + image = imread( inputName, 1 ).getUMat(ACCESS_READ); + if( image.empty() ) + { + if(!capture.open( inputName )) + cout << "Could not read " << inputName << endl; + } + } + + namedWindow( "result", 1 ); + + if( capture.isOpened() ) + { + cout << "Video capturing has been started ..." << endl; + for(;;) + { + capture >> frame; + if( frame.empty() ) + break; + + detectAndDraw( frame, canvas, cascade, nestedCascade, scale, tryflip ); + + if( waitKey( 10 ) >= 0 ) + break; + } + } + else + { + cout << "Detecting face(s) in " << inputName << endl; + if( !image.empty() ) + { + detectAndDraw( image, canvas, cascade, nestedCascade, scale, tryflip ); + waitKey(0); + } + else if( !inputName.empty() ) + { + /* assume it is a text file containing the + list of the image filenames to be processed - one per line */ + FILE* f = fopen( inputName.c_str(), "rt" ); + if( f ) + { + char buf[1000+1]; + while( fgets( buf, 1000, f ) ) + { + int len = (int)strlen(buf), c; + while( len > 0 && isspace(buf[len-1]) ) + len--; + buf[len] = '\0'; + cout << "file " << buf << endl; + image = imread( buf, 1 ).getUMat(ACCESS_READ); + if( !image.empty() ) + { + detectAndDraw( image, canvas, cascade, nestedCascade, scale, tryflip ); + c = waitKey(0); + if( c == 27 || c == 'q' || c == 'Q' ) + break; + } + else + { + cerr << "Aw snap, couldn't read image " << buf << endl; + } + } + fclose(f); + } + } + } + + return 0; +} + +void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade, + CascadeClassifier& nestedCascade, + double scale0, bool tryflip ) +{ + int i = 0; + double t = 0, scale=1; + vector faces, faces2; + const static Scalar colors[] = + { + Scalar(0,0,255), + Scalar(0,128,255), + Scalar(0,255,255), + Scalar(0,255,0), + Scalar(255,128,0), + Scalar(255,255,0), + Scalar(255,0,0), + Scalar(255,0,255) + }; + static UMat gray, smallImg; + + t = (double)getTickCount(); + + cvtColor( img, gray, COLOR_BGR2GRAY ); + resize( gray, smallImg, Size(), scale0, scale0, INTER_LINEAR ); + cvtColor(smallImg, canvas, COLOR_GRAY2BGR); + equalizeHist( smallImg, smallImg ); + + cascade.detectMultiScale( smallImg, faces, + 1.1, 2, 0 + //|CASCADE_FIND_BIGGEST_OBJECT + //|CASCADE_DO_ROUGH_SEARCH + |CASCADE_SCALE_IMAGE + , + Size(30, 30) ); + if( tryflip ) + { + flip(smallImg, smallImg, 1); + cascade.detectMultiScale( smallImg, faces2, + 1.1, 2, 0 + //|CASCADE_FIND_BIGGEST_OBJECT + //|CASCADE_DO_ROUGH_SEARCH + |CASCADE_SCALE_IMAGE + , + Size(30, 30) ); + for( vector::const_iterator r = faces2.begin(); r != faces2.end(); r++ ) + { + faces.push_back(Rect(smallImg.cols - r->x - r->width, r->y, r->width, r->height)); + } + } + t = (double)getTickCount() - t; + cvtColor(smallImg, canvas, COLOR_GRAY2BGR); + + double fps = getTickFrequency()/t; + + putText(canvas, format("OpenCL: %s, fps: %.1f", ocl::useOpenCL() ? "ON" : "OFF", fps), Point(250, 50), + FONT_HERSHEY_SIMPLEX, 1, Scalar(0,255,0), 3); + + for( vector::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) + { + vector nestedObjects; + Point center; + Scalar color = colors[i%8]; + int radius; + + double aspect_ratio = (double)r->width/r->height; + if( 0.75 < aspect_ratio && aspect_ratio < 1.3 ) + { + center.x = cvRound((r->x + r->width*0.5)*scale); + center.y = cvRound((r->y + r->height*0.5)*scale); + radius = cvRound((r->width + r->height)*0.25*scale); + circle( canvas, center, radius, color, 3, 8, 0 ); + } + else + rectangle( canvas, Point(cvRound(r->x*scale), cvRound(r->y*scale)), + Point(cvRound((r->x + r->width-1)*scale), cvRound((r->y + r->height-1)*scale)), + color, 3, 8, 0); + if( nestedCascade.empty() ) + continue; + UMat smallImgROI = smallImg(*r); + nestedCascade.detectMultiScale( smallImgROI, nestedObjects, + 1.1, 2, 0 + //|CASCADE_FIND_BIGGEST_OBJECT + //|CASCADE_DO_ROUGH_SEARCH + //|CASCADE_DO_CANNY_PRUNING + |CASCADE_SCALE_IMAGE + , + Size(30, 30) ); + for( vector::const_iterator nr = nestedObjects.begin(); nr != nestedObjects.end(); nr++ ) + { + center.x = cvRound((r->x + nr->x + nr->width*0.5)*scale); + center.y = cvRound((r->y + nr->y + nr->height*0.5)*scale); + radius = cvRound((nr->width + nr->height)*0.25*scale); + circle( canvas, center, radius, color, 3, 8, 0 ); + } + } + imshow( "result", canvas ); +} diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp index 8669719504..781efa066f 100644 --- a/samples/ocl/facedetect.cpp +++ b/samples/ocl/facedetect.cpp @@ -11,7 +11,7 @@ using namespace std; using namespace cv; -#define LOOP_NUM 10 +#define LOOP_NUM 1 const static Scalar colors[] = { CV_RGB(0,0,255), CV_RGB(0,128,255), @@ -46,12 +46,12 @@ static double getTime() static void detect( Mat& img, vector& faces, ocl::OclCascadeClassifier& cascade, - double scale, bool calTime); + double scale); static void detectCPU( Mat& img, vector& faces, CascadeClassifier& cascade, - double scale, bool calTime); + double scale); static void Draw(Mat& img, vector& faces, double scale); @@ -83,7 +83,7 @@ int main( int argc, const char** argv ) } CvCapture* capture = 0; - Mat frame, frameCopy, image; + Mat frame, frameCopy0, frameCopy, image; bool useCPU = cmd.get("s"); string inputName = cmd.get("i"); @@ -129,16 +129,21 @@ int main( int argc, const char** argv ) if( frame.empty() ) break; if( iplImg->origin == IPL_ORIGIN_TL ) - frame.copyTo( frameCopy ); + frame.copyTo( frameCopy0 ); else - flip( frame, frameCopy, 0 ); + flip( frame, frameCopy0, 0 ); + if( scale == 1) + frameCopy0.copyTo(frameCopy); + else + resize(frameCopy0, frameCopy, Size(), 1./scale, 1./scale, INTER_LINEAR); + work_end = 0; if(useCPU) - detectCPU(frameCopy, faces, cpu_cascade, scale, false); + detectCPU(frameCopy, faces, cpu_cascade, 1); else - detect(frameCopy, faces, cascade, scale, false); + detect(frameCopy, faces, cascade, 1); - Draw(frameCopy, faces, scale); + Draw(frameCopy, faces, 1); if( waitKey( 10 ) >= 0 ) break; } @@ -150,17 +155,19 @@ int main( int argc, const char** argv ) vector faces; vector ref_rst; double accuracy = 0.; + detectCPU(image, ref_rst, cpu_cascade, scale); + work_end = 0; + for(int i = 0; i <= LOOP_NUM; i ++) { cout << "loop" << i << endl; if(useCPU) - detectCPU(image, faces, cpu_cascade, scale, i==0?false:true); + detectCPU(image, faces, cpu_cascade, scale); else { - detect(image, faces, cascade, scale, i==0?false:true); + detect(image, faces, cascade, scale); if(i == 0) { - detectCPU(image, ref_rst, cpu_cascade, scale, false); accuracy = checkRectSimilarity(image.size(), ref_rst, faces); } } @@ -184,11 +191,11 @@ int main( int argc, const char** argv ) void detect( Mat& img, vector& faces, ocl::OclCascadeClassifier& cascade, - double scale, bool calTime) + double scale) { ocl::oclMat image(img); ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); - if(calTime) workBegin(); + workBegin(); ocl::cvtColor( image, gray, COLOR_BGR2GRAY ); ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); ocl::equalizeHist( smallImg, smallImg ); @@ -197,14 +204,14 @@ void detect( Mat& img, vector& faces, 3, 0 |CASCADE_SCALE_IMAGE , Size(30,30), Size(0, 0) ); - if(calTime) workEnd(); + workEnd(); } void detectCPU( Mat& img, vector& faces, CascadeClassifier& cascade, - double scale, bool calTime) + double scale) { - if(calTime) workBegin(); + workBegin(); Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); cvtColor(img, cpu_gray, COLOR_BGR2GRAY); resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR); @@ -212,13 +219,15 @@ void detectCPU( Mat& img, vector& faces, cascade.detectMultiScale(cpu_smallImg, faces, 1.1, 3, 0 | CASCADE_SCALE_IMAGE, Size(30, 30), Size(0, 0)); - if(calTime) workEnd(); + workEnd(); } void Draw(Mat& img, vector& faces, double scale) { int i = 0; + putText(img, format("fps: %.1f", 1000./getTime()), Point(450, 50), + FONT_HERSHEY_SIMPLEX, 1, Scalar(0,255,0), 3); for( vector::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) { Point center; @@ -229,7 +238,7 @@ void Draw(Mat& img, vector& faces, double scale) radius = cvRound((r->width + r->height)*0.25*scale); circle( img, center, radius, color, 3, 8, 0 ); } - imwrite( outputName, img ); + //imwrite( outputName, img ); if(abs(scale-1.0)>.001) { resize(img, img, Size((int)(img.cols/scale), (int)(img.rows/scale)));