diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index ab12dc2cbf..c296f57a3d 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -584,7 +584,8 @@ namespace cv CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0); - CV_EXPORTS void setIdentity(oclMat& src, double val); + //! initializes a scaled identity matrix + CV_EXPORTS void setIdentity(oclMat& src, const Scalar & val = Scalar(1)); //////////////////////////////// Filter Engine //////////////////////////////// diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 0dd695bfa6..2a663b990a 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -64,11 +64,10 @@ namespace cv { //////////////////////////////// OpenCL kernel strings ///////////////////// + extern const char *arithm_absdiff_nonsaturate; extern const char *arithm_nonzero; extern const char *arithm_sum; - extern const char *arithm_sum_3; extern const char *arithm_minMax; - extern const char *arithm_minMax_mask; extern const char *arithm_minMaxLoc; extern const char *arithm_minMaxLoc_mask; extern const char *arithm_LUT; @@ -318,21 +317,28 @@ void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int ////////////////////////////////// sum ////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -//type = 0 sum,type = 1 absSum,type = 2 sqrSum -static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum, int type = 0) +enum { SUM = 0, ABS_SUM, SQR_SUM }; + +static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int groupnum, int type, int ddepth) { - vector > args; - int all_cols = src.step / (vlen * src.elemSize1()); - int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1()); - int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / (vlen * src.elemSize1()) - 1; + int ochannels = src.oclchannels(); + int all_cols = src.step / src.elemSize(); + int pre_cols = (src.offset % src.step) / src.elemSize(); + int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1; int invalid_cols = pre_cols + sec_cols; int cols = all_cols - invalid_cols , elemnum = cols * src.rows;; - int offset = src.offset / (vlen * src.elemSize1()); - int repeat_s = src.offset / src.elemSize1() - offset * vlen; - int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels(); - char build_options[512]; - CV_Assert(type == 0 || type == 1 || type == 2); - sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d -D FUNC_TYPE_%d", src.depth(), repeat_s, repeat_e, type); + int offset = src.offset / src.elemSize(); + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const funcMap[] = { "FUNC_SUM", "FUNC_ABS_SUM", "FUNC_SQR_SUM" }; + const char * const channelMap[] = { " ", " ", "2", "4", "4" }; + string buildOptions = format("-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s -D %s", + typeMap[src.depth()], channelMap[ochannels], + typeMap[ddepth], channelMap[ochannels], + typeMap[ddepth], channelMap[ochannels], + funcMap[type]); + + vector > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset)); @@ -340,55 +346,63 @@ static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); - size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; - if (src.oclchannels() != 3) - openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", gt, lt, args, -1, -1, build_options); - else - openCLExecuteKernel(src.clCxt, &arithm_sum_3, "arithm_op_sum_3", gt, lt, args, -1, -1, build_options); + size_t globalThreads[3] = { groupnum * 256, 1, 1 }; + size_t localThreads[3] = { 256, 1, 1 }; + + openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", globalThreads, localThreads, + args, -1, -1, buildOptions.c_str()); } template -Scalar arithmetic_sum(const oclMat &src, int type = 0) +Scalar arithmetic_sum(const oclMat &src, int type, int ddepth) { + CV_Assert(src.step % src.elemSize() == 0); + size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); - int vlen = src.oclchannels() == 3 ? 12 : 8, dbsize = groupnum * vlen; + + int dbsize = groupnum * src.oclchannels(); Context *clCxt = src.clCxt; AutoBuffer _buf(dbsize); T *p = (T*)_buf; - cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T)); - Scalar s = Scalar::all(0.0); - arithmetic_sum_buffer_run(src, dstBuffer, vlen, groupnum, type); - memset(p, 0, dbsize * sizeof(T)); - openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T)); - for (int i = 0; i < dbsize;) - { - for (int j = 0; j < src.oclchannels(); j++, i++) - s.val[j] += p[i]; - } + cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T)); + arithmetic_sum_buffer_run(src, dstBuffer, groupnum, type, ddepth); + openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T)); openCLFree(dstBuffer); + + Scalar s = Scalar::all(0.0); + for (int i = 0; i < dbsize;) + for (int j = 0; j < src.oclchannels(); j++, i++) + s.val[j] += p[i]; + return s; } -typedef Scalar (*sumFunc)(const oclMat &src, int type); +typedef Scalar (*sumFunc)(const oclMat &src, int type, int ddepth); + Scalar cv::ocl::sum(const oclMat &src) { if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } - static sumFunc functab[2] = + static sumFunc functab[3] = { + arithmetic_sum, arithmetic_sum, arithmetic_sum }; - sumFunc func; - func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; - return func(src, 0); + bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); + int ddepth = std::max(src.depth(), CV_32S); + if (!hasDouble && ddepth == CV_64F) + ddepth = CV_32F; + + sumFunc func = functab[ddepth - CV_32S]; + return func(src, SUM, ddepth); } Scalar cv::ocl::absSum(const oclMat &src) @@ -397,15 +411,20 @@ Scalar cv::ocl::absSum(const oclMat &src) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } - static sumFunc functab[2] = + static sumFunc functab[3] = { + arithmetic_sum, arithmetic_sum, arithmetic_sum }; - sumFunc func; - func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; - return func(src, 1); + bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); + int ddepth = std::max(src.depth(), CV_32S); + if (!hasDouble && ddepth == CV_64F) + ddepth = CV_32F; + + sumFunc func = functab[ddepth - CV_32S]; + return func(src, ABS_SUM, ddepth); } Scalar cv::ocl::sqrSum(const oclMat &src) @@ -414,15 +433,18 @@ Scalar cv::ocl::sqrSum(const oclMat &src) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } - static sumFunc functab[2] = + static sumFunc functab[3] = { - arithmetic_sum, + arithmetic_sum, + arithmetic_sum, arithmetic_sum }; - sumFunc func; - func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; - return func(src, 2); + bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); + int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F); + + sumFunc func = functab[ddepth - CV_32S]; + return func(src, SQR_SUM, ddepth); } ////////////////////////////////////////////////////////////////////////////// @@ -431,23 +453,15 @@ Scalar cv::ocl::sqrSum(const oclMat &src) void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev) { - CV_Assert(src.depth() <= CV_32S); - cv::Size sz(1, 1); - int channels = src.oclchannels(); - Mat m1(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)), - m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)); - oclMat dst1(m1), dst2(m2); + double total = 1.0 / src.size().area(); - //arithmetic_sum_run(src, dst1,"arithm_op_sum"); - //arithmetic_sum_run(src, dst2,"arithm_op_squares_sum"); + mean = sum(src); + stddev = sqrSum(src); - m1 = (Mat)dst1; - m2 = (Mat)dst2; - int i = 0, *p = (int *)m1.data, *q = (int *)m2.data; - for (; i < channels; i++) + for (int i = 0; i < 4; ++i) { - mean.val[i] = (double)p[i] / (src.cols * src.rows); - stddev.val[i] = std::sqrt(std::max((double) q[i] / (src.cols * src.rows) - mean.val[i] * mean.val[i] , 0.)); + mean[i] *= total; + stddev[i] = std::sqrt(std::max(stddev[i] * total - mean.val[i] * mean.val[i] , 0.)); } } @@ -455,139 +469,120 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev) //////////////////////////////////// minMax ///////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void arithmetic_minMax_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen , int groupnum, string kernelName) +template +static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem &dst, int groupnum, string kernelName) { - vector > args; - int all_cols = src.step / (vlen * src.elemSize1()); - int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1()); - int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / (vlen * src.elemSize1()) - 1; + int all_cols = src.step / src.elemSize(); + int pre_cols = (src.offset % src.step) / src.elemSize(); + int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1; int invalid_cols = pre_cols + sec_cols; - int cols = all_cols - invalid_cols , elemnum = cols * src.rows;; - int offset = src.offset / (vlen * src.elemSize1()); - int repeat_s = src.offset / src.elemSize1() - offset * vlen; - int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels(); - char build_options[50]; - sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e); + int cols = all_cols - invalid_cols , elemnum = cols * src.rows; + int offset = src.offset / src.elemSize(); + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const channelMap[] = { " ", " ", "2", "4", "4" }; + + ostringstream stream; + stream << "-D T=" << typeMap[src.depth()] << channelMap[src.channels()]; + stream << " -D MAX_VAL=" << (WT)numeric_limits::max(); + stream << " -D MIN_VAL=" << (WT)numeric_limits::min(); + string buildOptions = stream.str(); + + vector > args; + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); args.push_back( make_pair( sizeof(cl_int) , (void *)&cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset)); args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum)); args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + + int minvalid_cols = 0, moffset = 0; if (!mask.empty()) { - int mall_cols = mask.step / (vlen * mask.elemSize1()); - int mpre_cols = (mask.offset % mask.step) / (vlen * mask.elemSize1()); - int msec_cols = mall_cols - (mask.offset % mask.step + mask.cols * mask.elemSize() - 1) / (vlen * mask.elemSize1()) - 1; - int minvalid_cols = mpre_cols + msec_cols; - int moffset = mask.offset / (vlen * mask.elemSize1()); + int mall_cols = mask.step / mask.elemSize(); + int mpre_cols = (mask.offset % mask.step) / mask.elemSize(); + int msec_cols = mall_cols - (mask.offset % mask.step + mask.cols * mask.elemSize() - 1) / mask.elemSize() - 1; + minvalid_cols = mpre_cols + msec_cols; + moffset = mask.offset / mask.elemSize(); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&minvalid_cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&moffset )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); + + kernelName += "_mask"; } - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); - size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &arithm_minMax, kernelName, gt, lt, args, -1, -1, build_options); + + size_t globalThreads[3] = {groupnum * 256, 1, 1}; + size_t localThreads[3] = {256, 1, 1}; + + openCLExecuteKernel(src.clCxt, &arithm_minMax, kernelName, globalThreads, localThreads, + args, -1, -1, buildOptions.c_str()); } - -static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl_mem &dst, int vlen, int groupnum, string kernelName) -{ - vector > args; - size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; - char build_options[50]; - if (src.oclchannels() == 1) - { - int cols = (src.cols - 1) / vlen + 1; - int invalid_cols = src.step / (vlen * src.elemSize1()) - cols; - int offset = src.offset / src.elemSize1(); - int repeat_me = vlen - (mask.cols % vlen == 0 ? vlen : mask.cols % vlen); - int minvalid_cols = mask.step / (vlen * mask.elemSize1()) - cols; - int moffset = mask.offset / mask.elemSize1(); - int elemnum = cols * src.rows; - sprintf(build_options, "-D DEPTH_%d -D REPEAT_E%d", src.depth(), repeat_me); - args.push_back( make_pair( sizeof(cl_int) , (void *)&cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&offset)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum)); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_int) , (void *)&minvalid_cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&moffset )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); - openCLExecuteKernel(src.clCxt, &arithm_minMax_mask, kernelName, gt, lt, args, -1, -1, build_options); - } -} - -template void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, +template +void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf) { size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); - groupnum = groupnum * 2; - int vlen = 8; - int dbsize = groupnum * 2 * vlen * sizeof(T) ; + int dbsize = groupnum * 2 * src.elemSize(); ensureSizeIsEnough(1, dbsize, CV_8UC1, buf); cl_mem buf_data = reinterpret_cast(buf.data); - - if (mask.empty()) - { - arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax"); - } - else - { - arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask"); - } + arithmetic_minMax_run(src, mask, buf_data, groupnum, "arithm_op_minMax"); Mat matbuf = Mat(buf); T *p = matbuf.ptr(); if (minVal != NULL) { *minVal = std::numeric_limits::max(); - for (int i = 0; i < vlen * (int)groupnum; i++) - { + for (int i = 0, end = src.oclchannels() * (int)groupnum; i < end; i++) *minVal = *minVal < p[i] ? *minVal : p[i]; - } } if (maxVal != NULL) { *maxVal = -std::numeric_limits::max(); - for (int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++) - { + for (int i = src.oclchannels() * (int)groupnum, end = i << 1; i < end; i++) *maxVal = *maxVal > p[i] ? *maxVal : p[i]; - } } } -typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf); void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) { oclMat buf; minMax_buf(src, minVal, maxVal, mask, buf); } +typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf); + void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf) { - CV_Assert(src.oclchannels() == 1); + CV_Assert(src.channels() == 1); + CV_Assert(src.size() == mask.size() || mask.empty()); + CV_Assert(src.step % src.elemSize() == 0); + + if (minVal == NULL && maxVal == NULL) + return; + if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } + static minMaxFunc functab[8] = { - arithmetic_minMax, - arithmetic_minMax, - arithmetic_minMax, - arithmetic_minMax, - arithmetic_minMax, - arithmetic_minMax, - arithmetic_minMax, + arithmetic_minMax, + arithmetic_minMax, + arithmetic_minMax, + arithmetic_minMax, + arithmetic_minMax, + arithmetic_minMax, + arithmetic_minMax, 0 }; + minMaxFunc func; func = functab[src.depth()]; func(src, minVal, maxVal, mask, buf); @@ -599,57 +594,102 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons double cv::ocl::norm(const oclMat &src1, int normType) { - return norm(src1, oclMat(src1.size(), src1.type(), Scalar::all(0)), normType); + CV_Assert((normType & NORM_RELATIVE) == 0); + return norm(src1, oclMat(), normType); +} + +static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & src2, oclMat & diff) +{ + CV_Assert(src1.step % src1.elemSize() == 0 && (src2.empty() || src2.step % src2.elemSize() == 0)); + Context *clCxt = src1.clCxt; + + int ddepth = CV_64F; + diff.create(src1.size(), CV_MAKE_TYPE(ddepth, src1.channels())); + + int oclChannels = src1.oclchannels(), sdepth = src1.depth(); + int src1step1 = src1.step / src1.elemSize(), src1offset1 = src1.offset / src1.elemSize(); + int src2step1 = src2.step / src2.elemSize(), src2offset1 = src2.offset / src2.elemSize(); + int diffstep1 = diff.step / diff.elemSize(), diffoffset1 = diff.offset / diff.elemSize(); + + string kernelName = "arithm_absdiff_nonsaturate"; + size_t localThreads[3] = { 16, 16, 1 }; + size_t globalThreads[3] = { diff.cols, diff.rows, 1 }; + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const channelMap[] = { "", "", "2", "4", "4" }; + + std::string buildOptions = format("-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s", + typeMap[sdepth], channelMap[oclChannels], + typeMap[ddepth], channelMap[oclChannels], + typeMap[ddepth], channelMap[oclChannels]); + + vector > args; + args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 )); + + if (!src2.empty()) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 )); + + kernelName += "_binary"; + } + + args.push_back( make_pair( sizeof(cl_mem), (void *)&diff.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&diffstep1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&diffoffset1 )); + + args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows )); + + openCLExecuteKernel(clCxt, &arithm_absdiff_nonsaturate, + kernelName, globalThreads, localThreads, + args, -1, -1, buildOptions.c_str()); } double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) { + CV_Assert(!src1.empty()); + CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size())); + + if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F) + { + CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); + } + bool isRelative = (normType & NORM_RELATIVE) != 0; - normType &= 7; - CV_Assert(src1.depth() <= CV_32S && src1.type() == src2.type() && ( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2)); - int channels = src1.oclchannels(), i = 0, *p; + normType &= NORM_TYPE_MASK; + CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); + + Scalar s; + int cn = src1.channels(); double r = 0; - oclMat gm1(src1.size(), src1.type()); - int min_int = (normType == NORM_INF ? CL_INT_MIN : 0); - Mat m(1, 1, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(min_int)); - oclMat gm2(m), emptyMat; - switch(normType) + oclMat diff; + arithm_absdiff_nonsaturate_run(src1, src2, diff); + + switch (normType) { case NORM_INF: - // arithmetic_run(src1, src2, gm1, "arithm_op_absdiff"); - //arithmetic_minMax_run(gm1,emptyMat, gm2,"arithm_op_max"); - m = (gm2); - p = (int *)m.data; - r = -std::numeric_limits::max(); - for (i = 0; i < channels; i++) - { - r = std::max(r, (double)p[i]); - } + diff = diff.reshape(1); + minMax(diff, NULL, &r); break; case NORM_L1: - //arithmetic_run(src1, src2, gm1, "arithm_op_absdiff"); - //arithmetic_sum_run(gm1, gm2,"arithm_op_sum"); - m = (gm2); - p = (int *)m.data; - for (i = 0; i < channels; i++) - { - r = r + (double)p[i]; - } + s = sum(diff); + for (int i = 0; i < cn; ++i) + r += s[i]; break; case NORM_L2: - //arithmetic_run(src1, src2, gm1, "arithm_op_absdiff"); - //arithmetic_sum_run(gm1, gm2,"arithm_op_squares_sum"); - m = (gm2); - p = (int *)m.data; - for (i = 0; i < channels; i++) - { - r = r + (double)p[i]; - } + s = sqrSum(diff); + for (int i = 0; i < cn; ++i) + r += s[i]; r = std::sqrt(r); break; } if (isRelative) r = r / norm(src2, normType); + return r; } @@ -923,47 +963,38 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat return; } - CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols && src1.rows == src2.rows && src2.rows == dst.rows); - CV_Assert(src1.type() == src2.type() && src1.type() == dst.type()); - Context *clCxt = src1.clCxt; - int channels = dst.oclchannels(); - int depth = dst.depth(); - - size_t vector_length = 1; - int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1); - int cols = divUp(dst.cols * channels + offset_cols, vector_length); + int depth = dst.depth(), cols1 = src1.cols * src1.oclchannels(); + int src1step1 = src1.step / src1.elemSize1(), src1offset1 = src1.offset / src1.elemSize1(); + int src2step1 = src2.step / src2.elemSize1(), src2offset1 = src2.offset / src2.elemSize1(); + int dststep1 = dst.step / dst.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1(); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { cols, dst.rows, 1 }; + size_t globalThreads[3] = { cols1, dst.rows, 1 }; - int dst_step1 = dst.cols * dst.elemSize(); vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); } -void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angleInDegrees) +void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees) { CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F)); + CV_Assert(x.step % x.elemSize() == 0 && y.step % y.elemSize() == 0); + Angle.create(x.size(), x.type()); - string kernelName = angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians"; - if (angleInDegrees) - arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase); - else - arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase); + arithmetic_phase_run(x, y, Angle, angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians", &arithm_phase); } ////////////////////////////////////////////////////////////////////////////// @@ -1228,21 +1259,22 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, ///////////////////////////// countNonZero /////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen , int groupnum, string kernelName) +static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int groupnum, string kernelName) { - vector > args; - int all_cols = src.step / (vlen * src.elemSize1()); - int pre_cols = (src.offset % src.step) / (vlen * src.elemSize1()); - int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / (vlen * src.elemSize1()) - 1; + int ochannels = src.oclchannels(); + int all_cols = src.step / src.elemSize(); + int pre_cols = (src.offset % src.step) / src.elemSize(); + int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1; int invalid_cols = pre_cols + sec_cols; int cols = all_cols - invalid_cols , elemnum = cols * src.rows;; - int offset = src.offset / (vlen * src.elemSize1()); - int repeat_s = src.offset / src.elemSize1() - offset * vlen; - int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels(); + int offset = src.offset / src.elemSize(); - char build_options[50]; - sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e); + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const channelMap[] = { " ", " ", "2", "4", "4" }; + string buildOptions = format("-D srcT=%s%s -D dstT=int%s", typeMap[src.depth()], channelMap[ochannels], + channelMap[ochannels]); + vector > args; args.push_back( make_pair( sizeof(cl_int) , (void *)&cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&invalid_cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset)); @@ -1250,33 +1282,44 @@ static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); - size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; - openCLExecuteKernel(src.clCxt, &arithm_nonzero, kernelName, gt, lt, args, -1, -1, build_options); + + size_t globalThreads[3] = { groupnum * 256, 1, 1 }; + size_t localThreads[3] = { 256, 1, 1 }; + + openCLExecuteKernel(src.clCxt, &arithm_nonzero, kernelName, globalThreads, localThreads, + args, -1, -1, buildOptions.c_str()); } int cv::ocl::countNonZero(const oclMat &src) { - size_t groupnum = src.clCxt->computeUnits(); + CV_Assert(src.step % src.elemSize() == 0); + CV_Assert(src.channels() == 1); + + Context *clCxt = src.clCxt; if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "selected device doesn't support double"); } + + size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); - int vlen = 8 , dbsize = groupnum * vlen; - Context *clCxt = src.clCxt; + int dbsize = groupnum; + string kernelName = "arithm_op_nonzero"; AutoBuffer _buf(dbsize); int *p = (int*)_buf, nonzero = 0; - cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(int)); - arithmetic_countNonZero_run(src, dstBuffer, vlen, groupnum, kernelName); - memset(p, 0, dbsize * sizeof(int)); + + cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(int)); + arithmetic_countNonZero_run(src, dstBuffer, groupnum, kernelName); openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(int)); + for (int i = 0; i < dbsize; i++) nonzero += p[i]; openCLSafeCall(clReleaseMemObject(dstBuffer)); + return nonzero; } @@ -1522,8 +1565,8 @@ oclMatExpr::operator oclMat() const /////////////////////////////// transpose //////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -#define TILE_DIM (32) -#define BLOCK_ROWS (256/TILE_DIM) +#define TILE_DIM (32) +#define BLOCK_ROWS (256 / TILE_DIM) static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false) { @@ -1702,63 +1745,35 @@ void cv::ocl::pow(const oclMat &x, double p, oclMat &y) /////////////////////////////// setIdentity ////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void cv::ocl::setIdentity(oclMat& src, double scalar) +void cv::ocl::setIdentity(oclMat& src, const Scalar & scalar) { - CV_Assert(src.empty() == false && src.rows == src.cols); - CV_Assert(src.type() == CV_32SC1 || src.type() == CV_32FC1); - int src_step = src.step/src.elemSize(); Context *clCxt = Context::getContext(); - size_t local_threads[] = {16, 16, 1}; - size_t global_threads[] = {src.cols, src.rows, 1}; - - string kernelName = "setIdentityKernel"; - if (src.type() == CV_32FC1) - kernelName += "_F1"; - else if (src.type() == CV_32SC1) - kernelName += "_I1"; - else + if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { - kernelName += "_D1"; - if (!(clCxt->supportsFeature(Context::CL_DOUBLE))) - { - oclMat temp; - src.convertTo(temp, CV_32FC1); - temp.copyTo(src); - } - + CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); + return; } + CV_Assert(src.step % src.elemSize() == 0); + + int src_step1 = src.step / src.elemSize(), src_offset1 = src.offset / src.elemSize(); + size_t local_threads[] = { 16, 16, 1 }; + size_t global_threads[] = { src.cols, src.rows, 1 }; + + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; + const char * const channelMap[] = { "", "", "2", "4", "4" }; + string buildOptions = format("-D T=%s%s", typeMap[src.depth()], channelMap[src.oclchannels()]); + vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src_step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src_step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); - int scalar_i = 0; - float scalar_f = 0.0f; - if (clCxt->supportsFeature(Context::CL_DOUBLE)) - { - if (src.type() == CV_32SC1) - { - scalar_i = (int)scalar; - args.push_back(make_pair(sizeof(cl_int), (void*)&scalar_i)); - } - else - args.push_back(make_pair(sizeof(cl_double), (void*)&scalar)); - } - else - { - if (src.type() == CV_32SC1) - { - scalar_i = (int)scalar; - args.push_back(make_pair(sizeof(cl_int), (void*)&scalar_i)); - } - else - { - scalar_f = (float)scalar; - args.push_back(make_pair(sizeof(cl_float), (void*)&scalar_f)); - } - } + oclMat sc(1, 1, src.type(), scalar); + args.push_back( make_pair( sizeof(cl_mem), (void *)&sc.data )); - openCLExecuteKernel(clCxt, &arithm_setidentity, kernelName, global_threads, local_threads, args, -1, -1); + openCLExecuteKernel(clCxt, &arithm_setidentity, "setIdentity", global_threads, local_threads, + args, -1, -1, buildOptions.c_str()); } diff --git a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl new file mode 100644 index 0000000000..e5d8271394 --- /dev/null +++ b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl @@ -0,0 +1,93 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif +#endif + +__kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_step, int src1_offset, + __global srcT *src2, int src2_step, int src2_offset, + __global dstT *dst, int dst_step, int dst_offset, + int cols, int rows) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int src1_index = mad24(y, src1_step, x + src1_offset); + int src2_index = mad24(y, src2_step, x + src2_offset); + int dst_index = mad24(y, dst_step, x + dst_offset); + + dstT t0 = convertToDstT(src1[src1_index]); + dstT t1 = convertToDstT(src2[src2_index]); + dstT t2 = t0 - t1; + + dst[dst_index] = t2 >= 0 ? t2 : -t2; + } +} + +__kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int src1_offset, + __global dstT *dst, int dst_step, int dst_offset, + int cols, int rows) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < cols && y < rows) + { + int src1_index = mad24(y, src1_step, x + src1_offset); + int dst_index = mad24(y, dst_step, x + dst_offset); + + dstT t0 = convertToDstT(src1[src1_index]); + + dst[dst_index] = t0 >= 0 ? t0 : -t0; + } +} diff --git a/modules/ocl/src/opencl/arithm_minMax.cl b/modules/ocl/src/opencl/arithm_minMax.cl index 23b2933066..c5d3ec2abd 100644 --- a/modules/ocl/src/opencl/arithm_minMax.cl +++ b/modules/ocl/src/opencl/arithm_minMax.cl @@ -53,169 +53,117 @@ #endif #endif -#if defined (DEPTH_0) -#define VEC_TYPE uchar8 -#define CONVERT_TYPE convert_uchar8 -#define MIN_VAL 0 -#define MAX_VAL 255 -#endif -#if defined (DEPTH_1) -#define VEC_TYPE char8 -#define CONVERT_TYPE convert_char8 -#define MIN_VAL -128 -#define MAX_VAL 127 -#endif -#if defined (DEPTH_2) -#define VEC_TYPE ushort8 -#define CONVERT_TYPE convert_ushort8 -#define MIN_VAL 0 -#define MAX_VAL 65535 -#endif -#if defined (DEPTH_3) -#define VEC_TYPE short8 -#define CONVERT_TYPE convert_short8 -#define MIN_VAL -32768 -#define MAX_VAL 32767 -#endif -#if defined (DEPTH_4) -#define VEC_TYPE int8 -#define CONVERT_TYPE convert_int8 -#define MIN_VAL INT_MIN -#define MAX_VAL INT_MAX -#endif -#if defined (DEPTH_5) -#define VEC_TYPE float8 -#define CONVERT_TYPE convert_float8 -#define MIN_VAL (-FLT_MAX) -#define MAX_VAL FLT_MAX -#endif -#if defined (DEPTH_6) -#define VEC_TYPE double8 -#define CONVERT_TYPE convert_double8 -#define MIN_VAL (-DBL_MAX) -#define MAX_VAL DBL_MAX -#endif - -#if defined (REPEAT_S0) -#define repeat_s(a) a = a; -#endif -#if defined (REPEAT_S1) -#define repeat_s(a) a.s0 = a.s1; -#endif -#if defined (REPEAT_S2) -#define repeat_s(a) a.s0 = a.s2;a.s1 = a.s2; -#endif -#if defined (REPEAT_S3) -#define repeat_s(a) a.s0 = a.s3;a.s1 = a.s3;a.s2 = a.s3; -#endif -#if defined (REPEAT_S4) -#define repeat_s(a) a.s0 = a.s4;a.s1 = a.s4;a.s2 = a.s4;a.s3 = a.s4; -#endif -#if defined (REPEAT_S5) -#define repeat_s(a) a.s0 = a.s5;a.s1 = a.s5;a.s2 = a.s5;a.s3 = a.s5;a.s4 = a.s5; -#endif -#if defined (REPEAT_S6) -#define repeat_s(a) a.s0 = a.s6;a.s1 = a.s6;a.s2 = a.s6;a.s3 = a.s6;a.s4 = a.s6;a.s5 = a.s6; -#endif -#if defined (REPEAT_S7) -#define repeat_s(a) a.s0 = a.s7;a.s1 = a.s7;a.s2 = a.s7;a.s3 = a.s7;a.s4 = a.s7;a.s5 = a.s7;a.s6 = a.s7; -#endif - -#if defined (REPEAT_E0) -#define repeat_e(a) a = a; -#endif -#if defined (REPEAT_E1) -#define repeat_e(a) a.s7 = a.s6; -#endif -#if defined (REPEAT_E2) -#define repeat_e(a) a.s7 = a.s5;a.s6 = a.s5; -#endif -#if defined (REPEAT_E3) -#define repeat_e(a) a.s7 = a.s4;a.s6 = a.s4;a.s5 = a.s4; -#endif -#if defined (REPEAT_E4) -#define repeat_e(a) a.s7 = a.s3;a.s6 = a.s3;a.s5 = a.s3;a.s4 = a.s3; -#endif -#if defined (REPEAT_E5) -#define repeat_e(a) a.s7 = a.s2;a.s6 = a.s2;a.s5 = a.s2;a.s4 = a.s2;a.s3 = a.s2; -#endif -#if defined (REPEAT_E6) -#define repeat_e(a) a.s7 = a.s1;a.s6 = a.s1;a.s5 = a.s1;a.s4 = a.s1;a.s3 = a.s1;a.s2 = a.s1; -#endif -#if defined (REPEAT_E7) -#define repeat_e(a) a.s7 = a.s0;a.s6 = a.s0;a.s5 = a.s0;a.s4 = a.s0;a.s3 = a.s0;a.s2 = a.s0;a.s1 = a.s0; -#endif - #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable /**************************************Array minMax**************************************/ -__kernel void arithm_op_minMax (int cols,int invalid_cols,int offset,int elemnum,int groupnum, - __global VEC_TYPE *src, __global VEC_TYPE *dst) + +__kernel void arithm_op_minMax(__global const T * src, __global T * dst, + int cols, int invalid_cols, int offset, int elemnum, int groupnum) { unsigned int lid = get_local_id(0); unsigned int gid = get_group_id(0); - unsigned int id = get_global_id(0); + unsigned int id = get_global_id(0); + unsigned int idx = offset + id + (id / cols) * invalid_cols; - __local VEC_TYPE localmem_max[128],localmem_min[128]; - VEC_TYPE minval,maxval,temp; - if(id < elemnum) - { - temp = src[idx]; - if(id % cols == 0 ) - { - repeat_s(temp); - } - if(id % cols == cols - 1) - { - repeat_e(temp); - } - minval = temp; - maxval = temp; - } - else - { - minval = MAX_VAL; - maxval = MIN_VAL; - } - for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8)) + + __local T localmem_max[128], localmem_min[128]; + T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp; + + for (int grainSize = groupnum << 8; id < elemnum; id += grainSize) { idx = offset + id + (id / cols) * invalid_cols; temp = src[idx]; - if(id % cols == 0 ) - { - repeat_s(temp); - } - if(id % cols == cols - 1) - { - repeat_e(temp); - } - minval = min(minval,temp); - maxval = max(maxval,temp); + minval = min(minval, temp); + maxval = max(maxval, temp); } + if(lid > 127) { localmem_min[lid - 128] = minval; localmem_max[lid - 128] = maxval; } barrier(CLK_LOCAL_MEM_FENCE); + if(lid < 128) { - localmem_min[lid] = min(minval,localmem_min[lid]); - localmem_max[lid] = max(maxval,localmem_max[lid]); + localmem_min[lid] = min(minval, localmem_min[lid]); + localmem_max[lid] = max(maxval, localmem_max[lid]); } barrier(CLK_LOCAL_MEM_FENCE); - for(int lsize = 64; lsize > 0; lsize >>= 1) + + for (int lsize = 64; lsize > 0; lsize >>= 1) { - if(lid < lsize) + if (lid < lsize) { int lid2 = lsize + lid; - localmem_min[lid] = min(localmem_min[lid] , localmem_min[lid2]); - localmem_max[lid] = max(localmem_max[lid] , localmem_max[lid2]); + localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]); + localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]); } barrier(CLK_LOCAL_MEM_FENCE); } - if( lid == 0) + + if (lid == 0) + { + dst[gid] = localmem_min[0]; + dst[gid + groupnum] = localmem_max[0]; + } +} + +__kernel void arithm_op_minMax_mask(__global const T * src, __global T * dst, + int cols, int invalid_cols, int offset, + int elemnum, int groupnum, + const __global uchar * mask, int minvalid_cols, int moffset) +{ + unsigned int lid = get_local_id(0); + unsigned int gid = get_group_id(0); + unsigned int id = get_global_id(0); + + unsigned int idx = offset + id + (id / cols) * invalid_cols; + unsigned int midx = moffset + id + (id / cols) * minvalid_cols; + + __local T localmem_max[128], localmem_min[128]; + T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp; + + for (int grainSize = groupnum << 8; id < elemnum; id += grainSize) + { + idx = offset + id + (id / cols) * invalid_cols; + midx = moffset + id + (id / cols) * minvalid_cols; + + if (mask[midx]) + { + temp = src[idx]; + minval = min(minval, temp); + maxval = max(maxval, temp); + } + } + + if(lid > 127) + { + localmem_min[lid - 128] = minval; + localmem_max[lid - 128] = maxval; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if(lid < 128) + { + localmem_min[lid] = min(minval, localmem_min[lid]); + localmem_max[lid] = max(maxval, localmem_max[lid]); + } + barrier(CLK_LOCAL_MEM_FENCE); + + for (int lsize = 64; lsize > 0; lsize >>= 1) + { + if (lid < lsize) + { + int lid2 = lsize + lid; + localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]); + localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]); + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (lid == 0) { dst[gid] = localmem_min[0]; dst[gid + groupnum] = localmem_max[0]; diff --git a/modules/ocl/src/opencl/arithm_nonzero.cl b/modules/ocl/src/opencl/arithm_nonzero.cl index e34207de36..921367b3df 100644 --- a/modules/ocl/src/opencl/arithm_nonzero.cl +++ b/modules/ocl/src/opencl/arithm_nonzero.cl @@ -41,151 +41,53 @@ // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // -/// -/**************************************PUBLICFUNC*************************************/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif - -#if defined (DEPTH_0) -#define VEC_TYPE uchar8 #endif -#if defined (DEPTH_1) -#define VEC_TYPE char8 -#endif -#if defined (DEPTH_2) -#define VEC_TYPE ushort8 -#endif -#if defined (DEPTH_3) -#define VEC_TYPE short8 -#endif -#if defined (DEPTH_4) -#define VEC_TYPE int8 -#endif -#if defined (DEPTH_5) -#define VEC_TYPE float8 -#endif -#if defined (DEPTH_6) -#define VEC_TYPE double8 -#endif - -#if defined (REPEAT_S0) -#define repeat_s(a) a = a; -#endif -#if defined (REPEAT_S1) -#define repeat_s(a) a.s0 = 0; -#endif -#if defined (REPEAT_S2) -#define repeat_s(a) a.s0 = 0;a.s1 = 0; -#endif -#if defined (REPEAT_S3) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0; -#endif -#if defined (REPEAT_S4) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0; -#endif -#if defined (REPEAT_S5) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0; -#endif -#if defined (REPEAT_S6) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0; -#endif -#if defined (REPEAT_S7) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0; -#endif - -#if defined (REPEAT_E0) -#define repeat_e(a) a = a; -#endif -#if defined (REPEAT_E1) -#define repeat_e(a) a.s7 = 0; -#endif -#if defined (REPEAT_E2) -#define repeat_e(a) a.s7 = 0;a.s6 = 0; -#endif -#if defined (REPEAT_E3) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0; -#endif -#if defined (REPEAT_E4) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0; -#endif -#if defined (REPEAT_E5) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0; -#endif -#if defined (REPEAT_E6) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0; -#endif -#if defined (REPEAT_E7) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0; -#endif - -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable /**************************************Count NonZero**************************************/ -__kernel void arithm_op_nonzero (int cols,int invalid_cols,int offset,int elemnum,int groupnum, - __global VEC_TYPE *src, __global int8 *dst) + +__kernel void arithm_op_nonzero(int cols, int invalid_cols, int offset, int elemnum, int groupnum, + __global srcT *src, __global dstT *dst) { - unsigned int lid = get_local_id(0); - unsigned int gid = get_group_id(0); - unsigned int id = get_global_id(0); - unsigned int idx = offset + id + (id / cols) * invalid_cols; - __local int8 localmem_nonzero[128]; - int8 nonzero; - VEC_TYPE zero=0,one=1,temp; - if(id < elemnum) - { - temp = src[idx]; - if(id % cols == 0 ) - { - repeat_s(temp); - } - if(id % cols == cols - 1) - { - repeat_e(temp); - } - nonzero = convert_int8(temp == zero ? zero:one); - } - else - { - nonzero = 0; - } - for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8)) - { - idx = offset + id + (id / cols) * invalid_cols; - temp = src[idx]; - if(id % cols == 0 ) - { - repeat_s(temp); - } - if(id % cols == cols - 1) - { - repeat_e(temp); - } - nonzero = nonzero + convert_int8(temp == zero ? zero:one); - } - if(lid > 127) - { - localmem_nonzero[lid - 128] = nonzero; - } - barrier(CLK_LOCAL_MEM_FENCE); - if(lid < 128) - { - localmem_nonzero[lid] = nonzero + localmem_nonzero[lid]; - } - barrier(CLK_LOCAL_MEM_FENCE); - for(int lsize = 64; lsize > 0; lsize >>= 1) - { - if(lid < lsize) - { + unsigned int lid = get_local_id(0); + unsigned int gid = get_group_id(0); + unsigned int id = get_global_id(0); + + unsigned int idx = offset + id + (id / cols) * invalid_cols; + __local dstT localmem_nonzero[128]; + dstT nonzero = (dstT)(0); + srcT zero = (srcT)(0), one = (srcT)(1); + + for (int grain = groupnum << 8; id < elemnum; id += grain) + { + idx = offset + id + (id / cols) * invalid_cols; + nonzero += src[idx] == zero ? zero : one; + } + + if (lid > 127) + localmem_nonzero[lid - 128] = nonzero; + barrier(CLK_LOCAL_MEM_FENCE); + + if (lid < 128) + localmem_nonzero[lid] = nonzero + localmem_nonzero[lid]; + barrier(CLK_LOCAL_MEM_FENCE); + + for (int lsize = 64; lsize > 0; lsize >>= 1) + { + if (lid < lsize) + { int lid2 = lsize + lid; localmem_nonzero[lid] = localmem_nonzero[lid] + localmem_nonzero[lid2]; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - if( lid == 0) - { - dst[gid] = localmem_nonzero[0]; - } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (lid == 0) + dst[gid] = localmem_nonzero[0]; } diff --git a/modules/ocl/src/opencl/arithm_phase.cl b/modules/ocl/src/opencl/arithm_phase.cl index 9dda5e957a..a30eba4310 100644 --- a/modules/ocl/src/opencl/arithm_phase.cl +++ b/modules/ocl/src/opencl/arithm_phase.cl @@ -45,110 +45,125 @@ // #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif -#define CV_PI 3.1415926535898 -/**************************************phase inradians**************************************/ -__kernel void arithm_phase_inradians_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global float *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ +#define CV_PI 3.1415926535898 +#define CV_2PI 2*3.1415926535898 + +/**************************************phase inradians**************************************/ + +__kernel void arithm_phase_inradians_D5(__global float *src1, int src1_step1, int src1_offset1, + __global float *src2, int src2_step1, int src2_offset1, + __global float *dst, int dst_step1, int dst_offset1, + int cols, int rows) +{ int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); - float data1 = *((__global float *)((__global char *)src1 + src1_index)); - float data2 = *((__global float *)((__global char *)src2 + src2_index)); - float tmp = atan2(data2,data1); + float data1 = src1[src1_index]; + float data2 = src2[src2_index]; + float tmp = atan2(data2, data1); - *((__global float *)((__global char *)dst + dst_index)) = tmp; + if (tmp < 0) + tmp += CV_2PI; + + dst[dst_index] = tmp; } - } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_phase_inradians_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global double *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_phase_inradians_D6(__global double *src1, int src1_step1, int src1_offset1, + __global double *src2, int src2_step1, int src2_offset1, + __global double *dst, int dst_step1, int dst_offset1, + int cols, int rows) { - int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); - double data1 = *((__global double *)((__global char *)src1 + src1_index)); - double data2 = *((__global double *)((__global char *)src2 + src2_index)); + double data1 = src1[src1_index]; + double data2 = src2[src2_index]; + double tmp = atan2(data2, data1); - *((__global double *)((__global char *)dst + dst_index)) = atan2(data2,data1); + if (tmp < 0) + tmp += CV_2PI; + + dst[dst_index] = tmp; } - } + #endif /**************************************phase indegrees**************************************/ -__kernel void arithm_phase_indegrees_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global float *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ +__kernel void arithm_phase_indegrees_D5(__global float *src1, int src1_step1, int src1_offset1, + __global float *src2, int src2_step1, int src2_offset1, + __global float *dst, int dst_step1, int dst_offset1, + int cols, int rows) +{ int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); - float data1 = *((__global float *)((__global char *)src1 + src1_index)); - float data2 = *((__global float *)((__global char *)src2 + src2_index)); - float tmp = atan2(data2,data1); - float tmp_data = 180*tmp/CV_PI; + float data1 = src1[src1_index]; + float data2 = src2[src2_index]; + float tmp = atan2(data2, data1); + tmp = 180 * tmp / CV_PI; - *((__global float *)((__global char *)dst + dst_index)) = tmp_data; + if (tmp < 0) + tmp += 360; + + dst[dst_index] = tmp; } - } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global double *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, int src1_offset1, + __global double *src2, int src2_step1, int src2_offset1, + __global double *dst, int dst_step1, int dst_offset1, + int cols, int rows) { - int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); - double data1 = *((__global double *)((__global char *)src1 + src1_index)); - double data2 = *((__global double *)((__global char *)src2 + src2_index)); - double tmp = atan2(data2,data1); - double tmp_data = 180*tmp/CV_PI; + double data1 = src1[src1_index]; + double data2 = src2[src2_index]; + double tmp = atan2(src2[src2_index], src1[src1_index]); - *((__global double *)((__global char *)dst + dst_index)) = tmp_data; + tmp = 180 * tmp / CV_PI; + if (tmp < 0) + tmp += 360; + + dst[dst_index] = tmp; } - } #endif diff --git a/modules/ocl/src/opencl/arithm_setidentity.cl b/modules/ocl/src/opencl/arithm_setidentity.cl index 0604ae81dd..921026b40d 100644 --- a/modules/ocl/src/opencl/arithm_setidentity.cl +++ b/modules/ocl/src/opencl/arithm_setidentity.cl @@ -42,6 +42,7 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ + #if defined (DOUBLE_SUPPORT) #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable @@ -50,51 +51,19 @@ #endif #endif - -#if defined (DOUBLE_SUPPORT) -#define DATA_TYPE double -#else -#define DATA_TYPE float -#endif - -__kernel void setIdentityKernel_F1(__global float* src, int src_row, int src_col, int src_step, DATA_TYPE scalar) +__kernel void setIdentity(__global T * src, int src_step, int src_offset, + int cols, int rows, __global const T * scalar) { int x = get_global_id(0); int y = get_global_id(1); - if(x < src_col && y < src_row) + if (x < cols && y < rows) { - if(x == y) - src[y * src_step + x] = scalar; + int src_index = mad24(y, src_step, src_offset + x); + + if (x == y) + src[src_index] = *scalar; else - src[y * src_step + x] = 0 * scalar; - } -} - -__kernel void setIdentityKernel_D1(__global DATA_TYPE* src, int src_row, int src_col, int src_step, DATA_TYPE scalar) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < src_col && y < src_row) - { - if(x == y) - src[y * src_step + x] = scalar; - else - src[y * src_step + x] = 0 * scalar; - } -} - -__kernel void setIdentityKernel_I1(__global int* src, int src_row, int src_col, int src_step, int scalar) -{ - int x = get_global_id(0); - int y = get_global_id(1); - - if(x < src_col && y < src_row) - { - if(x == y) - src[y * src_step + x] = scalar; - else - src[y * src_step + x] = 0 * scalar; + src[src_index] = 0; } } diff --git a/modules/ocl/src/opencl/arithm_sum.cl b/modules/ocl/src/opencl/arithm_sum.cl index 280b0a5111..4011f03bea 100644 --- a/modules/ocl/src/opencl/arithm_sum.cl +++ b/modules/ocl/src/opencl/arithm_sum.cl @@ -43,163 +43,62 @@ // //M*/ -/**************************************PUBLICFUNC*************************************/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable -#define RES_TYPE double8 -#define CONVERT_RES_TYPE convert_double8 -#else -#define RES_TYPE float8 -#define CONVERT_RES_TYPE convert_float8 +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif -#if defined (DEPTH_0) -#define VEC_TYPE uchar8 +#if defined (FUNC_SUM) +#define FUNC(a, b) b += a; #endif -#if defined (DEPTH_1) -#define VEC_TYPE char8 +#if defined (FUNC_ABS_SUM) +#define FUNC(a, b) b += a >= 0 ? a : -a; #endif -#if defined (DEPTH_2) -#define VEC_TYPE ushort8 +#if defined (FUNC_SQR_SUM) +#define FUNC(a, b) b += a * a; #endif -#if defined (DEPTH_3) -#define VEC_TYPE short8 -#endif -#if defined (DEPTH_4) -#define VEC_TYPE int8 -#endif -#if defined (DEPTH_5) -#define VEC_TYPE float8 -#endif -#if defined (DEPTH_6) -#define VEC_TYPE double8 -#endif - -#if defined (FUNC_TYPE_0) -#define FUNC(a,b) b += a; -#endif -#if defined (FUNC_TYPE_1) -#define FUNC(a,b) b = b + (a >= 0 ? a : -a); -#endif -#if defined (FUNC_TYPE_2) -#define FUNC(a,b) b = b + a * a; -#endif - -#if defined (REPEAT_S0) -#define repeat_s(a) a = a; -#endif -#if defined (REPEAT_S1) -#define repeat_s(a) a.s0 = 0; -#endif -#if defined (REPEAT_S2) -#define repeat_s(a) a.s0 = 0;a.s1 = 0; -#endif -#if defined (REPEAT_S3) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0; -#endif -#if defined (REPEAT_S4) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0; -#endif -#if defined (REPEAT_S5) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0; -#endif -#if defined (REPEAT_S6) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0; -#endif -#if defined (REPEAT_S7) -#define repeat_s(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0; -#endif - -#if defined (REPEAT_E0) -#define repeat_e(a) a = a; -#endif -#if defined (REPEAT_E1) -#define repeat_e(a) a.s7 = 0; -#endif -#if defined (REPEAT_E2) -#define repeat_e(a) a.s7 = 0;a.s6 = 0; -#endif -#if defined (REPEAT_E3) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0; -#endif -#if defined (REPEAT_E4) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0; -#endif -#if defined (REPEAT_E5) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0; -#endif -#if defined (REPEAT_E6) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0; -#endif -#if defined (REPEAT_E7) -#define repeat_e(a) a.s7 = 0;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0; -#endif - -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable /**************************************Array buffer SUM**************************************/ -__kernel void arithm_op_sum (int cols,int invalid_cols,int offset,int elemnum,int groupnum, - __global VEC_TYPE *src, __global RES_TYPE *dst) + +__kernel void arithm_op_sum(int cols,int invalid_cols,int offset,int elemnum,int groupnum, + __global srcT *src, __global dstT *dst) { unsigned int lid = get_local_id(0); unsigned int gid = get_group_id(0); - unsigned int id = get_global_id(0); + unsigned int id = get_global_id(0); unsigned int idx = offset + id + (id / cols) * invalid_cols; - __local RES_TYPE localmem_sum[128]; - RES_TYPE sum = 0,temp; - if(id < elemnum) - { - temp = CONVERT_RES_TYPE(src[idx]); - if(id % cols == 0 ) - { - repeat_s(temp); - } - if(id % cols == cols - 1) - { - repeat_e(temp); - } - FUNC(temp,sum); - } - else - { - sum = 0; - } - for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8)) + + __local dstT localmem_sum[128]; + dstT sum = (dstT)(0), temp; + + for (int grainSize = groupnum << 8; id < elemnum; id += grainSize) { idx = offset + id + (id / cols) * invalid_cols; - temp = CONVERT_RES_TYPE(src[idx]); - if(id % cols == 0 ) - { - repeat_s(temp); - } - if(id % cols == cols - 1) - { - repeat_e(temp); - } - FUNC(temp,sum); + temp = convertToDstT(src[idx]); + FUNC(temp, sum); } - if(lid > 127) - { + + if (lid > 127) localmem_sum[lid - 128] = sum; - } barrier(CLK_LOCAL_MEM_FENCE); - if(lid < 128) - { + + if (lid < 128) localmem_sum[lid] = sum + localmem_sum[lid]; - } barrier(CLK_LOCAL_MEM_FENCE); - for(int lsize = 64; lsize > 0; lsize >>= 1) + + for (int lsize = 64; lsize > 0; lsize >>= 1) { - if(lid < lsize) + if (lid < lsize) { int lid2 = lsize + lid; localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2]; } barrier(CLK_LOCAL_MEM_FENCE); } - if( lid == 0) - { + + if (lid == 0) dst[gid] = localmem_sum[0]; - } } diff --git a/modules/ocl/src/opencl/arithm_sum_3.cl b/modules/ocl/src/opencl/arithm_sum_3.cl deleted file mode 100644 index 3f6ed08803..0000000000 --- a/modules/ocl/src/opencl/arithm_sum_3.cl +++ /dev/null @@ -1,247 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Shengen Yan,yanshengen@gmail.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -/**************************************PUBLICFUNC*************************************/ -#if defined (DOUBLE_SUPPORT) -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#define RES_TYPE double4 -#define CONVERT_RES_TYPE convert_double4 -#else -#define RES_TYPE float4 -#define CONVERT_RES_TYPE convert_float4 -#endif - -#if defined (DEPTH_0) -#define VEC_TYPE uchar4 -#endif -#if defined (DEPTH_1) -#define VEC_TYPE char4 -#endif -#if defined (DEPTH_2) -#define VEC_TYPE ushort4 -#endif -#if defined (DEPTH_3) -#define VEC_TYPE short4 -#endif -#if defined (DEPTH_4) -#define VEC_TYPE int4 -#endif -#if defined (DEPTH_5) -#define VEC_TYPE float4 -#endif -#if defined (DEPTH_6) -#define VEC_TYPE double4 -#endif - -#if defined (FUNC_TYPE_0) -#define FUNC(a,b) b += a; -#endif -#if defined (FUNC_TYPE_1) -#define FUNC(a,b) b = b + (a >= 0 ? a : -a); -#endif -#if defined (FUNC_TYPE_2) -#define FUNC(a,b) b = b + a * a; -#endif - -#if defined (REPEAT_S0) -#define repeat_s(a,b,c) a=a; b =b; c=c; -#endif -#if defined (REPEAT_S1) -#define repeat_s(a,b,c) a.s0=0; b=b; c=c; -#endif -#if defined (REPEAT_S2) -#define repeat_s(a,b,c) a.s0=0; a.s1=0; b=b; c=c; -#endif -#if defined (REPEAT_S3) -#define repeat_s(a,b,c) a.s0=0; a.s1=0; a.s2=0; b=b; c=c; -#endif -#if defined (REPEAT_S4) -#define repeat_s(a,b,c) a=0;b=b; c=c; -#endif -#if defined (REPEAT_S5) -#define repeat_s(a,b,c) a=0; b.s0=0;c=c; -#endif -#if defined (REPEAT_S6) -#define repeat_s(a,b,c) a=0; b.s0=0; b.s1=0; c=c; -#endif -#if defined (REPEAT_S7) -#define repeat_s(a,b,c) a=0; b.s0=0; b.s1=0; b.s2=0; c=c; -#endif -#if defined (REPEAT_S8) -#define repeat_s(a,b,c) a=0; b=0; c=c; -#endif -#if defined (REPEAT_S9) -#define repeat_s(a,b,c) a=0; b=0; c.s0=0; -#endif -#if defined (REPEAT_S10) -#define repeat_s(a,b,c) a=0; b=0; c.s0=0; c.s1=0; -#endif -#if defined (REPEAT_S11) -#define repeat_s(a,b,c) a=0; b=0; c.s0=0; c.s1=0; c.s2=0; -#endif - -#if defined (REPEAT_E0) -#define repeat_e(a,b,c) a=a; b =b; c=c; -#endif -#if defined (REPEAT_E1) -#define repeat_e(a,b,c) a=a; b=b; c.s3=0; -#endif -#if defined (REPEAT_E2) -#define repeat_e(a,b,c) a=a; b=b; c.s3=0; c.s2=0; -#endif -#if defined (REPEAT_E3) -#define repeat_e(a,b,c) a=a; b=b; c.s3=0; c.s2=0; c.s1=0; -#endif -#if defined (REPEAT_E4) -#define repeat_e(a,b,c) a=a; b=b; c=0; -#endif -#if defined (REPEAT_E5) -#define repeat_e(a,b,c) a=a; b.s3=0; c=0; -#endif -#if defined (REPEAT_E6) -#define repeat_e(a,b,c) a=a; b.s3=0; b.s2=0; c=0; -#endif -#if defined (REPEAT_E7) -#define repeat_e(a,b,c) a=a; b.s3=0; b.s2=0; b.s1=0; c=0; -#endif -#if defined (REPEAT_E8) -#define repeat_e(a,b,c) a=a; b=0; c=0; -#endif -#if defined (REPEAT_E9) -#define repeat_e(a,b,c) a.s3=0; b=0; c=0; -#endif -#if defined (REPEAT_E10) -#define repeat_e(a,b,c) a.s3=0; a.s2=0; b=0; c=0; -#endif -#if defined (REPEAT_E11) -#define repeat_e(a,b,c) a.s3=0; a.s2=0; a.s1=0; b=0; c=0; -#endif - -__kernel void arithm_op_sum_3 (int cols,int invalid_cols,int offset,int elemnum,int groupnum, - __global VEC_TYPE *src, __global RES_TYPE *dst) -{ - unsigned int lid = get_local_id(0); - unsigned int gid = get_group_id(0); - unsigned int id = get_global_id(0); - unsigned int idx = offset + id + (id / cols) * invalid_cols; - idx = idx * 3; - __local RES_TYPE localmem_sum1[128]; - __local RES_TYPE localmem_sum2[128]; - __local RES_TYPE localmem_sum3[128]; - RES_TYPE sum1 = 0,sum2 = 0,sum3 = 0,temp1,temp2,temp3; - if(id < elemnum) - { - temp1 = CONVERT_RES_TYPE(src[idx]); - temp2 = CONVERT_RES_TYPE(src[idx+1]); - temp3 = CONVERT_RES_TYPE(src[idx+2]); - if(id % cols == 0 ) - { - repeat_s(temp1,temp2,temp3); - } - if(id % cols == cols - 1) - { - repeat_e(temp1,temp2,temp3); - } - FUNC(temp1,sum1); - FUNC(temp2,sum2); - FUNC(temp3,sum3); - } - else - { - sum1 = 0; - sum2 = 0; - sum3 = 0; - } - for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8)) - { - idx = offset + id + (id / cols) * invalid_cols; - idx = idx * 3; - temp1 = CONVERT_RES_TYPE(src[idx]); - temp2 = CONVERT_RES_TYPE(src[idx+1]); - temp3 = CONVERT_RES_TYPE(src[idx+2]); - if(id % cols == 0 ) - { - repeat_s(temp1,temp2,temp3); - } - if(id % cols == cols - 1) - { - repeat_e(temp1,temp2,temp3); - } - FUNC(temp1,sum1); - FUNC(temp2,sum2); - FUNC(temp3,sum3); - } - if(lid > 127) - { - localmem_sum1[lid - 128] = sum1; - localmem_sum2[lid - 128] = sum2; - localmem_sum3[lid - 128] = sum3; - } - barrier(CLK_LOCAL_MEM_FENCE); - if(lid < 128) - { - localmem_sum1[lid] = sum1 + localmem_sum1[lid]; - localmem_sum2[lid] = sum2 + localmem_sum2[lid]; - localmem_sum3[lid] = sum3 + localmem_sum3[lid]; - } - barrier(CLK_LOCAL_MEM_FENCE); - for(int lsize = 64; lsize > 0; lsize >>= 1) - { - if(lid < lsize) - { - int lid2 = lsize + lid; - localmem_sum1[lid] = localmem_sum1[lid] + localmem_sum1[lid2]; - localmem_sum2[lid] = localmem_sum2[lid] + localmem_sum2[lid2]; - localmem_sum3[lid] = localmem_sum3[lid] + localmem_sum3[lid2]; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - if( lid == 0) - { - dst[gid*3] = localmem_sum1[0]; - dst[gid*3+1] = localmem_sum2[0]; - dst[gid*3+2] = localmem_sum3[0]; - } -} diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index 9b20dbf89c..db01d95036 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -220,8 +220,8 @@ PARAM_TEST_CASE(ArithmTestBase, int, int, bool) cv::RNG &rng = TS::ptr()->get_rng(); - src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); - src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -15440, 14450, false); + src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 2, 11, false); + src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -1540, 1740, false); dst1 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); dst2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false); mask = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), CV_8UC1, 0, 2, false); @@ -464,7 +464,6 @@ TEST_P(Mul, Scalar) } } - TEST_P(Mul, Mat_Scalar) { for (int j = 0; j < LOOP_TIMES; j++) @@ -507,7 +506,6 @@ TEST_P(Div, Scalar) } } - TEST_P(Div, Mat_Scalar) { for (int j = 0; j < LOOP_TIMES; j++) @@ -753,7 +751,7 @@ TEST_P(MinMax, MAT) } } -TEST_P(MinMax, DISABLED_MASK) +TEST_P(MinMax, MASK) { for (int j = 0; j < LOOP_TIMES; j++) { @@ -1022,7 +1020,7 @@ TEST_P(MinMaxLoc, MASK) typedef ArithmTestBase Sum; -TEST_P(Sum, DISABLED_MAT) +TEST_P(Sum, MAT) { for (int j = 0; j < LOOP_TIMES; j++) { @@ -1031,7 +1029,121 @@ TEST_P(Sum, DISABLED_MAT) Scalar cpures = cv::sum(src1_roi); Scalar gpures = cv::ocl::sum(gsrc1); - //check results + // check results + EXPECT_NEAR(cpures[0], gpures[0], 0.1); + EXPECT_NEAR(cpures[1], gpures[1], 0.1); + EXPECT_NEAR(cpures[2], gpures[2], 0.1); + EXPECT_NEAR(cpures[3], gpures[3], 0.1); + } +} + +typedef ArithmTestBase SqrSum; + +template +static Scalar sqrSum(const Mat & src) +{ + Scalar sum = Scalar::all(0); + int cn = src.channels(); + WT data[4] = { 0, 0, 0, 0 }; + + int cols = src.cols * cn; + for (int y = 0; y < src.rows; ++y) + { + const T * const sdata = src.ptr(y); + for (int x = 0; x < cols; ) + for (int i = 0; i < cn; ++i, ++x) + { + WT t = static_cast(sdata[x]); + data[i] += t * t; + } + } + + for (int i = 0; i < cn; ++i) + sum[i] = static_cast(data[i]); + + return sum; +} + +typedef Scalar (*sumFunc)(const Mat &); + +TEST_P(SqrSum, MAT) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + static sumFunc funcs[] = { sqrSum, + sqrSum, + sqrSum, + sqrSum, + sqrSum, + sqrSum, + sqrSum, + 0 }; + + sumFunc func = funcs[src1_roi.depth()]; + CV_Assert(func != 0); + + Scalar cpures = func(src1_roi); + Scalar gpures = cv::ocl::sqrSum(gsrc1); + + // check results + EXPECT_NEAR(cpures[0], gpures[0], 1.0); + EXPECT_NEAR(cpures[1], gpures[1], 1.0); + EXPECT_NEAR(cpures[2], gpures[2], 1.0); + EXPECT_NEAR(cpures[3], gpures[3], 1.0); + } +} + +typedef ArithmTestBase AbsSum; + +template +static Scalar absSum(const Mat & src) +{ + Scalar sum = Scalar::all(0); + int cn = src.channels(); + WT data[4] = { 0, 0, 0, 0 }; + + int cols = src.cols * cn; + for (int y = 0; y < src.rows; ++y) + { + const T * const sdata = src.ptr(y); + for (int x = 0; x < cols; ) + for (int i = 0; i < cn; ++i, ++x) + { + WT t = static_cast(sdata[x]); + data[i] += t >= 0 ? t : -t; + } + } + + for (int i = 0; i < cn; ++i) + sum[i] = static_cast(data[i]); + + return sum; +} + +TEST_P(AbsSum, MAT) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + static sumFunc funcs[] = { absSum, + absSum, + absSum, + absSum, + absSum, + absSum, + absSum, + 0 }; + + sumFunc func = funcs[src1_roi.depth()]; + CV_Assert(func != 0); + + Scalar cpures = func(src1_roi); + Scalar gpures = cv::ocl::absSum(gsrc1); + + // check results EXPECT_NEAR(cpures[0], gpures[0], 0.1); EXPECT_NEAR(cpures[1], gpures[1], 0.1); EXPECT_NEAR(cpures[2], gpures[2], 0.1); @@ -1059,17 +1171,27 @@ TEST_P(CountNonZero, MAT) typedef ArithmTestBase Phase; -TEST_P(Phase, DISABLED_Mat) +TEST_P(Phase, angleInDegrees) { - for (int angelInDegrees = 0; angelInDegrees < 2; angelInDegrees++) + for (int j = 0; j < LOOP_TIMES; j++) { - for (int j = 0; j < LOOP_TIMES; j++) - { - random_roi(); - cv::phase(src1_roi, src2_roi, dst1_roi, angelInDegrees ? true : false); - cv::ocl::phase(gsrc1, gsrc2, gdst1, angelInDegrees ? true : false); - Near(1e-2); - } + random_roi(); + cv::phase(src1_roi, src2_roi, dst1_roi, true); + cv::ocl::phase(gsrc1, gsrc2, gdst1, true); + + Near(1e-2); + } +} + +TEST_P(Phase, angleInRadians) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + cv::phase(src1_roi, src2_roi, dst1_roi); + cv::ocl::phase(gsrc1, gsrc2, gdst1); + + Near(1e-2); } } @@ -1301,32 +1423,136 @@ TEST_P(AddWeighted, Mat) } } +//////////////////////////////// setIdentity ///////////////////////////////////////////////// + +typedef ArithmTestBase SetIdentity; + +TEST_P(SetIdentity, Mat) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + cv::setIdentity(dst1_roi, val); + cv::ocl::setIdentity(gdst1, val); + + Near(0); + } +} + +//////////////////////////////// meanStdDev ///////////////////////////////////////////////// + +typedef ArithmTestBase MeanStdDev; + +TEST_P(MeanStdDev, Mat) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + Scalar cpu_mean, cpu_stddev; + Scalar gpu_mean, gpu_stddev; + + cv::meanStdDev(src1_roi, cpu_mean, cpu_stddev); + cv::ocl::meanStdDev(gsrc1, gpu_mean, gpu_stddev); + + for (int i = 0; i < 4; ++i) + { + EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 0.1); + EXPECT_NEAR(cpu_stddev[i], gpu_stddev[i], 0.1); + } + } +} + +//////////////////////////////// Norm ///////////////////////////////////////////////// + +typedef ArithmTestBase Norm; + +TEST_P(Norm, NORM_INF) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + int type = NORM_INF; + if (relative == 1) + type |= NORM_RELATIVE; + + const double cpuRes = cv::norm(src1_roi, src2_roi, type); + const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + +TEST_P(Norm, NORM_L1) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + int type = NORM_L1; + if (relative == 1) + type |= NORM_RELATIVE; + + const double cpuRes = cv::norm(src1_roi, src2_roi, type); + const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + +TEST_P(Norm, NORM_L2) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + + int type = NORM_L2; + if (relative == 1) + type |= NORM_RELATIVE; + + const double cpuRes = cv::norm(src1_roi, src2_roi, type); + const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + //////////////////////////////////////// Instantiation ///////////////////////////////////////// -INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool(), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + +INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool(), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(testing::Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); -INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + +INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); -INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // + +INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); -INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); // + -INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); // + +INSTANTIATE_TEST_CASE_P(Arithm, SqrSum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, AbsSum, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(testing::Range(CV_8U, CV_USRTYPE1), Values(1), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, SetIdentity, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); +INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool())); #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_norm.cpp b/modules/ocl/test/test_norm.cpp deleted file mode 100644 index 2bd847068e..0000000000 --- a/modules/ocl/test/test_norm.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "test_precomp.hpp" - -typedef ::testing::TestWithParam normFixture; - -TEST_P(normFixture, DISABLED_accuracy) -{ - const cv::Size srcSize = GetParam(); - - cv::Mat src1(srcSize, CV_8UC1), src2(srcSize, CV_8UC1); - cv::randu(src1, 0, 2); - cv::randu(src2, 0, 2); - - cv::ocl::oclMat oclSrc1(src1), oclSrc2(src2); - - double value = cv::norm(src1, src2, cv::NORM_INF); - double oclValue = cv::ocl::norm(oclSrc1, oclSrc2, cv::NORM_INF); - - ASSERT_EQ(value, oclValue); -} - -INSTANTIATE_TEST_CASE_P(oclNormTest, normFixture, - ::testing::Values(cv::Size(500, 500), cv::Size(1000, 1000)));