Merge remote-tracking branch 'origin/2.4' into merge-2.4

Merged pull requests:
	#890 from caorong:patch-1
	#893 from jet47:gpu-arm-fixes
	#933 from pengx17:2.4_macfix_cont
	#935 from pengx17:2.4_filter2d_fix
	#936 from bitwangyaoyao:2.4_perf
	#937 from bitwangyaoyao:2.4_fixPyrLK
	#938 from pengx17:2.4_surf_sample
	#939 from pengx17:2.4_getDevice
	#940 from SpecLad:autolock
	#941 from apavlenko:signed_char
	#946 from bitwangyaoyao:2.4_samples2
	#947 from jet47:fix-gpu-arm-build
	#948 from jet47:cuda-5.5-support
	#952 from SpecLad:jepg
	#953 from jet47:fix-bug-3069
	#955 from SpecLad:symlink
	#957 from pengx17:2.4_fix_corner_detector
	#959 from SpecLad:qt4-build
	#960 from SpecLad:extra-modules

Conflicts:
	modules/core/include/opencv2/core/core.hpp
	modules/gpu/CMakeLists.txt
	modules/gpu/include/opencv2/gpu/device/vec_math.hpp
	modules/gpu/perf/perf_video.cpp
	modules/gpuimgproc/src/cuda/hough.cu
	modules/ocl/include/opencv2/ocl/ocl.hpp
	modules/ocl/src/pyrlk.cpp
	samples/gpu/driver_api_multi.cpp
	samples/gpu/driver_api_stereo_multi.cpp
	samples/ocl/surf_matcher.cpp
This commit is contained in:
Roman Donchenko
2013-06-10 18:18:01 +04:00
31 changed files with 435 additions and 444 deletions
+1 -1
View File
@@ -4,4 +4,4 @@ if(NOT OPENCV_MODULES_PATH)
set(OPENCV_MODULES_PATH "${CMAKE_CURRENT_SOURCE_DIR}")
endif()
ocv_glob_modules(${OPENCV_MODULES_PATH})
ocv_glob_modules(${OPENCV_MODULES_PATH} ${OPENCV_EXTRA_MODULES_PATH})
+1 -1
View File
@@ -2136,7 +2136,7 @@ template<typename _Tp> inline void Seq<_Tp>::remove(int idx)
{ seqRemove(seq, idx); }
template<typename _Tp> inline void Seq<_Tp>::remove(const Range& r)
{ seqRemoveSlice(seq, r); }
{ seqRemoveSlice(seq, cvSlice(r.start, r.end)); }
template<typename _Tp> inline void Seq<_Tp>::copyTo(std::vector<_Tp>& vec, const Range& range) const
{
+1 -1
View File
@@ -1041,7 +1041,7 @@ typedef struct CvSlice
{
int start_index, end_index;
#ifdef __cplusplus
#if defined(__cplusplus) && !defined(__CUDACC__)
CvSlice(int start = 0, int end = 0) : start_index(start), end_index(end) {}
CvSlice(const cv::Range& r) { *this = (r.start != INT_MIN && r.end != INT_MAX) ? CvSlice(r.start, r.end) : CvSlice(0, CV_WHOLE_SEQ_END_INDEX); }
operator cv::Range() const { return (start_index == 0 && end_index == CV_WHOLE_SEQ_END_INDEX ) ? cv::Range::all() : cv::Range(start_index, end_index); }
@@ -294,6 +294,9 @@ public:
~AutoLock() { mutex->unlock(); }
protected:
Mutex* mutex;
private:
AutoLock(const AutoLock&);
AutoLock& operator = (const AutoLock&);
};
// The CommandLineParser class is designed for command line arguments parsing
+1 -1
View File
@@ -437,7 +437,7 @@ public:
GpuMat dclassified(1, 1, CV_32S);
cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
PyrLavel level(0, 1.0f, image.size(), NxM, minObjectSize);
PyrLavel level(0, scaleFactor, image.size(), NxM, minObjectSize);
while (level.isFeasible(maxObjectSize))
{
+1 -5
View File
@@ -101,14 +101,10 @@ elseif(HAVE_QT)
endif()
include(${QT_USE_FILE})
if(QT_INCLUDE_DIR)
ocv_include_directories(${QT_INCLUDE_DIR})
endif()
QT4_ADD_RESOURCES(_RCC_OUTFILES src/window_QT.qrc)
QT4_WRAP_CPP(_MOC_OUTFILES src/window_QT.h)
list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES} ${QT_QTTEST_LIBRARY})
list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES})
list(APPEND highgui_srcs src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES})
ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag)
if(${_have_flag})
+2
View File
@@ -708,6 +708,8 @@ namespace cv
}
//! applies non-separable 2D linear filter to the image
// Note, at the moment this function only works when anchor point is in the kernel center
// and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result
CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-5
View File
@@ -62,7 +62,6 @@ PERFTEST(lut)
gen(src, size, size, all_type[j], 0, 256);
gen(lut, 1, 256, CV_8UC1, 0, 1);
dst = src;
LUT(src, lut, dst);
@@ -233,8 +232,6 @@ PERFTEST(Mul)
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
dst = src1;
dst.setTo(0);
multiply(src1, src2, dst);
@@ -281,8 +278,6 @@ PERFTEST(Div)
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
dst = src1;
dst.setTo(0);
divide(src1, src2, dst);
+25 -28
View File
@@ -291,9 +291,7 @@ PERFTEST(GaussianBlur)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
gen(src, size, size, all_type[j], 0, 256);
dst = src;
dst.setTo(0);
gen(src, size, size, all_type[j], 5, 16);
GaussianBlur(src, dst, Size(9, 9), 0);
@@ -339,39 +337,38 @@ PERFTEST(filter2D)
{
gen(src, size, size, all_type[j], 0, 256);
for (int ksize = 3; ksize <= 15; ksize = 2*ksize+1)
{
SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ;
const int ksize = 3;
Mat kernel;
gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0);
SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ;
Mat dst, ocl_dst;
dst.setTo(0);
cv::filter2D(src, dst, -1, kernel);
Mat kernel;
gen(kernel, ksize, ksize, CV_32SC1, -3.0, 3.0);
CPU_ON;
cv::filter2D(src, dst, -1, kernel);
CPU_OFF;
Mat dst, ocl_dst;
ocl::oclMat d_src(src), d_dst;
cv::filter2D(src, dst, -1, kernel);
WARMUP_ON;
ocl::filter2D(d_src, d_dst, -1, kernel);
WARMUP_OFF;
CPU_ON;
cv::filter2D(src, dst, -1, kernel);
CPU_OFF;
GPU_ON;
ocl::filter2D(d_src, d_dst, -1, kernel);
GPU_OFF;
ocl::oclMat d_src(src), d_dst;
GPU_FULL_ON;
d_src.upload(src);
ocl::filter2D(d_src, d_dst, -1, kernel);
d_dst.download(ocl_dst);
GPU_FULL_OFF;
WARMUP_ON;
ocl::filter2D(d_src, d_dst, -1, kernel);
WARMUP_OFF;
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
GPU_ON;
ocl::filter2D(d_src, d_dst, -1, kernel);
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::filter2D(d_src, d_dst, -1, kernel);
d_dst.download(ocl_dst);
GPU_FULL_OFF;
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
}
+7 -190
View File
@@ -674,8 +674,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
coor.y = static_cast<short>(y0);
return coor;
}
void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit);
void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit)
static void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit)
{
if( src_roi.empty() )
CV_Error( Error::StsBadArg, "The input image is empty" );
@@ -683,6 +683,8 @@ void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::T
if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
CV_Error( Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
dst_roi.create(src_roi.size(), src_roi.type());
CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) );
CV_Assert( !(dst_roi.step & 0x3) );
@@ -725,9 +727,6 @@ PERFTEST(meanShiftFiltering)
SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4";
gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
//gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
dst = src;
dst.setTo(0);
cv::TermCriteria crit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1);
@@ -756,201 +755,21 @@ PERFTEST(meanShiftFiltering)
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
}
}
///////////// meanShiftProc////////////////////////
#if 0
COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab)
{
int isr2 = sr * sr;
int c0, c1, c2, c3;
int iter;
uchar *ptr = NULL;
uchar *pstart = NULL;
int revx = 0, revy = 0;
c0 = sptr[0];
c1 = sptr[1];
c2 = sptr[2];
c3 = sptr[3];
// iterate meanshift procedure
for (iter = 0; iter < maxIter; iter++)
{
int count = 0;
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
//mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
int minx = x0 - sp;
int miny = y0 - sp;
int maxx = x0 + sp;
int maxy = y0 + sp;
//deal with the image boundary
if (minx < 0)
{
minx = 0;
}
if (miny < 0)
{
miny = 0;
}
if (maxx >= size.width)
{
maxx = size.width - 1;
}
if (maxy >= size.height)
{
maxy = size.height - 1;
}
if (iter == 0)
{
pstart = sptr;
}
else
{
pstart = pstart + revy * sstep + (revx << 2); //point to the new position
}
ptr = pstart;
ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row
for (int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
{
int rowCount = 0;
int x = minx;
#if CV_ENABLE_UNROLLED
for (; x + 4 <= maxx; x += 4, ptr += 16)
{
int t0, t1, t2;
t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x;
rowCount++;
}
t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 1;
rowCount++;
}
t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 2;
rowCount++;
}
t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 3;
rowCount++;
}
}
#endif
for (; x <= maxx; x++, ptr += 4)
{
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 += t2;
sx += x;
rowCount++;
}
}
if (rowCount == 0)
{
continue;
}
count += rowCount;
sy += y * rowCount;
}
if (count == 0)
{
break;
}
int x1 = sx / count;
int y1 = sy / count;
s0 = s0 / count;
s1 = s1 / count;
s2 = s2 / count;
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
//revise the pointer corresponding to the new (y0,x0)
revx = x1 - x0;
revy = y1 - y0;
x0 = x1;
y0 = y1;
c0 = s0;
c1 = s1;
c2 = s2;
if (stopFlag)
{
break;
}
} //for iter
dptr[0] = (uchar)c0;
dptr[1] = (uchar)c1;
dptr[2] = (uchar)c2;
dptr[3] = (uchar)c3;
COOR coor;
coor.x = static_cast<short>(x0);
coor.y = static_cast<short>(y0);
return coor;
}
#endif
void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit)
{
if (src_roi.empty())
{
CV_Error(Error::StsBadArg, "The input image is empty");
}
if (src_roi.depth() != CV_8U || src_roi.channels() != 4)
{
CV_Error(Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported");
}
dst_roi.create(src_roi.size(), src_roi.type());
dstCoor_roi.create(src_roi.size(), CV_16SC2);
CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) &&
(src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows));
CV_Assert(!(dstCoor_roi.step & 0x3));
@@ -1008,8 +827,6 @@ PERFTEST(meanShiftProc)
SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 ";
gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
gen(dst[0], size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
gen(dst[1], size, size, CV_16SC2, Scalar::all(0), Scalar::all(256));
meanShiftProc_(src, dst[0], dst[1], 5, 6, crit);
+2 -2
View File
@@ -48,8 +48,8 @@
///////////// PyrLKOpticalFlow ////////////////////////
PERFTEST(PyrLKOpticalFlow)
{
std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"};
std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"};
std::string images1[] = {"rubberwhale1.png", "basketball1.png"};
std::string images2[] = {"rubberwhale2.png", "basketball2.png"};
for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++)
{
+7 -3
View File
@@ -645,7 +645,11 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth);
const int buffer_size = 100;
char opt_buffer [buffer_size] = "";
sprintf(opt_buffer, "-DANCHOR=%d -DANX=%d -DANY=%d", ksize.width, anchor.x, anchor.y);
openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth, opt_buffer);
}
Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
Point anchor, int borderType)
@@ -656,7 +660,7 @@ Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const
oclMat gpu_krnl;
int nDivisor;
normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);
normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, false);
normalizeAnchor(anchor, ksize);
return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)],
@@ -1172,7 +1176,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
args.push_back(std::make_pair(sizeof(cl_int), (void *)&ridusy));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
openCLExecuteKernel2(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option, CLFLUSH);
openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option);
}
Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype)
+2 -1
View File
@@ -257,7 +257,8 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
if (minDistance < 1)
{
corners = tmpCorners_(Rect(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1));
Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1);
tmpCorners_(roi_range).copyTo(corners);
}
else
{
+4
View File
@@ -337,6 +337,10 @@ namespace cv
oclinfo.push_back(ocltmpinfo);
}
}
if(devcienums > 0)
{
setDevice(oclinfo[0]);
}
return devcienums;
}
@@ -82,9 +82,9 @@
//////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////Macro for define elements number per thread/////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
#define ANCHOR 3
#define ANX 1
#define ANY 1
//#define ANCHOR 3
//#define ANX 1
//#define ANY 1
#define ROWS_PER_GROUP 4
#define ROWS_PER_GROUP_BITS 2
@@ -185,7 +185,7 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x
for(int i = 0; i < ANCHOR; i++)
{
#pragma unroll 3
#pragma unroll
for(int j = 0; j < ANCHOR; j++)
{
if(dst_rows_index < dst_rows_end)
@@ -295,7 +295,7 @@ __kernel void filter2D_C1_D5(__global float *src, int src_step, int src_offset_x
for(int i = 0; i < ANCHOR; i++)
{
#pragma unroll 3
#pragma unroll
for(int j = 0; j < ANCHOR; j++)
{
if(dst_rows_index < dst_rows_end)
@@ -410,7 +410,7 @@ __kernel void filter2D_C4_D0(__global uchar4 *src, int src_step, int src_offset_
for(int i = 0; i < ANCHOR; i++)
{
#pragma unroll 3
#pragma unroll
for(int j = 0; j < ANCHOR; j++)
{
if(dst_rows_index < dst_rows_end)
+8 -7
View File
@@ -130,28 +130,29 @@ __kernel void calcHarris(__global const float *Dx,__global const float *Dy, __gl
data[2][i] = dy_data[i] * dy_data[i];
}
#else
for(int i=0; i < ksY+1; i++)
{
int clamped_col = min(dst_cols, col);
for(int i=0; i < ksY+1; i++)
{
int dx_selected_row;
int dx_selected_col;
dx_selected_row = ADDR_H(dx_startY+i, 0, dx_whole_rows);
dx_selected_row = ADDR_B(dx_startY+i, dx_whole_rows, dx_selected_row);
dx_selected_col = ADDR_L(dx_startX+col, 0, dx_whole_cols);
dx_selected_col = ADDR_R(dx_startX+col, dx_whole_cols, dx_selected_col);
dx_selected_col = ADDR_L(dx_startX+clamped_col, 0, dx_whole_cols);
dx_selected_col = ADDR_R(dx_startX+clamped_col, dx_whole_cols, dx_selected_col);
dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col];
int dy_selected_row;
int dy_selected_col;
dy_selected_row = ADDR_H(dy_startY+i, 0, dy_whole_rows);
dy_selected_row = ADDR_B(dy_startY+i, dy_whole_rows, dy_selected_row);
dy_selected_col = ADDR_L(dy_startX+col, 0, dy_whole_cols);
dy_selected_col = ADDR_R(dy_startX+col, dy_whole_cols, dy_selected_col);
dy_selected_col = ADDR_L(dy_startX+clamped_col, 0, dy_whole_cols);
dy_selected_col = ADDR_R(dy_startX+clamped_col, dy_whole_cols, dy_selected_col);
dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col];
data[0][i] = dx_data[i] * dx_data[i];
data[1][i] = dx_data[i] * dy_data[i];
data[2][i] = dy_data[i] * dy_data[i];
}
}
#endif
float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0;
for(int i=1; i < ksY; i++)
@@ -130,28 +130,30 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
data[2][i] = dy_data[i] * dy_data[i];
}
#else
for(int i=0; i < ksY+1; i++)
{
int clamped_col = min(dst_cols, col);
for(int i=0; i < ksY+1; i++)
{
int dx_selected_row;
int dx_selected_col;
dx_selected_row = ADDR_H(dx_startY+i, 0, dx_whole_rows);
dx_selected_row = ADDR_B(dx_startY+i, dx_whole_rows, dx_selected_row);
dx_selected_col = ADDR_L(dx_startX+col, 0, dx_whole_cols);
dx_selected_col = ADDR_R(dx_startX+col, dx_whole_cols, dx_selected_col);
dx_selected_col = ADDR_L(dx_startX+clamped_col, 0, dx_whole_cols);
dx_selected_col = ADDR_R(dx_startX+clamped_col, dx_whole_cols, dx_selected_col);
dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col];
int dy_selected_row;
int dy_selected_col;
dy_selected_row = ADDR_H(dy_startY+i, 0, dy_whole_rows);
dy_selected_row = ADDR_B(dy_startY+i, dy_whole_rows, dy_selected_row);
dy_selected_col = ADDR_L(dy_startX+col, 0, dy_whole_cols);
dy_selected_col = ADDR_R(dy_startX+col, dy_whole_cols, dy_selected_col);
dy_selected_col = ADDR_L(dy_startX+clamped_col, 0, dy_whole_cols);
dy_selected_col = ADDR_R(dy_startX+clamped_col, dy_whole_cols, dy_selected_col);
dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col];
data[0][i] = dx_data[i] * dx_data[i];
data[1][i] = dx_data[i] * dy_data[i];
data[2][i] = dy_data[i] * dy_data[i];
}
}
#endif
float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0;
for(int i=1; i < ksY; i++)
+8 -7
View File
@@ -389,8 +389,8 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
float4 sum = (float4)(0,0,0,0);
const int evenFlag = (int)((tidx & 1) == 0);
const int oddFlag = (int)((tidx & 1) != 0);
const float4 evenFlag = (float4)((tidx & 1) == 0);
const float4 oddFlag = (float4)((tidx & 1) != 0);
const bool eveny = ((tidy & 1) == 0);
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
@@ -455,6 +455,7 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
dst[x + y * dstStep] = convert_uchar4_sat_rte(4.0f * sum);
}
}
///////////////////////////////////////////////////////////////////////
////////////////////////// CV_16UC4 //////////////////////////////////
///////////////////////////////////////////////////////////////////////
@@ -492,8 +493,8 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
float4 sum = (float4)(0,0,0,0);
const int evenFlag = (int)((get_local_id(0) & 1) == 0);
const int oddFlag = (int)((get_local_id(0) & 1) != 0);
const float4 evenFlag = (float4)((get_local_id(0) & 1) == 0);
const float4 oddFlag = (float4)((get_local_id(0) & 1) != 0);
const bool eveny = ((get_local_id(1) & 1) == 0);
const int tidx = get_local_id(0);
@@ -604,8 +605,8 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
float4 sum = (float4)(0,0,0,0);
const int evenFlag = (int)((tidx & 1) == 0);
const int oddFlag = (int)((tidx & 1) != 0);
const float4 evenFlag = (float4)((tidx & 1) == 0);
const float4 oddFlag = (float4)((tidx & 1) != 0);
const bool eveny = ((tidy & 1) == 0);
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f);
@@ -669,4 +670,4 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
{
dst[x + y * dstStep] = 4.0f * sum;
}
}
}
+1 -1
View File
@@ -508,7 +508,7 @@ static void lkSparse_run(oclMat &I, oclMat &J,
int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
openCLSafeCall(clReleaseKernel(kernel));
static char opt[16] = {0};
static char opt[32] = {0};
sprintf(opt, " -D WAVE_SIZE=%d", wave_size);
openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), opt, CLFLUSH);