Merged the trunk r8575:8583 (INTER_AREA interpolation for GPU resize)

This commit is contained in:
Andrey Kamaev
2012-06-15 08:52:08 +00:00
parent ab20da0f53
commit 73c152abc4
10 changed files with 1319 additions and 781 deletions
File diff suppressed because it is too large Load Diff
+21 -6
View File
@@ -3,15 +3,16 @@
void fill(cv::Mat& m, double a, double b);
using perf::MatType;
using perf::MatDepth;
enum {HORIZONTAL_AXIS = 0, VERTICAL_AXIS = 1, BOTH_AXIS = -1};
CV_ENUM(MorphOp, cv::MORPH_ERODE, cv::MORPH_DILATE)
CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
CV_ENUM(FlipCode, HORIZONTAL_AXIS, VERTICAL_AXIS, BOTH_AXIS)
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
CV_ENUM(MatchMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2)
CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv::gpu::ALPHA_ATOP, cv::gpu::ALPHA_XOR, cv::gpu::ALPHA_PLUS, cv::gpu::ALPHA_OVER_PREMUL, cv::gpu::ALPHA_IN_PREMUL, cv::gpu::ALPHA_OUT_PREMUL, cv::gpu::ALPHA_ATOP_PREMUL, cv::gpu::ALPHA_XOR_PREMUL, cv::gpu::ALPHA_PLUS_PREMUL, cv::gpu::ALPHA_PREMUL)
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
struct CvtColorInfo
{
@@ -24,6 +25,22 @@ struct CvtColorInfo
void PrintTo(const CvtColorInfo& info, std::ostream* os);
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << " = " << testing::PrintToString(static_cast< type >(param)); \
}
IMPLEMENT_PARAM_CLASS(Channels, int)
namespace cv { namespace gpu
{
void PrintTo(const cv::gpu::DeviceInfo& info, std::ostream* os);
@@ -55,8 +72,6 @@ namespace cv { namespace gpu
cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
const std::vector<cv::gpu::DeviceInfo>& devices();
std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature);
+69 -3
View File
@@ -46,6 +46,7 @@
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/filters.hpp"
# include <cfloat>
namespace cv { namespace gpu { namespace device
{
@@ -65,6 +66,17 @@ namespace cv { namespace gpu { namespace device
}
}
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
dst(y, x) = saturate_cast<T>(src(y, x));
}
}
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
{
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
@@ -74,13 +86,47 @@ namespace cv { namespace gpu { namespace device
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
}
};
template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
{
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
{
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
{
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)
@@ -169,15 +215,35 @@ namespace cv { namespace gpu { namespace device
}
};
template <typename T> struct ResizeDispatcher<AreaFilter, T>
{
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
{
int iscale_x = round(fx);
int iscale_y = round(fy);
if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
else
ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
}
};
template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
DevMem2Db dst, int interpolation, cudaStream_t stream)
{
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);
static const caller_t callers[3] =
static const caller_t callers[4] =
{
ResizeDispatcher<PointFilter, T>::call, ResizeDispatcher<LinearFilter, T>::call, ResizeDispatcher<CubicFilter, T>::call
ResizeDispatcher<PointFilter, T>::call,
ResizeDispatcher<LinearFilter, T>::call,
ResizeDispatcher<CubicFilter, T>::call,
ResizeDispatcher<AreaFilter, T>::call
};
// chenge to linear if area interpolation upscaling
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
interpolation = 1;
callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy,
static_cast< DevMem2D_<T> >(dst), stream);
+108 -3
View File
@@ -55,7 +55,7 @@ namespace cv { namespace gpu { namespace device
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_) : src(src_) {}
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) {}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
@@ -70,7 +70,7 @@ namespace cv { namespace gpu { namespace device
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_) : src(src_) {}
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) {}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
@@ -107,7 +107,7 @@ namespace cv { namespace gpu { namespace device
typedef float index_type;
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_) : src(src_) {}
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) {}
static __device__ __forceinline__ float bicubicCoeff(float x_)
{
@@ -154,6 +154,111 @@ namespace cv { namespace gpu { namespace device
const Ptr2D src;
};
// for integer scaling
template <typename Ptr2D> struct IntegerAreaFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
float fsx1 = x * scale_x;
float fsx2 = fsx1 + scale_x;
int sx1 = __float2int_ru(fsx1);
int sx2 = __float2int_rd(fsx2);
float fsy1 = y * scale_y;
float fsy2 = fsy1 + scale_y;
int sy1 = __float2int_ru(fsy1);
int sy2 = __float2int_rd(fsy2);
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0.f);
for(int dy = sy1; dy < sy2; ++dy)
for(int dx = sx1; dx < sx2; ++dx)
{
out = out + src(dy, dx) * scale;
}
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
float scale_x, scale_y ,scale;
};
template <typename Ptr2D> struct AreaFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
float fsx1 = x * scale_x;
float fsx2 = fsx1 + scale_x;
int sx1 = __float2int_ru(fsx1);
int sx2 = __float2int_rd(fsx2);
float fsy1 = y * scale_y;
float fsy2 = fsy1 + scale_y;
int sy1 = __float2int_ru(fsy1);
int sy2 = __float2int_rd(fsy2);
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0.f);
for (int dy = sy1; dy < sy2; ++dy)
{
for (int dx = sx1; dx < sx2; ++dx)
out = out + src(dy, dx) * scale;
if (sx1 > fsx1)
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
if (sx2 < fsx2)
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
}
if (sy1 > fsy1)
for (int dx = sx1; dx < sx2; ++dx)
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
if (sy2 < fsy2)
for (int dx = sx1; dx < sx2; ++dx)
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
if ((sy1 > fsy1) && (sx1 > fsx1))
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
if ((sy1 > fsy1) && (sx2 < fsx2))
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx2 < fsx2))
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
if ((sy2 < fsy2) && (sx1 > fsx1))
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
float scale_x, scale_y;
int width, haight;
};
}}} // namespace cv { namespace gpu { namespace device
#endif // __OPENCV_GPU_FILTERS_HPP__
@@ -221,7 +221,7 @@ namespace cv { namespace gpu { namespace device
template<> struct VecTraits<char>
{
typedef char elem_type;
typedef char elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char all(char v) {return v;}
static __device__ __host__ __forceinline__ char make(char x) {return x;}
@@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace device
};
template<> struct VecTraits<schar>
{
typedef schar elem_type;
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
+3 -2
View File
@@ -61,7 +61,8 @@ namespace cv { namespace gpu { namespace device
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
{
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR
|| interpolation == INTER_CUBIC || interpolation == INTER_AREA);
CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0));
if (dsize == Size())
@@ -90,7 +91,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
src.locateROI(wholeSize, ofs);
bool useNpp = (src.type() == CV_8UC1 || src.type() == CV_8UC4);
useNpp = useNpp && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || src.type() == CV_8UC4);
useNpp = useNpp && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || (src.type() == CV_8UC4 && interpolation != INTER_AREA));
if (useNpp)
{
+47 -1
View File
@@ -48,7 +48,8 @@
namespace
{
template <typename T, template <typename> class Interpolator> void resizeImpl(const cv::Mat& src, cv::Mat& dst, double fx, double fy)
template <typename T, template <typename> class Interpolator>
void resizeImpl(const cv::Mat& src, cv::Mat& dst, double fx, double fy)
{
const int cn = src.channels();
@@ -156,6 +157,51 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine(
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
WHOLE_SUBMAT));
/////////////////
PARAM_TEST_CASE(ResizeArea, cv::gpu::DeviceInfo, cv::Size, MatType, double, Interpolation, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
double coeff;
int interpolation;
int type;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
type = GET_PARAM(2);
coeff = GET_PARAM(3);
interpolation = GET_PARAM(4);
useRoi = GET_PARAM(5);
cv::gpu::setDevice(devInfo.deviceID());
}
};
TEST_P(ResizeArea, Accuracy)
{
cv::Mat src = randomMat(size, type);
cv::gpu::GpuMat dst = createMat(cv::Size(cv::saturate_cast<int>(src.cols * coeff), cv::saturate_cast<int>(src.rows * coeff)), type, useRoi);
cv::gpu::resize(loadMat(src, useRoi), dst, cv::Size(), coeff, coeff, interpolation);
cv::Mat dst_cpu;
cv::resize(src, dst_cpu, cv::Size(), coeff, coeff, interpolation);
EXPECT_MAT_NEAR(dst_cpu, dst, src.depth() == CV_32F ? 1e-2 : 1.0);
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeArea, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(0.3, 0.5),
testing::Values(Interpolation(cv::INTER_AREA)),
WHOLE_SUBMAT));
///////////////////////////////////////////////////////////////////
// Test NPP
+1 -1
View File
@@ -277,7 +277,7 @@ IMPLEMENT_PARAM_CLASS(Channels, int)
CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
CV_ENUM(BorderType, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
#define ALL_BORDER_TYPES testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP))