Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
51e543050c
@ -69,7 +69,7 @@ CvMat* mI = &I.operator CvMat();
|
||||
@endcode
|
||||
One of the biggest complaints of the C interface is that it leaves all the memory management to you.
|
||||
You need to figure out when it is safe to release your unused objects and make sure you do so before
|
||||
the program finishes or you could have troublesome memory leeks. To work around this issue in OpenCV
|
||||
the program finishes or you could have troublesome memory leaks. To work around this issue in OpenCV
|
||||
there is introduced a sort of smart pointer. This will automatically release the object when it's no
|
||||
longer in use. To use this declare the pointers as a specialization of the *Ptr* :
|
||||
@code{.cpp}
|
||||
|
||||
@ -29,7 +29,7 @@ Execute in webcam:
|
||||
|
||||
@code{.bash}
|
||||
|
||||
$ example_dnn_object_detection --config=[PATH-TO-DARKNET]/cfg/yolo.cfg --model=[PATH-TO-DARKNET]/yolo.weights --classes=object_detection_classes_pascal_voc.txt --width=416 --height=416 --scale=0.00392
|
||||
$ example_dnn_object_detection --config=[PATH-TO-DARKNET]/cfg/yolo.cfg --model=[PATH-TO-DARKNET]/yolo.weights --classes=object_detection_classes_pascal_voc.txt --width=416 --height=416 --scale=0.00392 --rgb
|
||||
|
||||
@endcode
|
||||
|
||||
@ -37,7 +37,7 @@ Execute with image or video file:
|
||||
|
||||
@code{.bash}
|
||||
|
||||
$ example_dnn_object_detection --config=[PATH-TO-DARKNET]/cfg/yolo.cfg --model=[PATH-TO-DARKNET]/yolo.weights --classes=object_detection_classes_pascal_voc.txt --width=416 --height=416 --scale=0.00392 --input=[PATH-TO-IMAGE-OR-VIDEO-FILE]
|
||||
$ example_dnn_object_detection --config=[PATH-TO-DARKNET]/cfg/yolo.cfg --model=[PATH-TO-DARKNET]/yolo.weights --classes=object_detection_classes_pascal_voc.txt --width=416 --height=416 --scale=0.00392 --input=[PATH-TO-IMAGE-OR-VIDEO-FILE] --rgb
|
||||
|
||||
@endcode
|
||||
|
||||
|
||||
@ -786,10 +786,14 @@ template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a) \
|
||||
{ return _Tpvec(vextq_##suffix(a.val, vdupq_n_##suffix(0), n)); } \
|
||||
template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a) \
|
||||
{ return _Tpvec(vextq_##suffix(vdupq_n_##suffix(0), a.val, _Tpvec::nlanes - n)); } \
|
||||
template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \
|
||||
{ return a; } \
|
||||
template<int n> inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vextq_##suffix(a.val, b.val, n)); } \
|
||||
template<int n> inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vextq_##suffix(b.val, a.val, _Tpvec::nlanes - n)); }
|
||||
{ return _Tpvec(vextq_##suffix(b.val, a.val, _Tpvec::nlanes - n)); } \
|
||||
template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ CV_UNUSED(b); return a; }
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_uint8x16, u8)
|
||||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int8x16, s8)
|
||||
|
||||
@ -589,7 +589,7 @@ inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b)
|
||||
return _Tpvec(vec_sld(a.val, b.val, CV_SHIFT));
|
||||
}
|
||||
|
||||
#define OPENCV_IMPL_VSX_ROTATE_64(_Tpvec, suffix, rg1, rg2) \
|
||||
#define OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, suffix, rg1, rg2) \
|
||||
template<int imm> \
|
||||
inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
@ -598,11 +598,13 @@ inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \
|
||||
return imm ? b : a; \
|
||||
}
|
||||
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_int64x2, right, a, b)
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, right, a, b)
|
||||
#define OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(_Tpvec) \
|
||||
OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, left, b, a) \
|
||||
OPENCV_IMPL_VSX_ROTATE_64_2RG(_Tpvec, right, a, b)
|
||||
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_int64x2, left, b, a)
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, left, b, a)
|
||||
OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_float64x2)
|
||||
OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_uint64x2)
|
||||
OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_int64x2)
|
||||
|
||||
/* Extract */
|
||||
template<int s, typename _Tpvec>
|
||||
@ -716,26 +718,33 @@ inline int v_signmask(const v_uint64x2& a)
|
||||
inline int v_signmask(const v_float64x2& a)
|
||||
{ return v_signmask(v_reinterpret_as_s64(a)); }
|
||||
|
||||
|
||||
template<typename _Tpvec>
|
||||
inline bool v_check_all(const _Tpvec& a)
|
||||
{ return vec_all_lt(a.val, _Tpvec().val);}
|
||||
inline bool v_check_all(const v_uint8x16 &a)
|
||||
{ return vec_all_lt(a.val, _Tpvec().val); }
|
||||
inline bool v_check_all(const v_uint8x16& a)
|
||||
{ return v_check_all(v_reinterpret_as_s8(a)); }
|
||||
inline bool v_check_all(const v_uint16x8 &a)
|
||||
inline bool v_check_all(const v_uint16x8& a)
|
||||
{ return v_check_all(v_reinterpret_as_s16(a)); }
|
||||
inline bool v_check_all(const v_uint32x4 &a)
|
||||
inline bool v_check_all(const v_uint32x4& a)
|
||||
{ return v_check_all(v_reinterpret_as_s32(a)); }
|
||||
inline bool v_check_all(const v_float32x4& a)
|
||||
{ return v_check_all(v_reinterpret_as_s32(a)); }
|
||||
inline bool v_check_all(const v_float64x2& a)
|
||||
{ return v_check_all(v_reinterpret_as_s64(a)); }
|
||||
|
||||
template<typename _Tpvec>
|
||||
inline bool v_check_any(const _Tpvec& a)
|
||||
{ return vec_any_lt(a.val, _Tpvec().val);}
|
||||
inline bool v_check_any(const v_uint8x16 &a)
|
||||
{ return vec_any_lt(a.val, _Tpvec().val); }
|
||||
inline bool v_check_any(const v_uint8x16& a)
|
||||
{ return v_check_any(v_reinterpret_as_s8(a)); }
|
||||
inline bool v_check_any(const v_uint16x8 &a)
|
||||
inline bool v_check_any(const v_uint16x8& a)
|
||||
{ return v_check_any(v_reinterpret_as_s16(a)); }
|
||||
inline bool v_check_any(const v_uint32x4 &a)
|
||||
inline bool v_check_any(const v_uint32x4& a)
|
||||
{ return v_check_any(v_reinterpret_as_s32(a)); }
|
||||
inline bool v_check_any(const v_float32x4& a)
|
||||
{ return v_check_any(v_reinterpret_as_s32(a)); }
|
||||
inline bool v_check_any(const v_float64x2& a)
|
||||
{ return v_check_any(v_reinterpret_as_s64(a)); }
|
||||
|
||||
////////// Other math /////////
|
||||
|
||||
|
||||
@ -194,8 +194,8 @@ Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const
|
||||
{
|
||||
Matx<_Tp, n, m> b;
|
||||
bool ok;
|
||||
if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
|
||||
ok = cv::internal::Matx_FastInvOp<_Tp, m>()(*this, b, method);
|
||||
if( m == n && (method == DECOMP_LU || method == DECOMP_CHOLESKY) )
|
||||
ok = cv::internal::Matx_FastInvOp<_Tp, m>()(*reinterpret_cast<const Matx<_Tp, m, m>*>(this), reinterpret_cast<Matx<_Tp, m, m>&>(b), method);
|
||||
else
|
||||
{
|
||||
Mat A(*this, false), B(b, false);
|
||||
|
||||
@ -837,17 +837,28 @@ template<typename R> struct TheTest
|
||||
Data<R> resC = v_rotate_right<s>(a);
|
||||
Data<R> resD = v_rotate_right<s>(a, b);
|
||||
|
||||
Data<R> resE = v_rotate_left<s>(a);
|
||||
Data<R> resF = v_rotate_left<s>(a, b);
|
||||
|
||||
for (int i = 0; i < R::nlanes; ++i)
|
||||
{
|
||||
if (i + s >= R::nlanes)
|
||||
{
|
||||
EXPECT_EQ((LaneType)0, resC[i]);
|
||||
EXPECT_EQ(dataB[i - R::nlanes + s], resD[i]);
|
||||
|
||||
EXPECT_EQ((LaneType)0, resE[i - R::nlanes + s]);
|
||||
EXPECT_EQ(dataB[i], resF[i - R::nlanes + s]);
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_EQ(dataA[i + s], resC[i]);
|
||||
}
|
||||
EXPECT_EQ(dataA[i + s], resD[i]);
|
||||
|
||||
EXPECT_EQ(dataA[i], resE[i + s]);
|
||||
EXPECT_EQ(dataA[i], resF[i + s]);
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
@ -52,10 +52,8 @@ namespace column_filter
|
||||
{
|
||||
#define MAX_KERNEL_SIZE 32
|
||||
|
||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||
|
||||
template <int KSIZE, typename T, typename D, typename B>
|
||||
__global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
||||
__global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const float* kernel, const int anchor, const B brd)
|
||||
{
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
||||
const int BLOCK_DIM_X = 16;
|
||||
@ -135,7 +133,7 @@ namespace column_filter
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < KSIZE; ++k)
|
||||
sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k];
|
||||
sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * kernel[k];
|
||||
|
||||
dst(y, x) = saturate_cast<D>(sum);
|
||||
}
|
||||
@ -143,7 +141,7 @@ namespace column_filter
|
||||
}
|
||||
|
||||
template <int KSIZE, typename T, typename D, template<typename> class B>
|
||||
void caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
|
||||
void caller(PtrStepSz<T> src, PtrStepSz<D> dst, const float* kernel, int anchor, int cc, cudaStream_t stream)
|
||||
{
|
||||
int BLOCK_DIM_X;
|
||||
int BLOCK_DIM_Y;
|
||||
@ -167,7 +165,7 @@ namespace column_filter
|
||||
|
||||
B<T> brd(src.rows);
|
||||
|
||||
linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
|
||||
linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, kernel, anchor, brd);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@ -181,7 +179,7 @@ namespace filter
|
||||
template <typename T, typename D>
|
||||
void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, const float* kernel, int anchor, int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[5][33] =
|
||||
{
|
||||
@ -362,11 +360,6 @@ namespace filter
|
||||
}
|
||||
};
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
|
||||
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
|
||||
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, kernel, anchor, cc, stream);
|
||||
}
|
||||
}
|
||||
|
||||
@ -52,10 +52,8 @@ namespace row_filter
|
||||
{
|
||||
#define MAX_KERNEL_SIZE 32
|
||||
|
||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||
|
||||
template <int KSIZE, typename T, typename D, typename B>
|
||||
__global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
||||
__global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const float* kernel, const int anchor, const B brd)
|
||||
{
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
||||
const int BLOCK_DIM_X = 32;
|
||||
@ -135,7 +133,7 @@ namespace row_filter
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < KSIZE; ++k)
|
||||
sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k];
|
||||
sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * kernel[k];
|
||||
|
||||
dst(y, x) = saturate_cast<D>(sum);
|
||||
}
|
||||
@ -143,7 +141,7 @@ namespace row_filter
|
||||
}
|
||||
|
||||
template <int KSIZE, typename T, typename D, template<typename> class B>
|
||||
void caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
|
||||
void caller(PtrStepSz<T> src, PtrStepSz<D> dst, const float* kernel, int anchor, int cc, cudaStream_t stream)
|
||||
{
|
||||
int BLOCK_DIM_X;
|
||||
int BLOCK_DIM_Y;
|
||||
@ -167,7 +165,7 @@ namespace row_filter
|
||||
|
||||
B<T> brd(src.cols);
|
||||
|
||||
linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
|
||||
linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, kernel, anchor, brd);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
@ -180,7 +178,7 @@ namespace filter
|
||||
template <typename T, typename D>
|
||||
void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, const float* kernel, int anchor, int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[5][33] =
|
||||
{
|
||||
@ -361,11 +359,6 @@ namespace filter
|
||||
}
|
||||
};
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
|
||||
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
|
||||
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, kernel, anchor, cc, stream);
|
||||
}
|
||||
}
|
||||
|
||||
@ -58,9 +58,9 @@ namespace canny
|
||||
|
||||
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh, cudaStream_t stream);
|
||||
|
||||
void edgesHysteresisLocal(PtrStepSzi map, short2* st1, cudaStream_t stream);
|
||||
void edgesHysteresisLocal(PtrStepSzi map, short2* st1, int* d_counter, cudaStream_t stream);
|
||||
|
||||
void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2, cudaStream_t stream);
|
||||
void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2, int* d_counter, cudaStream_t stream);
|
||||
|
||||
void getEdges(PtrStepSzi map, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
@ -127,6 +127,8 @@ namespace
|
||||
Ptr<Filter> filterDX_, filterDY_;
|
||||
#endif
|
||||
int old_apperture_size_;
|
||||
|
||||
int *d_counter;
|
||||
};
|
||||
|
||||
void CannyImpl::detect(InputArray _image, OutputArray _edges, Stream& stream)
|
||||
@ -218,12 +220,17 @@ namespace
|
||||
|
||||
void CannyImpl::CannyCaller(GpuMat& edges, Stream& stream)
|
||||
{
|
||||
map_.setTo(Scalar::all(0));
|
||||
map_.setTo(Scalar::all(0), stream);
|
||||
|
||||
canny::calcMap(dx_, dy_, mag_, map_, static_cast<float>(low_thresh_), static_cast<float>(high_thresh_), StreamAccessor::getStream(stream));
|
||||
|
||||
canny::edgesHysteresisLocal(map_, st1_.ptr<short2>(), StreamAccessor::getStream(stream));
|
||||
cudaSafeCall( cudaMalloc(&d_counter, sizeof(int)) );
|
||||
|
||||
canny::edgesHysteresisGlobal(map_, st1_.ptr<short2>(), st2_.ptr<short2>(), StreamAccessor::getStream(stream));
|
||||
canny::edgesHysteresisLocal(map_, st1_.ptr<short2>(), d_counter, StreamAccessor::getStream(stream));
|
||||
|
||||
canny::edgesHysteresisGlobal(map_, st1_.ptr<short2>(), st2_.ptr<short2>(), d_counter, StreamAccessor::getStream(stream));
|
||||
|
||||
cudaSafeCall( cudaFree(d_counter) );
|
||||
|
||||
canny::getEdges(map_, edges, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
@ -47,6 +47,7 @@
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/utility.hpp"
|
||||
#include "opencv2/core/cuda.hpp"
|
||||
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cuda::device;
|
||||
@ -102,6 +103,20 @@ namespace canny
|
||||
}
|
||||
};
|
||||
|
||||
struct SrcTexObject
|
||||
{
|
||||
int xoff;
|
||||
int yoff;
|
||||
cudaTextureObject_t tex_src_object;
|
||||
__host__ SrcTexObject(int _xoff, int _yoff, cudaTextureObject_t _tex_src_object) : xoff(_xoff), yoff(_yoff), tex_src_object(_tex_src_object) { }
|
||||
|
||||
__device__ __forceinline__ int operator ()(int y, int x) const
|
||||
{
|
||||
return tex2D<uchar>(tex_src_object, x + xoff, y + yoff);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class Norm> __global__
|
||||
void calcMagnitudeKernel(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
|
||||
{
|
||||
@ -120,29 +135,94 @@ namespace canny
|
||||
mag(y, x) = norm(dxVal, dyVal);
|
||||
}
|
||||
|
||||
template <class Norm> __global__
|
||||
void calcMagnitudeKernel(const SrcTexObject src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y >= mag.rows || x >= mag.cols)
|
||||
return;
|
||||
|
||||
int dxVal = (src(y - 1, x + 1) + 2 * src(y, x + 1) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y, x - 1) + src(y + 1, x - 1));
|
||||
int dyVal = (src(y + 1, x - 1) + 2 * src(y + 1, x) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y - 1, x) + src(y - 1, x + 1));
|
||||
|
||||
dx(y, x) = dxVal;
|
||||
dy(y, x) = dyVal;
|
||||
|
||||
mag(y, x) = norm(dxVal, dyVal);
|
||||
}
|
||||
|
||||
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(mag.cols, block.x), divUp(mag.rows, block.y));
|
||||
|
||||
bindTexture(&tex_src, srcWhole);
|
||||
SrcTex src(xoff, yoff);
|
||||
bool cc30 = deviceSupports(FEATURE_SET_COMPUTE_30);
|
||||
|
||||
if (L2Grad)
|
||||
if (cc30)
|
||||
{
|
||||
L2 norm;
|
||||
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
|
||||
cudaResourceDesc resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
resDesc.resType = cudaResourceTypePitch2D;
|
||||
resDesc.res.pitch2D.devPtr = srcWhole.ptr();
|
||||
resDesc.res.pitch2D.height = srcWhole.rows;
|
||||
resDesc.res.pitch2D.width = srcWhole.cols;
|
||||
resDesc.res.pitch2D.pitchInBytes = srcWhole.step;
|
||||
resDesc.res.pitch2D.desc = cudaCreateChannelDesc<uchar>();
|
||||
|
||||
cudaTextureDesc texDesc;
|
||||
memset(&texDesc, 0, sizeof(texDesc));
|
||||
texDesc.addressMode[0] = cudaAddressModeClamp;
|
||||
texDesc.addressMode[1] = cudaAddressModeClamp;
|
||||
texDesc.addressMode[2] = cudaAddressModeClamp;
|
||||
|
||||
cudaTextureObject_t tex = 0;
|
||||
cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
|
||||
|
||||
SrcTexObject src(xoff, yoff, tex);
|
||||
|
||||
if (L2Grad)
|
||||
{
|
||||
L2 norm;
|
||||
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
|
||||
}
|
||||
else
|
||||
{
|
||||
L1 norm;
|
||||
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == NULL)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
else
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
|
||||
cudaSafeCall( cudaDestroyTextureObject(tex) );
|
||||
}
|
||||
else
|
||||
{
|
||||
L1 norm;
|
||||
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
|
||||
bindTexture(&tex_src, srcWhole);
|
||||
SrcTex src(xoff, yoff);
|
||||
|
||||
if (L2Grad)
|
||||
{
|
||||
L2 norm;
|
||||
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
|
||||
}
|
||||
else
|
||||
{
|
||||
L1 norm;
|
||||
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == NULL)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == NULL)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream)
|
||||
@ -165,7 +245,6 @@ namespace canny
|
||||
namespace canny
|
||||
{
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_mag(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
__global__ void calcMapKernel(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
|
||||
{
|
||||
const int CANNY_SHIFT = 15;
|
||||
@ -218,18 +297,103 @@ namespace canny
|
||||
map(y, x) = edge_type;
|
||||
}
|
||||
|
||||
__global__ void calcMapKernel(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh, cudaTextureObject_t tex_mag)
|
||||
{
|
||||
const int CANNY_SHIFT = 15;
|
||||
const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x == 0 || x >= dx.cols - 1 || y == 0 || y >= dx.rows - 1)
|
||||
return;
|
||||
|
||||
int dxVal = dx(y, x);
|
||||
int dyVal = dy(y, x);
|
||||
|
||||
const int s = (dxVal ^ dyVal) < 0 ? -1 : 1;
|
||||
const float m = tex2D<float>(tex_mag, x, y);
|
||||
|
||||
dxVal = ::abs(dxVal);
|
||||
dyVal = ::abs(dyVal);
|
||||
|
||||
// 0 - the pixel can not belong to an edge
|
||||
// 1 - the pixel might belong to an edge
|
||||
// 2 - the pixel does belong to an edge
|
||||
int edge_type = 0;
|
||||
|
||||
if (m > low_thresh)
|
||||
{
|
||||
const int tg22x = dxVal * TG22;
|
||||
const int tg67x = tg22x + ((dxVal + dxVal) << CANNY_SHIFT);
|
||||
|
||||
dyVal <<= CANNY_SHIFT;
|
||||
|
||||
if (dyVal < tg22x)
|
||||
{
|
||||
if (m > tex2D<float>(tex_mag, x - 1, y) && m >= tex2D<float>(tex_mag, x + 1, y))
|
||||
edge_type = 1 + (int)(m > high_thresh);
|
||||
}
|
||||
else if(dyVal > tg67x)
|
||||
{
|
||||
if (m > tex2D<float>(tex_mag, x, y - 1) && m >= tex2D<float>(tex_mag, x, y + 1))
|
||||
edge_type = 1 + (int)(m > high_thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m > tex2D<float>(tex_mag, x - s, y - 1) && m >= tex2D<float>(tex_mag, x + s, y + 1))
|
||||
edge_type = 1 + (int)(m > high_thresh);
|
||||
}
|
||||
}
|
||||
|
||||
map(y, x) = edge_type;
|
||||
}
|
||||
|
||||
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(dx.cols, block.x), divUp(dx.rows, block.y));
|
||||
|
||||
bindTexture(&tex_mag, mag);
|
||||
if (deviceSupports(FEATURE_SET_COMPUTE_30))
|
||||
{
|
||||
// Use the texture object
|
||||
cudaResourceDesc resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
resDesc.resType = cudaResourceTypePitch2D;
|
||||
resDesc.res.pitch2D.devPtr = mag.ptr();
|
||||
resDesc.res.pitch2D.height = mag.rows;
|
||||
resDesc.res.pitch2D.width = mag.cols;
|
||||
resDesc.res.pitch2D.pitchInBytes = mag.step;
|
||||
resDesc.res.pitch2D.desc = cudaCreateChannelDesc<float>();
|
||||
|
||||
calcMapKernel<<<grid, block, 0, stream>>>(dx, dy, map, low_thresh, high_thresh);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
cudaTextureDesc texDesc;
|
||||
memset(&texDesc, 0, sizeof(texDesc));
|
||||
texDesc.addressMode[0] = cudaAddressModeClamp;
|
||||
texDesc.addressMode[1] = cudaAddressModeClamp;
|
||||
texDesc.addressMode[2] = cudaAddressModeClamp;
|
||||
|
||||
if (stream == NULL)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaTextureObject_t tex=0;
|
||||
cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
|
||||
calcMapKernel<<<grid, block, 0, stream>>>(dx, dy, map, low_thresh, high_thresh, tex);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == NULL)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
else
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
|
||||
cudaSafeCall( cudaDestroyTextureObject(tex) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use the texture reference
|
||||
bindTexture(&tex_mag, mag);
|
||||
calcMapKernel<<<grid, block, 0, stream>>>(dx, dy, map, low_thresh, high_thresh);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == NULL)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -237,14 +401,12 @@ namespace canny
|
||||
|
||||
namespace canny
|
||||
{
|
||||
__device__ int counter = 0;
|
||||
|
||||
__device__ __forceinline__ bool checkIdx(int y, int x, int rows, int cols)
|
||||
{
|
||||
return (y >= 0) && (y < rows) && (x >= 0) && (x < cols);
|
||||
}
|
||||
|
||||
__global__ void edgesHysteresisLocalKernel(PtrStepSzi map, short2* st)
|
||||
__global__ void edgesHysteresisLocalKernel(PtrStepSzi map, short2* st, int* d_counter)
|
||||
{
|
||||
__shared__ volatile int smem[18][18];
|
||||
|
||||
@ -325,22 +487,19 @@ namespace canny
|
||||
|
||||
if (n > 0)
|
||||
{
|
||||
const int ind = ::atomicAdd(&counter, 1);
|
||||
const int ind = ::atomicAdd(d_counter, 1);
|
||||
st[ind] = make_short2(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
void edgesHysteresisLocal(PtrStepSzi map, short2* st1, cudaStream_t stream)
|
||||
void edgesHysteresisLocal(PtrStepSzi map, short2* st1, int* d_counter, cudaStream_t stream)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
|
||||
|
||||
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) );
|
||||
cudaSafeCall( cudaMemsetAsync(d_counter, 0, sizeof(int), stream) );
|
||||
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y));
|
||||
|
||||
edgesHysteresisLocalKernel<<<grid, block, 0, stream>>>(map, st1);
|
||||
edgesHysteresisLocalKernel<<<grid, block, 0, stream>>>(map, st1, d_counter);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == NULL)
|
||||
@ -355,7 +514,7 @@ namespace canny
|
||||
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
|
||||
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
|
||||
|
||||
__global__ void edgesHysteresisGlobalKernel(PtrStepSzi map, short2* st1, short2* st2, const int count)
|
||||
__global__ void edgesHysteresisGlobalKernel(PtrStepSzi map, short2* st1, short2* st2, int* d_counter, const int count)
|
||||
{
|
||||
const int stack_size = 512;
|
||||
|
||||
@ -429,7 +588,7 @@ namespace canny
|
||||
{
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
s_ind = ::atomicAdd(&counter, s_counter);
|
||||
s_ind = ::atomicAdd(d_counter, s_counter);
|
||||
|
||||
if (s_ind + s_counter > map.cols * map.rows)
|
||||
s_counter = 0;
|
||||
@ -444,29 +603,26 @@ namespace canny
|
||||
}
|
||||
}
|
||||
|
||||
void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2, cudaStream_t stream)
|
||||
void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2, int* d_counter, cudaStream_t stream)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
|
||||
|
||||
int count;
|
||||
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
|
||||
cudaSafeCall( cudaMemcpyAsync(&count, d_counter, sizeof(int), cudaMemcpyDeviceToHost, stream) );
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
|
||||
while (count > 0)
|
||||
{
|
||||
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) );
|
||||
cudaSafeCall( cudaMemsetAsync(d_counter, 0, sizeof(int), stream) );
|
||||
|
||||
const dim3 block(128);
|
||||
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
|
||||
|
||||
edgesHysteresisGlobalKernel<<<grid, block, 0, stream>>>(map, st1, st2, count);
|
||||
edgesHysteresisGlobalKernel<<<grid, block, 0, stream>>>(map, st1, st2, d_counter, count);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == NULL)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
|
||||
cudaSafeCall( cudaMemcpyAsync(&count, d_counter, sizeof(int), cudaMemcpyDeviceToHost, stream) );
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
|
||||
count = min(count, map.cols * map.rows);
|
||||
|
||||
@ -92,9 +92,66 @@ CUDA_TEST_P(Canny, Accuracy)
|
||||
EXPECT_MAT_SIMILAR(edges_gold, edges, 2e-2);
|
||||
}
|
||||
|
||||
class CannyAsyncParallelLoopBody : public cv::ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
CannyAsyncParallelLoopBody(const cv::cuda::GpuMat& d_img_, cv::cuda::GpuMat* edges_, double low_thresh_, double high_thresh_, int apperture_size_, bool useL2gradient_)
|
||||
: d_img(d_img_), edges(edges_), low_thresh(low_thresh_), high_thresh(high_thresh_), apperture_size(apperture_size_), useL2gradient(useL2gradient_) {}
|
||||
~CannyAsyncParallelLoopBody() {};
|
||||
void operator()(const cv::Range& r) const
|
||||
{
|
||||
for (int i = r.start; i < r.end; i++) {
|
||||
cv::cuda::Stream stream;
|
||||
cv::Ptr<cv::cuda::CannyEdgeDetector> canny = cv::cuda::createCannyEdgeDetector(low_thresh, high_thresh, apperture_size, useL2gradient);
|
||||
canny->detect(d_img, edges[i], stream);
|
||||
stream.waitForCompletion();
|
||||
}
|
||||
}
|
||||
protected:
|
||||
const cv::cuda::GpuMat& d_img;
|
||||
cv::cuda::GpuMat* edges;
|
||||
double low_thresh;
|
||||
double high_thresh;
|
||||
int apperture_size;
|
||||
bool useL2gradient;
|
||||
};
|
||||
|
||||
#define NUM_STREAMS 64
|
||||
|
||||
CUDA_TEST_P(Canny, Async)
|
||||
{
|
||||
if (!supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_30))
|
||||
{
|
||||
throw SkipTestException("CUDA device doesn't support texture objects");
|
||||
}
|
||||
else
|
||||
{
|
||||
const cv::Mat img = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
const cv::cuda::GpuMat d_img_roi = loadMat(img, useRoi);
|
||||
|
||||
double low_thresh = 50.0;
|
||||
double high_thresh = 100.0;
|
||||
|
||||
// Synchronous call
|
||||
cv::Ptr<cv::cuda::CannyEdgeDetector> canny = cv::cuda::createCannyEdgeDetector(low_thresh, high_thresh, apperture_size, useL2gradient);
|
||||
cv::cuda::GpuMat edges_gold;
|
||||
canny->detect(d_img_roi, edges_gold);
|
||||
|
||||
// Asynchronous call
|
||||
cv::cuda::GpuMat edges[NUM_STREAMS];
|
||||
cv::parallel_for_(cv::Range(0, NUM_STREAMS), CannyAsyncParallelLoopBody(d_img_roi, edges, low_thresh, high_thresh, apperture_size, useL2gradient));
|
||||
|
||||
// Compare the results of synchronous call and asynchronous call
|
||||
for (int i = 0; i < NUM_STREAMS; i++)
|
||||
EXPECT_MAT_NEAR(edges_gold, edges[i], 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, Canny, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
testing::Values(AppertureSize(3), AppertureSize(5)),
|
||||
testing::Values(AppertureSize(3), AppertureSize(5), AppertureSize(7)),
|
||||
testing::Values(L2gradient(false), L2gradient(true)),
|
||||
WHOLE_SUBMAT));
|
||||
|
||||
|
||||
@ -84,15 +84,97 @@ struct RGB2HSV_f
|
||||
typedef float channel_type;
|
||||
|
||||
RGB2HSV_f(int _srccn, int _blueIdx, float _hrange)
|
||||
: srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {}
|
||||
: srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {
|
||||
#if CV_SIMD128
|
||||
hasSIMD = hasSIMD128();
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CV_SIMD128
|
||||
inline void process(v_float32x4& v_r, v_float32x4& v_g,
|
||||
v_float32x4& v_b, float hscale) const
|
||||
{
|
||||
v_float32x4 v_min_rgb = v_min(v_min(v_r, v_g), v_b);
|
||||
v_float32x4 v_max_rgb = v_max(v_max(v_r, v_g), v_b);
|
||||
|
||||
v_float32x4 v_eps = v_setall_f32(FLT_EPSILON);
|
||||
v_float32x4 v_diff = v_max_rgb - v_min_rgb;
|
||||
v_float32x4 v_s = v_diff / (v_abs(v_max_rgb) + v_eps);
|
||||
|
||||
v_float32x4 v_r_eq_max = v_r == v_max_rgb;
|
||||
v_float32x4 v_g_eq_max = v_g == v_max_rgb;
|
||||
v_float32x4 v_h = v_select(v_r_eq_max, v_g - v_b,
|
||||
v_select(v_g_eq_max, v_b - v_r, v_r - v_g));
|
||||
v_float32x4 v_res = v_select(v_r_eq_max, (v_g < v_b) & v_setall_f32(360.0f),
|
||||
v_select(v_g_eq_max, v_setall_f32(120.0f), v_setall_f32(240.0f)));
|
||||
v_float32x4 v_rev_diff = v_setall_f32(60.0f) / (v_diff + v_eps);
|
||||
v_r = v_muladd(v_h, v_rev_diff, v_res) * v_setall_f32(hscale);
|
||||
|
||||
v_g = v_s;
|
||||
v_b = v_max_rgb;
|
||||
}
|
||||
#endif
|
||||
|
||||
void operator()(const float* src, float* dst, int n) const
|
||||
{
|
||||
int i, bidx = blueIdx, scn = srccn;
|
||||
int i = 0, bidx = blueIdx, scn = srccn;
|
||||
float hscale = hrange*(1.f/360.f);
|
||||
n *= 3;
|
||||
|
||||
for( i = 0; i < n; i += 3, src += scn )
|
||||
#if CV_SIMD128
|
||||
if (hasSIMD)
|
||||
{
|
||||
if (scn == 3) {
|
||||
if (bidx) {
|
||||
for ( ; i <= n - 12; i += 12, src += scn * 4)
|
||||
{
|
||||
v_float32x4 v_r;
|
||||
v_float32x4 v_g;
|
||||
v_float32x4 v_b;
|
||||
v_load_deinterleave(src, v_r, v_g, v_b);
|
||||
process(v_r, v_g, v_b, hscale);
|
||||
v_store_interleave(dst + i, v_r, v_g, v_b);
|
||||
}
|
||||
} else {
|
||||
for ( ; i <= n - 12; i += 12, src += scn * 4)
|
||||
{
|
||||
v_float32x4 v_r;
|
||||
v_float32x4 v_g;
|
||||
v_float32x4 v_b;
|
||||
v_load_deinterleave(src, v_r, v_g, v_b);
|
||||
process(v_b, v_g, v_r, hscale);
|
||||
v_store_interleave(dst + i, v_b, v_g, v_r);
|
||||
}
|
||||
}
|
||||
} else { // scn == 4
|
||||
if (bidx) {
|
||||
for ( ; i <= n - 12; i += 12, src += scn * 4)
|
||||
{
|
||||
v_float32x4 v_r;
|
||||
v_float32x4 v_g;
|
||||
v_float32x4 v_b;
|
||||
v_float32x4 v_a;
|
||||
v_load_deinterleave(src, v_r, v_g, v_b, v_a);
|
||||
process(v_r, v_g, v_b, hscale);
|
||||
v_store_interleave(dst + i, v_r, v_g, v_b);
|
||||
}
|
||||
} else {
|
||||
for ( ; i <= n - 12; i += 12, src += scn * 4)
|
||||
{
|
||||
v_float32x4 v_r;
|
||||
v_float32x4 v_g;
|
||||
v_float32x4 v_b;
|
||||
v_float32x4 v_a;
|
||||
v_load_deinterleave(src, v_r, v_g, v_b, v_a);
|
||||
process(v_b, v_g, v_r, hscale);
|
||||
v_store_interleave(dst + i, v_b, v_g, v_r);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for( ; i < n; i += 3, src += scn )
|
||||
{
|
||||
float b = src[bidx], g = src[1], r = src[bidx^2];
|
||||
float h, s, v;
|
||||
@ -125,6 +207,9 @@ struct RGB2HSV_f
|
||||
|
||||
int srccn, blueIdx;
|
||||
float hrange;
|
||||
#if CV_SIMD128
|
||||
bool hasSIMD;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@ -855,167 +940,111 @@ struct HLS2RGB_f
|
||||
|
||||
HLS2RGB_f(int _dstcn, int _blueIdx, float _hrange)
|
||||
: dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {
|
||||
#if CV_SSE2
|
||||
haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
|
||||
#if CV_SIMD128
|
||||
hasSIMD = hasSIMD128();
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CV_SSE2
|
||||
void process(__m128& v_h0, __m128& v_h1, __m128& v_l0,
|
||||
__m128& v_l1, __m128& v_s0, __m128& v_s1) const
|
||||
#if CV_SIMD128
|
||||
inline void process(v_float32x4& v_h, v_float32x4& v_l, v_float32x4& v_s) const
|
||||
{
|
||||
__m128 v_lel0 = _mm_cmple_ps(v_l0, _mm_set1_ps(0.5f));
|
||||
__m128 v_lel1 = _mm_cmple_ps(v_l1, _mm_set1_ps(0.5f));
|
||||
__m128 v_p20 = _mm_andnot_ps(v_lel0, _mm_sub_ps(_mm_add_ps(v_l0, v_s0), _mm_mul_ps(v_l0, v_s0)));
|
||||
__m128 v_p21 = _mm_andnot_ps(v_lel1, _mm_sub_ps(_mm_add_ps(v_l1, v_s1), _mm_mul_ps(v_l1, v_s1)));
|
||||
v_p20 = _mm_or_ps(v_p20, _mm_and_ps(v_lel0, _mm_mul_ps(v_l0, _mm_add_ps(_mm_set1_ps(1.0f), v_s0))));
|
||||
v_p21 = _mm_or_ps(v_p21, _mm_and_ps(v_lel1, _mm_mul_ps(v_l1, _mm_add_ps(_mm_set1_ps(1.0f), v_s1))));
|
||||
v_float32x4 v_one = v_setall_f32(1.0f);
|
||||
|
||||
__m128 v_p10 = _mm_sub_ps(_mm_mul_ps(_mm_set1_ps(2.0f), v_l0), v_p20);
|
||||
__m128 v_p11 = _mm_sub_ps(_mm_mul_ps(_mm_set1_ps(2.0f), v_l1), v_p21);
|
||||
v_float32x4 v_l_le_half = v_l <= v_setall_f32(0.5f);
|
||||
v_float32x4 v_ls = v_l * v_s;
|
||||
v_float32x4 v_elem0 = v_select(v_l_le_half, v_ls, v_s - v_ls);
|
||||
|
||||
v_h0 = _mm_mul_ps(v_h0, _mm_set1_ps(hscale));
|
||||
v_h1 = _mm_mul_ps(v_h1, _mm_set1_ps(hscale));
|
||||
v_float32x4 v_hs_raw = v_h * v_setall_f32(hscale);
|
||||
v_float32x4 v_pre_hs = v_cvt_f32(v_trunc(v_hs_raw));
|
||||
v_float32x4 v_hs = v_hs_raw - v_pre_hs;
|
||||
v_float32x4 v_sector = v_pre_hs - v_setall_f32(6.0f) * v_cvt_f32(v_trunc(v_hs_raw * v_setall_f32(1.0f / 6.0f)));
|
||||
v_float32x4 v_elem1 = v_hs + v_hs;
|
||||
|
||||
__m128 v_pre_sector0 = _mm_cvtepi32_ps(_mm_cvttps_epi32(v_h0));
|
||||
__m128 v_pre_sector1 = _mm_cvtepi32_ps(_mm_cvttps_epi32(v_h1));
|
||||
v_float32x4 v_tab0 = v_l + v_elem0;
|
||||
v_float32x4 v_tab1 = v_l - v_elem0;
|
||||
v_float32x4 v_tab2 = v_l + v_elem0 - v_elem0 * v_elem1;
|
||||
v_float32x4 v_tab3 = v_l - v_elem0 + v_elem0 * v_elem1;
|
||||
|
||||
v_h0 = _mm_sub_ps(v_h0, v_pre_sector0);
|
||||
v_h1 = _mm_sub_ps(v_h1, v_pre_sector1);
|
||||
v_float32x4 v_two = v_setall_f32(2.0f);
|
||||
v_float32x4 v_four = v_setall_f32(4.0f);
|
||||
|
||||
__m128 v_p2_p10 = _mm_sub_ps(v_p20, v_p10);
|
||||
__m128 v_p2_p11 = _mm_sub_ps(v_p21, v_p11);
|
||||
__m128 v_tab20 = _mm_add_ps(v_p10, _mm_mul_ps(v_p2_p10, _mm_sub_ps(_mm_set1_ps(1.0f), v_h0)));
|
||||
__m128 v_tab21 = _mm_add_ps(v_p11, _mm_mul_ps(v_p2_p11, _mm_sub_ps(_mm_set1_ps(1.0f), v_h1)));
|
||||
__m128 v_tab30 = _mm_add_ps(v_p10, _mm_mul_ps(v_p2_p10, v_h0));
|
||||
__m128 v_tab31 = _mm_add_ps(v_p11, _mm_mul_ps(v_p2_p11, v_h1));
|
||||
v_h = v_select(v_sector < v_two , v_tab1,
|
||||
v_select(v_sector <= v_two , v_tab3,
|
||||
v_select(v_sector <= v_four, v_tab0, v_tab2)));
|
||||
|
||||
__m128 v_sector0 = _mm_div_ps(v_pre_sector0, _mm_set1_ps(6.0f));
|
||||
__m128 v_sector1 = _mm_div_ps(v_pre_sector1, _mm_set1_ps(6.0f));
|
||||
v_sector0 = _mm_cvtepi32_ps(_mm_cvttps_epi32(v_sector0));
|
||||
v_sector1 = _mm_cvtepi32_ps(_mm_cvttps_epi32(v_sector1));
|
||||
v_sector0 = _mm_mul_ps(v_sector0, _mm_set1_ps(6.0f));
|
||||
v_sector1 = _mm_mul_ps(v_sector1, _mm_set1_ps(6.0f));
|
||||
v_sector0 = _mm_sub_ps(v_pre_sector0, v_sector0);
|
||||
v_sector1 = _mm_sub_ps(v_pre_sector1, v_sector1);
|
||||
v_l = v_select(v_sector < v_one , v_tab3,
|
||||
v_select(v_sector <= v_two , v_tab0,
|
||||
v_select(v_sector < v_four, v_tab2, v_tab1)));
|
||||
|
||||
v_h0 = _mm_and_ps(v_p10, _mm_cmplt_ps(v_sector0, _mm_set1_ps(2.0f)));
|
||||
v_h1 = _mm_and_ps(v_p11, _mm_cmplt_ps(v_sector1, _mm_set1_ps(2.0f)));
|
||||
v_h0 = _mm_or_ps(v_h0, _mm_and_ps(v_tab30, _mm_cmpeq_ps(v_sector0, _mm_set1_ps(2.0f))));
|
||||
v_h1 = _mm_or_ps(v_h1, _mm_and_ps(v_tab31, _mm_cmpeq_ps(v_sector1, _mm_set1_ps(2.0f))));
|
||||
v_h0 = _mm_or_ps(v_h0, _mm_and_ps(v_p20, _mm_cmpeq_ps(v_sector0, _mm_set1_ps(3.0f))));
|
||||
v_h1 = _mm_or_ps(v_h1, _mm_and_ps(v_p21, _mm_cmpeq_ps(v_sector1, _mm_set1_ps(3.0f))));
|
||||
v_h0 = _mm_or_ps(v_h0, _mm_and_ps(v_p20, _mm_cmpeq_ps(v_sector0, _mm_set1_ps(4.0f))));
|
||||
v_h1 = _mm_or_ps(v_h1, _mm_and_ps(v_p21, _mm_cmpeq_ps(v_sector1, _mm_set1_ps(4.0f))));
|
||||
v_h0 = _mm_or_ps(v_h0, _mm_and_ps(v_tab20, _mm_cmpgt_ps(v_sector0, _mm_set1_ps(4.0f))));
|
||||
v_h1 = _mm_or_ps(v_h1, _mm_and_ps(v_tab21, _mm_cmpgt_ps(v_sector1, _mm_set1_ps(4.0f))));
|
||||
v_l0 = _mm_and_ps(v_tab30, _mm_cmplt_ps(v_sector0, _mm_set1_ps(1.0f)));
|
||||
v_l1 = _mm_and_ps(v_tab31, _mm_cmplt_ps(v_sector1, _mm_set1_ps(1.0f)));
|
||||
v_l0 = _mm_or_ps(v_l0, _mm_and_ps(v_p20, _mm_cmpeq_ps(v_sector0, _mm_set1_ps(1.0f))));
|
||||
v_l1 = _mm_or_ps(v_l1, _mm_and_ps(v_p21, _mm_cmpeq_ps(v_sector1, _mm_set1_ps(1.0f))));
|
||||
v_l0 = _mm_or_ps(v_l0, _mm_and_ps(v_p20, _mm_cmpeq_ps(v_sector0, _mm_set1_ps(2.0f))));
|
||||
v_l1 = _mm_or_ps(v_l1, _mm_and_ps(v_p21, _mm_cmpeq_ps(v_sector1, _mm_set1_ps(2.0f))));
|
||||
v_l0 = _mm_or_ps(v_l0, _mm_and_ps(v_tab20, _mm_cmpeq_ps(v_sector0, _mm_set1_ps(3.0f))));
|
||||
v_l1 = _mm_or_ps(v_l1, _mm_and_ps(v_tab21, _mm_cmpeq_ps(v_sector1, _mm_set1_ps(3.0f))));
|
||||
v_l0 = _mm_or_ps(v_l0, _mm_and_ps(v_p10, _mm_cmpgt_ps(v_sector0, _mm_set1_ps(3.0f))));
|
||||
v_l1 = _mm_or_ps(v_l1, _mm_and_ps(v_p11, _mm_cmpgt_ps(v_sector1, _mm_set1_ps(3.0f))));
|
||||
v_s0 = _mm_and_ps(v_p20, _mm_cmplt_ps(v_sector0, _mm_set1_ps(1.0f)));
|
||||
v_s1 = _mm_and_ps(v_p21, _mm_cmplt_ps(v_sector1, _mm_set1_ps(1.0f)));
|
||||
v_s0 = _mm_or_ps(v_s0, _mm_and_ps(v_tab20, _mm_cmpeq_ps(v_sector0, _mm_set1_ps(1.0f))));
|
||||
v_s1 = _mm_or_ps(v_s1, _mm_and_ps(v_tab21, _mm_cmpeq_ps(v_sector1, _mm_set1_ps(1.0f))));
|
||||
v_s0 = _mm_or_ps(v_s0, _mm_and_ps(v_p10, _mm_cmpeq_ps(v_sector0, _mm_set1_ps(2.0f))));
|
||||
v_s1 = _mm_or_ps(v_s1, _mm_and_ps(v_p11, _mm_cmpeq_ps(v_sector1, _mm_set1_ps(2.0f))));
|
||||
v_s0 = _mm_or_ps(v_s0, _mm_and_ps(v_p10, _mm_cmpeq_ps(v_sector0, _mm_set1_ps(3.0f))));
|
||||
v_s1 = _mm_or_ps(v_s1, _mm_and_ps(v_p11, _mm_cmpeq_ps(v_sector1, _mm_set1_ps(3.0f))));
|
||||
v_s0 = _mm_or_ps(v_s0, _mm_and_ps(v_tab30, _mm_cmpeq_ps(v_sector0, _mm_set1_ps(4.0f))));
|
||||
v_s1 = _mm_or_ps(v_s1, _mm_and_ps(v_tab31, _mm_cmpeq_ps(v_sector1, _mm_set1_ps(4.0f))));
|
||||
v_s0 = _mm_or_ps(v_s0, _mm_and_ps(v_p20, _mm_cmpgt_ps(v_sector0, _mm_set1_ps(4.0f))));
|
||||
v_s1 = _mm_or_ps(v_s1, _mm_and_ps(v_p21, _mm_cmpgt_ps(v_sector1, _mm_set1_ps(4.0f))));
|
||||
v_s = v_select(v_sector < v_one , v_tab0,
|
||||
v_select(v_sector < v_two , v_tab2,
|
||||
v_select(v_sector < v_four, v_tab1,
|
||||
v_select(v_sector <= v_four, v_tab3, v_tab0))));
|
||||
}
|
||||
#endif
|
||||
|
||||
void operator()(const float* src, float* dst, int n) const
|
||||
{
|
||||
int i = 0, bidx = blueIdx, dcn = dstcn;
|
||||
float _hscale = hscale;
|
||||
float alpha = ColorChannel<float>::max();
|
||||
n *= 3;
|
||||
|
||||
#if CV_SSE2
|
||||
if (haveSIMD)
|
||||
#if CV_SIMD128
|
||||
if (hasSIMD)
|
||||
{
|
||||
for( ; i <= n - 24; i += 24, dst += dcn * 8 )
|
||||
if (dcn == 3)
|
||||
{
|
||||
__m128 v_h0 = _mm_loadu_ps(src + i + 0);
|
||||
__m128 v_h1 = _mm_loadu_ps(src + i + 4);
|
||||
__m128 v_l0 = _mm_loadu_ps(src + i + 8);
|
||||
__m128 v_l1 = _mm_loadu_ps(src + i + 12);
|
||||
__m128 v_s0 = _mm_loadu_ps(src + i + 16);
|
||||
__m128 v_s1 = _mm_loadu_ps(src + i + 20);
|
||||
|
||||
_mm_deinterleave_ps(v_h0, v_h1, v_l0, v_l1, v_s0, v_s1);
|
||||
|
||||
process(v_h0, v_h1, v_l0, v_l1, v_s0, v_s1);
|
||||
|
||||
if (dcn == 3)
|
||||
if (bidx)
|
||||
{
|
||||
if (bidx)
|
||||
for (; i <= n - 12; i += 12, dst += dcn * 4)
|
||||
{
|
||||
_mm_interleave_ps(v_s0, v_s1, v_l0, v_l1, v_h0, v_h1);
|
||||
|
||||
_mm_storeu_ps(dst + 0, v_s0);
|
||||
_mm_storeu_ps(dst + 4, v_s1);
|
||||
_mm_storeu_ps(dst + 8, v_l0);
|
||||
_mm_storeu_ps(dst + 12, v_l1);
|
||||
_mm_storeu_ps(dst + 16, v_h0);
|
||||
_mm_storeu_ps(dst + 20, v_h1);
|
||||
v_float32x4 v_h;
|
||||
v_float32x4 v_l;
|
||||
v_float32x4 v_s;
|
||||
v_load_deinterleave(src + i, v_h, v_l, v_s);
|
||||
process(v_h, v_l, v_s);
|
||||
v_store_interleave(dst, v_s, v_l, v_h);
|
||||
}
|
||||
else
|
||||
} else {
|
||||
for (; i <= n - 12; i += 12, dst += dcn * 4)
|
||||
{
|
||||
_mm_interleave_ps(v_h0, v_h1, v_l0, v_l1, v_s0, v_s1);
|
||||
|
||||
_mm_storeu_ps(dst + 0, v_h0);
|
||||
_mm_storeu_ps(dst + 4, v_h1);
|
||||
_mm_storeu_ps(dst + 8, v_l0);
|
||||
_mm_storeu_ps(dst + 12, v_l1);
|
||||
_mm_storeu_ps(dst + 16, v_s0);
|
||||
_mm_storeu_ps(dst + 20, v_s1);
|
||||
v_float32x4 v_h;
|
||||
v_float32x4 v_l;
|
||||
v_float32x4 v_s;
|
||||
v_load_deinterleave(src + i, v_h, v_l, v_s);
|
||||
process(v_h, v_l, v_s);
|
||||
v_store_interleave(dst, v_h, v_l, v_s);
|
||||
}
|
||||
}
|
||||
else
|
||||
} else { // dcn == 4
|
||||
if (bidx)
|
||||
{
|
||||
__m128 v_a0 = _mm_set1_ps(alpha);
|
||||
__m128 v_a1 = _mm_set1_ps(alpha);
|
||||
if (bidx)
|
||||
for (; i <= n - 12; i += 12, dst += dcn * 4)
|
||||
{
|
||||
_mm_interleave_ps(v_s0, v_s1, v_l0, v_l1, v_h0, v_h1, v_a0, v_a1);
|
||||
|
||||
_mm_storeu_ps(dst + 0, v_s0);
|
||||
_mm_storeu_ps(dst + 4, v_s1);
|
||||
_mm_storeu_ps(dst + 8, v_l0);
|
||||
_mm_storeu_ps(dst + 12, v_l1);
|
||||
_mm_storeu_ps(dst + 16, v_h0);
|
||||
_mm_storeu_ps(dst + 20, v_h1);
|
||||
_mm_storeu_ps(dst + 24, v_a0);
|
||||
_mm_storeu_ps(dst + 28, v_a1);
|
||||
v_float32x4 v_h;
|
||||
v_float32x4 v_l;
|
||||
v_float32x4 v_s;
|
||||
v_load_deinterleave(src + i, v_h, v_l, v_s);
|
||||
process(v_h, v_l, v_s);
|
||||
v_float32x4 v_a = v_setall_f32(alpha);
|
||||
v_store_interleave(dst, v_s, v_l, v_h, v_a);
|
||||
}
|
||||
else
|
||||
} else {
|
||||
for (; i <= n - 12; i += 12, dst += dcn * 4)
|
||||
{
|
||||
_mm_interleave_ps(v_h0, v_h1, v_l0, v_l1, v_s0, v_s1, v_a0, v_a1);
|
||||
|
||||
_mm_storeu_ps(dst + 0, v_h0);
|
||||
_mm_storeu_ps(dst + 4, v_h1);
|
||||
_mm_storeu_ps(dst + 8, v_l0);
|
||||
_mm_storeu_ps(dst + 12, v_l1);
|
||||
_mm_storeu_ps(dst + 16, v_s0);
|
||||
_mm_storeu_ps(dst + 20, v_s1);
|
||||
_mm_storeu_ps(dst + 24, v_a0);
|
||||
_mm_storeu_ps(dst + 28, v_a1);
|
||||
v_float32x4 v_h;
|
||||
v_float32x4 v_l;
|
||||
v_float32x4 v_s;
|
||||
v_load_deinterleave(src + i, v_h, v_l, v_s);
|
||||
process(v_h, v_l, v_s);
|
||||
v_float32x4 v_a = v_setall_f32(alpha);
|
||||
v_store_interleave(dst, v_h, v_l, v_s, v_a);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for( ; i < n; i += 3, dst += dcn )
|
||||
{
|
||||
float h = src[i], l = src[i+1], s = src[i+2];
|
||||
@ -1033,7 +1062,7 @@ struct HLS2RGB_f
|
||||
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
|
||||
float p1 = 2*l - p2;
|
||||
|
||||
h *= _hscale;
|
||||
h *= hscale;
|
||||
if( h < 0 )
|
||||
do h += 6; while( h < 0 );
|
||||
else if( h >= 6 )
|
||||
@ -1063,8 +1092,8 @@ struct HLS2RGB_f
|
||||
|
||||
int dstcn, blueIdx;
|
||||
float hscale;
|
||||
#if CV_SSE2
|
||||
bool haveSIMD;
|
||||
#if CV_SIMD128
|
||||
bool hasSIMD;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@ -213,7 +213,7 @@ TEST(Photo_MergeRobertson, regression)
|
||||
loadImage(test_path + "merge/robertson.hdr", expected);
|
||||
merge->process(images, result, times);
|
||||
|
||||
#ifdef __aarch64__
|
||||
#if defined(__aarch64__) || defined(__PPC64__)
|
||||
const float eps = 6.f;
|
||||
#else
|
||||
const float eps = 5.f;
|
||||
|
||||
@ -48,6 +48,7 @@
|
||||
#endif
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/core/cuda.hpp"
|
||||
|
||||
namespace cv {
|
||||
namespace detail {
|
||||
|
||||
@ -870,7 +870,12 @@ bool CvCapture_FFMPEG::open( const char* _filename )
|
||||
int enc_width = enc->width;
|
||||
int enc_height = enc->height;
|
||||
|
||||
AVCodec *codec = avcodec_find_decoder(enc->codec_id);
|
||||
AVCodec *codec;
|
||||
if(av_dict_get(dict, "video_codec", NULL, 0) == NULL) {
|
||||
codec = avcodec_find_decoder(enc->codec_id);
|
||||
} else {
|
||||
codec = avcodec_find_decoder_by_name(av_dict_get(dict, "video_codec", NULL, 0)->value);
|
||||
}
|
||||
if (!codec ||
|
||||
#if LIBAVCODEC_VERSION_INT >= ((53<<16)+(8<<8)+0)
|
||||
avcodec_open2(enc, codec, NULL)
|
||||
|
||||
@ -634,11 +634,11 @@ static int icvOpenCamera_QT (CvCapture_QT_Cam * capture, const int index)
|
||||
OPENCV_ASSERT (capture, "icvOpenCamera_QT", "'capture' is a NULL-pointer");
|
||||
OPENCV_ASSERT (index >=0, "icvOpenCamera_QT", "camera index is negative");
|
||||
|
||||
ComponentDescription component_description;
|
||||
Component component = 0;
|
||||
ComponentDescription component_description;
|
||||
Component component = 0;
|
||||
int number_of_inputs = 0;
|
||||
Rect myRect;
|
||||
ComponentResult result = noErr;
|
||||
ComponentResult result = noErr;
|
||||
|
||||
|
||||
// travers all components and count video digitizer channels
|
||||
|
||||
@ -16,18 +16,18 @@ public:
|
||||
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<std::vector<int> > &outputs,
|
||||
std::vector<std::vector<int> > &internals) const;
|
||||
std::vector<std::vector<int> > &internals) const CV_OVERRIDE;
|
||||
//! [MyLayer::getMemoryShapes]
|
||||
|
||||
//! [MyLayer::forward]
|
||||
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals);
|
||||
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals) CV_OVERRIDE;
|
||||
//! [MyLayer::forward]
|
||||
|
||||
//! [MyLayer::finalize]
|
||||
virtual void finalize(const std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs);
|
||||
virtual void finalize(const std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs) CV_OVERRIDE;
|
||||
//! [MyLayer::finalize]
|
||||
|
||||
virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals);
|
||||
virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals) CV_OVERRIDE;
|
||||
};
|
||||
//! [A custom layer interface]
|
||||
|
||||
@ -49,7 +49,7 @@ public:
|
||||
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<std::vector<int> > &outputs,
|
||||
std::vector<std::vector<int> > &internals) const
|
||||
std::vector<std::vector<int> > &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(requiredOutputs); CV_UNUSED(internals);
|
||||
std::vector<int> outShape(4);
|
||||
@ -62,7 +62,7 @@ public:
|
||||
}
|
||||
|
||||
// Implementation of this custom layer is based on https://github.com/cdmh/deeplab-public/blob/master/src/caffe/layers/interp_layer.cpp
|
||||
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals)
|
||||
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals) CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(internals);
|
||||
cv::Mat& inp = *inputs[0];
|
||||
@ -105,7 +105,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {}
|
||||
virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) CV_OVERRIDE {}
|
||||
|
||||
private:
|
||||
int outWidth, outHeight;
|
||||
@ -132,7 +132,7 @@ public:
|
||||
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<std::vector<int> > &outputs,
|
||||
std::vector<std::vector<int> > &internals) const
|
||||
std::vector<std::vector<int> > &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(requiredOutputs); CV_UNUSED(internals);
|
||||
std::vector<int> outShape(4);
|
||||
@ -146,7 +146,7 @@ public:
|
||||
|
||||
// This implementation is based on a reference implementation from
|
||||
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
|
||||
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals)
|
||||
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals) CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(internals);
|
||||
cv::Mat& inp = *inputs[0];
|
||||
@ -187,7 +187,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {}
|
||||
virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) CV_OVERRIDE {}
|
||||
|
||||
private:
|
||||
static inline int offset(const cv::MatSize& size, int c, int x, int y, int b)
|
||||
|
||||
@ -7,12 +7,13 @@
|
||||
|
||||
const char* keys =
|
||||
"{ help h | | Print help message. }"
|
||||
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
|
||||
"{ device | 0 | camera device number. }"
|
||||
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera. }"
|
||||
"{ model m | | Path to a binary file of model contains trained weights. "
|
||||
"It could be a file with extensions .caffemodel (Caffe), "
|
||||
".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }"
|
||||
".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet).}"
|
||||
"{ config c | | Path to a text file of model contains network configuration. "
|
||||
"It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }"
|
||||
"It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet).}"
|
||||
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
|
||||
"{ classes | | Optional path to a text file with names of classes to label detected objects. }"
|
||||
"{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }"
|
||||
@ -91,7 +92,7 @@ int main(int argc, char** argv)
|
||||
if (parser.has("input"))
|
||||
cap.open(parser.get<String>("input"));
|
||||
else
|
||||
cap.open(0);
|
||||
cap.open(parser.get<int>("device"));
|
||||
|
||||
// Process frames.
|
||||
Mat frame, blob;
|
||||
|
||||
@ -61,12 +61,16 @@ int main(int argc, char **argv)
|
||||
"{ p proto | | (required) model configuration, e.g. hand/pose.prototxt }"
|
||||
"{ m model | | (required) model weights, e.g. hand/pose_iter_102000.caffemodel }"
|
||||
"{ i image | | (required) path to image file (containing a single person, or hand) }"
|
||||
"{ width | 368 | Preprocess input image by resizing to a specific width. }"
|
||||
"{ height | 368 | Preprocess input image by resizing to a specific height. }"
|
||||
"{ t threshold | 0.1 | threshold or confidence value for the heatmap }"
|
||||
);
|
||||
|
||||
String modelTxt = parser.get<string>("proto");
|
||||
String modelBin = parser.get<string>("model");
|
||||
String imageFile = parser.get<String>("image");
|
||||
int W_in = parser.get<int>("width");
|
||||
int H_in = parser.get<int>("height");
|
||||
float thresh = parser.get<float>("threshold");
|
||||
if (parser.get<bool>("help") || modelTxt.empty() || modelBin.empty() || imageFile.empty())
|
||||
{
|
||||
@ -75,10 +79,6 @@ int main(int argc, char **argv)
|
||||
return 0;
|
||||
}
|
||||
|
||||
// fixed input size for the pretrained network
|
||||
int W_in = 368;
|
||||
int H_in = 368;
|
||||
|
||||
// read the network model
|
||||
Net net = readNetFromCaffe(modelTxt, modelBin);
|
||||
|
||||
|
||||
@ -7,12 +7,13 @@
|
||||
|
||||
const char* keys =
|
||||
"{ help h | | Print help message. }"
|
||||
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
|
||||
"{ device | 0 | camera device number. }"
|
||||
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera. }"
|
||||
"{ model m | | Path to a binary file of model contains trained weights. "
|
||||
"It could be a file with extensions .caffemodel (Caffe), "
|
||||
".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }"
|
||||
".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet). }"
|
||||
"{ config c | | Path to a text file of model contains network configuration. "
|
||||
"It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }"
|
||||
"It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet). }"
|
||||
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
|
||||
"{ classes | | Optional path to a text file with names of classes. }"
|
||||
"{ colors | | Optional path to a text file with colors for an every class. "
|
||||
@ -111,7 +112,7 @@ int main(int argc, char** argv)
|
||||
if (parser.has("input"))
|
||||
cap.open(parser.get<String>("input"));
|
||||
else
|
||||
cap.open(0);
|
||||
cap.open(parser.get<int>("device"));
|
||||
//! [Open a video file or an image file or a camera stream]
|
||||
|
||||
// Process frames.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user