Merge pull request #18001 from Yosshi999:sift-8bit-descr
* 8-bit SIFT descriptors * use clearer parameter * update docs * propagate type info * overload function for avoiding ABI-break * bugfix: some values are undefined when CV_SIMD is absent
This commit is contained in:
@@ -88,7 +88,7 @@ class SIFT_Impl : public SIFT
|
||||
public:
|
||||
explicit SIFT_Impl( int nfeatures = 0, int nOctaveLayers = 3,
|
||||
double contrastThreshold = 0.04, double edgeThreshold = 10,
|
||||
double sigma = 1.6);
|
||||
double sigma = 1.6, int descriptorType = CV_32F );
|
||||
|
||||
//! returns the descriptor size in floats (128)
|
||||
int descriptorSize() const CV_OVERRIDE;
|
||||
@@ -117,13 +117,25 @@ protected:
|
||||
CV_PROP_RW double contrastThreshold;
|
||||
CV_PROP_RW double edgeThreshold;
|
||||
CV_PROP_RW double sigma;
|
||||
CV_PROP_RW int descriptor_type;
|
||||
};
|
||||
|
||||
Ptr<SIFT> SIFT::create( int _nfeatures, int _nOctaveLayers,
|
||||
double _contrastThreshold, double _edgeThreshold, double _sigma )
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
return makePtr<SIFT_Impl>(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma);
|
||||
|
||||
return makePtr<SIFT_Impl>(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma, CV_32F);
|
||||
}
|
||||
|
||||
Ptr<SIFT> SIFT::create( int _nfeatures, int _nOctaveLayers,
|
||||
double _contrastThreshold, double _edgeThreshold, double _sigma, int _descriptorType )
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
// SIFT descriptor supports 32bit floating point and 8bit unsigned int.
|
||||
CV_Assert(_descriptorType == CV_32F || _descriptorType == CV_8U);
|
||||
return makePtr<SIFT_Impl>(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma, _descriptorType);
|
||||
}
|
||||
|
||||
String SIFT::getDefaultName() const
|
||||
@@ -362,12 +374,12 @@ void SIFT_Impl::findScaleSpaceExtrema( const std::vector<Mat>& gauss_pyr, const
|
||||
static
|
||||
void calcSIFTDescriptor(
|
||||
const Mat& img, Point2f ptf, float ori, float scl,
|
||||
int d, int n, float* dst
|
||||
int d, int n, Mat& dst, int row
|
||||
)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
CV_CPU_DISPATCH(calcSIFTDescriptor, (img, ptf, ori, scl, d, n, dst),
|
||||
CV_CPU_DISPATCH(calcSIFTDescriptor, (img, ptf, ori, scl, d, n, dst, row),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
@@ -408,7 +420,7 @@ public:
|
||||
float angle = 360.f - kpt.angle;
|
||||
if(std::abs(angle - 360.f) < FLT_EPSILON)
|
||||
angle = 0.f;
|
||||
calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors.ptr<float>((int)i));
|
||||
calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors, i);
|
||||
}
|
||||
}
|
||||
private:
|
||||
@@ -429,9 +441,9 @@ static void calcDescriptors(const std::vector<Mat>& gpyr, const std::vector<KeyP
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
SIFT_Impl::SIFT_Impl( int _nfeatures, int _nOctaveLayers,
|
||||
double _contrastThreshold, double _edgeThreshold, double _sigma )
|
||||
double _contrastThreshold, double _edgeThreshold, double _sigma, int _descriptorType )
|
||||
: nfeatures(_nfeatures), nOctaveLayers(_nOctaveLayers),
|
||||
contrastThreshold(_contrastThreshold), edgeThreshold(_edgeThreshold), sigma(_sigma)
|
||||
contrastThreshold(_contrastThreshold), edgeThreshold(_edgeThreshold), sigma(_sigma), descriptor_type(_descriptorType)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -442,7 +454,7 @@ int SIFT_Impl::descriptorSize() const
|
||||
|
||||
int SIFT_Impl::descriptorType() const
|
||||
{
|
||||
return CV_32F;
|
||||
return descriptor_type;
|
||||
}
|
||||
|
||||
int SIFT_Impl::defaultNorm() const
|
||||
@@ -533,9 +545,9 @@ void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask,
|
||||
{
|
||||
//t = (double)getTickCount();
|
||||
int dsize = descriptorSize();
|
||||
_descriptors.create((int)keypoints.size(), dsize, CV_32F);
|
||||
Mat descriptors = _descriptors.getMat();
|
||||
_descriptors.create((int)keypoints.size(), dsize, descriptor_type);
|
||||
|
||||
Mat descriptors = _descriptors.getMat();
|
||||
calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave);
|
||||
//t = (double)getTickCount() - t;
|
||||
//printf("descriptor extraction time: %g\n", t*1000./tf);
|
||||
|
||||
@@ -150,7 +150,7 @@ void findScaleSpaceExtrema(
|
||||
|
||||
void calcSIFTDescriptor(
|
||||
const Mat& img, Point2f ptf, float ori, float scl,
|
||||
int d, int n, float* dst
|
||||
int d, int n, Mat& dst, int row
|
||||
);
|
||||
|
||||
|
||||
@@ -555,7 +555,7 @@ void findScaleSpaceExtrema(
|
||||
|
||||
void calcSIFTDescriptor(
|
||||
const Mat& img, Point2f ptf, float ori, float scl,
|
||||
int d, int n, float* dst
|
||||
int d, int n, Mat& dstMat, int row
|
||||
)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
@@ -575,9 +575,18 @@ void calcSIFTDescriptor(
|
||||
int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2);
|
||||
int rows = img.rows, cols = img.cols;
|
||||
|
||||
AutoBuffer<float> buf(len*6 + histlen);
|
||||
float *X = buf.data(), *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len;
|
||||
float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len;
|
||||
cv::utils::BufferArea area;
|
||||
float *X = 0, *Y = 0, *Mag, *Ori = 0, *W = 0, *RBin = 0, *CBin = 0, *hist = 0, *rawDst = 0;
|
||||
area.allocate(X, len, CV_SIMD_WIDTH);
|
||||
area.allocate(Y, len, CV_SIMD_WIDTH);
|
||||
area.allocate(Ori, len, CV_SIMD_WIDTH);
|
||||
area.allocate(W, len, CV_SIMD_WIDTH);
|
||||
area.allocate(RBin, len, CV_SIMD_WIDTH);
|
||||
area.allocate(CBin, len, CV_SIMD_WIDTH);
|
||||
area.allocate(hist, histlen, CV_SIMD_WIDTH);
|
||||
area.allocate(rawDst, len, CV_SIMD_WIDTH);
|
||||
area.commit();
|
||||
Mag = Y;
|
||||
|
||||
for( i = 0; i < d+2; i++ )
|
||||
{
|
||||
@@ -628,10 +637,10 @@ void calcSIFTDescriptor(
|
||||
const v_int32 __n_plus_2 = vx_setall_s32(n+2);
|
||||
for( ; k <= len - vecsize; k += vecsize )
|
||||
{
|
||||
v_float32 rbin = vx_load(RBin + k);
|
||||
v_float32 cbin = vx_load(CBin + k);
|
||||
v_float32 obin = (vx_load(Ori + k) - __ori) * __bins_per_rad;
|
||||
v_float32 mag = vx_load(Mag + k) * vx_load(W + k);
|
||||
v_float32 rbin = vx_load_aligned(RBin + k);
|
||||
v_float32 cbin = vx_load_aligned(CBin + k);
|
||||
v_float32 obin = (vx_load_aligned(Ori + k) - __ori) * __bins_per_rad;
|
||||
v_float32 mag = vx_load_aligned(Mag + k) * vx_load_aligned(W + k);
|
||||
|
||||
v_int32 r0 = v_floor(rbin);
|
||||
v_int32 c0 = v_floor(cbin);
|
||||
@@ -723,7 +732,7 @@ void calcSIFTDescriptor(
|
||||
hist[idx] += hist[idx+n];
|
||||
hist[idx+1] += hist[idx+n+1];
|
||||
for( k = 0; k < n; k++ )
|
||||
dst[(i*d + j)*n + k] = hist[idx+k];
|
||||
rawDst[(i*d + j)*n + k] = hist[idx+k];
|
||||
}
|
||||
// copy histogram to the descriptor,
|
||||
// apply hysteresis thresholding
|
||||
@@ -735,17 +744,17 @@ void calcSIFTDescriptor(
|
||||
#if CV_SIMD
|
||||
{
|
||||
v_float32 __nrm2 = vx_setzero_f32();
|
||||
v_float32 __dst;
|
||||
v_float32 __rawDst;
|
||||
for( ; k <= len - v_float32::nlanes; k += v_float32::nlanes )
|
||||
{
|
||||
__dst = vx_load(dst + k);
|
||||
__nrm2 = v_fma(__dst, __dst, __nrm2);
|
||||
__rawDst = vx_load_aligned(rawDst + k);
|
||||
__nrm2 = v_fma(__rawDst, __rawDst, __nrm2);
|
||||
}
|
||||
nrm2 = (float)v_reduce_sum(__nrm2);
|
||||
}
|
||||
#endif
|
||||
for( ; k < len; k++ )
|
||||
nrm2 += dst[k]*dst[k];
|
||||
nrm2 += rawDst[k]*rawDst[k];
|
||||
|
||||
float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR;
|
||||
|
||||
@@ -760,9 +769,9 @@ void calcSIFTDescriptor(
|
||||
__m256 __thr = _mm256_set1_ps(thr);
|
||||
for( ; i <= len - 8; i += 8 )
|
||||
{
|
||||
__dst = _mm256_loadu_ps(&dst[i]);
|
||||
__dst = _mm256_loadu_ps(&rawDst[i]);
|
||||
__dst = _mm256_min_ps(__dst, __thr);
|
||||
_mm256_storeu_ps(&dst[i], __dst);
|
||||
_mm256_storeu_ps(&rawDst[i], __dst);
|
||||
#if CV_FMA3
|
||||
__nrm2 = _mm256_fmadd_ps(__dst, __dst, __nrm2);
|
||||
#else
|
||||
@@ -776,44 +785,78 @@ void calcSIFTDescriptor(
|
||||
#endif
|
||||
for( ; i < len; i++ )
|
||||
{
|
||||
float val = std::min(dst[i], thr);
|
||||
dst[i] = val;
|
||||
float val = std::min(rawDst[i], thr);
|
||||
rawDst[i] = val;
|
||||
nrm2 += val*val;
|
||||
}
|
||||
nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON);
|
||||
|
||||
#if 1
|
||||
k = 0;
|
||||
if( dstMat.type() == CV_32F )
|
||||
{
|
||||
float* dst = dstMat.ptr<float>(row);
|
||||
#if CV_SIMD
|
||||
v_float32 __dst;
|
||||
v_float32 __min = vx_setzero_f32();
|
||||
v_float32 __max = vx_setall_f32(255.0f); // max of uchar
|
||||
v_float32 __nrm2 = vx_setall_f32(nrm2);
|
||||
for( k = 0; k <= len - v_float32::nlanes; k += v_float32::nlanes )
|
||||
{
|
||||
v_float32 __dst;
|
||||
v_float32 __min = vx_setzero_f32();
|
||||
v_float32 __max = vx_setall_f32(255.0f); // max of uchar
|
||||
v_float32 __nrm2 = vx_setall_f32(nrm2);
|
||||
for( k = 0; k <= len - v_float32::nlanes; k += v_float32::nlanes )
|
||||
{
|
||||
__dst = vx_load(dst + k);
|
||||
__dst = v_min(v_max(v_cvt_f32(v_round(__dst * __nrm2)), __min), __max);
|
||||
v_store(dst + k, __dst);
|
||||
}
|
||||
__dst = vx_load_aligned(rawDst + k);
|
||||
__dst = v_min(v_max(v_cvt_f32(v_round(__dst * __nrm2)), __min), __max);
|
||||
v_store(dst + k, __dst);
|
||||
}
|
||||
#endif
|
||||
for( ; k < len; k++ )
|
||||
{
|
||||
dst[k] = saturate_cast<uchar>(dst[k]*nrm2);
|
||||
dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
|
||||
}
|
||||
}
|
||||
else // CV_8U
|
||||
{
|
||||
uint8_t* dst = dstMat.ptr<uint8_t>(row);
|
||||
#if CV_SIMD
|
||||
v_float32 __dst0, __dst1;
|
||||
v_uint16 __pack01;
|
||||
v_float32 __nrm2 = vx_setall_f32(nrm2);
|
||||
for( k = 0; k <= len - v_float32::nlanes * 2; k += v_float32::nlanes * 2 )
|
||||
{
|
||||
__dst0 = vx_load_aligned(rawDst + k);
|
||||
__dst1 = vx_load_aligned(rawDst + k + v_float32::nlanes);
|
||||
|
||||
__pack01 = v_pack_u(v_round(__dst0 * __nrm2), v_round(__dst1 * __nrm2));
|
||||
v_pack_store(dst + k, __pack01);
|
||||
}
|
||||
#endif
|
||||
for( ; k < len; k++ )
|
||||
{
|
||||
dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
|
||||
}
|
||||
}
|
||||
#else
|
||||
float* dst = dstMat.ptr<float>(row);
|
||||
float nrm1 = 0;
|
||||
for( k = 0; k < len; k++ )
|
||||
{
|
||||
dst[k] *= nrm2;
|
||||
nrm1 += dst[k];
|
||||
rawDst[k] *= nrm2;
|
||||
nrm1 += rawDst[k];
|
||||
}
|
||||
nrm1 = 1.f/std::max(nrm1, FLT_EPSILON);
|
||||
if( dstMat.type() == CV_32F )
|
||||
{
|
||||
for( k = 0; k < len; k++ )
|
||||
{
|
||||
dst[k] = std::sqrt(dst[k] * nrm1);//saturate_cast<uchar>(std::sqrt(dst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
|
||||
dst[k] = std::sqrt(rawDst[k] * nrm1);
|
||||
}
|
||||
}
|
||||
else // CV_8U
|
||||
{
|
||||
for( k = 0; k < len; k++ )
|
||||
{
|
||||
dst[k] = saturate_cast<uchar>(std::sqrt(rawDst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user