Merge pull request #18001 from Yosshi999:sift-8bit-descr

* 8-bit SIFT descriptors * use clearer parameter * update docs * propagate type info * overload function for avoiding ABI-break * bugfix: some values are undefined when CV_SIMD is absent
2020-08-17 19:28:44 +09:00
parent b34234ac14
commit 1834eed809
4 changed files with 158 additions and 42 deletions
@@ -88,7 +88,7 @@ class SIFT_Impl : public SIFT
 public:
    explicit SIFT_Impl( int nfeatures = 0, int nOctaveLayers = 3,
                          double contrastThreshold = 0.04, double edgeThreshold = 10,
-                          double sigma = 1.6);
+                          double sigma = 1.6, int descriptorType = CV_32F );

    //! returns the descriptor size in floats (128)
    int descriptorSize() const CV_OVERRIDE;
@@ -117,13 +117,25 @@ protected:
    CV_PROP_RW double contrastThreshold;
    CV_PROP_RW double edgeThreshold;
    CV_PROP_RW double sigma;
+    CV_PROP_RW int descriptor_type;
 };

 Ptr<SIFT> SIFT::create( int _nfeatures, int _nOctaveLayers,
                     double _contrastThreshold, double _edgeThreshold, double _sigma )
 {
    CV_TRACE_FUNCTION();
-    return makePtr<SIFT_Impl>(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma);
+
+    return makePtr<SIFT_Impl>(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma, CV_32F);
+}
+
+Ptr<SIFT> SIFT::create( int _nfeatures, int _nOctaveLayers,
+                     double _contrastThreshold, double _edgeThreshold, double _sigma, int _descriptorType )
+{
+    CV_TRACE_FUNCTION();
+
+    // SIFT descriptor supports 32bit floating point and 8bit unsigned int.
+    CV_Assert(_descriptorType == CV_32F || _descriptorType == CV_8U);
+    return makePtr<SIFT_Impl>(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma, _descriptorType);
 }

 String SIFT::getDefaultName() const
@@ -362,12 +374,12 @@ void SIFT_Impl::findScaleSpaceExtrema( const std::vector<Mat>& gauss_pyr, const
 static
 void calcSIFTDescriptor(
        const Mat& img, Point2f ptf, float ori, float scl,
-        int d, int n, float* dst
+        int d, int n, Mat& dst, int row
 )
 {
    CV_TRACE_FUNCTION();

-    CV_CPU_DISPATCH(calcSIFTDescriptor, (img, ptf, ori, scl, d, n, dst),
+    CV_CPU_DISPATCH(calcSIFTDescriptor, (img, ptf, ori, scl, d, n, dst, row),
        CV_CPU_DISPATCH_MODES_ALL);
 }

@@ -408,7 +420,7 @@ public:
            float angle = 360.f - kpt.angle;
            if(std::abs(angle - 360.f) < FLT_EPSILON)
                angle = 0.f;
-            calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors.ptr<float>((int)i));
+            calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors, i);
        }
    }
 private:
@@ -429,9 +441,9 @@ static void calcDescriptors(const std::vector<Mat>& gpyr, const std::vector<KeyP
 //////////////////////////////////////////////////////////////////////////////////////////

 SIFT_Impl::SIFT_Impl( int _nfeatures, int _nOctaveLayers,
-           double _contrastThreshold, double _edgeThreshold, double _sigma )
+           double _contrastThreshold, double _edgeThreshold, double _sigma, int _descriptorType )
    : nfeatures(_nfeatures), nOctaveLayers(_nOctaveLayers),
-    contrastThreshold(_contrastThreshold), edgeThreshold(_edgeThreshold), sigma(_sigma)
+    contrastThreshold(_contrastThreshold), edgeThreshold(_edgeThreshold), sigma(_sigma), descriptor_type(_descriptorType)
 {
 }

@@ -442,7 +454,7 @@ int SIFT_Impl::descriptorSize() const

 int SIFT_Impl::descriptorType() const
 {
-    return CV_32F;
+    return descriptor_type;
 }

 int SIFT_Impl::defaultNorm() const
@@ -533,9 +545,9 @@ void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask,
    {
        //t = (double)getTickCount();
        int dsize = descriptorSize();
-        _descriptors.create((int)keypoints.size(), dsize, CV_32F);
-        Mat descriptors = _descriptors.getMat();
+        _descriptors.create((int)keypoints.size(), dsize, descriptor_type);

+        Mat descriptors = _descriptors.getMat();
        calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave);
        //t = (double)getTickCount() - t;
        //printf("descriptor extraction time: %g\n", t*1000./tf);
@@ -150,7 +150,7 @@ void findScaleSpaceExtrema(

 void calcSIFTDescriptor(
        const Mat& img, Point2f ptf, float ori, float scl,
-        int d, int n, float* dst
+        int d, int n, Mat& dst, int row
 );


@@ -555,7 +555,7 @@ void findScaleSpaceExtrema(

 void calcSIFTDescriptor(
        const Mat& img, Point2f ptf, float ori, float scl,
-        int d, int n, float* dst
+        int d, int n, Mat& dstMat, int row
 )
 {
    CV_TRACE_FUNCTION();
@@ -575,9 +575,18 @@ void calcSIFTDescriptor(
    int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2);
    int rows = img.rows, cols = img.cols;

-    AutoBuffer<float> buf(len*6 + histlen);
-    float *X = buf.data(), *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len;
-    float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len;
+    cv::utils::BufferArea area;
+    float *X = 0, *Y = 0, *Mag, *Ori = 0, *W = 0, *RBin = 0, *CBin = 0, *hist = 0, *rawDst = 0;
+    area.allocate(X, len, CV_SIMD_WIDTH);
+    area.allocate(Y, len, CV_SIMD_WIDTH);
+    area.allocate(Ori, len, CV_SIMD_WIDTH);
+    area.allocate(W, len, CV_SIMD_WIDTH);
+    area.allocate(RBin, len, CV_SIMD_WIDTH);
+    area.allocate(CBin, len, CV_SIMD_WIDTH);
+    area.allocate(hist, histlen, CV_SIMD_WIDTH);
+    area.allocate(rawDst, len, CV_SIMD_WIDTH);
+    area.commit();
+    Mag = Y;

    for( i = 0; i < d+2; i++ )
    {
@@ -628,10 +637,10 @@ void calcSIFTDescriptor(
        const v_int32 __n_plus_2 = vx_setall_s32(n+2);
        for( ; k <= len - vecsize; k += vecsize )
        {
-            v_float32 rbin = vx_load(RBin + k);
-            v_float32 cbin = vx_load(CBin + k);
-            v_float32 obin = (vx_load(Ori + k) - __ori) * __bins_per_rad;
-            v_float32 mag = vx_load(Mag + k) * vx_load(W + k);
+            v_float32 rbin = vx_load_aligned(RBin + k);
+            v_float32 cbin = vx_load_aligned(CBin + k);
+            v_float32 obin = (vx_load_aligned(Ori + k) - __ori) * __bins_per_rad;
+            v_float32 mag = vx_load_aligned(Mag + k) * vx_load_aligned(W + k);

            v_int32 r0 = v_floor(rbin);
            v_int32 c0 = v_floor(cbin);
@@ -723,7 +732,7 @@ void calcSIFTDescriptor(
            hist[idx] += hist[idx+n];
            hist[idx+1] += hist[idx+n+1];
            for( k = 0; k < n; k++ )
-                dst[(i*d + j)*n + k] = hist[idx+k];
+                rawDst[(i*d + j)*n + k] = hist[idx+k];
        }
    // copy histogram to the descriptor,
    // apply hysteresis thresholding
@@ -735,17 +744,17 @@ void calcSIFTDescriptor(
 #if CV_SIMD
    {
        v_float32 __nrm2 = vx_setzero_f32();
-        v_float32 __dst;
+        v_float32 __rawDst;
        for( ; k <= len - v_float32::nlanes; k += v_float32::nlanes )
        {
-            __dst = vx_load(dst + k);
-            __nrm2 = v_fma(__dst, __dst, __nrm2);
+            __rawDst = vx_load_aligned(rawDst + k);
+            __nrm2 = v_fma(__rawDst, __rawDst, __nrm2);
        }
        nrm2 = (float)v_reduce_sum(__nrm2);
    }
 #endif
    for( ; k < len; k++ )
-        nrm2 += dst[k]*dst[k];
+        nrm2 += rawDst[k]*rawDst[k];

    float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR;

@@ -760,9 +769,9 @@ void calcSIFTDescriptor(
        __m256 __thr = _mm256_set1_ps(thr);
        for( ; i <= len - 8; i += 8 )
        {
-            __dst = _mm256_loadu_ps(&dst[i]);
+            __dst = _mm256_loadu_ps(&rawDst[i]);
            __dst = _mm256_min_ps(__dst, __thr);
-            _mm256_storeu_ps(&dst[i], __dst);
+            _mm256_storeu_ps(&rawDst[i], __dst);
 #if CV_FMA3
            __nrm2 = _mm256_fmadd_ps(__dst, __dst, __nrm2);
 #else
@@ -776,44 +785,78 @@ void calcSIFTDescriptor(
 #endif
    for( ; i < len; i++ )
    {
-        float val = std::min(dst[i], thr);
-        dst[i] = val;
+        float val = std::min(rawDst[i], thr);
+        rawDst[i] = val;
        nrm2 += val*val;
    }
    nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON);

 #if 1
    k = 0;
+if( dstMat.type() == CV_32F )
+{
+    float* dst = dstMat.ptr<float>(row);
 #if CV_SIMD
+    v_float32 __dst;
+    v_float32 __min = vx_setzero_f32();
+    v_float32 __max = vx_setall_f32(255.0f); // max of uchar
+    v_float32 __nrm2 = vx_setall_f32(nrm2);
+    for( k = 0; k <= len - v_float32::nlanes; k += v_float32::nlanes )
    {
-        v_float32 __dst;
-        v_float32 __min = vx_setzero_f32();
-        v_float32 __max = vx_setall_f32(255.0f); // max of uchar
-        v_float32 __nrm2 = vx_setall_f32(nrm2);
-        for( k = 0; k <= len - v_float32::nlanes; k += v_float32::nlanes )
-        {
-            __dst = vx_load(dst + k);
-            __dst = v_min(v_max(v_cvt_f32(v_round(__dst * __nrm2)), __min), __max);
-            v_store(dst + k, __dst);
-        }
+        __dst = vx_load_aligned(rawDst + k);
+        __dst = v_min(v_max(v_cvt_f32(v_round(__dst * __nrm2)), __min), __max);
+        v_store(dst + k, __dst);
    }
 #endif
    for( ; k < len; k++ )
    {
-        dst[k] = saturate_cast<uchar>(dst[k]*nrm2);
+        dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
    }
+}
+else // CV_8U
+{
+    uint8_t* dst = dstMat.ptr<uint8_t>(row);
+#if CV_SIMD
+    v_float32 __dst0, __dst1;
+    v_uint16 __pack01;
+    v_float32 __nrm2 = vx_setall_f32(nrm2);
+    for( k = 0; k <= len - v_float32::nlanes * 2; k += v_float32::nlanes * 2 )
+    {
+        __dst0 = vx_load_aligned(rawDst + k);
+        __dst1 = vx_load_aligned(rawDst + k + v_float32::nlanes);
+
+        __pack01 = v_pack_u(v_round(__dst0 * __nrm2), v_round(__dst1 * __nrm2));
+        v_pack_store(dst + k, __pack01);
+    }
+#endif
+    for( ; k < len; k++ )
+    {
+        dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
+    }
+}
 #else
+    float* dst = dstMat.ptr<float>(row);
    float nrm1 = 0;
    for( k = 0; k < len; k++ )
    {
-        dst[k] *= nrm2;
-        nrm1 += dst[k];
+        rawDst[k] *= nrm2;
+        nrm1 += rawDst[k];
    }
    nrm1 = 1.f/std::max(nrm1, FLT_EPSILON);
+if( dstMat.type() == CV_32F )
+{
    for( k = 0; k < len; k++ )
    {
-        dst[k] = std::sqrt(dst[k] * nrm1);//saturate_cast<uchar>(std::sqrt(dst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
+        dst[k] = std::sqrt(rawDst[k] * nrm1);
    }
+}
+else // CV_8U
+{
+    for( k = 0; k < len; k++ )
+    {
+        dst[k] = saturate_cast<uchar>(std::sqrt(rawDst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
+    }
+}
 #endif
 }