DNA mode: add the distance computations
This commit is contained in:
parent
f3cebb3e1b
commit
05fbd1e5bc
@ -95,6 +95,8 @@ using ::cvflann::MaxDistance;
|
|||||||
using ::cvflann::HammingLUT;
|
using ::cvflann::HammingLUT;
|
||||||
using ::cvflann::Hamming;
|
using ::cvflann::Hamming;
|
||||||
using ::cvflann::Hamming2;
|
using ::cvflann::Hamming2;
|
||||||
|
using ::cvflann::DNAmmingLUT;
|
||||||
|
using ::cvflann::DNAmming2;
|
||||||
using ::cvflann::HistIntersectionDistance;
|
using ::cvflann::HistIntersectionDistance;
|
||||||
using ::cvflann::HellingerDistance;
|
using ::cvflann::HellingerDistance;
|
||||||
using ::cvflann::ChiSquareDistance;
|
using ::cvflann::ChiSquareDistance;
|
||||||
@ -131,6 +133,14 @@ performed using library calls, if available. Lookup table implementation is used
|
|||||||
cv::flann::Hamming2 - %Hamming distance functor. Population count is
|
cv::flann::Hamming2 - %Hamming distance functor. Population count is
|
||||||
implemented in 12 arithmetic operations (one of which is multiplication).
|
implemented in 12 arithmetic operations (one of which is multiplication).
|
||||||
|
|
||||||
|
cv::flann::DNAmmingLUT - %Adaptation of the Hamming distance functor to DNA comparison.
|
||||||
|
As the four bases A, C, G, T of the DNA (or A, G, C, U for RNA) can be coded on 2 bits,
|
||||||
|
it counts the bits pairs differences between two sequences using a lookup table implementation.
|
||||||
|
|
||||||
|
cv::flann::DNAmming2 - %Adaptation of the Hamming distance functor to DNA comparison.
|
||||||
|
Bases differences count are vectorised thanks to arithmetic operations using standard
|
||||||
|
registers (AVX2 and AVX-512 should come in a near future).
|
||||||
|
|
||||||
cv::flann::HistIntersectionDistance - The histogram
|
cv::flann::HistIntersectionDistance - The histogram
|
||||||
intersection distance functor.
|
intersection distance functor.
|
||||||
|
|
||||||
|
|||||||
@ -128,6 +128,7 @@ enum flann_distance_t
|
|||||||
FLANN_DIST_KULLBACK_LEIBLER = 8,
|
FLANN_DIST_KULLBACK_LEIBLER = 8,
|
||||||
FLANN_DIST_KL = 8,
|
FLANN_DIST_KL = 8,
|
||||||
FLANN_DIST_HAMMING = 9,
|
FLANN_DIST_HAMMING = 9,
|
||||||
|
FLANN_DIST_DNAMMING = 10,
|
||||||
|
|
||||||
// deprecated constants, should use the FLANN_DIST_* ones instead
|
// deprecated constants, should use the FLANN_DIST_* ones instead
|
||||||
EUCLIDEAN = 1,
|
EUCLIDEAN = 1,
|
||||||
|
|||||||
@ -744,6 +744,157 @@ private:
|
|||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
struct DNAmmingLUT
|
||||||
|
{
|
||||||
|
typedef False is_kdtree_distance;
|
||||||
|
typedef False is_vector_space_distance;
|
||||||
|
|
||||||
|
typedef unsigned char ElementType;
|
||||||
|
typedef int ResultType;
|
||||||
|
typedef ElementType CentersType;
|
||||||
|
|
||||||
|
/** this will count the bits in a ^ b
|
||||||
|
*/
|
||||||
|
template<typename Iterator2>
|
||||||
|
ResultType operator()(const unsigned char* a, const Iterator2 b, size_t size) const
|
||||||
|
{
|
||||||
|
static const uchar popCountTable[] =
|
||||||
|
{
|
||||||
|
0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
|
||||||
|
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
|
||||||
|
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||||
|
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||||
|
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||||
|
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||||
|
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||||
|
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
|
||||||
|
};
|
||||||
|
ResultType result = 0;
|
||||||
|
const unsigned char* b2 = reinterpret_cast<const unsigned char*> (b);
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
result += popCountTable[a[i] ^ b2[i]];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ResultType operator()(const unsigned char* a, const ZeroIterator<unsigned char> b, size_t size) const
|
||||||
|
{
|
||||||
|
(void)b;
|
||||||
|
static const uchar popCountTable[] =
|
||||||
|
{
|
||||||
|
0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
|
||||||
|
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
|
||||||
|
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||||
|
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||||
|
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||||
|
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||||
|
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
|
||||||
|
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
|
||||||
|
};
|
||||||
|
ResultType result = 0;
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
result += popCountTable[a[i]];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
struct DNAmming2
|
||||||
|
{
|
||||||
|
typedef False is_kdtree_distance;
|
||||||
|
typedef False is_vector_space_distance;
|
||||||
|
|
||||||
|
typedef T ElementType;
|
||||||
|
typedef int ResultType;
|
||||||
|
typedef ElementType CentersType;
|
||||||
|
|
||||||
|
/** This is popcount_3() from:
|
||||||
|
* http://en.wikipedia.org/wiki/Hamming_weight */
|
||||||
|
unsigned int popcnt32(uint32_t n) const
|
||||||
|
{
|
||||||
|
n = ((n >> 1) | n) & 0x55555555;
|
||||||
|
n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
|
||||||
|
return (((n + (n >> 4))& 0x0F0F0F0F)* 0x01010101) >> 24;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef FLANN_PLATFORM_64_BIT
|
||||||
|
unsigned int popcnt64(uint64_t n) const
|
||||||
|
{
|
||||||
|
n = ((n >> 1) | n) & 0x5555555555555555;
|
||||||
|
n = (n & 0x3333333333333333) + ((n >> 2) & 0x3333333333333333);
|
||||||
|
return (((n + (n >> 4))& 0x0f0f0f0f0f0f0f0f)* 0x0101010101010101) >> 56;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <typename Iterator1, typename Iterator2>
|
||||||
|
ResultType operator()(const Iterator1 a, const Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
|
||||||
|
{
|
||||||
|
CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
|
||||||
|
|
||||||
|
#ifdef FLANN_PLATFORM_64_BIT
|
||||||
|
const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
|
||||||
|
const uint64_t* pb = reinterpret_cast<const uint64_t*>(b);
|
||||||
|
ResultType result = 0;
|
||||||
|
size /= long_word_size_;
|
||||||
|
for(size_t i = 0; i < size; ++i ) {
|
||||||
|
result += popcnt64(*pa ^ *pb);
|
||||||
|
++pa;
|
||||||
|
++pb;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
|
||||||
|
const uint32_t* pb = reinterpret_cast<const uint32_t*>(b);
|
||||||
|
ResultType result = 0;
|
||||||
|
size /= long_word_size_;
|
||||||
|
for(size_t i = 0; i < size; ++i ) {
|
||||||
|
result += popcnt32(*pa ^ *pb);
|
||||||
|
++pa;
|
||||||
|
++pb;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Iterator1>
|
||||||
|
ResultType operator()(const Iterator1 a, ZeroIterator<unsigned char> b, size_t size, ResultType /*worst_dist*/ = -1) const
|
||||||
|
{
|
||||||
|
CV_DbgAssert(!(size % long_word_size_) && "vectors size must be multiple of long words size (i.e. 8)");
|
||||||
|
|
||||||
|
(void)b;
|
||||||
|
#ifdef FLANN_PLATFORM_64_BIT
|
||||||
|
const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
|
||||||
|
ResultType result = 0;
|
||||||
|
size /= long_word_size_;
|
||||||
|
for(size_t i = 0; i < size; ++i ) {
|
||||||
|
result += popcnt64(*pa);
|
||||||
|
++pa;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
|
||||||
|
ResultType result = 0;
|
||||||
|
size /= long_word_size_;
|
||||||
|
for(size_t i = 0; i < size; ++i ) {
|
||||||
|
result += popcnt32(*pa);
|
||||||
|
++pa;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
#ifdef FLANN_PLATFORM_64_BIT
|
||||||
|
static const size_t long_word_size_= sizeof(uint64_t)/sizeof(unsigned char);
|
||||||
|
#else
|
||||||
|
static const size_t long_word_size_= sizeof(uint32_t)/sizeof(unsigned char);
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
struct HistIntersectionDistance
|
struct HistIntersectionDistance
|
||||||
{
|
{
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user