From cc22a73d0fb88925617245bd03ed71b9bf358e9c Mon Sep 17 00:00:00 2001 From: Rachel A Date: Mon, 1 Mar 2021 10:57:22 -0800 Subject: [PATCH 01/10] EXR alpha support for 4 channel reading and writing. Issue https://github.com/opencv/opencv/issues/16115. --- modules/imgcodecs/src/grfmt_exr.cpp | 180 ++++++++++++++--------- modules/imgcodecs/src/grfmt_exr.hpp | 4 +- modules/imgcodecs/test/test_exr.impl.hpp | 158 +++++++++++++++++++- 3 files changed, 268 insertions(+), 74 deletions(-) diff --git a/modules/imgcodecs/src/grfmt_exr.cpp b/modules/imgcodecs/src/grfmt_exr.cpp index 1eceb4f5cd..9667b8ca03 100644 --- a/modules/imgcodecs/src/grfmt_exr.cpp +++ b/modules/imgcodecs/src/grfmt_exr.cpp @@ -84,12 +84,13 @@ ExrDecoder::ExrDecoder() { m_signature = "\x76\x2f\x31\x01"; m_file = 0; - m_red = m_green = m_blue = 0; + m_red = m_green = m_blue = m_alpha = 0; m_type = ((Imf::PixelType)0); m_iscolor = false; m_bit_depth = 0; m_isfloat = false; m_ischroma = false; + m_hasalpha = false; m_native_depth = false; } @@ -113,7 +114,7 @@ void ExrDecoder::close() int ExrDecoder::type() const { - return CV_MAKETYPE((m_isfloat ? CV_32F : CV_32S), m_iscolor ? 3 : 1); + return CV_MAKETYPE((m_isfloat ? CV_32F : CV_32S), ((m_iscolor && m_hasalpha) ? 4 : m_iscolor ? 3 : m_hasalpha ? 2 : 1)); } @@ -141,6 +142,11 @@ bool ExrDecoder::readHeader() m_red = channels.findChannel( "R" ); m_green = channels.findChannel( "G" ); m_blue = channels.findChannel( "B" ); + m_alpha = channels.findChannel( "A" ); + + if( m_alpha ) // alpha channel supported in RGB, Y, and YC scenarios + m_hasalpha = true; + if( m_red || m_green || m_blue ) { m_iscolor = true; @@ -178,7 +184,8 @@ bool ExrDecoder::readHeader() bool ExrDecoder::readData( Mat& img ) { m_native_depth = CV_MAT_DEPTH(type()) == img.depth(); - bool color = img.channels() > 1; + bool color = img.channels() > 2; // output mat has 3+ channels; Y or YA are the 1 and 2 channel scenario + bool alphasupported = ( img.channels() % 2 == 0 ); // even number of channels indicates alpha int channels = 0; uchar* data = img.ptr(); size_t step = img.step; @@ -187,18 +194,22 @@ bool ExrDecoder::readData( Mat& img ) bool rgbtogray = ( !m_ischroma && m_iscolor && !color ); bool result = true; FrameBuffer frame; - int xsample[3] = {1, 1, 1}; + const int defaultchannels = 3; + int xsample[defaultchannels] = {1, 1, 1}; char *buffer; - size_t xstep = 0; + CV_Assert(m_type == FLOAT); + const size_t floatsize = sizeof(float); + size_t xstep = m_native_depth ? floatsize : 1; // 4 bytes if native depth (FLOAT), otherwise converting to 1 byte U8 depth size_t ystep = 0; - - xstep = m_native_depth ? 4 : 1; + const int channelstoread = ( (m_iscolor && alphasupported) ? 4 : + ( (m_iscolor && !m_ischroma) || color) ? 3 : alphasupported ? 2 : 1 ); // number of channels to read may exceed channels in output img + size_t xStride = floatsize * channelstoread; AutoBuffer copy_buffer; if( !justcopy ) { - copy_buffer.allocate(sizeof(float) * m_width * 3); + copy_buffer.allocate(floatsize * m_width * defaultchannels); buffer = copy_buffer.data(); ystep = 0; } @@ -215,49 +226,49 @@ bool ExrDecoder::readData( Mat& img ) if( m_blue ) { frame.insert( "BY", Slice( m_type, - buffer - m_datawindow.min.x * 12 - m_datawindow.min.y * ystep, - 12, ystep, m_blue->xSampling, m_blue->ySampling, 0.0 )); - xsample[0] = m_blue->ySampling; + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep, + xStride, ystep, m_blue->xSampling, m_blue->ySampling, 0.0 )); + xsample[0] = m_blue->xSampling; } else { frame.insert( "BY", Slice( m_type, - buffer - m_datawindow.min.x * 12 - m_datawindow.min.y * ystep, - 12, ystep, 1, 1, 0.0 )); + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep, + xStride, ystep, 1, 1, 0.0 )); } if( m_green ) { frame.insert( "Y", Slice( m_type, - buffer - m_datawindow.min.x * 12 - m_datawindow.min.y * ystep + 4, - 12, ystep, m_green->xSampling, m_green->ySampling, 0.0 )); - xsample[1] = m_green->ySampling; + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep + floatsize, + xStride, ystep, m_green->xSampling, m_green->ySampling, 0.0 )); + xsample[1] = m_green->xSampling; } else { frame.insert( "Y", Slice( m_type, - buffer - m_datawindow.min.x * 12 - m_datawindow.min.y * ystep + 4, - 12, ystep, 1, 1, 0.0 )); + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep + floatsize, + xStride, ystep, 1, 1, 0.0 )); } if( m_red ) { frame.insert( "RY", Slice( m_type, - buffer - m_datawindow.min.x * 12 - m_datawindow.min.y * ystep + 8, - 12, ystep, m_red->xSampling, m_red->ySampling, 0.0 )); - xsample[2] = m_red->ySampling; + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep + (floatsize * 2), + xStride, ystep, m_red->xSampling, m_red->ySampling, 0.0 )); + xsample[2] = m_red->xSampling; } else { frame.insert( "RY", Slice( m_type, - buffer - m_datawindow.min.x * 12 - m_datawindow.min.y * ystep + 8, - 12, ystep, 1, 1, 0.0 )); + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep + (floatsize * 2), + xStride, ystep, 1, 1, 0.0 )); } } else { frame.insert( "Y", Slice( m_type, - buffer - m_datawindow.min.x * 4 - m_datawindow.min.y * ystep, - 4, ystep, m_green->xSampling, m_green->ySampling, 0.0 )); - xsample[0] = m_green->ySampling; + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep, + xStride, ystep, m_green->xSampling, m_green->ySampling, 0.0 )); + xsample[0] = m_green->xSampling; } } else @@ -265,67 +276,85 @@ bool ExrDecoder::readData( Mat& img ) if( m_blue ) { frame.insert( "B", Slice( m_type, - buffer - m_datawindow.min.x * 12 - m_datawindow.min.y * ystep, - 12, ystep, m_blue->xSampling, m_blue->ySampling, 0.0 )); - xsample[0] = m_blue->ySampling; + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep, + xStride, ystep, m_blue->xSampling, m_blue->ySampling, 0.0 )); + xsample[0] = m_blue->xSampling; } else { frame.insert( "B", Slice( m_type, - buffer - m_datawindow.min.x * 12 - m_datawindow.min.y * ystep, - 12, ystep, 1, 1, 0.0 )); + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep, + xStride, ystep, 1, 1, 0.0 )); } if( m_green ) { frame.insert( "G", Slice( m_type, - buffer - m_datawindow.min.x * 12 - m_datawindow.min.y * ystep + 4, - 12, ystep, m_green->xSampling, m_green->ySampling, 0.0 )); - xsample[1] = m_green->ySampling; + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep + floatsize, + xStride, ystep, m_green->xSampling, m_green->ySampling, 0.0 )); + xsample[1] = m_green->xSampling; } else { frame.insert( "G", Slice( m_type, - buffer - m_datawindow.min.x * 12 - m_datawindow.min.y * ystep + 4, - 12, ystep, 1, 1, 0.0 )); + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep + floatsize, + xStride, ystep, 1, 1, 0.0 )); } if( m_red ) { frame.insert( "R", Slice( m_type, - buffer - m_datawindow.min.x * 12 - m_datawindow.min.y * ystep + 8, - 12, ystep, m_red->xSampling, m_red->ySampling, 0.0 )); - xsample[2] = m_red->ySampling; + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep + (floatsize * 2), + xStride, ystep, m_red->xSampling, m_red->ySampling, 0.0 )); + xsample[2] = m_red->xSampling; } else { frame.insert( "R", Slice( m_type, - buffer - m_datawindow.min.x * 12 - m_datawindow.min.y * ystep + 8, - 12, ystep, 1, 1, 0.0 )); + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep + (floatsize * 2), + xStride, ystep, 1, 1, 0.0 )); } } + if( justcopy && m_hasalpha && alphasupported ) + { // alpha preserved only in justcopy scenario where alpha is desired (alphasupported) + // and present in original file (m_hasalpha) + CV_Assert(channelstoread == img.channels()); + int offset = (channelstoread - 1) * floatsize; + frame.insert( "A", Slice( m_type, + buffer - m_datawindow.min.x * xStride - m_datawindow.min.y * ystep + offset, + xStride, ystep, m_alpha->xSampling, m_alpha->ySampling, 0.0 )); + } + for (FrameBuffer::Iterator it = frame.begin(); it != frame.end(); it++) { channels++; } + CV_Assert(channels == channelstoread); + + if( (channels != channelstoread) || (!justcopy && channels > defaultchannels) ) + { // safety checking what ought to be true here + close(); + return false; + } + m_file->setFrameBuffer( frame ); if( justcopy ) { m_file->readPixels( m_datawindow.min.y, m_datawindow.max.y ); - if( color ) + if( m_iscolor ) { if( m_blue && (m_blue->xSampling != 1 || m_blue->ySampling != 1) ) - UpSample( data, 3, step / xstep, xsample[0], m_blue->ySampling ); + UpSample( data, channelstoread, step / xstep, m_blue->xSampling, m_blue->ySampling ); if( m_green && (m_green->xSampling != 1 || m_green->ySampling != 1) ) - UpSample( data + xstep, 3, step / xstep, xsample[1], m_green->ySampling ); + UpSample( data + xstep, channelstoread, step / xstep, m_green->xSampling, m_green->ySampling ); if( m_red && (m_red->xSampling != 1 || m_red->ySampling != 1) ) - UpSample( data + 2 * xstep, 3, step / xstep, xsample[2], m_red->ySampling ); + UpSample( data + 2 * xstep, channelstoread, step / xstep, m_red->xSampling, m_red->ySampling ); } else if( m_green && (m_green->xSampling != 1 || m_green->ySampling != 1) ) - UpSample( data, 1, step / xstep, xsample[0], m_green->ySampling ); + UpSample( data, channelstoread, step / xstep, m_green->xSampling, m_green->ySampling ); if( chromatorgb ) - ChromaToBGR( (float *)data, m_height, step / xstep ); + ChromaToBGR( (float *)data, m_height, channelstoread, step / xstep ); } else { @@ -347,7 +376,7 @@ bool ExrDecoder::readData( Mat& img ) else { if( chromatorgb ) - ChromaToBGR( (float *)buffer, 1, step ); + ChromaToBGR( (float *)buffer, 1, defaultchannels, step ); if( m_type == FLOAT ) { @@ -372,11 +401,11 @@ bool ExrDecoder::readData( Mat& img ) if( color ) { if( m_blue && (m_blue->xSampling != 1 || m_blue->ySampling != 1) ) - UpSampleY( data, 3, step / xstep, m_blue->ySampling ); + UpSampleY( data, defaultchannels, step / xstep, m_blue->ySampling ); if( m_green && (m_green->xSampling != 1 || m_green->ySampling != 1) ) - UpSampleY( data + xstep, 3, step / xstep, m_green->ySampling ); + UpSampleY( data + xstep, defaultchannels, step / xstep, m_green->ySampling ); if( m_red && (m_red->xSampling != 1 || m_red->ySampling != 1) ) - UpSampleY( data + 2 * xstep, 3, step / xstep, m_red->ySampling ); + UpSampleY( data + 2 * xstep, defaultchannels, step / xstep, m_red->ySampling ); } else if( m_green && (m_green->xSampling != 1 || m_green->ySampling != 1) ) UpSampleY( data, 1, step / xstep, m_green->ySampling ); @@ -457,7 +486,7 @@ void ExrDecoder::UpSampleY( uchar *data, int xstep, int ystep, int ysample ) /** // algorithm from ImfRgbaYca.cpp */ -void ExrDecoder::ChromaToBGR( float *data, int numlines, int step ) +void ExrDecoder::ChromaToBGR( float *data, int numlines, int xstep, int ystep ) { for( int y = 0; y < numlines; y++ ) { @@ -466,15 +495,15 @@ void ExrDecoder::ChromaToBGR( float *data, int numlines, int step ) double b, Y, r; if( m_type == FLOAT ) { - b = data[y * step + x * 3]; - Y = data[y * step + x * 3 + 1]; - r = data[y * step + x * 3 + 2]; + b = data[y * ystep + x * xstep]; + Y = data[y * ystep + x * xstep + 1]; + r = data[y * ystep + x * xstep + 2]; } else { - b = ((unsigned *)data)[y * step + x * 3]; - Y = ((unsigned *)data)[y * step + x * 3 + 1]; - r = ((unsigned *)data)[y * step + x * 3 + 2]; + b = ((unsigned *)data)[y * ystep + x * xstep]; + Y = ((unsigned *)data)[y * ystep + x * xstep + 1]; + r = ((unsigned *)data)[y * ystep + x * xstep + 2]; } r = (r + 1) * Y; b = (b + 1) * Y; @@ -482,18 +511,18 @@ void ExrDecoder::ChromaToBGR( float *data, int numlines, int step ) if( m_type == FLOAT ) { - data[y * step + x * 3] = (float)b; - data[y * step + x * 3 + 1] = (float)Y; - data[y * step + x * 3 + 2] = (float)r; + data[y * ystep + x * xstep] = (float)b; + data[y * ystep + x * xstep + 1] = (float)Y; + data[y * ystep + x * xstep + 2] = (float)r; } else { int t = cvRound(b); - ((unsigned *)data)[y * step + x * 3 + 0] = (unsigned)MAX(t, 0); + ((unsigned *)data)[y * ystep + x * xstep + 0] = (unsigned)MAX(t, 0); t = cvRound(Y); - ((unsigned *)data)[y * step + x * 3 + 1] = (unsigned)MAX(t, 0); + ((unsigned *)data)[y * ystep + x * xstep + 1] = (unsigned)MAX(t, 0); t = cvRound(r); - ((unsigned *)data)[y * step + x * 3 + 2] = (unsigned)MAX(t, 0); + ((unsigned *)data)[y * ystep + x * xstep + 2] = (unsigned)MAX(t, 0); } } } @@ -571,7 +600,6 @@ bool ExrEncoder::write( const Mat& img, const std::vector& params ) int depth = img.depth(); CV_Assert( depth == CV_32F ); int channels = img.channels(); - CV_Assert( channels == 3 || channels == 1 ); bool result = false; Header header( width, height ); Imf::PixelType type = FLOAT; @@ -594,7 +622,7 @@ bool ExrEncoder::write( const Mat& img, const std::vector& params ) } } - if( channels == 3 ) + if( channels == 3 || channels == 4 ) { header.channels().insert( "R", Channel( type ) ); header.channels().insert( "G", Channel( type ) ); @@ -607,6 +635,11 @@ bool ExrEncoder::write( const Mat& img, const std::vector& params ) //printf("gray\n"); } + if( channels % 2 == 0 ) + { // even number of channels indicates Alpha + header.channels().insert( "A", Channel( type ) ); + } + OutputFile file( m_filename.c_str(), header ); FrameBuffer frame; @@ -629,14 +662,19 @@ bool ExrEncoder::write( const Mat& img, const std::vector& params ) size = 4; } - if( channels == 3 ) + if( channels == 3 || channels == 4 ) { - frame.insert( "B", Slice( type, buffer, size * 3, bufferstep )); - frame.insert( "G", Slice( type, buffer + size, size * 3, bufferstep )); - frame.insert( "R", Slice( type, buffer + size * 2, size * 3, bufferstep )); + frame.insert( "B", Slice( type, buffer, size * channels, bufferstep )); + frame.insert( "G", Slice( type, buffer + size, size * channels, bufferstep )); + frame.insert( "R", Slice( type, buffer + size * 2, size * channels, bufferstep )); } else - frame.insert( "Y", Slice( type, buffer, size, bufferstep )); + frame.insert( "Y", Slice( type, buffer, size * channels, bufferstep )); + + if( channels % 2 == 0 ) + { // even channel count indicates Alpha channel + frame.insert( "A", Slice( type, buffer + size * (channels - 1), size * channels, bufferstep )); + } file.setFrameBuffer( frame ); diff --git a/modules/imgcodecs/src/grfmt_exr.hpp b/modules/imgcodecs/src/grfmt_exr.hpp index ec08028e22..99acd775c2 100644 --- a/modules/imgcodecs/src/grfmt_exr.hpp +++ b/modules/imgcodecs/src/grfmt_exr.hpp @@ -81,7 +81,7 @@ protected: void UpSample( uchar *data, int xstep, int ystep, int xsample, int ysample ); void UpSampleX( float *data, int xstep, int xsample ); void UpSampleY( uchar *data, int xstep, int ystep, int ysample ); - void ChromaToBGR( float *data, int numlines, int step ); + void ChromaToBGR( float *data, int numlines, int xstep, int ystep ); void RGBToGray( float *in, float *out ); InputFile *m_file; @@ -91,11 +91,13 @@ protected: const Channel *m_red; const Channel *m_green; const Channel *m_blue; + const Channel *m_alpha; Chromaticities m_chroma; int m_bit_depth; bool m_native_depth; bool m_iscolor; bool m_isfloat; + bool m_hasalpha; private: ExrDecoder(const ExrDecoder &); // copy disabled diff --git a/modules/imgcodecs/test/test_exr.impl.hpp b/modules/imgcodecs/test/test_exr.impl.hpp index 1f78a8f38f..ae5af53c78 100644 --- a/modules/imgcodecs/test/test_exr.impl.hpp +++ b/modules/imgcodecs/test/test_exr.impl.hpp @@ -7,7 +7,7 @@ namespace opencv_test { namespace { TEST(Imgcodecs_EXR, readWrite_32FC1) -{ +{ // Y channels const string root = cvtest::TS::ptr()->get_data_path(); const string filenameInput = root + "readwrite/test32FC1.exr"; const string filenameOutput = cv::tempfile(".exr"); @@ -31,7 +31,7 @@ TEST(Imgcodecs_EXR, readWrite_32FC1) } TEST(Imgcodecs_EXR, readWrite_32FC3) -{ +{ // RGB channels const string root = cvtest::TS::ptr()->get_data_path(); const string filenameInput = root + "readwrite/test32FC3.exr"; const string filenameOutput = cv::tempfile(".exr"); @@ -113,5 +113,159 @@ TEST(Imgcodecs_EXR, readWrite_32FC3_half) EXPECT_EQ(0, remove(filenameOutput.c_str())); } +// Note: YC to GRAYSCALE (IMREAD_GRAYSCALE | IMREAD_ANYDEPTH) +// outputs a black image, +// as does Y to RGB (IMREAD_COLOR | IMREAD_ANYDEPTH). +// This behavoir predates adding EXR alpha support issue +// 16115. + +TEST(Imgcodecs_EXR, read_YA_ignore_alpha) +{ + const string root = cvtest::TS::ptr()->get_data_path(); + const string filenameInput = root + "readwrite/test_YA.exr"; + + const Mat img = cv::imread(filenameInput, IMREAD_GRAYSCALE | IMREAD_ANYDEPTH); + + ASSERT_FALSE(img.empty()); + ASSERT_EQ(CV_32FC1, img.type()); + + // Writing Y covered by test 32FC1 +} + +TEST(Imgcodecs_EXR, read_YA_unchanged) +{ + const string root = cvtest::TS::ptr()->get_data_path(); + const string filenameInput = root + "readwrite/test_YA.exr"; + + const Mat img = cv::imread(filenameInput, IMREAD_UNCHANGED); + + ASSERT_FALSE(img.empty()); + ASSERT_EQ(CV_32FC2, img.type()); + + // Cannot test writing, 2 channel writing not suppported by loadsave +} + +TEST(Imgcodecs_EXR, read_YC_changeDepth) +{ + const string root = cvtest::TS::ptr()->get_data_path(); + const string filenameInput = root + "readwrite/test_YRYBY.exr"; + + const Mat img = cv::imread(filenameInput, IMREAD_COLOR); + + ASSERT_FALSE(img.empty()); + ASSERT_EQ(CV_8UC3, img.type()); + + // Cannot test writing, EXR encoder doesn't support 8U depth +} + +TEST(Imgcodecs_EXR, readwrite_YCA_ignore_alpha) +{ + const string root = cvtest::TS::ptr()->get_data_path(); + const string filenameInput = root + "readwrite/test_YRYBYA.exr"; + const string filenameOutput = cv::tempfile(".exr"); + + const Mat img = cv::imread(filenameInput, IMREAD_COLOR | IMREAD_ANYDEPTH); + + ASSERT_FALSE(img.empty()); + ASSERT_EQ(CV_32FC3, img.type()); + + ASSERT_TRUE(cv::imwrite(filenameOutput, img)); + const Mat img2 = cv::imread(filenameOutput, IMREAD_UNCHANGED); + ASSERT_EQ(img2.type(), img.type()); + ASSERT_EQ(img2.size(), img.size()); + EXPECT_LE(cvtest::norm(img, img2, NORM_INF | NORM_RELATIVE), 1e-3); + EXPECT_EQ(0, remove(filenameOutput.c_str())); +} + +TEST(Imgcodecs_EXR, read_YC_unchanged) +{ + const string root = cvtest::TS::ptr()->get_data_path(); + const string filenameInput = root + "readwrite/test_YRYBY.exr"; + + const Mat img = cv::imread(filenameInput, IMREAD_UNCHANGED); + + ASSERT_FALSE(img.empty()); + ASSERT_EQ(CV_32FC3, img.type()); + + // Writing YC covered by test readwrite_YCA_ignore_alpha +} + +TEST(Imgcodecs_EXR, readwrite_YCA_unchanged) +{ + const string root = cvtest::TS::ptr()->get_data_path(); + const string filenameInput = root + "readwrite/test_YRYBYA.exr"; + const string filenameOutput = cv::tempfile(".exr"); + + const Mat img = cv::imread(filenameInput, IMREAD_UNCHANGED); + + ASSERT_FALSE(img.empty()); + ASSERT_EQ(CV_32FC4, img.type()); + + ASSERT_TRUE(cv::imwrite(filenameOutput, img)); + const Mat img2 = cv::imread(filenameOutput, IMREAD_UNCHANGED); + ASSERT_EQ(img2.type(), img.type()); + ASSERT_EQ(img2.size(), img.size()); + EXPECT_LE(cvtest::norm(img, img2, NORM_INF | NORM_RELATIVE), 1e-3); + EXPECT_EQ(0, remove(filenameOutput.c_str())); +} + +TEST(Imgcodecs_EXR, readwrite_RGBA_togreyscale) +{ + const string root = cvtest::TS::ptr()->get_data_path(); + const string filenameInput = root + "readwrite/test_GeneratedRGBA.exr"; + const string filenameOutput = cv::tempfile(".exr"); + + const Mat img = cv::imread(filenameInput, IMREAD_GRAYSCALE | IMREAD_ANYDEPTH); + + ASSERT_FALSE(img.empty()); + ASSERT_EQ(CV_32FC1, img.type()); + + ASSERT_TRUE(cv::imwrite(filenameOutput, img)); + const Mat img2 = cv::imread(filenameOutput, IMREAD_UNCHANGED); + ASSERT_EQ(img2.type(), img.type()); + ASSERT_EQ(img2.size(), img.size()); + EXPECT_LE(cvtest::norm(img, img2, NORM_INF | NORM_RELATIVE), 1e-3); + EXPECT_EQ(0, remove(filenameOutput.c_str())); +} + +TEST(Imgcodecs_EXR, read_RGBA_ignore_alpha) +{ + const string root = cvtest::TS::ptr()->get_data_path(); + const string filenameInput = root + "readwrite/test_GeneratedRGBA.exr"; + + const Mat img = cv::imread(filenameInput, IMREAD_COLOR | IMREAD_ANYDEPTH); + + ASSERT_FALSE(img.empty()); + ASSERT_EQ(CV_32FC3, img.type()); + + // Writing RGB covered by test 32FC3 +} + +TEST(Imgcodecs_EXR, read_RGBA_unchanged) +{ + const string root = cvtest::TS::ptr()->get_data_path(); + const string filenameInput = root + "readwrite/test_GeneratedRGBA.exr"; + const string filenameOutput = cv::tempfile(".exr"); + +#ifndef GENERATE_DATA + const Mat img = cv::imread(filenameInput, IMREAD_UNCHANGED); +#else + const Size sz(64, 32); + Mat img(sz, CV_32FC4, Scalar(0.5, 0.1, 1, 1)); + img(Rect(10, 5, sz.width - 30, sz.height - 20)).setTo(Scalar(1, 0, 0, 1)); + img(Rect(10, 20, sz.width - 30, sz.height - 20)).setTo(Scalar(1, 1, 0, 0)); + ASSERT_TRUE(cv::imwrite(filenameInput, img)); +#endif + + ASSERT_FALSE(img.empty()); + ASSERT_EQ(CV_32FC4, img.type()); + + ASSERT_TRUE(cv::imwrite(filenameOutput, img)); + const Mat img2 = cv::imread(filenameOutput, IMREAD_UNCHANGED); + ASSERT_EQ(img2.type(), img.type()); + ASSERT_EQ(img2.size(), img.size()); + EXPECT_LE(cvtest::norm(img, img2, NORM_INF | NORM_RELATIVE), 1e-3); + EXPECT_EQ(0, remove(filenameOutput.c_str())); +} }} // namespace From cbfd38bd41e91433b7a23348ae65d3adff2bc20b Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 23 Feb 2021 00:22:06 +0000 Subject: [PATCH 02/10] core: rework code locality - to reduce binaries size of FFmpeg Windows wrapper - MinGW linker doesn't support -ffunction-sections (used for FFmpeg Windows wrapper) - move code to improve locality with its used dependencies - move UMat::dot() to matmul.dispatch.cpp (Mat::dot() is already there) - move UMat::inv() to lapack.cpp - move UMat::mul() to arithm.cpp - move UMat:eye() to matrix_operations.cpp (near setIdentity() implementation) - move normalize(): convert_scale.cpp => norm.cpp - move convertAndUnrollScalar(): arithm.cpp => copy.cpp - move scalarToRawData(): array.cpp => copy.cpp - move transpose(): matrix_operations.cpp => matrix_transform.cpp - move flip(), rotate(): copy.cpp => matrix_transform.cpp (rotate90 uses flip and transpose) - add 'OPENCV_CORE_EXCLUDE_C_API' CMake variable to exclude compilation of C-API functions from the core module - matrix_wrap.cpp: add compile-time checks for CUDA/OpenGL calls - the steps above allow to reduce FFmpeg wrapper size for ~1.5Mb (initial size of OpenCV part is about 3Mb) backport is done to improve merge experience (less conflicts) backport of commit: 65eb9467567598c08049bb190a4f3d3cbfabdcd0 --- modules/core/CMakeLists.txt | 4 + modules/core/src/arithm.cpp | 40 +- modules/core/src/array.cpp | 89 +-- modules/core/src/convert_c.cpp | 3 + modules/core/src/convert_scale.dispatch.cpp | 140 ---- modules/core/src/copy.cpp | 557 ++------------ modules/core/src/datastructs.cpp | 3 + modules/core/src/dxt.cpp | 4 + modules/core/src/lapack.cpp | 25 +- modules/core/src/mathfuncs.cpp | 7 + modules/core/src/matmul.dispatch.cpp | 73 ++ modules/core/src/matrix_c.cpp | 4 +- modules/core/src/matrix_operations.cpp | 296 +------- modules/core/src/matrix_transform.cpp | 770 ++++++++++++++++++++ modules/core/src/matrix_wrap.cpp | 61 ++ modules/core/src/norm.cpp | 174 ++++- modules/core/src/persistence_c.cpp | 42 -- modules/core/src/rand.cpp | 6 + modules/core/src/stat_c.cpp | 4 + modules/core/src/umatrix.cpp | 94 --- 20 files changed, 1251 insertions(+), 1145 deletions(-) create mode 100644 modules/core/src/matrix_transform.cpp diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 8da28d275f..a84d7fc3ad 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -112,6 +112,10 @@ ocv_target_link_libraries(${the_module} PRIVATE "${OPENCV_HAL_LINKER_LIBS}" ) +if(OPENCV_CORE_EXCLUDE_C_API) + ocv_target_compile_definitions(${the_module} PRIVATE "OPENCV_EXCLUDE_C_API=1") +endif() + ocv_add_accuracy_tests() ocv_add_perf_tests() diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 760bbcb088..41b281c8de 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -57,24 +57,6 @@ namespace cv * logical operations * \****************************************************************************************/ -void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize ) -{ - int scn = (int)sc.total(), cn = CV_MAT_CN(buftype); - size_t esz = CV_ELEM_SIZE(buftype); - getConvertFunc(sc.depth(), buftype)(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0); - // unroll the scalar - if( scn < cn ) - { - CV_Assert( scn == 1 ); - size_t esz1 = CV_ELEM_SIZE1(buftype); - for( size_t i = esz1; i < esz; i++ ) - scbuf[i] = scbuf[i - esz1]; - } - for( size_t i = esz; i < blocksize*esz; i++ ) - scbuf[i] = scbuf[i - esz]; -} - - enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4, OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8, OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14, @@ -1041,9 +1023,7 @@ static BinaryFuncC* getRecipTab() return recipTab; } -} - -void cv::multiply(InputArray src1, InputArray src2, +void multiply(InputArray src1, InputArray src2, OutputArray dst, double scale, int dtype) { CV_INSTRUMENT_REGION(); @@ -1052,7 +1032,7 @@ void cv::multiply(InputArray src1, InputArray src2, true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE); } -void cv::divide(InputArray src1, InputArray src2, +void divide(InputArray src1, InputArray src2, OutputArray dst, double scale, int dtype) { CV_INSTRUMENT_REGION(); @@ -1060,7 +1040,7 @@ void cv::divide(InputArray src1, InputArray src2, arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE); } -void cv::divide(double scale, InputArray src2, +void divide(double scale, InputArray src2, OutputArray dst, int dtype) { CV_INSTRUMENT_REGION(); @@ -1068,13 +1048,17 @@ void cv::divide(double scale, InputArray src2, arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE); } +UMat UMat::mul(InputArray m, double scale) const +{ + UMat dst; + multiply(*this, m, dst, scale); + return dst; +} + /****************************************************************************************\ * addWeighted * \****************************************************************************************/ -namespace cv -{ - static BinaryFuncC* getAddWeightedTab() { static BinaryFuncC addWeightedTab[] = @@ -1879,6 +1863,9 @@ void cv::inRange(InputArray _src, InputArray _lowerb, } } + +#ifndef OPENCV_EXCLUDE_C_API + /****************************************************************************************\ * Earlier API: cvAdd etc. * \****************************************************************************************/ @@ -2141,4 +2128,5 @@ cvMaxS( const void* srcarr1, double value, void* dstarr ) cv::max( src1, value, dst ); } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/array.cpp b/modules/core/src/array.cpp index f2a79b5a69..1a5ea0100f 100644 --- a/modules/core/src/array.cpp +++ b/modules/core/src/array.cpp @@ -48,6 +48,8 @@ #include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API + #define CV_ORIGIN_TL 0 #define CV_ORIGIN_BL 1 @@ -3223,51 +3225,50 @@ template<> void DefaultDeleter::operator ()(CvMemStorage* obj) con template<> void DefaultDeleter::operator ()(CvFileStorage* obj) const { cvReleaseFileStorage(&obj); } -template static inline -void scalarToRawData_(const Scalar& s, T * const buf, const int cn, const int unroll_to) -{ - int i = 0; - for(; i < cn; i++) - buf[i] = saturate_cast(s.val[i]); - for(; i < unroll_to; i++) - buf[i] = buf[i-cn]; -} - -void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to) -{ - CV_INSTRUMENT_REGION(); - - const int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - CV_Assert(cn <= 4); - switch(depth) - { - case CV_8U: - scalarToRawData_(s, (uchar*)_buf, cn, unroll_to); - break; - case CV_8S: - scalarToRawData_(s, (schar*)_buf, cn, unroll_to); - break; - case CV_16U: - scalarToRawData_(s, (ushort*)_buf, cn, unroll_to); - break; - case CV_16S: - scalarToRawData_(s, (short*)_buf, cn, unroll_to); - break; - case CV_32S: - scalarToRawData_(s, (int*)_buf, cn, unroll_to); - break; - case CV_32F: - scalarToRawData_(s, (float*)_buf, cn, unroll_to); - break; - case CV_64F: - scalarToRawData_(s, (double*)_buf, cn, unroll_to); - break; - default: - CV_Error(CV_StsUnsupportedFormat,""); - } -} - } // cv:: +/* universal functions */ +CV_IMPL void +cvRelease( void** struct_ptr ) +{ + CvTypeInfo* info; + + if( !struct_ptr ) + CV_Error( CV_StsNullPtr, "NULL double pointer" ); + + if( *struct_ptr ) + { + info = cvTypeOf( *struct_ptr ); + if( !info ) + CV_Error( CV_StsError, "Unknown object type" ); + if( !info->release ) + CV_Error( CV_StsError, "release function pointer is NULL" ); + + info->release( struct_ptr ); + *struct_ptr = 0; + } +} + + +void* cvClone( const void* struct_ptr ) +{ + void* struct_copy = 0; + CvTypeInfo* info; + + if( !struct_ptr ) + CV_Error( CV_StsNullPtr, "NULL structure pointer" ); + + info = cvTypeOf( struct_ptr ); + if( !info ) + CV_Error( CV_StsError, "Unknown object type" ); + if( !info->clone ) + CV_Error( CV_StsError, "clone function pointer is NULL" ); + + struct_copy = info->clone( struct_ptr ); + return struct_copy; +} + + +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/convert_c.cpp b/modules/core/src/convert_c.cpp index efe4de740a..96beffccc6 100644 --- a/modules/core/src/convert_c.cpp +++ b/modules/core/src/convert_c.cpp @@ -5,6 +5,7 @@ #include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API CV_IMPL void cvSplit( const void* srcarr, void* dstarr0, void* dstarr1, void* dstarr2, void* dstarr3 ) @@ -132,3 +133,5 @@ CV_IMPL void cvNormalize( const CvArr* srcarr, CvArr* dstarr, CV_Assert( dst.size() == src.size() && src.channels() == dst.channels() ); cv::normalize( src, dst, a, b, norm_type, dst.type(), mask ); } + +#endif // OPENCV_EXCLUDE_C_API diff --git a/modules/core/src/convert_scale.dispatch.cpp b/modules/core/src/convert_scale.dispatch.cpp index 83376aa61d..6902ecc24b 100644 --- a/modules/core/src/convert_scale.dispatch.cpp +++ b/modules/core/src/convert_scale.dispatch.cpp @@ -9,7 +9,6 @@ #include "convert_scale.simd.hpp" #include "convert_scale.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content - namespace cv { @@ -117,143 +116,4 @@ void convertScaleAbs(InputArray _src, OutputArray _dst, double alpha, double bet } } -//================================================================================================== - -#ifdef HAVE_OPENCL - -static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype, - double scale, double delta ) -{ - UMat src = _src.getUMat(); - - if( _mask.empty() ) - src.convertTo( _dst, dtype, scale, delta ); - else if (src.channels() <= 4) - { - const ocl::Device & dev = ocl::Device::getDefault(); - - int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), - ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)), - rowsPerWI = dev.isIntel() ? 4 : 1; - - float fscale = static_cast(scale), fdelta = static_cast(delta); - bool haveScale = std::fabs(scale - 1) > DBL_EPSILON, - haveZeroScale = !(std::fabs(scale) > DBL_EPSILON), - haveDelta = std::fabs(delta) > DBL_EPSILON, - doubleSupport = dev.doubleFPConfig() > 0; - - if (!haveScale && !haveDelta && stype == dtype) - { - _src.copyTo(_dst, _mask); - return true; - } - if (haveZeroScale) - { - _dst.setTo(Scalar(delta), _mask); - return true; - } - - if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport) - return false; - - char cvt[2][40]; - String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d" - " -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s", - ocl::typeToStr(stype), ocl::typeToStr(dtype), - ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn, - rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), - ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), - doubleSupport ? " -D DOUBLE_SUPPORT" : "", - haveScale ? " -D HAVE_SCALE" : "", - haveDelta ? " -D HAVE_DELTA" : "", - ocl::typeToStr(sdepth), ocl::typeToStr(ddepth)); - - ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts); - if (k.empty()) - return false; - - UMat mask = _mask.getUMat(), dst = _dst.getUMat(); - - ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), - maskarg = ocl::KernelArg::ReadOnlyNoSize(mask), - dstarg = ocl::KernelArg::ReadWrite(dst); - - if (haveScale) - { - if (haveDelta) - k.args(srcarg, maskarg, dstarg, fscale, fdelta); - else - k.args(srcarg, maskarg, dstarg, fscale); - } - else - { - if (haveDelta) - k.args(srcarg, maskarg, dstarg, fdelta); - else - k.args(srcarg, maskarg, dstarg); - } - - size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI }; - return k.run(2, globalsize, NULL, false); - } - else - { - UMat temp; - src.convertTo( temp, dtype, scale, delta ); - temp.copyTo( _dst, _mask ); - } - - return true; -} - -#endif - -void normalize(InputArray _src, InputOutputArray _dst, double a, double b, - int norm_type, int rtype, InputArray _mask) -{ - CV_INSTRUMENT_REGION(); - - double scale = 1, shift = 0; - int type = _src.type(), depth = CV_MAT_DEPTH(type); - - if( rtype < 0 ) - rtype = _dst.fixedType() ? _dst.depth() : depth; - - if( norm_type == CV_MINMAX ) - { - double smin = 0, smax = 0; - double dmin = MIN( a, b ), dmax = MAX( a, b ); - minMaxIdx( _src, &smin, &smax, 0, 0, _mask ); - scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0); - if( rtype == CV_32F ) - { - scale = (float)scale; - shift = (float)dmin - (float)(smin*scale); - } - else - shift = dmin - smin*scale; - } - else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C ) - { - scale = norm( _src, norm_type, _mask ); - scale = scale > DBL_EPSILON ? a/scale : 0.; - shift = 0; - } - else - CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" ); - - CV_OCL_RUN(_dst.isUMat(), - ocl_normalize(_src, _dst, _mask, rtype, scale, shift)) - - Mat src = _src.getMat(); - if( _mask.empty() ) - src.convertTo( _dst, rtype, scale, shift ); - else - { - Mat temp; - src.convertTo( temp, rtype, scale, shift ); - temp.copyTo( _dst, _mask ); - } -} - } // namespace diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 798fde74d4..5262eb1b9c 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -53,6 +53,75 @@ namespace cv { +template static inline +void scalarToRawData_(const Scalar& s, T * const buf, const int cn, const int unroll_to) +{ + int i = 0; + for(; i < cn; i++) + buf[i] = saturate_cast(s.val[i]); + for(; i < unroll_to; i++) + buf[i] = buf[i-cn]; +} + +void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to) +{ + CV_INSTRUMENT_REGION(); + + const int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + CV_Assert(cn <= 4); + switch(depth) + { + case CV_8U: + scalarToRawData_(s, (uchar*)_buf, cn, unroll_to); + break; + case CV_8S: + scalarToRawData_(s, (schar*)_buf, cn, unroll_to); + break; + case CV_16U: + scalarToRawData_(s, (ushort*)_buf, cn, unroll_to); + break; + case CV_16S: + scalarToRawData_(s, (short*)_buf, cn, unroll_to); + break; + case CV_32S: + scalarToRawData_(s, (int*)_buf, cn, unroll_to); + break; + case CV_32F: + scalarToRawData_(s, (float*)_buf, cn, unroll_to); + break; + case CV_64F: + scalarToRawData_(s, (double*)_buf, cn, unroll_to); + break; +#if CV_VERSION_MAJOR >= 4 + case CV_16F: + scalarToRawData_(s, (float16_t*)_buf, cn, unroll_to); + break; +#endif + default: + CV_Error(CV_StsUnsupportedFormat,""); + } +} + +void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize ) +{ + int scn = (int)sc.total(), cn = CV_MAT_CN(buftype); + size_t esz = CV_ELEM_SIZE(buftype); + BinaryFunc cvtFn = getConvertFunc(sc.depth(), buftype); + CV_Assert(cvtFn); + cvtFn(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0); + // unroll the scalar + if( scn < cn ) + { + CV_Assert( scn == 1 ); + size_t esz1 = CV_ELEM_SIZE1(buftype); + for( size_t i = esz1; i < esz; i++ ) + scbuf[i] = scbuf[i - esz1]; + } + for( size_t i = esz; i < blocksize*esz; i++ ) + scbuf[i] = scbuf[i - esz]; +} + + template static void copyMask_(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size) { @@ -594,490 +663,6 @@ Mat& Mat::setTo(InputArray _value, InputArray _mask) return *this; } -#if CV_SIMD128 -template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) -{ - typedef typename V::lane_type T; - int end = (int)(size.width*esz); - int width = (end + 1)/2; - int width_1 = width & -v_uint8x16::nlanes; - int i, j; - -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(src, dst)); -#endif - - for( ; size.height--; src += sstep, dst += dstep ) - { - for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) - { - V t0, t1; - - t0 = v_load((T*)((uchar*)src + i)); - t1 = v_load((T*)((uchar*)src + j - v_uint8x16::nlanes)); - t0 = v_reverse(t0); - t1 = v_reverse(t1); - v_store((T*)(dst + j - v_uint8x16::nlanes), t0); - v_store((T*)(dst + i), t1); - } - if (isAligned(src, dst)) - { - for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) - { - T t0, t1; - - t0 = *((T*)((uchar*)src + i)); - t1 = *((T*)((uchar*)src + j - sizeof(T))); - *((T*)(dst + j - sizeof(T))) = t0; - *((T*)(dst + i)) = t1; - } - } - else - { - for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) - { - for (int k = 0; k < (int)sizeof(T); k++) - { - uchar t0, t1; - - t0 = *((uchar*)src + i + k); - t1 = *((uchar*)src + j + k - sizeof(T)); - *(dst + j + k - sizeof(T)) = t0; - *(dst + i + k) = t1; - } - } - } - } -} - -template CV_ALWAYS_INLINE void flipHoriz_double( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) -{ - int end = (int)(size.width*esz); - int width = (end + 1)/2; - -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(src, dst)); - CV_Assert(isAligned(src, dst)); -#endif - - for( ; size.height--; src += sstep, dst += dstep ) - { - for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) ) - { - T1 t0, t1; - T2 t2, t3; - - t0 = *((T1*)((uchar*)src + i)); - t2 = *((T2*)((uchar*)src + i + sizeof(T1))); - t1 = *((T1*)((uchar*)src + j - sizeof(T1) - sizeof(T2))); - t3 = *((T2*)((uchar*)src + j - sizeof(T2))); - *((T1*)(dst + j - sizeof(T1) - sizeof(T2))) = t0; - *((T2*)(dst + j - sizeof(T2))) = t2; - *((T1*)(dst + i)) = t1; - *((T2*)(dst + i + sizeof(T1))) = t3; - } - } -} -#endif - -static void -flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) -{ -#if CV_SIMD -#if CV_STRONG_ALIGNMENT - size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep; -#endif - if (esz == 2 * v_uint8x16::nlanes) - { - int end = (int)(size.width*esz); - int width = end/2; - - for( ; size.height--; src += sstep, dst += dstep ) - { - for( int i = 0, j = end - 2 * v_uint8x16::nlanes; i < width; i += 2 * v_uint8x16::nlanes, j -= 2 * v_uint8x16::nlanes ) - { -#if CV_SIMD256 - v_uint8x32 t0, t1; - - t0 = v256_load((uchar*)src + i); - t1 = v256_load((uchar*)src + j); - v_store(dst + j, t0); - v_store(dst + i, t1); -#else - v_uint8x16 t0, t1, t2, t3; - - t0 = v_load((uchar*)src + i); - t1 = v_load((uchar*)src + i + v_uint8x16::nlanes); - t2 = v_load((uchar*)src + j); - t3 = v_load((uchar*)src + j + v_uint8x16::nlanes); - v_store(dst + j, t0); - v_store(dst + j + v_uint8x16::nlanes, t1); - v_store(dst + i, t2); - v_store(dst + i + v_uint8x16::nlanes, t3); -#endif - } - } - } - else if (esz == v_uint8x16::nlanes) - { - int end = (int)(size.width*esz); - int width = end/2; - - for( ; size.height--; src += sstep, dst += dstep ) - { - for( int i = 0, j = end - v_uint8x16::nlanes; i < width; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) - { - v_uint8x16 t0, t1; - - t0 = v_load((uchar*)src + i); - t1 = v_load((uchar*)src + j); - v_store(dst + j, t0); - v_store(dst + i, t1); - } - } - } - else if (esz == 8 -#if CV_STRONG_ALIGNMENT - && isAligned(alignmentMark) -#endif - ) - { - flipHoriz_single(src, sstep, dst, dstep, size, esz); - } - else if (esz == 4 -#if CV_STRONG_ALIGNMENT - && isAligned(alignmentMark) -#endif - ) - { - flipHoriz_single(src, sstep, dst, dstep, size, esz); - } - else if (esz == 2 -#if CV_STRONG_ALIGNMENT - && isAligned(alignmentMark) -#endif - ) - { - flipHoriz_single(src, sstep, dst, dstep, size, esz); - } - else if (esz == 1) - { - flipHoriz_single(src, sstep, dst, dstep, size, esz); - } - else if (esz == 24 -#if CV_STRONG_ALIGNMENT - && isAligned(alignmentMark) -#endif - ) - { - int end = (int)(size.width*esz); - int width = (end + 1)/2; - - for( ; size.height--; src += sstep, dst += dstep ) - { - for ( int i = 0, j = end; i < width; i += v_uint8x16::nlanes + sizeof(uint64_t), j -= v_uint8x16::nlanes + sizeof(uint64_t) ) - { - v_uint8x16 t0, t1; - uint64_t t2, t3; - - t0 = v_load((uchar*)src + i); - t2 = *((uint64_t*)((uchar*)src + i + v_uint8x16::nlanes)); - t1 = v_load((uchar*)src + j - v_uint8x16::nlanes - sizeof(uint64_t)); - t3 = *((uint64_t*)((uchar*)src + j - sizeof(uint64_t))); - v_store(dst + j - v_uint8x16::nlanes - sizeof(uint64_t), t0); - *((uint64_t*)(dst + j - sizeof(uint64_t))) = t2; - v_store(dst + i, t1); - *((uint64_t*)(dst + i + v_uint8x16::nlanes)) = t3; - } - } - } -#if !CV_STRONG_ALIGNMENT - else if (esz == 12) - { - flipHoriz_double(src, sstep, dst, dstep, size, esz); - } - else if (esz == 6) - { - flipHoriz_double(src, sstep, dst, dstep, size, esz); - } - else if (esz == 3) - { - flipHoriz_double(src, sstep, dst, dstep, size, esz); - } -#endif - else -#endif // CV_SIMD - { - int i, j, limit = (int)(((size.width + 1)/2)*esz); - AutoBuffer _tab(size.width*esz); - int* tab = _tab.data(); - - for( i = 0; i < size.width; i++ ) - for( size_t k = 0; k < esz; k++ ) - tab[i*esz + k] = (int)((size.width - i - 1)*esz + k); - - for( ; size.height--; src += sstep, dst += dstep ) - { - for( i = 0; i < limit; i++ ) - { - j = tab[i]; - uchar t0 = src[i], t1 = src[j]; - dst[i] = t1; dst[j] = t0; - } - } - } -} - -static void -flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, size_t esz ) -{ - const uchar* src1 = src0 + (size.height - 1)*sstep; - uchar* dst1 = dst0 + (size.height - 1)*dstep; - size.width *= (int)esz; - - for( int y = 0; y < (size.height + 1)/2; y++, src0 += sstep, src1 -= sstep, - dst0 += dstep, dst1 -= dstep ) - { - int i = 0; -#if CV_SIMD -#if CV_STRONG_ALIGNMENT - if (isAligned(src0, src1, dst0, dst1)) -#endif - { - for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) - { - v_int32 t0 = vx_load((int*)(src0 + i)); - v_int32 t1 = vx_load((int*)(src1 + i)); - vx_store((int*)(dst0 + i), t1); - vx_store((int*)(dst1 + i), t0); - } - } -#if CV_STRONG_ALIGNMENT - else - { - for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) - { - v_uint8 t0 = vx_load(src0 + i); - v_uint8 t1 = vx_load(src1 + i); - vx_store(dst0 + i, t1); - vx_store(dst1 + i, t0); - } - } -#endif -#endif - - if (isAligned(src0, src1, dst0, dst1)) - { - for( ; i <= size.width - 16; i += 16 ) - { - int t0 = ((int*)(src0 + i))[0]; - int t1 = ((int*)(src1 + i))[0]; - - ((int*)(dst0 + i))[0] = t1; - ((int*)(dst1 + i))[0] = t0; - - t0 = ((int*)(src0 + i))[1]; - t1 = ((int*)(src1 + i))[1]; - - ((int*)(dst0 + i))[1] = t1; - ((int*)(dst1 + i))[1] = t0; - - t0 = ((int*)(src0 + i))[2]; - t1 = ((int*)(src1 + i))[2]; - - ((int*)(dst0 + i))[2] = t1; - ((int*)(dst1 + i))[2] = t0; - - t0 = ((int*)(src0 + i))[3]; - t1 = ((int*)(src1 + i))[3]; - - ((int*)(dst0 + i))[3] = t1; - ((int*)(dst1 + i))[3] = t0; - } - - for( ; i <= size.width - 4; i += 4 ) - { - int t0 = ((int*)(src0 + i))[0]; - int t1 = ((int*)(src1 + i))[0]; - - ((int*)(dst0 + i))[0] = t1; - ((int*)(dst1 + i))[0] = t0; - } - } - - for( ; i < size.width; i++ ) - { - uchar t0 = src0[i]; - uchar t1 = src1[i]; - - dst0[i] = t1; - dst1[i] = t0; - } - } -} - -#ifdef HAVE_OPENCL - -enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS }; - -static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) -{ - CV_Assert(flipCode >= -1 && flipCode <= 1); - - const ocl::Device & dev = ocl::Device::getDefault(); - int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), - flipType, kercn = std::min(ocl::predictOptimalVectorWidth(_src, _dst), 4); - - bool doubleSupport = dev.doubleFPConfig() > 0; - if (!doubleSupport && depth == CV_64F) - kercn = cn; - - if (cn > 4) - return false; - - const char * kernelName; - if (flipCode == 0) - kernelName = "arithm_flip_rows", flipType = FLIP_ROWS; - else if (flipCode > 0) - kernelName = "arithm_flip_cols", flipType = FLIP_COLS; - else - kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH; - - int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1; - kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn; - - ocl::Kernel k(kernelName, ocl::core::flip_oclsrc, - format( "-D T=%s -D T1=%s -D DEPTH=%d -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d", - kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)), - kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), depth, cn, pxPerWIy, kercn)); - if (k.empty()) - return false; - - Size size = _src.size(); - _dst.create(size, type); - UMat src = _src.getUMat(), dst = _dst.getUMat(); - - int cols = size.width * cn / kercn, rows = size.height; - cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols; - rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows; - - k.args(ocl::KernelArg::ReadOnlyNoSize(src), - ocl::KernelArg::WriteOnly(dst, cn, kercn), rows, cols); - - size_t maxWorkGroupSize = dev.maxWorkGroupSize(); - CV_Assert(maxWorkGroupSize % 4 == 0); - - size_t globalsize[2] = { (size_t)cols, ((size_t)rows + pxPerWIy - 1) / pxPerWIy }, - localsize[2] = { maxWorkGroupSize / 4, 4 }; - return k.run(2, globalsize, (flipType == FLIP_COLS) && !dev.isIntel() ? localsize : NULL, false); -} - -#endif - -#if defined HAVE_IPP -static bool ipp_flip(Mat &src, Mat &dst, int flip_mode) -{ -#ifdef HAVE_IPP_IW - CV_INSTRUMENT_REGION_IPP(); - - // Details: https://github.com/opencv/opencv/issues/12943 - if (flip_mode <= 0 /* swap rows */ - && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42 - && (int64_t)(src.total()) * src.elemSize() >= CV_BIG_INT(0x80000000)/*2Gb*/ - ) - return false; - - IppiAxis ippMode; - if(flip_mode < 0) - ippMode = ippAxsBoth; - else if(flip_mode == 0) - ippMode = ippAxsHorizontal; - else - ippMode = ippAxsVertical; - - try - { - ::ipp::IwiImage iwSrc = ippiGetImage(src); - ::ipp::IwiImage iwDst = ippiGetImage(dst); - - CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode); - } - catch(const ::ipp::IwException &) - { - return false; - } - - return true; -#else - CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(flip_mode); - return false; -#endif -} -#endif - - -void flip( InputArray _src, OutputArray _dst, int flip_mode ) -{ - CV_INSTRUMENT_REGION(); - - CV_Assert( _src.dims() <= 2 ); - Size size = _src.size(); - - if (flip_mode < 0) - { - if (size.width == 1) - flip_mode = 0; - if (size.height == 1) - flip_mode = 1; - } - - if ((size.width == 1 && flip_mode > 0) || - (size.height == 1 && flip_mode == 0)) - { - return _src.copyTo(_dst); - } - - CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode)) - - Mat src = _src.getMat(); - int type = src.type(); - _dst.create( size, type ); - Mat dst = _dst.getMat(); - - CV_IPP_RUN_FAST(ipp_flip(src, dst, flip_mode)); - - size_t esz = CV_ELEM_SIZE(type); - - if( flip_mode <= 0 ) - flipVert( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz ); - else - flipHoriz( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz ); - - if( flip_mode < 0 ) - flipHoriz( dst.ptr(), dst.step, dst.ptr(), dst.step, dst.size(), esz ); -} - -void rotate(InputArray _src, OutputArray _dst, int rotateMode) -{ - CV_Assert(_src.dims() <= 2); - - switch (rotateMode) - { - case ROTATE_90_CLOCKWISE: - transpose(_src, _dst); - flip(_dst, _dst, 1); - break; - case ROTATE_180: - flip(_src, _dst, -1); - break; - case ROTATE_90_COUNTERCLOCKWISE: - transpose(_src, _dst); - flip(_dst, _dst, 0); - break; - default: - break; - } -} #if defined HAVE_OPENCL && !defined __APPLE__ @@ -1499,6 +1084,9 @@ void cv::copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom, } } + +#ifndef OPENCV_EXCLUDE_C_API + /* dst = src */ CV_IMPL void cvCopy( const void* srcarr, void* dstarr, const void* maskarr ) @@ -1614,4 +1202,5 @@ cvRepeat( const CvArr* srcarr, CvArr* dstarr ) cv::repeat(src, dst.rows/src.rows, dst.cols/src.cols, dst); } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/datastructs.cpp b/modules/core/src/datastructs.cpp index 61adf3493e..cd9196a130 100644 --- a/modules/core/src/datastructs.cpp +++ b/modules/core/src/datastructs.cpp @@ -40,6 +40,8 @@ //M*/ #include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API + /* default alignment for dynamic data strucutures, resided in storages. */ #define CV_STRUCT_ALIGN ((int)sizeof(double)) @@ -3585,4 +3587,5 @@ void seqInsertSlice( CvSeq* seq, int before_index, const CvArr* from_arr ) } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index b307703a32..e378f31e66 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -4640,6 +4640,9 @@ int cv::getOptimalDFTSize( int size0 ) return optimalDFTSizeTab[b]; } + +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL void cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows ) { @@ -4695,4 +4698,5 @@ cvGetOptimalDFTSize( int size0 ) return cv::getOptimalDFTSize(size0); } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp index 486b7a5aba..9bca6a8211 100644 --- a/modules/core/src/lapack.cpp +++ b/modules/core/src/lapack.cpp @@ -753,8 +753,6 @@ SVBkSb( int m, int n, const double* w, size_t wstep, (double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 ); } -} - /****************************************************************************************\ * Determinant of the matrix * \****************************************************************************************/ @@ -764,7 +762,7 @@ SVBkSb( int m, int n, const double* w, size_t wstep, m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) + \ m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0))) -double cv::determinant( InputArray _mat ) +double determinant( InputArray _mat ) { CV_INSTRUMENT_REGION(); @@ -842,7 +840,7 @@ double cv::determinant( InputArray _mat ) #define Df( y, x ) ((float*)(dstdata + y*dststep))[x] #define Dd( y, x ) ((double*)(dstdata + y*dststep))[x] -double cv::invert( InputArray _src, OutputArray _dst, int method ) +double invert( InputArray _src, OutputArray _dst, int method ) { CV_INSTRUMENT_REGION(); @@ -1069,13 +1067,19 @@ double cv::invert( InputArray _src, OutputArray _dst, int method ) return result; } +UMat UMat::inv(int method) const +{ + UMat m; + invert(*this, m, method); + return m; +} /****************************************************************************************\ * Solving a linear system * \****************************************************************************************/ -bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method ) +bool solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method ) { CV_INSTRUMENT_REGION(); @@ -1374,7 +1378,7 @@ bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int meth /////////////////// finding eigenvalues and eigenvectors of a symmetric matrix /////////////// -bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) +bool eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) { CV_INSTRUMENT_REGION(); @@ -1396,7 +1400,7 @@ bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) const bool evecNeeded = _evects.needed(); const int esOptions = evecNeeded ? Eigen::ComputeEigenvectors : Eigen::EigenvaluesOnly; _evals.create(n, 1, type); - cv::Mat evals = _evals.getMat(); + Mat evals = _evals.getMat(); if ( type == CV_64F ) { Eigen::MatrixXd src_eig, zeros_eig; @@ -1448,9 +1452,6 @@ bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) #endif } -namespace cv -{ - static void _SVDcompute( InputArray _aarr, OutputArray _w, OutputArray _u, OutputArray _vt, int flags ) { @@ -1598,6 +1599,9 @@ void cv::SVBackSubst(InputArray w, InputArray u, InputArray vt, InputArray rhs, } + +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL double cvDet( const CvArr* arr ) { @@ -1789,3 +1793,4 @@ cvSVBkSb( const CvArr* warr, const CvArr* uarr, cv::SVD::backSubst(w, u, v, rhs, dst); CV_Assert( dst.data == dst0.data ); } +#endif // OPENCV_EXCLUDE_C_API diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index a4e5263aa8..9fdf7d7702 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -1637,6 +1637,9 @@ void patchNaNs( InputOutputArray _a, double _val ) } + +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL float cvCbrt(float value) { return cv::cubeRoot(value); } CV_IMPL float cvFastArctan(float y, float x) { return cv::fastAtan2(y, x); } @@ -1720,6 +1723,7 @@ CV_IMPL int cvCheckArr( const CvArr* arr, int flags, return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal ); } +#endif // OPENCV_EXCLUDE_C_API /* Finds real roots of cubic, quadratic or linear equation. @@ -2015,6 +2019,8 @@ double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters ) } +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL int cvSolveCubic( const CvMat* coeffs, CvMat* roots ) { @@ -2034,6 +2040,7 @@ void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int) CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated } +#endif // OPENCV_EXCLUDE_C_API // Common constants for dispatched code diff --git a/modules/core/src/matmul.dispatch.cpp b/modules/core/src/matmul.dispatch.cpp index a9b82aee88..e81064ec16 100644 --- a/modules/core/src/matmul.dispatch.cpp +++ b/modules/core/src/matmul.dispatch.cpp @@ -999,8 +999,79 @@ double Mat::dot(InputArray _mat) const return r; } + +#ifdef HAVE_OPENCL + +static bool ocl_dot( InputArray _src1, InputArray _src2, double & res ) +{ + UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1); + + int type = src1.type(), depth = CV_MAT_DEPTH(type), + kercn = ocl::predictOptimalVectorWidth(src1, src2); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if ( !doubleSupport && depth == CV_64F ) + return false; + + int dbsize = ocl::Device::getDefault().maxComputeUnits(); + size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); + int ddepth = std::max(CV_32F, depth); + + int wgs2_aligned = 1; + while (wgs2_aligned < (int)wgs) + wgs2_aligned <<= 1; + wgs2_aligned >>= 1; + + char cvt[40]; + ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, + format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT " + "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d", + ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth), + ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), + ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt), + (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", + _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "", + _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn)); + if (k.empty()) + return false; + + UMat db(1, dbsize, ddepth); + + ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), + src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), + dbarg = ocl::KernelArg::PtrWriteOnly(db); + + k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg); + + size_t globalsize = dbsize * wgs; + if (k.run(1, &globalsize, &wgs, false)) + { + res = sum(db.getMat(ACCESS_READ))[0]; + return true; + } + return false; +} + +#endif + +double UMat::dot(InputArray m) const +{ + CV_INSTRUMENT_REGION(); + + CV_Assert(m.sameSize(*this) && m.type() == type()); + +#ifdef HAVE_OPENCL + double r = 0; + CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r) +#endif + + return getMat(ACCESS_READ).dot(m); +} + } // namespace cv:: + +#ifndef OPENCV_EXCLUDE_C_API /****************************************************************************************\ * Earlier API * \****************************************************************************************/ @@ -1225,4 +1296,6 @@ cvBackProjectPCA( const CvArr* proj_arr, const CvArr* avg_arr, CV_Assert(dst0.data == dst.data); } +#endif // OPENCV_EXCLUDE_C_API + /* End of file. */ diff --git a/modules/core/src/matrix_c.cpp b/modules/core/src/matrix_c.cpp index 2fead4100c..baa61bb66f 100644 --- a/modules/core/src/matrix_c.cpp +++ b/modules/core/src/matrix_c.cpp @@ -6,6 +6,7 @@ #include "opencv2/core/mat.hpp" #include "opencv2/core/types_c.h" +#ifndef OPENCV_EXCLUDE_C_API // glue CvMatND cvMatND(const cv::Mat& m) @@ -360,7 +361,6 @@ cvSort( const CvArr* _src, CvArr* _dst, CvArr* _idx, int flags ) } } - CV_IMPL int cvKMeans2( const CvArr* _samples, int cluster_count, CvArr* _labels, CvTermCriteria termcrit, int attempts, CvRNG*, @@ -389,3 +389,5 @@ cvKMeans2( const CvArr* _samples, int cluster_count, CvArr* _labels, *_compactness = compactness; return 1; } + +#endif // OPENCV_EXCLUDE_C_API diff --git a/modules/core/src/matrix_operations.cpp b/modules/core/src/matrix_operations.cpp index 6f863b8871..ca8edc4771 100644 --- a/modules/core/src/matrix_operations.cpp +++ b/modules/core/src/matrix_operations.cpp @@ -226,6 +226,23 @@ void cv::setIdentity( InputOutputArray _m, const Scalar& s ) } } + +namespace cv { + +UMat UMat::eye(int rows, int cols, int type) +{ + return UMat::eye(Size(cols, rows), type); +} + +UMat UMat::eye(Size size, int type) +{ + UMat m(size, type); + setIdentity(m); + return m; +} + +} // namespace + //////////////////////////////////////////// trace /////////////////////////////////////////// cv::Scalar cv::trace( InputArray _m ) @@ -260,285 +277,6 @@ cv::Scalar cv::trace( InputArray _m ) return cv::sum(m.diag()); } -////////////////////////////////////// transpose ///////////////////////////////////////// - -namespace cv -{ - -template static void -transpose_( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) -{ - int i=0, j, m = sz.width, n = sz.height; - - #if CV_ENABLE_UNROLLED - for(; i <= m - 4; i += 4 ) - { - T* d0 = (T*)(dst + dstep*i); - T* d1 = (T*)(dst + dstep*(i+1)); - T* d2 = (T*)(dst + dstep*(i+2)); - T* d3 = (T*)(dst + dstep*(i+3)); - - for( j = 0; j <= n - 4; j += 4 ) - { - const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j); - const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1)); - const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2)); - const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3)); - - d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0]; - d1[j] = s0[1]; d1[j+1] = s1[1]; d1[j+2] = s2[1]; d1[j+3] = s3[1]; - d2[j] = s0[2]; d2[j+1] = s1[2]; d2[j+2] = s2[2]; d2[j+3] = s3[2]; - d3[j] = s0[3]; d3[j+1] = s1[3]; d3[j+2] = s2[3]; d3[j+3] = s3[3]; - } - - for( ; j < n; j++ ) - { - const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep); - d0[j] = s0[0]; d1[j] = s0[1]; d2[j] = s0[2]; d3[j] = s0[3]; - } - } - #endif - for( ; i < m; i++ ) - { - T* d0 = (T*)(dst + dstep*i); - j = 0; - #if CV_ENABLE_UNROLLED - for(; j <= n - 4; j += 4 ) - { - const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j); - const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1)); - const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2)); - const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3)); - - d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0]; - } - #endif - for( ; j < n; j++ ) - { - const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep); - d0[j] = s0[0]; - } - } -} - -template static void -transposeI_( uchar* data, size_t step, int n ) -{ - for( int i = 0; i < n; i++ ) - { - T* row = (T*)(data + step*i); - uchar* data1 = data + i*sizeof(T); - for( int j = i+1; j < n; j++ ) - std::swap( row[j], *(T*)(data1 + step*j) ); - } -} - -typedef void (*TransposeFunc)( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ); -typedef void (*TransposeInplaceFunc)( uchar* data, size_t step, int n ); - -#define DEF_TRANSPOSE_FUNC(suffix, type) \ -static void transpose_##suffix( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) \ -{ transpose_(src, sstep, dst, dstep, sz); } \ -\ -static void transposeI_##suffix( uchar* data, size_t step, int n ) \ -{ transposeI_(data, step, n); } - -DEF_TRANSPOSE_FUNC(8u, uchar) -DEF_TRANSPOSE_FUNC(16u, ushort) -DEF_TRANSPOSE_FUNC(8uC3, Vec3b) -DEF_TRANSPOSE_FUNC(32s, int) -DEF_TRANSPOSE_FUNC(16uC3, Vec3s) -DEF_TRANSPOSE_FUNC(32sC2, Vec2i) -DEF_TRANSPOSE_FUNC(32sC3, Vec3i) -DEF_TRANSPOSE_FUNC(32sC4, Vec4i) -DEF_TRANSPOSE_FUNC(32sC6, Vec6i) -DEF_TRANSPOSE_FUNC(32sC8, Vec8i) - -static TransposeFunc transposeTab[] = -{ - 0, transpose_8u, transpose_16u, transpose_8uC3, transpose_32s, 0, transpose_16uC3, 0, - transpose_32sC2, 0, 0, 0, transpose_32sC3, 0, 0, 0, transpose_32sC4, - 0, 0, 0, 0, 0, 0, 0, transpose_32sC6, 0, 0, 0, 0, 0, 0, 0, transpose_32sC8 -}; - -static TransposeInplaceFunc transposeInplaceTab[] = -{ - 0, transposeI_8u, transposeI_16u, transposeI_8uC3, transposeI_32s, 0, transposeI_16uC3, 0, - transposeI_32sC2, 0, 0, 0, transposeI_32sC3, 0, 0, 0, transposeI_32sC4, - 0, 0, 0, 0, 0, 0, 0, transposeI_32sC6, 0, 0, 0, 0, 0, 0, 0, transposeI_32sC8 -}; - -#ifdef HAVE_OPENCL - -static bool ocl_transpose( InputArray _src, OutputArray _dst ) -{ - const ocl::Device & dev = ocl::Device::getDefault(); - const int TILE_DIM = 32, BLOCK_ROWS = 8; - int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type), - rowsPerWI = dev.isIntel() ? 4 : 1; - - UMat src = _src.getUMat(); - _dst.create(src.cols, src.rows, type); - UMat dst = _dst.getUMat(); - - String kernelName("transpose"); - bool inplace = dst.u == src.u; - - if (inplace) - { - CV_Assert(dst.cols == dst.rows); - kernelName += "_inplace"; - } - else - { - // check required local memory size - size_t required_local_memory = (size_t) TILE_DIM*(TILE_DIM+1)*CV_ELEM_SIZE(type); - if (required_local_memory > ocl::Device::getDefault().localMemSize()) - return false; - } - - ocl::Kernel k(kernelName.c_str(), ocl::core::transpose_oclsrc, - format("-D T=%s -D T1=%s -D cn=%d -D TILE_DIM=%d -D BLOCK_ROWS=%d -D rowsPerWI=%d%s", - ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth), - cn, TILE_DIM, BLOCK_ROWS, rowsPerWI, inplace ? " -D INPLACE" : "")); - if (k.empty()) - return false; - - if (inplace) - k.args(ocl::KernelArg::ReadWriteNoSize(dst), dst.rows); - else - k.args(ocl::KernelArg::ReadOnly(src), - ocl::KernelArg::WriteOnlyNoSize(dst)); - - size_t localsize[2] = { TILE_DIM, BLOCK_ROWS }; - size_t globalsize[2] = { (size_t)src.cols, inplace ? ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI : (divUp((size_t)src.rows, TILE_DIM) * BLOCK_ROWS) }; - - if (inplace && dev.isIntel()) - { - localsize[0] = 16; - localsize[1] = dev.maxWorkGroupSize() / localsize[0]; - } - - return k.run(2, globalsize, localsize, false); -} - -#endif - -#ifdef HAVE_IPP -static bool ipp_transpose( Mat &src, Mat &dst ) -{ - CV_INSTRUMENT_REGION_IPP(); - - int type = src.type(); - typedef IppStatus (CV_STDCALL * IppiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize); - typedef IppStatus (CV_STDCALL * IppiTransposeI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize); - IppiTranspose ippiTranspose = 0; - IppiTransposeI ippiTranspose_I = 0; - - if (dst.data == src.data && dst.cols == dst.rows) - { - CV_SUPPRESS_DEPRECATED_START - ippiTranspose_I = - type == CV_8UC1 ? (IppiTransposeI)ippiTranspose_8u_C1IR : - type == CV_8UC3 ? (IppiTransposeI)ippiTranspose_8u_C3IR : - type == CV_8UC4 ? (IppiTransposeI)ippiTranspose_8u_C4IR : - type == CV_16UC1 ? (IppiTransposeI)ippiTranspose_16u_C1IR : - type == CV_16UC3 ? (IppiTransposeI)ippiTranspose_16u_C3IR : - type == CV_16UC4 ? (IppiTransposeI)ippiTranspose_16u_C4IR : - type == CV_16SC1 ? (IppiTransposeI)ippiTranspose_16s_C1IR : - type == CV_16SC3 ? (IppiTransposeI)ippiTranspose_16s_C3IR : - type == CV_16SC4 ? (IppiTransposeI)ippiTranspose_16s_C4IR : - type == CV_32SC1 ? (IppiTransposeI)ippiTranspose_32s_C1IR : - type == CV_32SC3 ? (IppiTransposeI)ippiTranspose_32s_C3IR : - type == CV_32SC4 ? (IppiTransposeI)ippiTranspose_32s_C4IR : - type == CV_32FC1 ? (IppiTransposeI)ippiTranspose_32f_C1IR : - type == CV_32FC3 ? (IppiTransposeI)ippiTranspose_32f_C3IR : - type == CV_32FC4 ? (IppiTransposeI)ippiTranspose_32f_C4IR : 0; - CV_SUPPRESS_DEPRECATED_END - } - else - { - ippiTranspose = - type == CV_8UC1 ? (IppiTranspose)ippiTranspose_8u_C1R : - type == CV_8UC3 ? (IppiTranspose)ippiTranspose_8u_C3R : - type == CV_8UC4 ? (IppiTranspose)ippiTranspose_8u_C4R : - type == CV_16UC1 ? (IppiTranspose)ippiTranspose_16u_C1R : - type == CV_16UC3 ? (IppiTranspose)ippiTranspose_16u_C3R : - type == CV_16UC4 ? (IppiTranspose)ippiTranspose_16u_C4R : - type == CV_16SC1 ? (IppiTranspose)ippiTranspose_16s_C1R : - type == CV_16SC3 ? (IppiTranspose)ippiTranspose_16s_C3R : - type == CV_16SC4 ? (IppiTranspose)ippiTranspose_16s_C4R : - type == CV_32SC1 ? (IppiTranspose)ippiTranspose_32s_C1R : - type == CV_32SC3 ? (IppiTranspose)ippiTranspose_32s_C3R : - type == CV_32SC4 ? (IppiTranspose)ippiTranspose_32s_C4R : - type == CV_32FC1 ? (IppiTranspose)ippiTranspose_32f_C1R : - type == CV_32FC3 ? (IppiTranspose)ippiTranspose_32f_C3R : - type == CV_32FC4 ? (IppiTranspose)ippiTranspose_32f_C4R : 0; - } - - IppiSize roiSize = { src.cols, src.rows }; - if (ippiTranspose != 0) - { - if (CV_INSTRUMENT_FUN_IPP(ippiTranspose, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, roiSize) >= 0) - return true; - } - else if (ippiTranspose_I != 0) - { - if (CV_INSTRUMENT_FUN_IPP(ippiTranspose_I, dst.ptr(), (int)dst.step, roiSize) >= 0) - return true; - } - return false; -} -#endif - -} - - -void cv::transpose( InputArray _src, OutputArray _dst ) -{ - CV_INSTRUMENT_REGION(); - - int type = _src.type(), esz = CV_ELEM_SIZE(type); - CV_Assert( _src.dims() <= 2 && esz <= 32 ); - - CV_OCL_RUN(_dst.isUMat(), - ocl_transpose(_src, _dst)) - - Mat src = _src.getMat(); - if( src.empty() ) - { - _dst.release(); - return; - } - - _dst.create(src.cols, src.rows, src.type()); - Mat dst = _dst.getMat(); - - // handle the case of single-column/single-row matrices, stored in STL vectors. - if( src.rows != dst.cols || src.cols != dst.rows ) - { - CV_Assert( src.size() == dst.size() && (src.cols == 1 || src.rows == 1) ); - src.copyTo(dst); - return; - } - - CV_IPP_RUN_FAST(ipp_transpose(src, dst)) - - if( dst.data == src.data ) - { - TransposeInplaceFunc func = transposeInplaceTab[esz]; - CV_Assert( func != 0 ); - CV_Assert( dst.cols == dst.rows ); - func( dst.ptr(), dst.step, dst.rows ); - } - else - { - TransposeFunc func = transposeTab[esz]; - CV_Assert( func != 0 ); - func( src.ptr(), src.step, dst.ptr(), dst.step, src.size() ); - } -} - ////////////////////////////////////// completeSymm ///////////////////////////////////////// diff --git a/modules/core/src/matrix_transform.cpp b/modules/core/src/matrix_transform.cpp new file mode 100644 index 0000000000..37bc273b4d --- /dev/null +++ b/modules/core/src/matrix_transform.cpp @@ -0,0 +1,770 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include "precomp.hpp" +#include "opencl_kernels_core.hpp" + +namespace cv { + +////////////////////////////////////// transpose ///////////////////////////////////////// + +template static void +transpose_( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) +{ + int i=0, j, m = sz.width, n = sz.height; + + #if CV_ENABLE_UNROLLED + for(; i <= m - 4; i += 4 ) + { + T* d0 = (T*)(dst + dstep*i); + T* d1 = (T*)(dst + dstep*(i+1)); + T* d2 = (T*)(dst + dstep*(i+2)); + T* d3 = (T*)(dst + dstep*(i+3)); + + for( j = 0; j <= n - 4; j += 4 ) + { + const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j); + const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1)); + const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2)); + const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3)); + + d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0]; + d1[j] = s0[1]; d1[j+1] = s1[1]; d1[j+2] = s2[1]; d1[j+3] = s3[1]; + d2[j] = s0[2]; d2[j+1] = s1[2]; d2[j+2] = s2[2]; d2[j+3] = s3[2]; + d3[j] = s0[3]; d3[j+1] = s1[3]; d3[j+2] = s2[3]; d3[j+3] = s3[3]; + } + + for( ; j < n; j++ ) + { + const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep); + d0[j] = s0[0]; d1[j] = s0[1]; d2[j] = s0[2]; d3[j] = s0[3]; + } + } + #endif + for( ; i < m; i++ ) + { + T* d0 = (T*)(dst + dstep*i); + j = 0; + #if CV_ENABLE_UNROLLED + for(; j <= n - 4; j += 4 ) + { + const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j); + const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1)); + const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2)); + const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3)); + + d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0]; + } + #endif + for( ; j < n; j++ ) + { + const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep); + d0[j] = s0[0]; + } + } +} + +template static void +transposeI_( uchar* data, size_t step, int n ) +{ + for( int i = 0; i < n; i++ ) + { + T* row = (T*)(data + step*i); + uchar* data1 = data + i*sizeof(T); + for( int j = i+1; j < n; j++ ) + std::swap( row[j], *(T*)(data1 + step*j) ); + } +} + +typedef void (*TransposeFunc)( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ); +typedef void (*TransposeInplaceFunc)( uchar* data, size_t step, int n ); + +#define DEF_TRANSPOSE_FUNC(suffix, type) \ +static void transpose_##suffix( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) \ +{ transpose_(src, sstep, dst, dstep, sz); } \ +\ +static void transposeI_##suffix( uchar* data, size_t step, int n ) \ +{ transposeI_(data, step, n); } + +DEF_TRANSPOSE_FUNC(8u, uchar) +DEF_TRANSPOSE_FUNC(16u, ushort) +DEF_TRANSPOSE_FUNC(8uC3, Vec3b) +DEF_TRANSPOSE_FUNC(32s, int) +DEF_TRANSPOSE_FUNC(16uC3, Vec3s) +DEF_TRANSPOSE_FUNC(32sC2, Vec2i) +DEF_TRANSPOSE_FUNC(32sC3, Vec3i) +DEF_TRANSPOSE_FUNC(32sC4, Vec4i) +DEF_TRANSPOSE_FUNC(32sC6, Vec6i) +DEF_TRANSPOSE_FUNC(32sC8, Vec8i) + +static TransposeFunc transposeTab[] = +{ + 0, transpose_8u, transpose_16u, transpose_8uC3, transpose_32s, 0, transpose_16uC3, 0, + transpose_32sC2, 0, 0, 0, transpose_32sC3, 0, 0, 0, transpose_32sC4, + 0, 0, 0, 0, 0, 0, 0, transpose_32sC6, 0, 0, 0, 0, 0, 0, 0, transpose_32sC8 +}; + +static TransposeInplaceFunc transposeInplaceTab[] = +{ + 0, transposeI_8u, transposeI_16u, transposeI_8uC3, transposeI_32s, 0, transposeI_16uC3, 0, + transposeI_32sC2, 0, 0, 0, transposeI_32sC3, 0, 0, 0, transposeI_32sC4, + 0, 0, 0, 0, 0, 0, 0, transposeI_32sC6, 0, 0, 0, 0, 0, 0, 0, transposeI_32sC8 +}; + +#ifdef HAVE_OPENCL + +static bool ocl_transpose( InputArray _src, OutputArray _dst ) +{ + const ocl::Device & dev = ocl::Device::getDefault(); + const int TILE_DIM = 32, BLOCK_ROWS = 8; + int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type), + rowsPerWI = dev.isIntel() ? 4 : 1; + + UMat src = _src.getUMat(); + _dst.create(src.cols, src.rows, type); + UMat dst = _dst.getUMat(); + + String kernelName("transpose"); + bool inplace = dst.u == src.u; + + if (inplace) + { + CV_Assert(dst.cols == dst.rows); + kernelName += "_inplace"; + } + else + { + // check required local memory size + size_t required_local_memory = (size_t) TILE_DIM*(TILE_DIM+1)*CV_ELEM_SIZE(type); + if (required_local_memory > ocl::Device::getDefault().localMemSize()) + return false; + } + + ocl::Kernel k(kernelName.c_str(), ocl::core::transpose_oclsrc, + format("-D T=%s -D T1=%s -D cn=%d -D TILE_DIM=%d -D BLOCK_ROWS=%d -D rowsPerWI=%d%s", + ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth), + cn, TILE_DIM, BLOCK_ROWS, rowsPerWI, inplace ? " -D INPLACE" : "")); + if (k.empty()) + return false; + + if (inplace) + k.args(ocl::KernelArg::ReadWriteNoSize(dst), dst.rows); + else + k.args(ocl::KernelArg::ReadOnly(src), + ocl::KernelArg::WriteOnlyNoSize(dst)); + + size_t localsize[2] = { TILE_DIM, BLOCK_ROWS }; + size_t globalsize[2] = { (size_t)src.cols, inplace ? ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI : (divUp((size_t)src.rows, TILE_DIM) * BLOCK_ROWS) }; + + if (inplace && dev.isIntel()) + { + localsize[0] = 16; + localsize[1] = dev.maxWorkGroupSize() / localsize[0]; + } + + return k.run(2, globalsize, localsize, false); +} + +#endif + +#ifdef HAVE_IPP +static bool ipp_transpose( Mat &src, Mat &dst ) +{ + CV_INSTRUMENT_REGION_IPP(); + + int type = src.type(); + typedef IppStatus (CV_STDCALL * IppiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize); + typedef IppStatus (CV_STDCALL * IppiTransposeI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize); + IppiTranspose ippiTranspose = 0; + IppiTransposeI ippiTranspose_I = 0; + + if (dst.data == src.data && dst.cols == dst.rows) + { + CV_SUPPRESS_DEPRECATED_START + ippiTranspose_I = + type == CV_8UC1 ? (IppiTransposeI)ippiTranspose_8u_C1IR : + type == CV_8UC3 ? (IppiTransposeI)ippiTranspose_8u_C3IR : + type == CV_8UC4 ? (IppiTransposeI)ippiTranspose_8u_C4IR : + type == CV_16UC1 ? (IppiTransposeI)ippiTranspose_16u_C1IR : + type == CV_16UC3 ? (IppiTransposeI)ippiTranspose_16u_C3IR : + type == CV_16UC4 ? (IppiTransposeI)ippiTranspose_16u_C4IR : + type == CV_16SC1 ? (IppiTransposeI)ippiTranspose_16s_C1IR : + type == CV_16SC3 ? (IppiTransposeI)ippiTranspose_16s_C3IR : + type == CV_16SC4 ? (IppiTransposeI)ippiTranspose_16s_C4IR : + type == CV_32SC1 ? (IppiTransposeI)ippiTranspose_32s_C1IR : + type == CV_32SC3 ? (IppiTransposeI)ippiTranspose_32s_C3IR : + type == CV_32SC4 ? (IppiTransposeI)ippiTranspose_32s_C4IR : + type == CV_32FC1 ? (IppiTransposeI)ippiTranspose_32f_C1IR : + type == CV_32FC3 ? (IppiTransposeI)ippiTranspose_32f_C3IR : + type == CV_32FC4 ? (IppiTransposeI)ippiTranspose_32f_C4IR : 0; + CV_SUPPRESS_DEPRECATED_END + } + else + { + ippiTranspose = + type == CV_8UC1 ? (IppiTranspose)ippiTranspose_8u_C1R : + type == CV_8UC3 ? (IppiTranspose)ippiTranspose_8u_C3R : + type == CV_8UC4 ? (IppiTranspose)ippiTranspose_8u_C4R : + type == CV_16UC1 ? (IppiTranspose)ippiTranspose_16u_C1R : + type == CV_16UC3 ? (IppiTranspose)ippiTranspose_16u_C3R : + type == CV_16UC4 ? (IppiTranspose)ippiTranspose_16u_C4R : + type == CV_16SC1 ? (IppiTranspose)ippiTranspose_16s_C1R : + type == CV_16SC3 ? (IppiTranspose)ippiTranspose_16s_C3R : + type == CV_16SC4 ? (IppiTranspose)ippiTranspose_16s_C4R : + type == CV_32SC1 ? (IppiTranspose)ippiTranspose_32s_C1R : + type == CV_32SC3 ? (IppiTranspose)ippiTranspose_32s_C3R : + type == CV_32SC4 ? (IppiTranspose)ippiTranspose_32s_C4R : + type == CV_32FC1 ? (IppiTranspose)ippiTranspose_32f_C1R : + type == CV_32FC3 ? (IppiTranspose)ippiTranspose_32f_C3R : + type == CV_32FC4 ? (IppiTranspose)ippiTranspose_32f_C4R : 0; + } + + IppiSize roiSize = { src.cols, src.rows }; + if (ippiTranspose != 0) + { + if (CV_INSTRUMENT_FUN_IPP(ippiTranspose, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, roiSize) >= 0) + return true; + } + else if (ippiTranspose_I != 0) + { + if (CV_INSTRUMENT_FUN_IPP(ippiTranspose_I, dst.ptr(), (int)dst.step, roiSize) >= 0) + return true; + } + return false; +} +#endif + + +void transpose( InputArray _src, OutputArray _dst ) +{ + CV_INSTRUMENT_REGION(); + + int type = _src.type(), esz = CV_ELEM_SIZE(type); + CV_Assert( _src.dims() <= 2 && esz <= 32 ); + + CV_OCL_RUN(_dst.isUMat(), + ocl_transpose(_src, _dst)) + + Mat src = _src.getMat(); + if( src.empty() ) + { + _dst.release(); + return; + } + + _dst.create(src.cols, src.rows, src.type()); + Mat dst = _dst.getMat(); + + // handle the case of single-column/single-row matrices, stored in STL vectors. + if( src.rows != dst.cols || src.cols != dst.rows ) + { + CV_Assert( src.size() == dst.size() && (src.cols == 1 || src.rows == 1) ); + src.copyTo(dst); + return; + } + + CV_IPP_RUN_FAST(ipp_transpose(src, dst)) + + if( dst.data == src.data ) + { + TransposeInplaceFunc func = transposeInplaceTab[esz]; + CV_Assert( func != 0 ); + CV_Assert( dst.cols == dst.rows ); + func( dst.ptr(), dst.step, dst.rows ); + } + else + { + TransposeFunc func = transposeTab[esz]; + CV_Assert( func != 0 ); + func( src.ptr(), src.step, dst.ptr(), dst.step, src.size() ); + } +} + + +#if CV_SIMD128 +template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) +{ + typedef typename V::lane_type T; + int end = (int)(size.width*esz); + int width = (end + 1)/2; + int width_1 = width & -v_uint8x16::nlanes; + int i, j; + +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(src, dst)); +#endif + + for( ; size.height--; src += sstep, dst += dstep ) + { + for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) + { + V t0, t1; + + t0 = v_load((T*)((uchar*)src + i)); + t1 = v_load((T*)((uchar*)src + j - v_uint8x16::nlanes)); + t0 = v_reverse(t0); + t1 = v_reverse(t1); + v_store((T*)(dst + j - v_uint8x16::nlanes), t0); + v_store((T*)(dst + i), t1); + } + if (isAligned(src, dst)) + { + for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) + { + T t0, t1; + + t0 = *((T*)((uchar*)src + i)); + t1 = *((T*)((uchar*)src + j - sizeof(T))); + *((T*)(dst + j - sizeof(T))) = t0; + *((T*)(dst + i)) = t1; + } + } + else + { + for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) + { + for (int k = 0; k < (int)sizeof(T); k++) + { + uchar t0, t1; + + t0 = *((uchar*)src + i + k); + t1 = *((uchar*)src + j + k - sizeof(T)); + *(dst + j + k - sizeof(T)) = t0; + *(dst + i + k) = t1; + } + } + } + } +} + +template CV_ALWAYS_INLINE void flipHoriz_double( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) +{ + int end = (int)(size.width*esz); + int width = (end + 1)/2; + +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(src, dst)); + CV_Assert(isAligned(src, dst)); +#endif + + for( ; size.height--; src += sstep, dst += dstep ) + { + for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) ) + { + T1 t0, t1; + T2 t2, t3; + + t0 = *((T1*)((uchar*)src + i)); + t2 = *((T2*)((uchar*)src + i + sizeof(T1))); + t1 = *((T1*)((uchar*)src + j - sizeof(T1) - sizeof(T2))); + t3 = *((T2*)((uchar*)src + j - sizeof(T2))); + *((T1*)(dst + j - sizeof(T1) - sizeof(T2))) = t0; + *((T2*)(dst + j - sizeof(T2))) = t2; + *((T1*)(dst + i)) = t1; + *((T2*)(dst + i + sizeof(T1))) = t3; + } + } +} +#endif + +static void +flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) +{ +#if CV_SIMD +#if CV_STRONG_ALIGNMENT + size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep; +#endif + if (esz == 2 * v_uint8x16::nlanes) + { + int end = (int)(size.width*esz); + int width = end/2; + + for( ; size.height--; src += sstep, dst += dstep ) + { + for( int i = 0, j = end - 2 * v_uint8x16::nlanes; i < width; i += 2 * v_uint8x16::nlanes, j -= 2 * v_uint8x16::nlanes ) + { +#if CV_SIMD256 + v_uint8x32 t0, t1; + + t0 = v256_load((uchar*)src + i); + t1 = v256_load((uchar*)src + j); + v_store(dst + j, t0); + v_store(dst + i, t1); +#else + v_uint8x16 t0, t1, t2, t3; + + t0 = v_load((uchar*)src + i); + t1 = v_load((uchar*)src + i + v_uint8x16::nlanes); + t2 = v_load((uchar*)src + j); + t3 = v_load((uchar*)src + j + v_uint8x16::nlanes); + v_store(dst + j, t0); + v_store(dst + j + v_uint8x16::nlanes, t1); + v_store(dst + i, t2); + v_store(dst + i + v_uint8x16::nlanes, t3); +#endif + } + } + } + else if (esz == v_uint8x16::nlanes) + { + int end = (int)(size.width*esz); + int width = end/2; + + for( ; size.height--; src += sstep, dst += dstep ) + { + for( int i = 0, j = end - v_uint8x16::nlanes; i < width; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) + { + v_uint8x16 t0, t1; + + t0 = v_load((uchar*)src + i); + t1 = v_load((uchar*)src + j); + v_store(dst + j, t0); + v_store(dst + i, t1); + } + } + } + else if (esz == 8 +#if CV_STRONG_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) + { + flipHoriz_single(src, sstep, dst, dstep, size, esz); + } + else if (esz == 4 +#if CV_STRONG_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) + { + flipHoriz_single(src, sstep, dst, dstep, size, esz); + } + else if (esz == 2 +#if CV_STRONG_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) + { + flipHoriz_single(src, sstep, dst, dstep, size, esz); + } + else if (esz == 1) + { + flipHoriz_single(src, sstep, dst, dstep, size, esz); + } + else if (esz == 24 +#if CV_STRONG_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) + { + int end = (int)(size.width*esz); + int width = (end + 1)/2; + + for( ; size.height--; src += sstep, dst += dstep ) + { + for ( int i = 0, j = end; i < width; i += v_uint8x16::nlanes + sizeof(uint64_t), j -= v_uint8x16::nlanes + sizeof(uint64_t) ) + { + v_uint8x16 t0, t1; + uint64_t t2, t3; + + t0 = v_load((uchar*)src + i); + t2 = *((uint64_t*)((uchar*)src + i + v_uint8x16::nlanes)); + t1 = v_load((uchar*)src + j - v_uint8x16::nlanes - sizeof(uint64_t)); + t3 = *((uint64_t*)((uchar*)src + j - sizeof(uint64_t))); + v_store(dst + j - v_uint8x16::nlanes - sizeof(uint64_t), t0); + *((uint64_t*)(dst + j - sizeof(uint64_t))) = t2; + v_store(dst + i, t1); + *((uint64_t*)(dst + i + v_uint8x16::nlanes)) = t3; + } + } + } +#if !CV_STRONG_ALIGNMENT + else if (esz == 12) + { + flipHoriz_double(src, sstep, dst, dstep, size, esz); + } + else if (esz == 6) + { + flipHoriz_double(src, sstep, dst, dstep, size, esz); + } + else if (esz == 3) + { + flipHoriz_double(src, sstep, dst, dstep, size, esz); + } +#endif + else +#endif // CV_SIMD + { + int i, j, limit = (int)(((size.width + 1)/2)*esz); + AutoBuffer _tab(size.width*esz); + int* tab = _tab.data(); + + for( i = 0; i < size.width; i++ ) + for( size_t k = 0; k < esz; k++ ) + tab[i*esz + k] = (int)((size.width - i - 1)*esz + k); + + for( ; size.height--; src += sstep, dst += dstep ) + { + for( i = 0; i < limit; i++ ) + { + j = tab[i]; + uchar t0 = src[i], t1 = src[j]; + dst[i] = t1; dst[j] = t0; + } + } + } +} + +static void +flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, size_t esz ) +{ + const uchar* src1 = src0 + (size.height - 1)*sstep; + uchar* dst1 = dst0 + (size.height - 1)*dstep; + size.width *= (int)esz; + + for( int y = 0; y < (size.height + 1)/2; y++, src0 += sstep, src1 -= sstep, + dst0 += dstep, dst1 -= dstep ) + { + int i = 0; +#if CV_SIMD +#if CV_STRONG_ALIGNMENT + if (isAligned(src0, src1, dst0, dst1)) +#endif + { + for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + { + v_int32 t0 = vx_load((int*)(src0 + i)); + v_int32 t1 = vx_load((int*)(src1 + i)); + vx_store((int*)(dst0 + i), t1); + vx_store((int*)(dst1 + i), t0); + } + } +#if CV_STRONG_ALIGNMENT + else + { + for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + { + v_uint8 t0 = vx_load(src0 + i); + v_uint8 t1 = vx_load(src1 + i); + vx_store(dst0 + i, t1); + vx_store(dst1 + i, t0); + } + } +#endif +#endif + + if (isAligned(src0, src1, dst0, dst1)) + { + for( ; i <= size.width - 16; i += 16 ) + { + int t0 = ((int*)(src0 + i))[0]; + int t1 = ((int*)(src1 + i))[0]; + + ((int*)(dst0 + i))[0] = t1; + ((int*)(dst1 + i))[0] = t0; + + t0 = ((int*)(src0 + i))[1]; + t1 = ((int*)(src1 + i))[1]; + + ((int*)(dst0 + i))[1] = t1; + ((int*)(dst1 + i))[1] = t0; + + t0 = ((int*)(src0 + i))[2]; + t1 = ((int*)(src1 + i))[2]; + + ((int*)(dst0 + i))[2] = t1; + ((int*)(dst1 + i))[2] = t0; + + t0 = ((int*)(src0 + i))[3]; + t1 = ((int*)(src1 + i))[3]; + + ((int*)(dst0 + i))[3] = t1; + ((int*)(dst1 + i))[3] = t0; + } + + for( ; i <= size.width - 4; i += 4 ) + { + int t0 = ((int*)(src0 + i))[0]; + int t1 = ((int*)(src1 + i))[0]; + + ((int*)(dst0 + i))[0] = t1; + ((int*)(dst1 + i))[0] = t0; + } + } + + for( ; i < size.width; i++ ) + { + uchar t0 = src0[i]; + uchar t1 = src1[i]; + + dst0[i] = t1; + dst1[i] = t0; + } + } +} + +#ifdef HAVE_OPENCL + +enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS }; + +static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) +{ + CV_Assert(flipCode >= -1 && flipCode <= 1); + + const ocl::Device & dev = ocl::Device::getDefault(); + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), + flipType, kercn = std::min(ocl::predictOptimalVectorWidth(_src, _dst), 4); + + bool doubleSupport = dev.doubleFPConfig() > 0; + if (!doubleSupport && depth == CV_64F) + kercn = cn; + + if (cn > 4) + return false; + + const char * kernelName; + if (flipCode == 0) + kernelName = "arithm_flip_rows", flipType = FLIP_ROWS; + else if (flipCode > 0) + kernelName = "arithm_flip_cols", flipType = FLIP_COLS; + else + kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH; + + int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1; + kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn; + + ocl::Kernel k(kernelName, ocl::core::flip_oclsrc, + format( "-D T=%s -D T1=%s -D DEPTH=%d -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d", + kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)), + kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), depth, cn, pxPerWIy, kercn)); + if (k.empty()) + return false; + + Size size = _src.size(); + _dst.create(size, type); + UMat src = _src.getUMat(), dst = _dst.getUMat(); + + int cols = size.width * cn / kercn, rows = size.height; + cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols; + rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows; + + k.args(ocl::KernelArg::ReadOnlyNoSize(src), + ocl::KernelArg::WriteOnly(dst, cn, kercn), rows, cols); + + size_t maxWorkGroupSize = dev.maxWorkGroupSize(); + CV_Assert(maxWorkGroupSize % 4 == 0); + + size_t globalsize[2] = { (size_t)cols, ((size_t)rows + pxPerWIy - 1) / pxPerWIy }, + localsize[2] = { maxWorkGroupSize / 4, 4 }; + return k.run(2, globalsize, (flipType == FLIP_COLS) && !dev.isIntel() ? localsize : NULL, false); +} + +#endif + +#if defined HAVE_IPP +static bool ipp_flip(Mat &src, Mat &dst, int flip_mode) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP(); + + // Details: https://github.com/opencv/opencv/issues/12943 + if (flip_mode <= 0 /* swap rows */ + && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42 + && (int64_t)(src.total()) * src.elemSize() >= CV_BIG_INT(0x80000000)/*2Gb*/ + ) + return false; + + IppiAxis ippMode; + if(flip_mode < 0) + ippMode = ippAxsBoth; + else if(flip_mode == 0) + ippMode = ippAxsHorizontal; + else + ippMode = ippAxsVertical; + + try + { + ::ipp::IwiImage iwSrc = ippiGetImage(src); + ::ipp::IwiImage iwDst = ippiGetImage(dst); + + CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode); + } + catch(const ::ipp::IwException &) + { + return false; + } + + return true; +#else + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(flip_mode); + return false; +#endif +} +#endif + + +void flip( InputArray _src, OutputArray _dst, int flip_mode ) +{ + CV_INSTRUMENT_REGION(); + + CV_Assert( _src.dims() <= 2 ); + Size size = _src.size(); + + if (flip_mode < 0) + { + if (size.width == 1) + flip_mode = 0; + if (size.height == 1) + flip_mode = 1; + } + + if ((size.width == 1 && flip_mode > 0) || + (size.height == 1 && flip_mode == 0)) + { + return _src.copyTo(_dst); + } + + CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode)) + + Mat src = _src.getMat(); + int type = src.type(); + _dst.create( size, type ); + Mat dst = _dst.getMat(); + + CV_IPP_RUN_FAST(ipp_flip(src, dst, flip_mode)); + + size_t esz = CV_ELEM_SIZE(type); + + if( flip_mode <= 0 ) + flipVert( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz ); + else + flipHoriz( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz ); + + if( flip_mode < 0 ) + flipHoriz( dst.ptr(), dst.step, dst.ptr(), dst.step, dst.size(), esz ); +} + +void rotate(InputArray _src, OutputArray _dst, int rotateMode) +{ + CV_Assert(_src.dims() <= 2); + + switch (rotateMode) + { + case ROTATE_90_CLOCKWISE: + transpose(_src, _dst); + flip(_dst, _dst, 1); + break; + case ROTATE_180: + flip(_src, _dst, -1); + break; + case ROTATE_90_COUNTERCLOCKWISE: + transpose(_src, _dst); + flip(_dst, _dst, 0); + break; + default: + break; + } +} + +} // namespace diff --git a/modules/core/src/matrix_wrap.cpp b/modules/core/src/matrix_wrap.cpp index 0d439759cc..53e0d24470 100644 --- a/modules/core/src/matrix_wrap.cpp +++ b/modules/core/src/matrix_wrap.cpp @@ -316,6 +316,7 @@ void _InputArray::getUMatVector(std::vector& umv) const cuda::GpuMat _InputArray::getGpuMat() const { +#ifdef HAVE_CUDA int k = kind(); if (k == CUDA_GPU_MAT) @@ -339,14 +340,22 @@ cuda::GpuMat _InputArray::getGpuMat() const return cuda::GpuMat(); CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for cuda::GpuMat and cuda::HostMem"); +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } void _InputArray::getGpuMatVector(std::vector& gpumv) const { +#ifdef HAVE_CUDA int k = kind(); if (k == STD_VECTOR_CUDA_GPU_MAT) { gpumv = *(std::vector*)obj; } +#else + CV_UNUSED(gpumv); + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } ogl::Buffer _InputArray::getOGlBuffer() const { @@ -457,11 +466,15 @@ Size _InputArray::size(int i) const if (k == STD_VECTOR_CUDA_GPU_MAT) { +#ifdef HAVE_CUDA const std::vector& vv = *(const std::vector*)obj; if (i < 0) return vv.empty() ? Size() : Size((int)vv.size(), 1); CV_Assert(i < (int)vv.size()); return vv[i].size(); +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == STD_VECTOR_UMAT ) @@ -795,6 +808,7 @@ int _InputArray::type(int i) const if (k == STD_VECTOR_CUDA_GPU_MAT) { +#ifdef HAVE_CUDA const std::vector& vv = *(const std::vector*)obj; if (vv.empty()) { @@ -803,6 +817,9 @@ int _InputArray::type(int i) const } CV_Assert(i < (int)vv.size()); return vv[i >= 0 ? i : 0].type(); +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == OPENGL_BUFFER ) @@ -1164,22 +1181,34 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int { CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == _sz); CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype); +#ifdef HAVE_CUDA ((cuda::GpuMat*)obj)->create(_sz, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == _sz); CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype); +#ifdef HAVE_OPENGL ((ogl::Buffer*)obj)->create(_sz, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)"); +#endif } if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == _sz); CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype); +#ifdef HAVE_CUDA ((cuda::HostMem*)obj)->create(_sz, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } int sizes[] = {_sz.height, _sz.width}; create(2, sizes, mtype, i, allowTransposed, fixedDepthMask); @@ -1206,22 +1235,34 @@ void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTran { CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(_cols, _rows)); CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype); +#ifdef HAVE_CUDA ((cuda::GpuMat*)obj)->create(_rows, _cols, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == Size(_cols, _rows)); CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype); +#ifdef HAVE_OPENGL ((ogl::Buffer*)obj)->create(_rows, _cols, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)"); +#endif } if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == Size(_cols, _rows)); CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype); +#ifdef HAVE_CUDA ((cuda::HostMem*)obj)->create(_rows, _cols, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } int sizes[] = {_rows, _cols}; create(2, sizes, mtype, i, allowTransposed, fixedDepthMask); @@ -1644,20 +1685,32 @@ void _OutputArray::release() const if( k == CUDA_GPU_MAT ) { +#ifdef HAVE_CUDA ((cuda::GpuMat*)obj)->release(); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == CUDA_HOST_MEM ) { +#ifdef HAVE_CUDA ((cuda::HostMem*)obj)->release(); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == OPENGL_BUFFER ) { +#ifdef HAVE_OPENGL ((ogl::Buffer*)obj)->release(); return; +#else + CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)"); +#endif } if( k == NONE ) @@ -1688,8 +1741,12 @@ void _OutputArray::release() const } if (k == STD_VECTOR_CUDA_GPU_MAT) { +#ifdef HAVE_CUDA ((std::vector*)obj)->clear(); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type"); } @@ -1797,9 +1854,13 @@ void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const ((UMat*)obj)->setTo(arr, mask); else if( k == CUDA_GPU_MAT ) { +#ifdef HAVE_CUDA Mat value = arr.getMat(); CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::CUDA_GPU_MAT) ); ((cuda::GpuMat*)obj)->setTo(Scalar(Vec(value.ptr())), mask); +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } else CV_Error(Error::StsNotImplemented, ""); diff --git a/modules/core/src/norm.cpp b/modules/core/src/norm.cpp index b95cd99bd8..601082783e 100644 --- a/modules/core/src/norm.cpp +++ b/modules/core/src/norm.cpp @@ -205,13 +205,10 @@ int normL1_(const uchar* a, const uchar* b, int n) return d; } -}} //cv::hal +} //cv::hal //================================================================================================== -namespace cv -{ - template int normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn) { @@ -591,12 +588,10 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result) CV_UNUSED(src); CV_UNUSED(normType); CV_UNUSED(mask); CV_UNUSED(result); #endif return false; -} -#endif +} // ipp_norm() +#endif // HAVE_IPP -} // cv:: - -double cv::norm( InputArray _src, int normType, InputArray _mask ) +double norm( InputArray _src, int normType, InputArray _mask ) { CV_INSTRUMENT_REGION(); @@ -769,9 +764,6 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) //================================================================================================== #ifdef HAVE_OPENCL - -namespace cv { - static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result ) { #ifdef __ANDROID__ @@ -826,15 +818,10 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr result /= (s2 + DBL_EPSILON); return true; -} - -} - -#endif +} // ocl_norm() +#endif // HAVE_OPENCL #ifdef HAVE_IPP -namespace cv -{ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask, double &result) { CV_INSTRUMENT_REGION_IPP(); @@ -1060,12 +1047,11 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra CV_UNUSED(_src1); CV_UNUSED(_src2); CV_UNUSED(normType); CV_UNUSED(_mask); CV_UNUSED(result); #endif return false; -} -} -#endif +} // ipp_norm +#endif // HAVE_IPP -double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask ) +double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask ) { CV_INSTRUMENT_REGION(); @@ -1234,12 +1220,12 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m return result.d; } -cv::Hamming::ResultType cv::Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const +cv::Hamming::ResultType Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const { return cv::hal::normHamming(a, b, size); } -double cv::PSNR(InputArray _src1, InputArray _src2) +double PSNR(InputArray _src1, InputArray _src2) { CV_INSTRUMENT_REGION(); @@ -1249,3 +1235,141 @@ double cv::PSNR(InputArray _src1, InputArray _src2) double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels())); return 20*log10(255./(diff+DBL_EPSILON)); } + + +#ifdef HAVE_OPENCL +static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype, + double scale, double delta ) +{ + UMat src = _src.getUMat(); + + if( _mask.empty() ) + src.convertTo( _dst, dtype, scale, delta ); + else if (src.channels() <= 4) + { + const ocl::Device & dev = ocl::Device::getDefault(); + + int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), + ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)), + rowsPerWI = dev.isIntel() ? 4 : 1; + + float fscale = static_cast(scale), fdelta = static_cast(delta); + bool haveScale = std::fabs(scale - 1) > DBL_EPSILON, + haveZeroScale = !(std::fabs(scale) > DBL_EPSILON), + haveDelta = std::fabs(delta) > DBL_EPSILON, + doubleSupport = dev.doubleFPConfig() > 0; + + if (!haveScale && !haveDelta && stype == dtype) + { + _src.copyTo(_dst, _mask); + return true; + } + if (haveZeroScale) + { + _dst.setTo(Scalar(delta), _mask); + return true; + } + + if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport) + return false; + + char cvt[2][40]; + String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d" + " -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s", + ocl::typeToStr(stype), ocl::typeToStr(dtype), + ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn, + rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), + ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), + doubleSupport ? " -D DOUBLE_SUPPORT" : "", + haveScale ? " -D HAVE_SCALE" : "", + haveDelta ? " -D HAVE_DELTA" : "", + ocl::typeToStr(sdepth), ocl::typeToStr(ddepth)); + + ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts); + if (k.empty()) + return false; + + UMat mask = _mask.getUMat(), dst = _dst.getUMat(); + + ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), + maskarg = ocl::KernelArg::ReadOnlyNoSize(mask), + dstarg = ocl::KernelArg::ReadWrite(dst); + + if (haveScale) + { + if (haveDelta) + k.args(srcarg, maskarg, dstarg, fscale, fdelta); + else + k.args(srcarg, maskarg, dstarg, fscale); + } + else + { + if (haveDelta) + k.args(srcarg, maskarg, dstarg, fdelta); + else + k.args(srcarg, maskarg, dstarg); + } + + size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI }; + return k.run(2, globalsize, NULL, false); + } + else + { + UMat temp; + src.convertTo( temp, dtype, scale, delta ); + temp.copyTo( _dst, _mask ); + } + + return true; +} // ocl_normalize +#endif // HAVE_OPENCL + +void normalize(InputArray _src, InputOutputArray _dst, double a, double b, + int norm_type, int rtype, InputArray _mask) +{ + CV_INSTRUMENT_REGION(); + + double scale = 1, shift = 0; + int type = _src.type(), depth = CV_MAT_DEPTH(type); + + if( rtype < 0 ) + rtype = _dst.fixedType() ? _dst.depth() : depth; + + if( norm_type == CV_MINMAX ) + { + double smin = 0, smax = 0; + double dmin = MIN( a, b ), dmax = MAX( a, b ); + minMaxIdx( _src, &smin, &smax, 0, 0, _mask ); + scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0); + if( rtype == CV_32F ) + { + scale = (float)scale; + shift = (float)dmin - (float)(smin*scale); + } + else + shift = dmin - smin*scale; + } + else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C ) + { + scale = norm( _src, norm_type, _mask ); + scale = scale > DBL_EPSILON ? a/scale : 0.; + shift = 0; + } + else + CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" ); + + CV_OCL_RUN(_dst.isUMat(), + ocl_normalize(_src, _dst, _mask, rtype, scale, shift)) + + Mat src = _src.getMat(); + if( _mask.empty() ) + src.convertTo( _dst, rtype, scale, shift ); + else + { + Mat temp; + src.convertTo( temp, rtype, scale, shift ); + temp.copyTo( _dst, _mask ); + } +} + +} // namespace diff --git a/modules/core/src/persistence_c.cpp b/modules/core/src/persistence_c.cpp index 9ec70190df..904164c783 100644 --- a/modules/core/src/persistence_c.cpp +++ b/modules/core/src/persistence_c.cpp @@ -1378,48 +1378,6 @@ cvTypeOf( const void* struct_ptr ) } -/* universal functions */ -CV_IMPL void -cvRelease( void** struct_ptr ) -{ - CvTypeInfo* info; - - if( !struct_ptr ) - CV_Error( CV_StsNullPtr, "NULL double pointer" ); - - if( *struct_ptr ) - { - info = cvTypeOf( *struct_ptr ); - if( !info ) - CV_Error( CV_StsError, "Unknown object type" ); - if( !info->release ) - CV_Error( CV_StsError, "release function pointer is NULL" ); - - info->release( struct_ptr ); - *struct_ptr = 0; - } -} - - -void* cvClone( const void* struct_ptr ) -{ - void* struct_copy = 0; - CvTypeInfo* info; - - if( !struct_ptr ) - CV_Error( CV_StsNullPtr, "NULL structure pointer" ); - - info = cvTypeOf( struct_ptr ); - if( !info ) - CV_Error( CV_StsError, "Unknown object type" ); - if( !info->clone ) - CV_Error( CV_StsError, "clone function pointer is NULL" ); - - struct_copy = info->clone( struct_ptr ); - return struct_copy; -} - - /* reads matrix, image, sequence, graph etc. */ CV_IMPL void* cvRead( CvFileStorage* fs, CvFileNode* node, CvAttrList* list ) diff --git a/modules/core/src/rand.cpp b/modules/core/src/rand.cpp index 8c66cdcc07..2ae5664245 100644 --- a/modules/core/src/rand.cpp +++ b/modules/core/src/rand.cpp @@ -867,6 +867,9 @@ void cv::randShuffle( InputOutputArray _dst, double iterFactor, RNG* _rng ) func( dst, rng, iterFactor ); } + +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL void cvRandArr( CvRNG* _rng, CvArr* arr, int disttype, CvScalar param1, CvScalar param2 ) { @@ -884,6 +887,9 @@ CV_IMPL void cvRandShuffle( CvArr* arr, CvRNG* _rng, double iter_factor ) cv::randShuffle( dst, iter_factor, &rng ); } +#endif // OPENCV_EXCLUDE_C_API + + // Mersenne Twister random number generator. // Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c diff --git a/modules/core/src/stat_c.cpp b/modules/core/src/stat_c.cpp index d7355b9f94..8b6f0f09e4 100644 --- a/modules/core/src/stat_c.cpp +++ b/modules/core/src/stat_c.cpp @@ -5,6 +5,8 @@ #include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL CvScalar cvSum( const CvArr* srcarr ) { cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1)); @@ -117,3 +119,5 @@ cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr ) return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask); } + +#endif // OPENCV_EXCLUDE_C_API diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index f21cf7b7e2..936348f779 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -1259,88 +1259,6 @@ UMat UMat::t() const return m; } -UMat UMat::inv(int method) const -{ - UMat m; - invert(*this, m, method); - return m; -} - -UMat UMat::mul(InputArray m, double scale) const -{ - UMat dst; - multiply(*this, m, dst, scale); - return dst; -} - -#ifdef HAVE_OPENCL - -static bool ocl_dot( InputArray _src1, InputArray _src2, double & res ) -{ - UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1); - - int type = src1.type(), depth = CV_MAT_DEPTH(type), - kercn = ocl::predictOptimalVectorWidth(src1, src2); - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - - if ( !doubleSupport && depth == CV_64F ) - return false; - - int dbsize = ocl::Device::getDefault().maxComputeUnits(); - size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); - int ddepth = std::max(CV_32F, depth); - - int wgs2_aligned = 1; - while (wgs2_aligned < (int)wgs) - wgs2_aligned <<= 1; - wgs2_aligned >>= 1; - - char cvt[40]; - ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, - format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT " - "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d", - ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth), - ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), - ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt), - (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", - _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "", - _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn)); - if (k.empty()) - return false; - - UMat db(1, dbsize, ddepth); - - ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), - src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), - dbarg = ocl::KernelArg::PtrWriteOnly(db); - - k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg); - - size_t globalsize = dbsize * wgs; - if (k.run(1, &globalsize, &wgs, false)) - { - res = sum(db.getMat(ACCESS_READ))[0]; - return true; - } - return false; -} - -#endif - -double UMat::dot(InputArray m) const -{ - CV_INSTRUMENT_REGION(); - - CV_Assert(m.sameSize(*this) && m.type() == type()); - -#ifdef HAVE_OPENCL - double r = 0; - CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r) -#endif - - return getMat(ACCESS_READ).dot(m); -} - UMat UMat::zeros(int rows, int cols, int type) { return UMat(rows, cols, type, Scalar::all(0)); @@ -1371,18 +1289,6 @@ UMat UMat::ones(int ndims, const int* sz, int type) return UMat(ndims, sz, type, Scalar(1)); } -UMat UMat::eye(int rows, int cols, int type) -{ - return UMat::eye(Size(cols, rows), type); -} - -UMat UMat::eye(Size size, int type) -{ - UMat m(size, type); - setIdentity(m); - return m; -} - } /* End of file. */ From 75ad74c893a02728821d8432fe73e89e35f49ec0 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 2 Mar 2021 23:56:27 +0000 Subject: [PATCH 03/10] ffmpeg/3.4: update FFmpeg wrapper 2021.03 - FFmpeg 3.4.8 --- 3rdparty/ffmpeg/ffmpeg.cmake | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/3rdparty/ffmpeg/ffmpeg.cmake b/3rdparty/ffmpeg/ffmpeg.cmake index 531d301fa8..a1ada4eeaa 100644 --- a/3rdparty/ffmpeg/ffmpeg.cmake +++ b/3rdparty/ffmpeg/ffmpeg.cmake @@ -1,8 +1,8 @@ -# Binaries branch name: ffmpeg/3.4_20200907 -# Binaries were created for OpenCV: 03bee14372f5537daa56c62e771ec16181ca1f98 -ocv_update(FFMPEG_BINARIES_COMMIT "2a96257b743695a47f8012aab1ffb995a1dee8b4") -ocv_update(FFMPEG_FILE_HASH_BIN32 "5e68a3ff82f43ac6524e50e448a34c9c") -ocv_update(FFMPEG_FILE_HASH_BIN64 "205db629d893e7d4865fd1459807ff47") +# Binaries branch name: ffmpeg/3.4_20210302 +# Binaries were created for OpenCV: 2ab1f3f166fccc3a01497209cc01c5cea44ff201 +ocv_update(FFMPEG_BINARIES_COMMIT "e99214251d9f3cde7c48abd46b2259bddc9885b6") +ocv_update(FFMPEG_FILE_HASH_BIN32 "fad5ada9be36120bba8966709e7953a8") +ocv_update(FFMPEG_FILE_HASH_BIN64 "650e2272728491923e566f784f79cfef") ocv_update(FFMPEG_FILE_HASH_CMAKE "3b90f67f4b429e77d3da36698cef700c") function(download_win_ffmpeg script_var) From a42d4da003357751a3579bef1568dcbf803f8bb7 Mon Sep 17 00:00:00 2001 From: SamFC10 Date: Wed, 3 Mar 2021 22:42:47 +0530 Subject: [PATCH 04/10] Added Spatial Attention Module in Darknet Importer --- modules/dnn/src/darknet/darknet_io.cpp | 31 ++++++++++++++++++++++ modules/dnn/test/test_darknet_importer.cpp | 5 ++++ 2 files changed, 36 insertions(+) diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index e3c978a8c0..4915538ff7 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -558,6 +558,29 @@ namespace cv { fused_layer_names.push_back(last_layer); } + void setSAM(int from) + { + cv::dnn::LayerParams eltwise_param; + eltwise_param.name = "SAM-name"; + eltwise_param.type = "Eltwise"; + + eltwise_param.set("operation", "prod"); + eltwise_param.set("output_channels_mode", "same"); + + darknet::LayerParameter lp; + std::string layer_name = cv::format("sam_%d", layer_id); + lp.layer_name = layer_name; + lp.layer_type = eltwise_param.type; + lp.layerParams = eltwise_param; + lp.bottom_indexes.push_back(last_layer); + lp.bottom_indexes.push_back(fused_layer_names.at(from)); + last_layer = layer_name; + net->layers.push_back(lp); + + layer_id++; + fused_layer_names.push_back(last_layer); + } + void setUpsample(int scaleFactor) { cv::dnn::LayerParams param; @@ -837,6 +860,14 @@ namespace cv { from = from < 0 ? from + layers_counter : from; setParams.setScaleChannels(from); } + else if (layer_type == "sam") + { + std::string bottom_layer = getParam(layer_params, "from", ""); + CV_Assert(!bottom_layer.empty()); + int from = std::atoi(bottom_layer.c_str()); + from = from < 0 ? from + layers_counter : from; + setParams.setSAM(from); + } else if (layer_type == "upsample") { int scaleFactor = getParam(layer_params, "stride", 1); diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 00638f83c5..8a633fa566 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -770,6 +770,11 @@ TEST_P(Test_Darknet_layers, relu) testDarknetLayer("relu"); } +TEST_P(Test_Darknet_layers, sam) +{ + testDarknetLayer("sam", true); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_layers, dnnBackendsAndTargets()); }} // namespace From 94533e12ebd6e131723a53e2d31e97986fea6e5b Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Thu, 4 Mar 2021 13:05:01 +0300 Subject: [PATCH 05/10] Determine layout --- modules/dnn/src/ie_ngraph.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index 84b984ac97..aa3be70e05 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -769,8 +769,14 @@ static InferenceEngine::Layout estimateLayout(const Mat& m) { if (m.dims == 4) return InferenceEngine::Layout::NCHW; + else if (m.dims == 3) + return InferenceEngine::Layout::CHW; else if (m.dims == 2) return InferenceEngine::Layout::NC; + else if (m.dims == 1) + return InferenceEngine::Layout::C; + else if (m.dims == 5) + return InferenceEngine::Layout::NCDHW; else return InferenceEngine::Layout::ANY; } From 125cc79c179f364eeecda72b24e6bb2da2f1bd1e Mon Sep 17 00:00:00 2001 From: APrigarina Date: Thu, 4 Mar 2021 14:04:50 +0300 Subject: [PATCH 06/10] fix false positive detection --- modules/objdetect/src/qrcode.cpp | 38 +++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/modules/objdetect/src/qrcode.cpp b/modules/objdetect/src/qrcode.cpp index 449e6e6d32..929807b34e 100644 --- a/modules/objdetect/src/qrcode.cpp +++ b/modules/objdetect/src/qrcode.cpp @@ -235,9 +235,11 @@ vector QRDetect::searchHorizontalLines() vector QRDetect::separateVerticalLines(const vector &list_lines) { CV_TRACE_FUNCTION(); - - for (int coeff_epsilon = 1; coeff_epsilon < 10; coeff_epsilon++) + const double min_dist_between_points = 10.0; + const double max_ratio = 1.0; + for (int coeff_epsilon_i = 1; coeff_epsilon_i < 101; ++coeff_epsilon_i) { + const float coeff_epsilon = coeff_epsilon_i * 0.1f; vector point2f_result = extractVerticalLines(list_lines, eps_horizontal * coeff_epsilon); if (!point2f_result.empty()) { @@ -247,9 +249,23 @@ vector QRDetect::separateVerticalLines(const vector &list_lines) point2f_result, 3, labels, TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 10, 0.1), 3, KMEANS_PP_CENTERS, centers); - if (compactness == 0) + double min_dist = std::numeric_limits::max(); + for (size_t i = 0; i < centers.size(); i++) + { + double dist = norm(centers[i] - centers[(i+1) % centers.size()]); + if (dist < min_dist) + { + min_dist = dist; + } + } + if (min_dist < min_dist_between_points) + { continue; - if (compactness > 0) + } + double mean_compactness = compactness / point2f_result.size(); + double ratio = mean_compactness / min_dist; + + if (ratio < max_ratio) { return point2f_result; } @@ -456,7 +472,6 @@ bool QRDetect::localization() vector list_lines_y = separateVerticalLines(list_lines_x); if( list_lines_y.empty() ) { return false; } - vector centers; Mat labels; kmeans(list_lines_y, 3, labels, TermCriteria( TermCriteria::EPS + TermCriteria::COUNT, 10, 0.1), @@ -464,7 +479,7 @@ bool QRDetect::localization() fixationPoints(localization_points); - bool suare_flag = false, local_points_flag = false; + bool square_flag = false, local_points_flag = false; double triangle_sides[3]; double triangle_perim, square_area, img_square_area; if (localization_points.size() == 3) @@ -482,14 +497,14 @@ bool QRDetect::localization() if (square_area > (img_square_area * 0.2)) { - suare_flag = true; + square_flag = true; } } else { local_points_flag = true; } - if ((suare_flag || local_points_flag) && purpose == SHRINKING) + if ((square_flag || local_points_flag) && purpose == SHRINKING) { localization_points.clear(); bin_barcode = resized_bin_barcode.clone(); @@ -1970,6 +1985,13 @@ bool QRDecode::createSpline(vector > &spline_lines) } } } + for (int i = 0; i < NUM_SIDES; i++) + { + if (spline_lines[i].size() == 0) + { + return false; + } + } return true; } From 7894cd3c73df5b90139c54c664225b913c5f049c Mon Sep 17 00:00:00 2001 From: Anastasia Murzova Date: Sun, 28 Feb 2021 19:55:43 +0300 Subject: [PATCH 07/10] Aligned TF Reshape layer behaviour --- modules/dnn/src/tensorflow/tf_importer.cpp | 70 +++++++++++++++++----- modules/dnn/test/test_tf_importer.cpp | 10 ++++ 2 files changed, 66 insertions(+), 14 deletions(-) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index c03ac8a943..53d62fc9f7 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -295,6 +295,22 @@ DataLayout getDataLayout( return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN; } +static +bool hasAllOnes(const Mat &inputs, int startPos, int endPos) +{ + CV_CheckLE(inputs.dims, 2, ""); + CV_CheckGE(startPos, 0, ""); + CV_CheckLE(startPos, endPos, ""); + CV_CheckLT((size_t)endPos, inputs.total(), ""); + + for (int i = startPos; i < endPos; i++) + { + if (inputs.at(i) != 1 || inputs.at(i)!= -1) + return false; + } + return true; +} + void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer) { if (hasLayerAttr(layer, "strides")) @@ -490,6 +506,9 @@ protected: std::map sharedWeights; std::map layer_id; + +private: + void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId); }; TFImporter::TFImporter(Net& net, const char *model, const char *config) @@ -895,6 +914,17 @@ void TFImporter::populateNet() CV_LOG_DEBUG(NULL, "DNN/TF: ===================== Import completed ====================="); } +void TFImporter::addPermuteLayer(const int* order, const std::string& permName, Pin& inpId) +{ + LayerParams permLP; + permLP.set("order", DictValue::arrayInt(order, 4)); + CV_Assert(layer_id.find(permName) == layer_id.end()); + int permId = dstNet.addLayer(permName, "Permute", permLP); + layer_id[permName] = permId; + connect(layer_id, dstNet, inpId, permId, 0); + inpId = Pin(permName); +} + void TFImporter::parseNode(const tensorflow::NodeDef& layer_) { tensorflow::NodeDef layer = layer_; @@ -1276,37 +1306,49 @@ void TFImporter::parseNode(const tensorflow::NodeDef& layer_) if (value_id.find(layer.input(1)) != value_id.end()) { Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1)); - if (newShape.total() == 4) + int newShapeSize = newShape.total(); + bool hasSwap = false; + if (newShapeSize == 4 && hasAllOnes(newShape, 0, 2)) { // NHWC->NCHW std::swap(*newShape.ptr(0, 2), *newShape.ptr(0, 3)); std::swap(*newShape.ptr(0, 1), *newShape.ptr(0, 2)); + hasSwap = true; } if (inpLayout == DATA_LAYOUT_NHWC) { - if (newShape.total() != 4 || newShape.at(1) == 1) + if (newShapeSize >= 2 || newShape.at(1) == 1) { - LayerParams permLP; int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. - permLP.set("order", DictValue::arrayInt(order, 4)); - - std::string permName = name + "/nchw"; - CV_Assert(layer_id.find(permName) == layer_id.end()); - int permId = dstNet.addLayer(permName, "Permute", permLP); - layer_id[permName] = permId; - connect(layer_id, dstNet, inpId, permId, 0); - inpId = Pin(permName); - inpLayout = DATA_LAYOUT_NCHW; + addPermuteLayer(order, name + "/nhwc", inpId); + if (newShapeSize < 4) + { + inpLayout = DATA_LAYOUT_NCHW; + } + else + { + inpLayout = DATA_LAYOUT_NHWC; + } } } - layerParams.set("dim", DictValue::arrayInt(newShape.ptr(), newShape.total())); + layerParams.set("dim", DictValue::arrayInt(newShape.ptr(), newShapeSize)); int id = dstNet.addLayer(name, "Reshape", layerParams); layer_id[name] = id; // one input only connect(layer_id, dstNet, inpId, id, 0); - data_layouts[name] = newShape.total() == 2 ? DATA_LAYOUT_PLANAR : inpLayout; + inpId = Pin(name); + + if ((inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_UNKNOWN || inpLayout == DATA_LAYOUT_PLANAR) && + newShapeSize == 4 && !hasSwap) + { + int order[] = {0, 3, 1, 2}; // Transform back to OpenCV's NCHW. + addPermuteLayer(order, name + "/nchw", inpId); + inpLayout = DATA_LAYOUT_NCHW; + } + + data_layouts[name] = newShapeSize == 2 ? DATA_LAYOUT_PLANAR : inpLayout; } else { diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 6163e89fa7..6a1a44f03a 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -457,6 +457,16 @@ TEST_P(Test_TensorFlow_layers, unfused_flatten) runTensorFlowNet("unfused_flatten_unknown_batch"); } +TEST_P(Test_TensorFlow_layers, reshape_layer) +{ + runTensorFlowNet("reshape_layer"); +} + +TEST_P(Test_TensorFlow_layers, reshape_nchw) +{ + runTensorFlowNet("reshape_nchw"); +} + TEST_P(Test_TensorFlow_layers, leaky_relu) { #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000) From 625d4fc8843435a505adeec752d5f17e1c153cef Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 5 Mar 2021 12:54:51 +0000 Subject: [PATCH 08/10] cmake: update Python linters handling - exclude from getBuildInformation() - fix pylint version --- CMakeLists.txt | 12 ++++++------ cmake/FindPylint.cmake | 2 +- cmake/OpenCVPylint.cmake | 1 - 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 65933cdeef..f6a2da5310 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1000,6 +1000,12 @@ if(COMMAND ocv_pylint_finalize) ocv_pylint_add_directory_recurse(${CMAKE_CURRENT_LIST_DIR}/samples/python/tutorial_code) ocv_pylint_finalize() endif() +if(TARGET check_pylint) + message(STATUS "Registered 'check_pylint' target: using ${PYLINT_EXECUTABLE} (ver: ${PYLINT_VERSION}), checks: ${PYLINT_TOTAL_TARGETS}") +endif() +if(TARGET check_flake8) + message(STATUS "Registered 'check_flake8' target: using ${FLAKE8_EXECUTABLE} (ver: ${FLAKE8_VERSION})") +endif() if(OPENCV_GENERATE_SETUPVARS) include(cmake/OpenCVGenSetupVars.cmake) @@ -1633,12 +1639,6 @@ endif() status("") status(" Python (for build):" PYTHON_DEFAULT_AVAILABLE THEN "${PYTHON_DEFAULT_EXECUTABLE}" ELSE NO) -if(PYLINT_FOUND AND PYLINT_EXECUTABLE) - status(" Pylint:" PYLINT_FOUND THEN "${PYLINT_EXECUTABLE} (ver: ${PYLINT_VERSION}, checks: ${PYLINT_TOTAL_TARGETS})" ELSE NO) -endif() -if(FLAKE8_FOUND AND FLAKE8_EXECUTABLE) - status(" Flake8:" FLAKE8_FOUND THEN "${FLAKE8_EXECUTABLE} (ver: ${FLAKE8_VERSION})" ELSE NO) -endif() # ========================== java ========================== if(BUILD_JAVA) diff --git a/cmake/FindPylint.cmake b/cmake/FindPylint.cmake index 5731ba493a..ef4b4394ff 100644 --- a/cmake/FindPylint.cmake +++ b/cmake/FindPylint.cmake @@ -16,7 +16,7 @@ if(PYLINT_EXECUTABLE AND NOT DEFINED PYLINT_VERSION) execute_process(COMMAND ${PYLINT_EXECUTABLE} --version RESULT_VARIABLE _result OUTPUT_VARIABLE PYLINT_VERSION_RAW) if(NOT _result EQUAL 0) ocv_clear_vars(PYLINT_EXECUTABLE PYLINT_VERSION) - elseif(PYLINT_VERSION_RAW MATCHES "pylint([^,]*) ([0-9\\.]+[0-9])") + elseif(PYLINT_VERSION_RAW MATCHES "pylint([^,\n]*) ([0-9\\.]+[0-9])") set(PYLINT_VERSION "${CMAKE_MATCH_2}") else() set(PYLINT_VERSION "unknown") diff --git a/cmake/OpenCVPylint.cmake b/cmake/OpenCVPylint.cmake index 50da730946..928926d340 100644 --- a/cmake/OpenCVPylint.cmake +++ b/cmake/OpenCVPylint.cmake @@ -122,7 +122,6 @@ function(ocv_pylint_finalize) list(LENGTH PYLINT_TARGET_ID __total) set(PYLINT_TOTAL_TARGETS "${__total}" CACHE INTERNAL "") - message(STATUS "Pylint: registered ${__total} targets. Build 'check_pylint' target to run checks (\"cmake --build . --target check_pylint\" or \"make check_pylint\")") configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/pylint.cmake.in" "${CMAKE_BINARY_DIR}/pylint.cmake" @ONLY) add_custom_target(check_pylint From 640f188ca269c7cc7134c70725789e556a7a9733 Mon Sep 17 00:00:00 2001 From: Mradul Agrawal <69335152+theroyalpekka@users.noreply.github.com> Date: Fri, 5 Mar 2021 19:25:52 +0530 Subject: [PATCH 09/10] Merge pull request #19583 from theroyalpekka:patch-1 * Update polynom_solver.cpp This pull request is in the response to Issue #19526. I have fixed the problem with the cube root calculation of 2*R. The Issue was in the usage of pow function with negative values of R, but if it is calculated for only positive values of R then changing x0 according to the parity of R, the Issue is resolved. Kindly consider it, Thanks! * add cv::cubeRoot(double) Co-authored-by: Alexander Alekhin --- modules/calib3d/src/polynom_solver.cpp | 14 +++++++++++--- modules/core/include/opencv2/core/base.hpp | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/modules/calib3d/src/polynom_solver.cpp b/modules/calib3d/src/polynom_solver.cpp index beb91cafc0..5025199dd3 100644 --- a/modules/calib3d/src/polynom_solver.cpp +++ b/modules/calib3d/src/polynom_solver.cpp @@ -65,7 +65,8 @@ int solve_deg3(double a, double b, double c, double d, return 3; } else { - x0 = pow(2 * R, 1 / 3.0) - b_a_3; + double cube_root = cv::cubeRoot(2 * R); + x0 = cube_root - b_a_3; return 1; } } @@ -82,8 +83,15 @@ int solve_deg3(double a, double b, double c, double d, } // D > 0, only one real root - double AD = pow(fabs(R) + sqrt(D), 1.0 / 3.0) * (R > 0 ? 1 : (R < 0 ? -1 : 0)); - double BD = (AD == 0) ? 0 : -Q / AD; + double AD = 0.; + double BD = 0.; + double R_abs = fabs(R); + if (R_abs > DBL_EPSILON) + { + AD = cv::cubeRoot(R_abs + sqrt(D)); + AD = (R >= 0) ? AD : -AD; + BD = -Q / AD; + } // Calculate the only real root x0 = AD + BD - b_a_3; diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index 546140e9f1..12504974d9 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -587,6 +587,21 @@ _AccTp normInf(const _Tp* a, const _Tp* b, int n) */ CV_EXPORTS_W float cubeRoot(float val); +/** @overload + +cubeRoot with argument of `double` type calls `std::cbrt(double)` (C++11) or falls back on `pow()` for C++98 compilation mode. +*/ +static inline +double cubeRoot(double val) +{ +#ifdef CV_CXX11 + return std::cbrt(val); +#else + double v = pow(abs(val), 1/3.); // pow doesn't support negative inputs with fractional exponents + return val >= 0 ? v : -v; +#endif +} + /** @brief Calculates the angle of a 2D vector in degrees. The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured From 04a9ff88d80fd2757b0545e7420884a4394394af Mon Sep 17 00:00:00 2001 From: Vitaly Tuzov Date: Sat, 6 Mar 2021 20:22:21 +0300 Subject: [PATCH 10/10] Merge pull request #19622 from terfendail:ref_doc * Updated cpp reference implementations for a few intrinsics to address wide universal intrinsics as well * Updated cpp reference implementations for a few more universal intrinsics --- .../include/opencv2/core/hal/intrin_cpp.hpp | 478 ++++++++---------- 1 file changed, 216 insertions(+), 262 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index 859bfd72dc..5878dced7f 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -559,27 +559,6 @@ template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) return c; \ } -//! @brief Helper macro -//! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(func, cfunc) \ -inline v_reg func(const v_reg& a) \ -{ \ - v_reg c; \ - for( int i = 0; i < 4; i++ ) \ - c.s[i] = cfunc(a.s[i]); \ - return c; \ -} \ -inline v_reg func(const v_reg& a) \ -{ \ - v_reg c; \ - for( int i = 0; i < 2; i++ ) \ - { \ - c.s[i] = cfunc(a.s[i]); \ - c.s[i + 2] = 0; \ - } \ - return c; \ -} - /** @brief Square root of elements Only for floating point types.*/ @@ -598,26 +577,6 @@ Only for floating point types.*/ OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs, typename V_TypeTraits<_Tp>::abs_type) -/** @brief Round elements - -Only for floating point types.*/ -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_round, cvRound) - -/** @brief Floor elements - -Only for floating point types.*/ -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_floor, cvFloor) - -/** @brief Ceil elements - -Only for floating point types.*/ -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_ceil, cvCeil) - -/** @brief Truncate elements - -Only for floating point types.*/ -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_trunc, int) - //! @brief Helper macro //! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \ @@ -855,9 +814,9 @@ inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, /** @overload For 32-bit floating point values */ -inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) +template inline v_reg v_absdiff(const v_reg& a, const v_reg& b) { - v_float32x4 c; + v_reg c; for( int i = 0; i < c.nlanes; i++ ) c.s[i] = _absdiff(a.s[i], b.s[i]); return c; @@ -866,9 +825,9 @@ inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) /** @overload For 64-bit floating point values */ -inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) +template inline v_reg v_absdiff(const v_reg& a, const v_reg& b) { - v_float64x2 c; + v_reg c; for( int i = 0; i < c.nlanes; i++ ) c.s[i] = _absdiff(a.s[i], b.s[i]); return c; @@ -1238,14 +1197,17 @@ template inline typename V_TypeTraits<_Tp>::sum_type v_redu result[3] = d[0] + d[1] + d[2] + d[3] @endcode */ -inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, - const v_float32x4& c, const v_float32x4& d) +template inline v_reg v_reduce_sum4(const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d) { - v_float32x4 r; - r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3]; - r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3]; - r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3]; - r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3]; + v_reg r; + for(int i = 0; i < (n/4); i++) + { + r.s[i*4 + 0] = a.s[i*4 + 0] + a.s[i*4 + 1] + a.s[i*4 + 2] + a.s[i*4 + 3]; + r.s[i*4 + 1] = b.s[i*4 + 0] + b.s[i*4 + 1] + b.s[i*4 + 2] + b.s[i*4 + 3]; + r.s[i*4 + 2] = c.s[i*4 + 0] + c.s[i*4 + 1] + c.s[i*4 + 2] + c.s[i*4 + 3]; + r.s[i*4 + 3] = d.s[i*4 + 0] + d.s[i*4 + 1] + d.s[i*4 + 2] + d.s[i*4 + 3]; + } return r; } @@ -1965,9 +1927,11 @@ inline v_reg<_Tp, n> v_broadcast_element(const v_reg<_Tp, n>& a) return v_reg<_Tp, n>::all(a.s[i]); } -/** @brief Round +/** @brief Round elements -Rounds each value. Input type is float vector ==> output type is int vector.*/ +Rounds each value. Input type is float vector ==> output type is int vector. +@note Only for floating point types. +*/ template inline v_reg v_round(const v_reg& a) { v_reg c; @@ -1988,9 +1952,11 @@ template inline v_reg v_round(const v_reg& a, const return c; } -/** @brief Floor +/** @brief Floor elements -Floor each value. Input type is float vector ==> output type is int vector.*/ +Floor each value. Input type is float vector ==> output type is int vector. +@note Only for floating point types. +*/ template inline v_reg v_floor(const v_reg& a) { v_reg c; @@ -1999,9 +1965,11 @@ template inline v_reg v_floor(const v_reg& a) return c; } -/** @brief Ceil +/** @brief Ceil elements -Ceil each value. Input type is float vector ==> output type is int vector.*/ +Ceil each value. Input type is float vector ==> output type is int vector. +@note Only for floating point types. +*/ template inline v_reg v_ceil(const v_reg& a) { v_reg c; @@ -2010,9 +1978,11 @@ template inline v_reg v_ceil(const v_reg& a) return c; } -/** @brief Trunc +/** @brief Truncate elements -Truncate each value. Input type is float vector ==> output type is int vector.*/ +Truncate each value. Input type is float vector ==> output type is int vector. +@note Only for floating point types. +*/ template inline v_reg v_trunc(const v_reg& a) { v_reg c; @@ -2036,7 +2006,7 @@ template inline v_reg v_round(const v_reg& a) /** @overload */ template inline v_reg v_floor(const v_reg& a) { - v_reg c; + v_reg c; for( int i = 0; i < n; i++ ) { c.s[i] = cvFloor(a.s[i]); @@ -2048,7 +2018,7 @@ template inline v_reg v_floor(const v_reg& a) /** @overload */ template inline v_reg v_ceil(const v_reg& a) { - v_reg c; + v_reg c; for( int i = 0; i < n; i++ ) { c.s[i] = cvCeil(a.s[i]); @@ -2060,10 +2030,10 @@ template inline v_reg v_ceil(const v_reg& a) /** @overload */ template inline v_reg v_trunc(const v_reg& a) { - v_reg c; + v_reg c; for( int i = 0; i < n; i++ ) { - c.s[i] = cvCeil(a.s[i]); + c.s[i] = (int)(a.s[i]); c.s[i+n] = 0; } return c; @@ -2105,11 +2075,10 @@ template inline v_reg v_cvt_f32(const v_reg& a, co /** @brief Convert to double Supported input type is cv::v_int32x4. */ -CV_INLINE v_reg v_cvt_f64(const v_reg& a) +template CV_INLINE v_reg v_cvt_f64(const v_reg& a) { - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) + v_reg c; + for( int i = 0; i < (n/2); i++ ) c.s[i] = (double)a.s[i]; return c; } @@ -2117,23 +2086,21 @@ CV_INLINE v_reg v_cvt_f64(const v_reg& a) /** @brief Convert to double high part of vector Supported input type is cv::v_int32x4. */ -CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) +template CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) { - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i + 2]; + v_reg c; + for( int i = 0; i < (n/2); i++ ) + c.s[i] = (double)a.s[i + (n/2)]; return c; } /** @brief Convert to double Supported input type is cv::v_float32x4. */ -CV_INLINE v_reg v_cvt_f64(const v_reg& a) +template CV_INLINE v_reg v_cvt_f64(const v_reg& a) { - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) + v_reg c; + for( int i = 0; i < (n/2); i++ ) c.s[i] = (double)a.s[i]; return c; } @@ -2141,33 +2108,19 @@ CV_INLINE v_reg v_cvt_f64(const v_reg& a) /** @brief Convert to double high part of vector Supported input type is cv::v_float32x4. */ -CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) +template CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) { - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i + 2]; + v_reg c; + for( int i = 0; i < (n/2); i++ ) + c.s[i] = (double)a.s[i + (n/2)]; return c; } /** @brief Convert to double Supported input type is cv::v_int64x2. */ -CV_INLINE v_reg v_cvt_f64(const v_reg& a) +template CV_INLINE v_reg v_cvt_f64(const v_reg& a) { - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -/** @brief Convert to double high part of vector - -Supported input type is cv::v_int64x2. */ -CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) -{ - enum { n = 2 }; v_reg c; for( int i = 0; i < n; i++ ) c.s[i] = (double)a.s[i]; @@ -2221,36 +2174,15 @@ template inline v_reg v_lut(const float* tab, const v_reg inline v_reg v_lut(const double* tab, const v_reg& idx) +template inline v_reg v_lut(const double* tab, const v_reg& idx) { - v_reg c; - for( int i = 0; i < n; i++ ) + v_reg c; + for( int i = 0; i < n/2; i++ ) c.s[i] = tab[idx.s[i]]; return c; } -inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - -inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - -inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - -inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - - template inline void v_lut_deinterleave(const float* tab, const v_reg& idx, v_reg& x, v_reg& y) { @@ -2330,16 +2262,23 @@ b2 {A3 B3 C3 D3} b3 {A4 B4 C4 D4} @endcode */ -template -inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1, - const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3, - v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1, - v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 ) +template +inline void v_transpose4x4( v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, + const v_reg<_Tp, n>& a2, const v_reg<_Tp, n>& a3, + v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1, + v_reg<_Tp, n>& b2, v_reg<_Tp, n>& b3 ) { - b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]); - b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]); - b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]); - b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]); + for (int i = 0; i < n / 4; i++) + { + b0.s[0 + i*4] = a0.s[0 + i*4]; b0.s[1 + i*4] = a1.s[0 + i*4]; + b0.s[2 + i*4] = a2.s[0 + i*4]; b0.s[3 + i*4] = a3.s[0 + i*4]; + b1.s[0 + i*4] = a0.s[1 + i*4]; b1.s[1 + i*4] = a1.s[1 + i*4]; + b1.s[2 + i*4] = a2.s[1 + i*4]; b1.s[3 + i*4] = a3.s[1 + i*4]; + b2.s[0 + i*4] = a0.s[2 + i*4]; b2.s[1 + i*4] = a1.s[2 + i*4]; + b2.s[2 + i*4] = a2.s[2 + i*4]; b2.s[3 + i*4] = a3.s[2 + i*4]; + b3.s[0 + i*4] = a0.s[3 + i*4]; b3.s[1 + i*4] = a1.s[3 + i*4]; + b3.s[2 + i*4] = a2.s[3 + i*4]; b3.s[3 + i*4] = a3.s[3 + i*4]; + } } //! @brief Helper macro @@ -2384,92 +2323,92 @@ OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64) //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \ -template inline _Tpvec \ +#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix) \ +template inline v_reg<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)> \ v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \ -{ return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); } +{ return a.template reinterpret_as<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)>(); } //! @name Reinterpret //! @{ //! @brief Convert vector to different type without modifying underlying data. -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_C_REINTERPRET(uchar, u8) +OPENCV_HAL_IMPL_C_REINTERPRET(schar, s8) +OPENCV_HAL_IMPL_C_REINTERPRET(ushort, u16) +OPENCV_HAL_IMPL_C_REINTERPRET(short, s16) +OPENCV_HAL_IMPL_C_REINTERPRET(unsigned, u32) +OPENCV_HAL_IMPL_C_REINTERPRET(int, s32) +OPENCV_HAL_IMPL_C_REINTERPRET(float, f32) +OPENCV_HAL_IMPL_C_REINTERPRET(double, f64) +OPENCV_HAL_IMPL_C_REINTERPRET(uint64, u64) +OPENCV_HAL_IMPL_C_REINTERPRET(int64, s64) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \ -template inline _Tpvec v_shl(const _Tpvec& a) \ -{ return a << n; } +#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp) \ +template inline v_reg<_Tp, n> v_shl(const v_reg<_Tp, n>& a) \ +{ return a << shift; } //! @name Left shift //! @{ //! @brief Shift left -OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short) -OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int) -OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64) +OPENCV_HAL_IMPL_C_SHIFTL(ushort) +OPENCV_HAL_IMPL_C_SHIFTL(short) +OPENCV_HAL_IMPL_C_SHIFTL(unsigned) +OPENCV_HAL_IMPL_C_SHIFTL(int) +OPENCV_HAL_IMPL_C_SHIFTL(uint64) +OPENCV_HAL_IMPL_C_SHIFTL(int64) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \ -template inline _Tpvec v_shr(const _Tpvec& a) \ -{ return a >> n; } +#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp) \ +template inline v_reg<_Tp, n> v_shr(const v_reg<_Tp, n>& a) \ +{ return a >> shift; } //! @name Right shift //! @{ //! @brief Shift right -OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short) -OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int) -OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64) +OPENCV_HAL_IMPL_C_SHIFTR(ushort) +OPENCV_HAL_IMPL_C_SHIFTR(short) +OPENCV_HAL_IMPL_C_SHIFTR(unsigned) +OPENCV_HAL_IMPL_C_SHIFTR(int) +OPENCV_HAL_IMPL_C_SHIFTR(uint64) +OPENCV_HAL_IMPL_C_SHIFTR(int64) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \ -template inline _Tpvec v_rshr(const _Tpvec& a) \ +#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp) \ +template inline v_reg<_Tp, n> v_rshr(const v_reg<_Tp, n>& a) \ { \ - _Tpvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \ return c; \ } //! @name Rounding shift //! @{ //! @brief Rounding shift right -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short) -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int) -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) +OPENCV_HAL_IMPL_C_RSHIFTR(ushort) +OPENCV_HAL_IMPL_C_RSHIFTR(short) +OPENCV_HAL_IMPL_C_RSHIFTR(unsigned) +OPENCV_HAL_IMPL_C_RSHIFTR(int) +OPENCV_HAL_IMPL_C_RSHIFTR(uint64) +OPENCV_HAL_IMPL_C_RSHIFTR(int64) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \ -inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ +#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast) \ +template inline v_reg<_Tpn, 2*n> v_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ { \ - _Tpnvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + v_reg<_Tpn, 2*n> c; \ + for( int i = 0; i < n; i++ ) \ { \ c.s[i] = cast<_Tpn>(a.s[i]); \ - c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \ + c.s[i+n] = cast<_Tpn>(b.s[i]); \ } \ return c; \ } @@ -2485,26 +2424,26 @@ inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ //! - pack_u: for 16- and 32-bit signed integer input types //! //! @note All variants except 64-bit use saturation. -OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(ushort, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(short, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(unsigned, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(int, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(uint64, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(int64, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(short, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(int, ushort, pack_u, saturate_cast) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ +#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast) \ +template inline v_reg<_Tpn, 2*n> v_rshr_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ { \ - _Tpnvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + v_reg<_Tpn, 2*n> c; \ + for( int i = 0; i < n; i++ ) \ { \ - c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \ + c.s[i+n] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \ } \ return c; \ } @@ -2520,22 +2459,22 @@ template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpve //! - pack_u: for 16- and 32-bit signed integer input types //! //! @note All variants except 64-bit use saturation. -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(ushort, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(short, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(unsigned, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(int, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(uint64, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(int64, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(short, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(int, ushort, pack_u, saturate_cast) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ +#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \ +template inline void v_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \ { \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + for( int i = 0; i < n; i++ ) \ ptr[i] = cast<_Tpn>(a.s[i]); \ } @@ -2550,23 +2489,23 @@ inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ //! - pack_u: for 16- and 32-bit signed integer input types //! //! @note All variants except 64-bit use saturation. -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(ushort, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(short, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(unsigned, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(int, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(uint64, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(int64, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(short, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(int, ushort, pack_u, saturate_cast) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ +#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \ +template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \ { \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + for( int i = 0; i < n; i++ ) \ + ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \ } //! @name Pack and store with rounding shift @@ -2580,14 +2519,14 @@ template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec //! - pack_u: for 16- and 32-bit signed integer input types //! //! @note All variants except 64-bit use saturation. -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(ushort, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(short, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(unsigned, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(uint64, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int64, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(short, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int, ushort, pack_u, saturate_cast) //! @} //! @cond IGNORED @@ -2622,9 +2561,9 @@ b {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF} } @endcode */ -inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +template inline v_reg v_pack_b(const v_reg& a, const v_reg& b) { - v_uint8x16 mask; + v_reg mask; _pack_b(mask.s, a, b); return mask; } @@ -2645,12 +2584,12 @@ d {0 0xFFFF.. 0 0xFFFF..} } @endcode */ -inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, - const v_uint32x4& c, const v_uint32x4& d) +template inline v_reg v_pack_b(const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d) { - v_uint8x16 mask; + v_reg mask; _pack_b(mask.s, a, b); - _pack_b(mask.s + 8, c, d); + _pack_b(mask.s + 2*n, c, d); return mask; } @@ -2674,15 +2613,16 @@ h {0 0xFFFF..} 0xFF 0 0xFF 0 0 0xFF 0 0xFF } @endcode */ -inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, - const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, - const v_uint64x2& g, const v_uint64x2& h) +template inline v_reg v_pack_b(const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d, + const v_reg& e, const v_reg& f, + const v_reg& g, const v_reg& h) { - v_uint8x16 mask; + v_reg mask; _pack_b(mask.s, a, b); - _pack_b(mask.s + 4, c, d); - _pack_b(mask.s + 8, e, f); - _pack_b(mask.s + 12, g, h); + _pack_b(mask.s + 2*n, c, d); + _pack_b(mask.s + 4*n, e, f); + _pack_b(mask.s + 6*n, g, h); return mask; } //! @} @@ -2697,54 +2637,68 @@ Scheme: {D0 D1 D2 D3} x |V3| ==================== {R0 R1 R2 R3}, where: -R0 = A0V0 + A1V1 + A2V2 + A3V3, -R1 = B0V0 + B1V1 + B2V2 + B3V3 +R0 = A0V0 + B0V1 + C0V2 + D0V3, +R1 = A1V0 + B1V1 + C1V2 + D1V3 ... @endcode */ -inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, - const v_float32x4& m1, const v_float32x4& m2, - const v_float32x4& m3) +template +inline v_reg v_matmul(const v_reg& v, + const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d) { - return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0], - v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1], - v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2], - v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]); + v_reg res; + for (int i = 0; i < n / 4; i++) + { + res.s[0 + i*4] = v.s[0 + i*4] * a.s[0 + i*4] + v.s[1 + i*4] * b.s[0 + i*4] + v.s[2 + i*4] * c.s[0 + i*4] + v.s[3 + i*4] * d.s[0 + i*4]; + res.s[1 + i*4] = v.s[0 + i*4] * a.s[1 + i*4] + v.s[1 + i*4] * b.s[1 + i*4] + v.s[2 + i*4] * c.s[1 + i*4] + v.s[3 + i*4] * d.s[1 + i*4]; + res.s[2 + i*4] = v.s[0 + i*4] * a.s[2 + i*4] + v.s[1 + i*4] * b.s[2 + i*4] + v.s[2 + i*4] * c.s[2 + i*4] + v.s[3 + i*4] * d.s[2 + i*4]; + res.s[3 + i*4] = v.s[0 + i*4] * a.s[3 + i*4] + v.s[1 + i*4] * b.s[3 + i*4] + v.s[2 + i*4] * c.s[3 + i*4] + v.s[3 + i*4] * d.s[3 + i*4]; + } + return res; } /** @brief Matrix multiplication and add Scheme: @code -{A0 A1 A2 } |V0| |D0| -{B0 B1 B2 } |V1| |D1| -{C0 C1 C2 } x |V2| + |D2| -==================== +{A0 A1 A2 A3} |V0| |D0| +{B0 B1 B2 B3} |V1| |D1| +{C0 C1 C2 C3} x |V2| + |D2| +==================== |D3| {R0 R1 R2 R3}, where: -R0 = A0V0 + A1V1 + A2V2 + D0, -R1 = B0V0 + B1V1 + B2V2 + D1 +R0 = A0V0 + B0V1 + C0V2 + D0, +R1 = A1V0 + B1V1 + C1V2 + D1 ... @endcode */ -inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, - const v_float32x4& m1, const v_float32x4& m2, - const v_float32x4& m3) +template +inline v_reg v_matmuladd(const v_reg& v, + const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d) { - return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0], - v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1], - v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2], - v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]); + v_reg res; + for (int i = 0; i < n / 4; i++) + { + res.s[0 + i * 4] = v.s[0 + i * 4] * a.s[0 + i * 4] + v.s[1 + i * 4] * b.s[0 + i * 4] + v.s[2 + i * 4] * c.s[0 + i * 4] + d.s[0 + i * 4]; + res.s[1 + i * 4] = v.s[0 + i * 4] * a.s[1 + i * 4] + v.s[1 + i * 4] * b.s[1 + i * 4] + v.s[2 + i * 4] * c.s[1 + i * 4] + d.s[1 + i * 4]; + res.s[2 + i * 4] = v.s[0 + i * 4] * a.s[2 + i * 4] + v.s[1 + i * 4] * b.s[2 + i * 4] + v.s[2 + i * 4] * c.s[2 + i * 4] + d.s[2 + i * 4]; + res.s[3 + i * 4] = v.s[0 + i * 4] * a.s[3 + i * 4] + v.s[1 + i * 4] * b.s[3 + i * 4] + v.s[2 + i * 4] * c.s[3 + i * 4] + d.s[3 + i * 4]; + } + return res; } -inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +template inline v_reg v_dotprod_expand(const v_reg& a, const v_reg& b) { return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); } -inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +template inline v_reg v_dotprod_expand(const v_reg& a, const v_reg& b, + const v_reg& c) { return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); } -inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +template inline v_reg v_dotprod_expand_fast(const v_reg& a, const v_reg& b) { return v_dotprod_expand(a, b); } -inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +template inline v_reg v_dotprod_expand_fast(const v_reg& a, const v_reg& b, + const v_reg& c) { return v_dotprod_expand(a, b, c); } ////// FP16 support /////// @@ -2760,8 +2714,8 @@ v_load_expand(const float16_t* ptr) return v; } -inline void -v_pack_store(float16_t* ptr, const v_reg::nlanes128>& v) +template inline void +v_pack_store(float16_t* ptr, const v_reg& v) { for( int i = 0; i < v.nlanes; i++ ) {