From 2695039a791f8a8c38760bc8144ce704ed0f0dd9 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 14 Nov 2011 14:34:36 +0000 Subject: [PATCH] moved GpuMat's operations implementation to core module --- modules/core/CMakeLists.txt | 130 ++++- modules/core/include/opencv2/core/gpumat.hpp | 22 +- .../src/cuda/matrix_operations.cu | 50 +- modules/core/src/gpumat.cpp | 499 +++++++++++++++++- modules/gpu/include/opencv2/gpu/gpu.hpp | 4 - modules/gpu/src/cuda/internal_shared.hpp | 32 -- modules/gpu/src/cuda/safe_call.hpp | 60 +-- modules/gpu/src/cudastream.cpp | 50 +- modules/gpu/src/error.cpp | 16 - modules/gpu/src/initialization.cpp | 374 ------------- .../opencv2/gpu/device/border_interpolate.hpp | 1 - modules/gpu/src/opencv2/gpu/device/color.hpp | 1 - modules/gpu/src/opencv2/gpu/device/common.hpp | 100 ++++ .../src/opencv2/gpu/device/datamov_utils.hpp | 2 +- .../gpu/device/detail/color_detail.hpp | 2 +- .../gpu/device/detail/transform_detail.hpp | 2 +- .../gpu/device/detail/type_traits_detail.hpp | 2 +- .../gpu/device/detail/utility_detail.hpp | 2 - .../gpu/device/detail/vec_distance_detail.hpp | 1 - .../src/opencv2/gpu/device/dynamic_smem.hpp | 61 +-- .../gpu/src/opencv2/gpu/device/emulation.hpp | 1 - .../gpu/src/opencv2/gpu/device/filters.hpp | 1 - .../gpu/src/opencv2/gpu/device/funcattrib.hpp | 1 - .../gpu/src/opencv2/gpu/device/functional.hpp | 1 - modules/gpu/src/opencv2/gpu/device/limits.hpp | 2 +- .../src/opencv2/gpu/device/saturate_cast.hpp | 2 +- .../gpu/src/opencv2/gpu/device/transform.hpp | 2 +- .../src/opencv2/gpu/device/type_traits.hpp | 1 - .../gpu/src/opencv2/gpu/device/utility.hpp | 1 - .../src/opencv2/gpu/device/vec_distance.hpp | 1 - .../gpu/src/opencv2/gpu/device/vec_math.hpp | 1 - .../gpu/src/opencv2/gpu/device/vec_traits.hpp | 2 +- modules/gpu/src/opencv2/gpu/device/warp.hpp | 2 - .../src/opencv2/gpu/device/warp_reduce.hpp | 2 - 34 files changed, 825 insertions(+), 606 deletions(-) rename modules/{gpu => core}/src/cuda/matrix_operations.cu (79%) create mode 100644 modules/gpu/src/opencv2/gpu/device/common.hpp diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 88c457f0ae..edd7a17277 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -3,4 +3,132 @@ if(ZLIB_FOUND) else() include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../3rdparty/zlib") endif() -define_opencv_module(core ${ZLIB_LIBRARY}) + +#define_opencv_module(core ${ZLIB_LIBRARY}) + +set(name "core") + +project(opencv_${name}) + +include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_SOURCE_DIR}/src" + "${CMAKE_CURRENT_BINARY_DIR}") + +file(GLOB lib_srcs "src/*.cpp") +file(GLOB lib_int_hdrs "src/*.h*") +file(GLOB lib_hdrs "include/opencv2/${name}/*.h*") +file(GLOB lib_hdrs_detail "include/opencv2/${name}/detail/*.h*") + +if(COMMAND get_module_external_sources) + get_module_external_sources(${name}) +endif() + +source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) +source_group("Include" FILES ${lib_hdrs}) +source_group("Include\\detail" FILES ${lib_hdrs_detail}) +list(APPEND lib_hdrs ${lib_hdrs_detail}) + +if (HAVE_CUDA) + file(GLOB lib_cuda "src/cuda/*.cu") + source_group("Cuda" FILES "${lib_cuda}") + + include_directories(${CUDA_INCLUDE_DIRS}) + include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/src") + include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/src/cuda") + + set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_10,code=sm_10 + -gencode arch=compute_11,code=sm_11 + -gencode arch=compute_12,code=sm_12 + -gencode arch=compute_13,code=sm_13 + -gencode arch=compute_20,code=sm_20 + -gencode arch=compute_20,code=sm_21) + + if (UNIX OR APPLE) + set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fPIC;") + #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" "-fPIC") + endif() + + #set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-keep") + #set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;/EHsc-;") + + if (APPLE) + set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fno-finite-math-only;") + endif() + + CUDA_COMPILE(cuda_objs ${lib_cuda}) + #CUDA_BUILD_CLEAN_TARGET() +endif() + +set(the_target "opencv_${name}") +add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${cuda_objs}) + +# For dynamic link numbering convenions +if(NOT ANDROID) + # Android SDK build scripts can include only .so files into final .apk + # As result we should not set version properties for Android + set_target_properties(${the_target} PROPERTIES + VERSION ${OPENCV_VERSION} + SOVERSION ${OPENCV_SOVERSION} + ) +endif() + +set_target_properties(${the_target} PROPERTIES OUTPUT_NAME "${the_target}${OPENCV_DLLVERSION}" ) + +if(ENABLE_SOLUTION_FOLDERS) + set_target_properties(${the_target} PROPERTIES FOLDER "modules") +endif() + +if (BUILD_SHARED_LIBS) + if(MSVC) + set_target_properties(${the_target} PROPERTIES DEFINE_SYMBOL CVAPI_EXPORTS) + else() + add_definitions(-DCVAPI_EXPORTS) + endif() +endif() + +# Additional target properties +set_target_properties(${the_target} PROPERTIES + DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}" + ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH} + RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH} + INSTALL_NAME_DIR lib + ) + +# Add the required libraries for linking: +target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ZLIB_LIBRARY}) + +if (HAVE_CUDA) + target_link_libraries(${the_target} ${CUDA_LIBRARIES}) + + unset(CUDA_npp_LIBRARY CACHE) + find_cuda_helper_libs(npp) + target_link_libraries(${the_target} ${CUDA_npp_LIBRARY}) +endif() + +if(MSVC) + if(CMAKE_CROSSCOMPILING) + set_target_properties(${the_target} PROPERTIES + LINK_FLAGS "/NODEFAULTLIB:secchk" + ) + endif() + set_target_properties(${the_target} PROPERTIES + LINK_FLAGS "/NODEFAULTLIB:libc /DEBUG" + ) +endif() + +# Dependencies of this target: +add_dependencies(${the_target} ${ZLIB_LIBRARY}) + +install(TARGETS ${the_target} + RUNTIME DESTINATION bin COMPONENT main + LIBRARY DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main + ARCHIVE DESTINATION ${OPENCV_LIB_INSTALL_PATH} COMPONENT main) + +install(FILES ${lib_hdrs} + DESTINATION ${OPENCV_INCLUDE_PREFIX}/opencv2/${name} + COMPONENT main) + +add_opencv_precompiled_headers(${the_target}) + +define_opencv_test(${name}) +define_opencv_perf_test(${name}) diff --git a/modules/core/include/opencv2/core/gpumat.hpp b/modules/core/include/opencv2/core/gpumat.hpp index 502655b9ea..99ec0402a1 100644 --- a/modules/core/include/opencv2/core/gpumat.hpp +++ b/modules/core/include/opencv2/core/gpumat.hpp @@ -212,27 +212,9 @@ namespace cv { namespace gpu CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m); CV_EXPORTS void ensureSizeIsEnough(Size size, int type, GpuMat& m); - class CV_EXPORTS GpuFuncTable - { - public: - virtual ~GpuFuncTable() {} + //////////////////////////////// Error handling //////////////////////// - virtual void copy(const Mat& src, GpuMat& dst) const = 0; - virtual void copy(const GpuMat& src, Mat& dst) const = 0; - virtual void copy(const GpuMat& src, GpuMat& dst) const = 0; - - virtual void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const = 0; - - virtual void convert(const GpuMat& src, GpuMat& dst) const = 0; - virtual void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const = 0; - - virtual void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const = 0; - - virtual void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const = 0; - virtual void free(void* devPtr) const = 0; - }; - - CV_EXPORTS void setGpuFuncTable(const GpuFuncTable* funcTbl); + CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func); //////////////////////////////////////////////////////////////////////// diff --git a/modules/gpu/src/cuda/matrix_operations.cu b/modules/core/src/cuda/matrix_operations.cu similarity index 79% rename from modules/gpu/src/cuda/matrix_operations.cu rename to modules/core/src/cuda/matrix_operations.cu index 980ff1e3bd..09fd40c58c 100644 --- a/modules/gpu/src/cuda/matrix_operations.cu +++ b/modules/core/src/cuda/matrix_operations.cu @@ -40,7 +40,6 @@ // //M*/ -#include "internal_shared.hpp" #include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/transform.hpp" #include "opencv2/gpu/device/functional.hpp" @@ -75,7 +74,7 @@ namespace cv { namespace gpu { namespace device } template - void copy_to_with_mask_run(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, const DevMem2Db& mask, int channels, const cudaStream_t & stream) + void copy_to_with_mask_run(DevMem2Db mat_src, DevMem2Db mat_dst, DevMem2Db mask, int channels, cudaStream_t stream) { dim3 threadsPerBlock(16,16, 1); dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1); @@ -88,9 +87,9 @@ namespace cv { namespace gpu { namespace device cudaSafeCall ( cudaDeviceSynchronize() ); } - void copy_to_with_mask(const DevMem2Db& mat_src, DevMem2Db mat_dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream) + void copy_to_with_mask(DevMem2Db mat_src, DevMem2Db mat_dst, int depth, DevMem2Db mask, int channels, cudaStream_t stream) { - typedef void (*CopyToFunc)(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, const DevMem2Db& mask, int channels, const cudaStream_t & stream); + typedef void (*CopyToFunc)(DevMem2Db mat_src, DevMem2Db mat_dst, DevMem2Db mask, int channels, cudaStream_t stream); static CopyToFunc tab[8] = { @@ -106,7 +105,8 @@ namespace cv { namespace gpu { namespace device CopyToFunc func = tab[depth]; - if (func == 0) cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__); + if (func == 0) + cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__); func(mat_src, mat_dst, mask, channels, stream); } @@ -188,7 +188,7 @@ namespace cv { namespace gpu { namespace device } } template - void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream) + void set_to_gpu(DevMem2Db mat, const T* scalar, DevMem2Db mask, int channels, cudaStream_t stream) { writeScalar(scalar); @@ -202,16 +202,16 @@ namespace cv { namespace gpu { namespace device cudaSafeCall ( cudaDeviceSynchronize() ); } - template void set_to_gpu(const DevMem2Db& mat, const uchar* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream); - template void set_to_gpu(const DevMem2Db& mat, const schar* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream); - template void set_to_gpu(const DevMem2Db& mat, const ushort* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream); - template void set_to_gpu(const DevMem2Db& mat, const short* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream); - template void set_to_gpu(const DevMem2Db& mat, const int* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream); - template void set_to_gpu(const DevMem2Db& mat, const float* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream); - template void set_to_gpu(const DevMem2Db& mat, const double* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const uchar* scalar, DevMem2Db mask, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const schar* scalar, DevMem2Db mask, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const ushort* scalar, DevMem2Db mask, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const short* scalar, DevMem2Db mask, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const int* scalar, DevMem2Db mask, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const float* scalar, DevMem2Db mask, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const double* scalar, DevMem2Db mask, int channels, cudaStream_t stream); template - void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream) + void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream) { writeScalar(scalar); @@ -225,13 +225,13 @@ namespace cv { namespace gpu { namespace device cudaSafeCall ( cudaDeviceSynchronize() ); } - template void set_to_gpu(const DevMem2Db& mat, const uchar* scalar, int channels, cudaStream_t stream); - template void set_to_gpu(const DevMem2Db& mat, const schar* scalar, int channels, cudaStream_t stream); - template void set_to_gpu(const DevMem2Db& mat, const ushort* scalar, int channels, cudaStream_t stream); - template void set_to_gpu(const DevMem2Db& mat, const short* scalar, int channels, cudaStream_t stream); - template void set_to_gpu(const DevMem2Db& mat, const int* scalar, int channels, cudaStream_t stream); - template void set_to_gpu(const DevMem2Db& mat, const float* scalar, int channels, cudaStream_t stream); - template void set_to_gpu(const DevMem2Db& mat, const double* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const uchar* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const schar* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const ushort* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const short* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const int* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const float* scalar, int channels, cudaStream_t stream); + template void set_to_gpu(DevMem2Db mat, const double* scalar, int channels, cudaStream_t stream); /////////////////////////////////////////////////////////////////////////// //////////////////////////////// ConvertTo //////////////////////////////// @@ -298,7 +298,7 @@ namespace cv { namespace gpu { namespace device }; template - void cvt_(const DevMem2Db& src, const DevMem2Db& dst, double alpha, double beta, cudaStream_t stream) + void cvt_(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream) { cudaSafeCall( cudaSetDoubleForDevice(&alpha) ); cudaSafeCall( cudaSetDoubleForDevice(&beta) ); @@ -306,11 +306,9 @@ namespace cv { namespace gpu { namespace device ::cv::gpu::device::transform((DevMem2D_)src, (DevMem2D_)dst, op, stream); } - void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, - cudaStream_t stream = 0) + void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream) { - typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Db& dst, double alpha, double beta, - cudaStream_t stream); + typedef void (*caller_t)(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream); static const caller_t tab[8][8] = { diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp index 2dffee4c28..19fd671405 100644 --- a/modules/core/src/gpumat.cpp +++ b/modules/core/src/gpumat.cpp @@ -43,6 +43,14 @@ #include "precomp.hpp" #include "opencv2/core/gpumat.hpp" +#include +#include + +#ifdef HAVE_CUDA + #include + #include +#endif + using namespace std; using namespace cv; using namespace cv::gpu; @@ -283,6 +291,31 @@ cv::Mat::Mat(const GpuMat& m) : flags(0), dims(0), rows(0), cols(0), data(0), re m.download(*this); } +namespace +{ + class CV_EXPORTS GpuFuncTable + { + public: + virtual ~GpuFuncTable() {} + + virtual void copy(const Mat& src, GpuMat& dst) const = 0; + virtual void copy(const GpuMat& src, Mat& dst) const = 0; + virtual void copy(const GpuMat& src, GpuMat& dst) const = 0; + + virtual void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const = 0; + + virtual void convert(const GpuMat& src, GpuMat& dst) const = 0; + virtual void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const = 0; + + virtual void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const = 0; + + virtual void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const = 0; + virtual void free(void* devPtr) const = 0; + }; +} + +#ifndef HAVE_CUDA + namespace { void throw_nogpu() @@ -308,20 +341,460 @@ namespace void free(void*) const {} }; - const GpuFuncTable* g_funcTbl = 0; - const GpuFuncTable* gpuFuncTable() { static EmptyFuncTable empty; - return g_funcTbl ? g_funcTbl : ∅ + return ∅ } } -void cv::gpu::setGpuFuncTable(const GpuFuncTable* funcTbl) +#else // HAVE_CUDA + +namespace cv { namespace gpu { namespace device { - g_funcTbl = funcTbl; + void copy_to_with_mask(DevMem2Db src, DevMem2Db dst, int depth, DevMem2Db mask, int channels, cudaStream_t stream); + + template + void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream); + + template + void set_to_gpu(DevMem2Db mat, const T* scalar, DevMem2Db mask, int channels, cudaStream_t stream); + + void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream); +}}} + +namespace +{ +#if defined(__GNUC__) + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__) + #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__) +#else /* defined(__CUDACC__) || defined(__MSVC__) */ + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) + #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__) +#endif + + inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") + { + if (cudaSuccess != err) + cv::gpu::error(cudaGetErrorString(err), file, line, func); + } + + inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "") + { + if (err < 0) + { + std::ostringstream msg; + msg << "NPP API Call Error: " << err; + cv::gpu::error(msg.str().c_str(), file, line, func); + } + } } +namespace +{ + template void kernelSetCaller(GpuMat& src, Scalar s, cudaStream_t stream) + { + Scalar_ sf = s; + ::cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), stream); + } + + template void kernelSetCaller(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream) + { + Scalar_ sf = s; + ::cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream); + } +} + +namespace cv { namespace gpu +{ + CV_EXPORTS void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0) + { + ::cv::gpu::device::copy_to_with_mask(src, dst, src.depth(), mask, src.channels(), stream); + } + + CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst) + { + ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0); + } + + CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0) + { + ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream); + } + + CV_EXPORTS void setTo(GpuMat& src, Scalar s, cudaStream_t stream) + { + typedef void (*caller_t)(GpuMat& src, Scalar s, cudaStream_t stream); + + static const caller_t callers[] = + { + kernelSetCaller, kernelSetCaller, kernelSetCaller, kernelSetCaller, kernelSetCaller, + kernelSetCaller, kernelSetCaller + }; + + callers[src.depth()](src, s, stream); + } + + CV_EXPORTS void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream) + { + typedef void (*caller_t)(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream); + + static const caller_t callers[] = + { + kernelSetCaller, kernelSetCaller, kernelSetCaller, kernelSetCaller, kernelSetCaller, + kernelSetCaller, kernelSetCaller + }; + + callers[src.depth()](src, s, mask, stream); + } + + CV_EXPORTS void setTo(GpuMat& src, Scalar s) + { + setTo(src, s, 0); + } + + CV_EXPORTS void setTo(GpuMat& src, Scalar s, const GpuMat& mask) + { + setTo(src, s, mask, 0); + } +}} + +namespace +{ + ////////////////////////////////////////////////////////////////////////// + // Convert + + template struct NPPTypeTraits; + template<> struct NPPTypeTraits { typedef Npp8u npp_type; }; + template<> struct NPPTypeTraits { typedef Npp16u npp_type; }; + template<> struct NPPTypeTraits { typedef Npp16s npp_type; }; + template<> struct NPPTypeTraits { typedef Npp32s npp_type; }; + template<> struct NPPTypeTraits { typedef Npp32f npp_type; }; + + template struct NppConvertFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + typedef typename NPPTypeTraits::npp_type dst_t; + + typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI); + }; + template struct NppConvertFunc + { + typedef typename NPPTypeTraits::npp_type dst_t; + + typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode); + }; + + template::func_ptr func> struct NppCvt + { + typedef typename NPPTypeTraits::npp_type src_t; + typedef typename NPPTypeTraits::npp_type dst_t; + + static void cvt(const GpuMat& src, GpuMat& dst) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz) ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + template::func_ptr func> struct NppCvt + { + typedef typename NPPTypeTraits::npp_type dst_t; + + static void cvt(const GpuMat& src, GpuMat& dst) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, NPP_RND_NEAR) ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + + ////////////////////////////////////////////////////////////////////////// + // Set + + template struct NppSetFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI); + }; + template struct NppSetFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI); + }; + + template::func_ptr func> struct NppSet + { + typedef typename NPPTypeTraits::npp_type src_t; + + static void set(GpuMat& src, Scalar s) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + Scalar_ nppS = s; + + nppSafeCall( func(nppS.val, src.ptr(), static_cast(src.step), sz) ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + template::func_ptr func> struct NppSet + { + typedef typename NPPTypeTraits::npp_type src_t; + + static void set(GpuMat& src, Scalar s) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + Scalar_ nppS = s; + + nppSafeCall( func(nppS[0], src.ptr(), static_cast(src.step), sz) ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + + template struct NppSetMaskFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); + }; + template struct NppSetMaskFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); + }; + + template::func_ptr func> struct NppSetMask + { + typedef typename NPPTypeTraits::npp_type src_t; + + static void set(GpuMat& src, Scalar s, const GpuMat& mask) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + Scalar_ nppS = s; + + nppSafeCall( func(nppS.val, src.ptr(), static_cast(src.step), sz, mask.ptr(), static_cast(mask.step)) ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + template::func_ptr func> struct NppSetMask + { + typedef typename NPPTypeTraits::npp_type src_t; + + static void set(GpuMat& src, Scalar s, const GpuMat& mask) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + Scalar_ nppS = s; + + nppSafeCall( func(nppS[0], src.ptr(), static_cast(src.step), sz, mask.ptr(), static_cast(mask.step)) ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + + class CudaFuncTable : public GpuFuncTable + { + public: + void copy(const Mat& src, GpuMat& dst) const + { + cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) ); + } + void copy(const GpuMat& src, Mat& dst) const + { + cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) ); + } + void copy(const GpuMat& src, GpuMat& dst) const + { + cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) ); + } + + void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const + { + ::cv::gpu::copyWithMask(src, dst, mask); + } + + void convert(const GpuMat& src, GpuMat& dst) const + { + typedef void (*caller_t)(const GpuMat& src, GpuMat& dst); + static const caller_t callers[7][7][7] = + { + { + /* 8U -> 8U */ {0, 0, 0, 0}, + /* 8U -> 8S */ {::cv::gpu::convertTo, ::cv::gpu::convertTo, ::cv::gpu::convertTo, ::cv::gpu::convertTo}, + /* 8U -> 16U */ {NppCvt::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt::cvt}, + /* 8U -> 16S */ {NppCvt::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt::cvt}, + /* 8U -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 8U -> 32F */ {NppCvt::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 8U -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo} + }, + { + /* 8S -> 8U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 8S -> 8S */ {0,0,0,0}, + /* 8S -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 8S -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 8S -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 8S -> 32F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 8S -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo} + }, + { + /* 16U -> 8U */ {NppCvt::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt::cvt}, + /* 16U -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 16U -> 16U */ {0,0,0,0}, + /* 16U -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 16U -> 32S */ {NppCvt::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 16U -> 32F */ {NppCvt::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 16U -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo} + }, + { + /* 16S -> 8U */ {NppCvt::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,NppCvt::cvt}, + /* 16S -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 16S -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 16S -> 16S */ {0,0,0,0}, + /* 16S -> 32S */ {NppCvt::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 16S -> 32F */ {NppCvt::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 16S -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo} + }, + { + /* 32S -> 8U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 32S -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 32S -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 32S -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 32S -> 32S */ {0,0,0,0}, + /* 32S -> 32F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 32S -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo} + }, + { + /* 32F -> 8U */ {NppCvt::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 32F -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 32F -> 16U */ {NppCvt::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 32F -> 16S */ {NppCvt::cvt,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 32F -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 32F -> 32F */ {0,0,0,0}, + /* 32F -> 64F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo} + }, + { + /* 64F -> 8U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 64F -> 8S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 64F -> 16U */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 64F -> 16S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 64F -> 32S */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 64F -> 32F */ {::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo,::cv::gpu::convertTo}, + /* 64F -> 64F */ {0,0,0,0} + } + }; + + caller_t func = callers[src.depth()][dst.depth()][src.channels() - 1]; + CV_DbgAssert(func != 0); + + func(src, dst); + } + + void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const + { + ::cv::gpu::convertTo(src, dst, alpha, beta); + } + + void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const + { + NppiSize sz; + sz.width = m.cols; + sz.height = m.rows; + + if (mask.empty()) + { + if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) + { + cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) ); + return; + } + + if (m.depth() == CV_8U) + { + int cn = m.channels(); + + if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) + { + int val = saturate_cast(s[0]); + cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) ); + return; + } + } + + typedef void (*caller_t)(GpuMat& src, Scalar s); + static const caller_t callers[7][4] = + { + {NppSet::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSet::set}, + {::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo}, + {NppSet::set, NppSet::set, ::cv::gpu::setTo, NppSet::set}, + {NppSet::set, NppSet::set, ::cv::gpu::setTo, NppSet::set}, + {NppSet::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSet::set}, + {NppSet::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSet::set}, + {::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo} + }; + + callers[m.depth()][m.channels() - 1](m, s); + } + else + { + typedef void (*caller_t)(GpuMat& src, Scalar s, const GpuMat& mask); + + static const caller_t callers[7][4] = + { + {NppSetMask::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask::set}, + {::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo}, + {NppSetMask::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask::set}, + {NppSetMask::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask::set}, + {NppSetMask::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask::set}, + {NppSetMask::set, ::cv::gpu::setTo, ::cv::gpu::setTo, NppSetMask::set}, + {::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo, ::cv::gpu::setTo} + }; + + callers[m.depth()][m.channels() - 1](m, s, mask); + } + } + + void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const + { + cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) ); + } + + void free(void* devPtr) const + { + cudaFree(devPtr); + } + }; + + const GpuFuncTable* gpuFuncTable() + { + static CudaFuncTable funcTable; + return &funcTable; + } +} + +#endif // HAVE_CUDA + void cv::gpu::GpuMat::upload(const Mat& m) { CV_DbgAssert(!m.empty()); @@ -458,3 +931,19 @@ void cv::gpu::GpuMat::release() step = rows = cols = 0; refcount = 0; } + +void cv::gpu::error(const char *error_string, const char *file, const int line, const char *func) +{ + int code = CV_GpuApiCallError; + + if (uncaught_exception()) + { + const char* errorStr = cvErrorStr(code); + const char* function = func ? func : "unknown function"; + + cerr << "OpenCV Error: " << errorStr << "(" << error_string << ") in " << function << ", file " << file << ", line " << line; + cerr.flush(); + } + else + cv::error( cv::Exception(code, error_string, func, file, line) ); +} diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index ffa32fbe37..b18d4730da 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -139,10 +139,6 @@ private: int minorVersion_; }; -//////////////////////////////// Error handling //////////////////////// - -CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func); - //////////////////////////////// CudaMem //////////////////////////////// // CudaMem is limited cv::Mat with page locked memory allocation. // Page locked memory is only needed for async and faster coping to GPU. diff --git a/modules/gpu/src/cuda/internal_shared.hpp b/modules/gpu/src/cuda/internal_shared.hpp index c64fb1337a..ecce14b88a 100644 --- a/modules/gpu/src/cuda/internal_shared.hpp +++ b/modules/gpu/src/cuda/internal_shared.hpp @@ -49,36 +49,6 @@ #include "opencv2/gpu/devmem2d.hpp" #include "safe_call.hpp" -#ifndef CV_PI -#define CV_PI 3.1415926535897932384626433832795 -#endif - -#ifndef CV_PI_F - #ifndef CV_PI - #define CV_PI_F 3.14159265f - #else - #define CV_PI_F ((float)CV_PI) - #endif -#endif - -#ifdef __CUDACC__ - -namespace cv { namespace gpu { namespace device -{ - typedef unsigned char uchar; - typedef unsigned short ushort; - typedef signed char schar; - typedef unsigned int uint; - - template static inline void bindTexture(const textureReference* tex, const DevMem2D_& img) - { - cudaChannelFormatDesc desc = cudaCreateChannelDesc(); - cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) ); - } -}}} - -#endif - namespace cv { namespace gpu { enum @@ -94,8 +64,6 @@ namespace cv { namespace gpu // Returns true if the GPU analogue exists, false otherwise. bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType); - static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; } - class NppStreamHandler { public: diff --git a/modules/gpu/src/cuda/safe_call.hpp b/modules/gpu/src/cuda/safe_call.hpp index a48b7a237a..0685a3ee2b 100644 --- a/modules/gpu/src/cuda/safe_call.hpp +++ b/modules/gpu/src/cuda/safe_call.hpp @@ -69,36 +69,36 @@ namespace cv { namespace gpu void ncvError(int err, const char *file, const int line, const char *func = ""); void cufftError(int err, const char *file, const int line, const char *func = ""); void cublasError(int err, const char *file, const int line, const char *func = ""); - - static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") - { - if (cudaSuccess != err) - cv::gpu::error(cudaGetErrorString(err), file, line, func); - } - - static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "") - { - if (err < 0) - cv::gpu::nppError(err, file, line, func); - } - - static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "") - { - if (NCV_SUCCESS != err) - cv::gpu::ncvError(err, file, line, func); - } - - static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "") - { - if (CUFFT_SUCCESS != err) - cv::gpu::cufftError(err, file, line, func); - } - - static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "") - { - if (CUBLAS_STATUS_SUCCESS != err) - cv::gpu::cublasError(err, file, line, func); - } }} +static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") +{ + if (cudaSuccess != err) + cv::gpu::error(cudaGetErrorString(err), file, line, func); +} + +static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "") +{ + if (err < 0) + cv::gpu::nppError(err, file, line, func); +} + +static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "") +{ + if (NCV_SUCCESS != err) + cv::gpu::ncvError(err, file, line, func); +} + +static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "") +{ + if (CUFFT_SUCCESS != err) + cv::gpu::cufftError(err, file, line, func); +} + +static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "") +{ + if (CUBLAS_STATUS_SUCCESS != err) + cv::gpu::cublasError(err, file, line, func); +} + #endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/src/cudastream.cpp b/modules/gpu/src/cudastream.cpp index 5cab5bb221..11ccda4452 100644 --- a/modules/gpu/src/cudastream.cpp +++ b/modules/gpu/src/cudastream.cpp @@ -71,19 +71,13 @@ cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; } #include "opencv2/gpu/stream_accessor.hpp" -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { - void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream = 0); - - template - void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream); - template - void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream); - - void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0); -}}} - -using namespace ::cv::gpu::device; + void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream); + void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream); + void setTo(GpuMat& src, Scalar s, cudaStream_t stream); + void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream); +}} struct Stream::Impl { @@ -99,20 +93,6 @@ namespace size_t bwidth = src.cols * src.elemSize(); cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, k, s) ); }; - - template - void kernelSet(GpuMat& src, const Scalar& s, cudaStream_t stream) - { - Scalar_ sf = s; - set_to_gpu(src, sf.val, src.channels(), stream); - } - - template - void kernelSetMask(GpuMat& src, const Scalar& s, const GpuMat& mask, cudaStream_t stream) - { - Scalar_ sf = s; - set_to_gpu(src, sf.val, mask, src.channels(), stream); - } } CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl ? stream.impl->stream : 0; }; @@ -208,13 +188,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s) } } - typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, cudaStream_t stream); - static const set_caller_t set_callers[] = - { - kernelSet, kernelSet, kernelSet, kernelSet, - kernelSet, kernelSet, kernelSet - }; - set_callers[src.depth()](src, s, impl->stream); + setTo(src, s, impl->stream); } void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask) @@ -224,13 +198,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask) CV_Assert(mask.type() == CV_8UC1); - typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, const GpuMat& mask, cudaStream_t stream); - static const set_caller_t set_callers[] = - { - kernelSetMask, kernelSetMask, kernelSetMask, kernelSetMask, - kernelSetMask, kernelSetMask, kernelSetMask - }; - set_callers[src.depth()](src, val, mask, impl->stream); + setTo(src, val, mask, impl->stream); } void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta) @@ -258,7 +226,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, psrc = &(temp = src); dst.create( src.size(), rtype ); - convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta, impl->stream); + convertTo(src, dst, alpha, beta, impl->stream); } cv::gpu::Stream::operator bool() const diff --git a/modules/gpu/src/error.cpp b/modules/gpu/src/error.cpp index 74aa4a87ba..37cd5b4d1a 100644 --- a/modules/gpu/src/error.cpp +++ b/modules/gpu/src/error.cpp @@ -220,22 +220,6 @@ namespace cv { namespace gpu { - void error(const char *error_string, const char *file, const int line, const char *func) - { - int code = CV_GpuApiCallError; - - if (uncaught_exception()) - { - const char* errorStr = cvErrorStr(code); - const char* function = func ? func : "unknown function"; - - cerr << "OpenCV Error: " << errorStr << "(" << error_string << ") in " << function << ", file " << file << ", line " << line; - cerr.flush(); - } - else - cv::error( cv::Exception(code, error_string, func, file, line) ); - } - void nppError(int code, const char *file, const int line, const char *func) { string msg = getErrorString(code, npp_errors, npp_error_num); diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp index 49c185c517..b13c173a36 100644 --- a/modules/gpu/src/initialization.cpp +++ b/modules/gpu/src/initialization.cpp @@ -271,379 +271,5 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory) setDevice(prev_device_id); } -//////////////////////////////////////////////////////////////////// -// GpuFuncTable - -namespace cv { namespace gpu { namespace device -{ - void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t& stream = 0); - - template - void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream); - template - void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream); - - void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0); -}}} - -namespace -{ - ////////////////////////////////////////////////////////////////////////// - // Convert - - template struct NPPTypeTraits; - template<> struct NPPTypeTraits { typedef Npp8u npp_type; }; - template<> struct NPPTypeTraits { typedef Npp16u npp_type; }; - template<> struct NPPTypeTraits { typedef Npp16s npp_type; }; - template<> struct NPPTypeTraits { typedef Npp32s npp_type; }; - template<> struct NPPTypeTraits { typedef Npp32f npp_type; }; - - template struct NppConvertFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - typedef typename NPPTypeTraits::npp_type dst_t; - - typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI); - }; - template struct NppConvertFunc - { - typedef typename NPPTypeTraits::npp_type dst_t; - - typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode); - }; - - template::func_ptr func> struct NppCvt - { - typedef typename NPPTypeTraits::npp_type src_t; - typedef typename NPPTypeTraits::npp_type dst_t; - - static void cvt(const GpuMat& src, GpuMat& dst) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz) ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - template::func_ptr func> struct NppCvt - { - typedef typename NPPTypeTraits::npp_type dst_t; - - static void cvt(const GpuMat& src, GpuMat& dst) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, NPP_RND_NEAR) ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - - void convertToKernelCaller(const GpuMat& src, GpuMat& dst) - { - ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0); - } - - ////////////////////////////////////////////////////////////////////////// - // Set - - template struct NppSetFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI); - }; - template struct NppSetFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI); - }; - - template::func_ptr func> struct NppSet - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void set(GpuMat& src, Scalar s) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - Scalar_ nppS = s; - - nppSafeCall( func(nppS.val, src.ptr(), static_cast(src.step), sz) ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - template::func_ptr func> struct NppSet - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void set(GpuMat& src, Scalar s) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - Scalar_ nppS = s; - - nppSafeCall( func(nppS[0], src.ptr(), static_cast(src.step), sz) ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - - template - void kernelSet(GpuMat& src, Scalar s) - { - Scalar_ sf = s; - ::cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), 0); - } - - template struct NppSetMaskFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); - }; - template struct NppSetMaskFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); - }; - - template::func_ptr func> struct NppSetMask - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void set(GpuMat& src, Scalar s, const GpuMat& mask) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - Scalar_ nppS = s; - - nppSafeCall( func(nppS.val, src.ptr(), static_cast(src.step), sz, mask.ptr(), static_cast(mask.step)) ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - template::func_ptr func> struct NppSetMask - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void set(GpuMat& src, Scalar s, const GpuMat& mask) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - Scalar_ nppS = s; - - nppSafeCall( func(nppS[0], src.ptr(), static_cast(src.step), sz, mask.ptr(), static_cast(mask.step)) ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - - template - void kernelSetMask(GpuMat& src, Scalar s, const GpuMat& mask) - { - Scalar_ sf = s; - ::cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), 0); - } - - class CudaFuncTable : public GpuFuncTable - { - public: - void copy(const Mat& src, GpuMat& dst) const - { - cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) ); - } - void copy(const GpuMat& src, Mat& dst) const - { - cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) ); - } - void copy(const GpuMat& src, GpuMat& dst) const - { - cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) ); - } - - void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const - { - ::cv::gpu::device::copy_to_with_mask(src, dst, src.depth(), mask, src.channels()); - } - - void convert(const GpuMat& src, GpuMat& dst) const - { - typedef void (*caller_t)(const GpuMat& src, GpuMat& dst); - static const caller_t callers[7][7][7] = - { - { - /* 8U -> 8U */ {0, 0, 0, 0}, - /* 8U -> 8S */ {convertToKernelCaller, convertToKernelCaller, convertToKernelCaller, convertToKernelCaller}, - /* 8U -> 16U */ {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, - /* 8U -> 16S */ {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, - /* 8U -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 8U -> 32F */ {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 8U -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller} - }, - { - /* 8S -> 8U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 8S -> 8S */ {0,0,0,0}, - /* 8S -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 8S -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 8S -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 8S -> 32F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 8S -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller} - }, - { - /* 16U -> 8U */ {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, - /* 16U -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 16U -> 16U */ {0,0,0,0}, - /* 16U -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 16U -> 32S */ {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 16U -> 32F */ {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 16U -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller} - }, - { - /* 16S -> 8U */ {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, - /* 16S -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 16S -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 16S -> 16S */ {0,0,0,0}, - /* 16S -> 32S */ {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 16S -> 32F */ {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 16S -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller} - }, - { - /* 32S -> 8U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 32S -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 32S -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 32S -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 32S -> 32S */ {0,0,0,0}, - /* 32S -> 32F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 32S -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller} - }, - { - /* 32F -> 8U */ {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 32F -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 32F -> 16U */ {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 32F -> 16S */ {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 32F -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 32F -> 32F */ {0,0,0,0}, - /* 32F -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller} - }, - { - /* 64F -> 8U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 64F -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 64F -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 64F -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 64F -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 64F -> 32F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - /* 64F -> 64F */ {0,0,0,0} - } - }; - - caller_t func = callers[src.depth()][dst.depth()][src.channels() - 1]; - CV_DbgAssert(func != 0); - - func(src, dst); - } - - void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const - { - ::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta); - } - - void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const - { - NppiSize sz; - sz.width = m.cols; - sz.height = m.rows; - - if (mask.empty()) - { - if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) - { - cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) ); - return; - } - - if (m.depth() == CV_8U) - { - int cn = m.channels(); - - if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) - { - int val = saturate_cast(s[0]); - cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) ); - return; - } - } - - typedef void (*caller_t)(GpuMat& src, Scalar s); - static const caller_t callers[7][4] = - { - {NppSet::set,kernelSet,kernelSet,NppSet::set}, - {kernelSet,kernelSet,kernelSet,kernelSet}, - {NppSet::set,NppSet::set,kernelSet,NppSet::set}, - {NppSet::set,NppSet::set,kernelSet,NppSet::set}, - {NppSet::set,kernelSet,kernelSet,NppSet::set}, - {NppSet::set,kernelSet,kernelSet,NppSet::set}, - {kernelSet,kernelSet,kernelSet,kernelSet} - }; - - callers[m.depth()][m.channels() - 1](m, s); - } - else - { - typedef void (*caller_t)(GpuMat& src, Scalar s, const GpuMat& mask); - - static const caller_t callers[7][4] = - { - {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, - {kernelSetMask,kernelSetMask,kernelSetMask,kernelSetMask}, - {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, - {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, - {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, - {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, - {kernelSetMask,kernelSetMask,kernelSetMask,kernelSetMask} - }; - - callers[m.depth()][m.channels() - 1](m, s, mask); - } - } - - void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const - { - cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) ); - } - - void free(void* devPtr) const - { - cudaFree(devPtr); - } - }; - - class Initializer - { - public: - Initializer() - { - static CudaFuncTable funcTable; - setGpuFuncTable(&funcTable); - } - }; - - Initializer init; -} - #endif diff --git a/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp b/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp index 5bfd2a7279..f640e5e01f 100644 --- a/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp +++ b/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp @@ -43,7 +43,6 @@ #ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__ #define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__ -#include "internal_shared.hpp" #include "saturate_cast.hpp" #include "vec_traits.hpp" #include "vec_math.hpp" diff --git a/modules/gpu/src/opencv2/gpu/device/color.hpp b/modules/gpu/src/opencv2/gpu/device/color.hpp index c012fe7085..177409bfc1 100644 --- a/modules/gpu/src/opencv2/gpu/device/color.hpp +++ b/modules/gpu/src/opencv2/gpu/device/color.hpp @@ -43,7 +43,6 @@ #ifndef __OPENCV_GPU_COLOR_HPP__ #define __OPENCV_GPU_COLOR_HPP__ -#include "internal_shared.hpp" #include "detail/color_detail.hpp" namespace cv { namespace gpu { namespace device diff --git a/modules/gpu/src/opencv2/gpu/device/common.hpp b/modules/gpu/src/opencv2/gpu/device/common.hpp new file mode 100644 index 0000000000..9633d0a1c1 --- /dev/null +++ b/modules/gpu/src/opencv2/gpu/device/common.hpp @@ -0,0 +1,100 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPU_COMMON_HPP__ +#define __OPENCV_GPU_COMMON_HPP__ + +#include +#include "opencv2/core/devmem2d.hpp" + +#ifndef CV_PI + #define CV_PI 3.1415926535897932384626433832795 +#endif + +#ifndef CV_PI_F + #ifndef CV_PI + #define CV_PI_F 3.14159265f + #else + #define CV_PI_F ((float)CV_PI) + #endif +#endif + +namespace cv { namespace gpu +{ + __host__ __device__ __forceinline__ int divUp(int total, int grain) + { + return (total + grain - 1) / grain; + } + + namespace device + { + typedef unsigned char uchar; + typedef unsigned short ushort; + typedef signed char schar; + typedef unsigned int uint; + + template inline void bindTexture(const textureReference* tex, const DevMem2D_& img) + { + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) ); + } + } +}} + +#if defined(__GNUC__) + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__) +#else /* defined(__CUDACC__) || defined(__MSVC__) */ + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) +#endif + +namespace cv { namespace gpu +{ + void error(const char *error_string, const char *file, const int line, const char *func = ""); +}} + +static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") +{ + if (cudaSuccess != err) + cv::gpu::error(cudaGetErrorString(err), file, line, func); +} + +#endif // __OPENCV_GPU_COMMON_HPP__ diff --git a/modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp b/modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp index 8bdc5bfa15..50b9c7e49c 100644 --- a/modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp +++ b/modules/gpu/src/opencv2/gpu/device/datamov_utils.hpp @@ -43,7 +43,7 @@ #ifndef __OPENCV_GPU_DATAMOV_UTILS_HPP__ #define __OPENCV_GPU_DATAMOV_UTILS_HPP__ -#include "internal_shared.hpp" +#include "common.hpp" namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/src/opencv2/gpu/device/detail/color_detail.hpp b/modules/gpu/src/opencv2/gpu/device/detail/color_detail.hpp index 7dd4e8dfe3..79d55c51b0 100644 --- a/modules/gpu/src/opencv2/gpu/device/detail/color_detail.hpp +++ b/modules/gpu/src/opencv2/gpu/device/detail/color_detail.hpp @@ -43,7 +43,7 @@ #ifndef __OPENCV_GPU_COLOR_DETAIL_HPP__ #define __OPENCV_GPU_COLOR_DETAIL_HPP__ -#include "internal_shared.hpp" +#include "../common.hpp" #include "../vec_traits.hpp" #include "../saturate_cast.hpp" #include "../limits.hpp" diff --git a/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp b/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp index 5efc8c2fb8..1c499b9788 100644 --- a/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp +++ b/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp @@ -43,7 +43,7 @@ #ifndef __OPENCV_GPU_TRANSFORM_DETAIL_HPP__ #define __OPENCV_GPU_TRANSFORM_DETAIL_HPP__ -#include "internal_shared.hpp" +#include "../common.hpp" #include "../vec_traits.hpp" #include "../functional.hpp" diff --git a/modules/gpu/src/opencv2/gpu/device/detail/type_traits_detail.hpp b/modules/gpu/src/opencv2/gpu/device/detail/type_traits_detail.hpp index 84af370062..bce1f03ee6 100644 --- a/modules/gpu/src/opencv2/gpu/device/detail/type_traits_detail.hpp +++ b/modules/gpu/src/opencv2/gpu/device/detail/type_traits_detail.hpp @@ -43,7 +43,7 @@ #ifndef __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__ #define __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__ -#include "internal_shared.hpp" +#include "../common.hpp" #include "../vec_traits.hpp" namespace cv { namespace gpu { namespace device diff --git a/modules/gpu/src/opencv2/gpu/device/detail/utility_detail.hpp b/modules/gpu/src/opencv2/gpu/device/detail/utility_detail.hpp index 39b599fb08..a0a3750c71 100644 --- a/modules/gpu/src/opencv2/gpu/device/detail/utility_detail.hpp +++ b/modules/gpu/src/opencv2/gpu/device/detail/utility_detail.hpp @@ -43,8 +43,6 @@ #ifndef __OPENCV_GPU_UTILITY_DETAIL_HPP__ #define __OPENCV_GPU_UTILITY_DETAIL_HPP__ -#include "internal_shared.hpp" - namespace cv { namespace gpu { namespace device { namespace utility_detail diff --git a/modules/gpu/src/opencv2/gpu/device/detail/vec_distance_detail.hpp b/modules/gpu/src/opencv2/gpu/device/detail/vec_distance_detail.hpp index 3538ca9d38..d6c525a74b 100644 --- a/modules/gpu/src/opencv2/gpu/device/detail/vec_distance_detail.hpp +++ b/modules/gpu/src/opencv2/gpu/device/detail/vec_distance_detail.hpp @@ -43,7 +43,6 @@ #ifndef __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__ #define __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__ -#include "internal_shared.hpp" #include "../datamov_utils.hpp" namespace cv { namespace gpu { namespace device diff --git a/modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp b/modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp index 5d1308ae4e..7ce6994fd7 100644 --- a/modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp +++ b/modules/gpu/src/opencv2/gpu/device/dynamic_smem.hpp @@ -43,41 +43,38 @@ #ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__ #define __OPENCV_GPU_DYNAMIC_SMEM_HPP__ -#include "internal_shared.hpp" - -BEGIN_OPENCV_DEVICE_NAMESPACE - -template struct DynamicSharedMem -{ - __device__ __forceinline__ operator T*() +namespace cv { namespace gpu { namespace device +{ + template struct DynamicSharedMem { - extern __shared__ int __smem[]; - return (T*)__smem; - } + __device__ __forceinline__ operator T*() + { + extern __shared__ int __smem[]; + return (T*)__smem; + } - __device__ __forceinline__ operator const T*() const + __device__ __forceinline__ operator const T*() const + { + extern __shared__ int __smem[]; + return (T*)__smem; + } + }; + + // specialize for double to avoid unaligned memory access compile errors + template<> struct DynamicSharedMem { - extern __shared__ int __smem[]; - return (T*)__smem; - } -}; + __device__ __forceinline__ operator double*() + { + extern __shared__ double __smem_d[]; + return (double*)__smem_d; + } -// specialize for double to avoid unaligned memory access compile errors -template<> struct DynamicSharedMem -{ - __device__ __forceinline__ operator double*() - { - extern __shared__ double __smem_d[]; - return (double*)__smem_d; - } - - __device__ __forceinline__ operator const double*() const - { - extern __shared__ double __smem_d[]; - return (double*)__smem_d; - } -}; - -END_OPENCV_DEVICE_NAMESPACE + __device__ __forceinline__ operator const double*() const + { + extern __shared__ double __smem_d[]; + return (double*)__smem_d; + } + }; +}}} #endif // __OPENCV_GPU_DYNAMIC_SMEM_HPP__ diff --git a/modules/gpu/src/opencv2/gpu/device/emulation.hpp b/modules/gpu/src/opencv2/gpu/device/emulation.hpp index e598986c67..1fd3d9f060 100644 --- a/modules/gpu/src/opencv2/gpu/device/emulation.hpp +++ b/modules/gpu/src/opencv2/gpu/device/emulation.hpp @@ -43,7 +43,6 @@ #ifndef OPENCV_GPU_EMULATION_HPP_ #define OPENCV_GPU_EMULATION_HPP_ -#include "internal_shared.hpp" #include "warp_reduce.hpp" namespace cv { namespace gpu { namespace device diff --git a/modules/gpu/src/opencv2/gpu/device/filters.hpp b/modules/gpu/src/opencv2/gpu/device/filters.hpp index 5ecf051b25..87fcd32bd0 100644 --- a/modules/gpu/src/opencv2/gpu/device/filters.hpp +++ b/modules/gpu/src/opencv2/gpu/device/filters.hpp @@ -43,7 +43,6 @@ #ifndef __OPENCV_GPU_FILTERS_HPP__ #define __OPENCV_GPU_FILTERS_HPP__ -#include "internal_shared.hpp" #include "saturate_cast.hpp" #include "vec_traits.hpp" #include "vec_math.hpp" diff --git a/modules/gpu/src/opencv2/gpu/device/funcattrib.hpp b/modules/gpu/src/opencv2/gpu/device/funcattrib.hpp index c91ca0284b..4be6dd337c 100644 --- a/modules/gpu/src/opencv2/gpu/device/funcattrib.hpp +++ b/modules/gpu/src/opencv2/gpu/device/funcattrib.hpp @@ -45,7 +45,6 @@ #define __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_ #include -#include "internal_shared.hpp" namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/src/opencv2/gpu/device/functional.hpp b/modules/gpu/src/opencv2/gpu/device/functional.hpp index 382c71bc0d..d21f728e31 100644 --- a/modules/gpu/src/opencv2/gpu/device/functional.hpp +++ b/modules/gpu/src/opencv2/gpu/device/functional.hpp @@ -44,7 +44,6 @@ #define __OPENCV_GPU_FUNCTIONAL_HPP__ #include -#include "internal_shared.hpp" #include "saturate_cast.hpp" #include "vec_traits.hpp" #include "type_traits.hpp" diff --git a/modules/gpu/src/opencv2/gpu/device/limits.hpp b/modules/gpu/src/opencv2/gpu/device/limits.hpp index 2559685160..396e9a310a 100644 --- a/modules/gpu/src/opencv2/gpu/device/limits.hpp +++ b/modules/gpu/src/opencv2/gpu/device/limits.hpp @@ -44,7 +44,7 @@ #define __OPENCV_GPU_LIMITS_GPU_HPP__ #include -#include "internal_shared.hpp" +#include "common.hpp" namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp b/modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp index 1fba68cbd9..35575a2b6d 100644 --- a/modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp +++ b/modules/gpu/src/opencv2/gpu/device/saturate_cast.hpp @@ -43,7 +43,7 @@ #ifndef __OPENCV_GPU_SATURATE_CAST_HPP__ #define __OPENCV_GPU_SATURATE_CAST_HPP__ -#include "internal_shared.hpp" +#include "common.hpp" namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/src/opencv2/gpu/device/transform.hpp b/modules/gpu/src/opencv2/gpu/device/transform.hpp index e1d033f2de..f4ea1531e8 100644 --- a/modules/gpu/src/opencv2/gpu/device/transform.hpp +++ b/modules/gpu/src/opencv2/gpu/device/transform.hpp @@ -43,7 +43,7 @@ #ifndef __OPENCV_GPU_TRANSFORM_HPP__ #define __OPENCV_GPU_TRANSFORM_HPP__ -#include "internal_shared.hpp" +#include "common.hpp" #include "utility.hpp" #include "detail/transform_detail.hpp" diff --git a/modules/gpu/src/opencv2/gpu/device/type_traits.hpp b/modules/gpu/src/opencv2/gpu/device/type_traits.hpp index 2dbecfb8a8..93c7f1b84b 100644 --- a/modules/gpu/src/opencv2/gpu/device/type_traits.hpp +++ b/modules/gpu/src/opencv2/gpu/device/type_traits.hpp @@ -43,7 +43,6 @@ #ifndef __OPENCV_GPU_TYPE_TRAITS_HPP__ #define __OPENCV_GPU_TYPE_TRAITS_HPP__ -#include "internal_shared.hpp" #include "detail/type_traits_detail.hpp" namespace cv { namespace gpu { namespace device diff --git a/modules/gpu/src/opencv2/gpu/device/utility.hpp b/modules/gpu/src/opencv2/gpu/device/utility.hpp index 0c417a72c7..21c9ff4bd2 100644 --- a/modules/gpu/src/opencv2/gpu/device/utility.hpp +++ b/modules/gpu/src/opencv2/gpu/device/utility.hpp @@ -43,7 +43,6 @@ #ifndef __OPENCV_GPU_UTILITY_HPP__ #define __OPENCV_GPU_UTILITY_HPP__ -#include "internal_shared.hpp" #include "saturate_cast.hpp" #include "datamov_utils.hpp" #include "detail/utility_detail.hpp" diff --git a/modules/gpu/src/opencv2/gpu/device/vec_distance.hpp b/modules/gpu/src/opencv2/gpu/device/vec_distance.hpp index a27e425cd1..a1ead9f525 100644 --- a/modules/gpu/src/opencv2/gpu/device/vec_distance.hpp +++ b/modules/gpu/src/opencv2/gpu/device/vec_distance.hpp @@ -43,7 +43,6 @@ #ifndef __OPENCV_GPU_VEC_DISTANCE_HPP__ #define __OPENCV_GPU_VEC_DISTANCE_HPP__ -#include "internal_shared.hpp" #include "utility.hpp" #include "functional.hpp" #include "detail/vec_distance_detail.hpp" diff --git a/modules/gpu/src/opencv2/gpu/device/vec_math.hpp b/modules/gpu/src/opencv2/gpu/device/vec_math.hpp index 88e8909bd7..833abcbc36 100644 --- a/modules/gpu/src/opencv2/gpu/device/vec_math.hpp +++ b/modules/gpu/src/opencv2/gpu/device/vec_math.hpp @@ -43,7 +43,6 @@ #ifndef __OPENCV_GPU_VECMATH_HPP__ #define __OPENCV_GPU_VECMATH_HPP__ -#include "internal_shared.hpp" #include "saturate_cast.hpp" #include "vec_traits.hpp" #include "functional.hpp" diff --git a/modules/gpu/src/opencv2/gpu/device/vec_traits.hpp b/modules/gpu/src/opencv2/gpu/device/vec_traits.hpp index dd304edea7..7ead7cb503 100644 --- a/modules/gpu/src/opencv2/gpu/device/vec_traits.hpp +++ b/modules/gpu/src/opencv2/gpu/device/vec_traits.hpp @@ -43,7 +43,7 @@ #ifndef __OPENCV_GPU_VEC_TRAITS_HPP__ #define __OPENCV_GPU_VEC_TRAITS_HPP__ -#include "internal_shared.hpp" +#include "common.hpp" namespace cv { namespace gpu { namespace device { diff --git a/modules/gpu/src/opencv2/gpu/device/warp.hpp b/modules/gpu/src/opencv2/gpu/device/warp.hpp index 78e1a34f6b..0ac67f47ae 100644 --- a/modules/gpu/src/opencv2/gpu/device/warp.hpp +++ b/modules/gpu/src/opencv2/gpu/device/warp.hpp @@ -43,8 +43,6 @@ #ifndef __OPENCV_GPU_DEVICE_WARP_HPP__ #define __OPENCV_GPU_DEVICE_WARP_HPP__ -#include "internal_shared.hpp" - namespace cv { namespace gpu { namespace device { struct Warp diff --git a/modules/gpu/src/opencv2/gpu/device/warp_reduce.hpp b/modules/gpu/src/opencv2/gpu/device/warp_reduce.hpp index f3ff01c79f..f7fa6444c1 100644 --- a/modules/gpu/src/opencv2/gpu/device/warp_reduce.hpp +++ b/modules/gpu/src/opencv2/gpu/device/warp_reduce.hpp @@ -44,8 +44,6 @@ #ifndef OPENCV_GPU_WARP_REDUCE_HPP__ #define OPENCV_GPU_WARP_REDUCE_HPP__ -#include "internal_shared.hpp" - namespace cv { namespace gpu { namespace device { template