1) NPP_staging as sources. Binaries removed.

2) NVidia tests for GPU
3) FD sample that uses NVidia's interface.
This commit is contained in:
Anatoly Baksheev
2011-02-04 15:15:25 +00:00
parent 811f6fbe92
commit 0747f2d863
52 changed files with 6042 additions and 1240 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
-784
View File
@@ -1,784 +0,0 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*/
#ifndef _npp_staging_h_
#define _npp_staging_h_
/**
* \file npp_staging.h
* NPP Staging Library (will become part of NPP next release)
*/
#ifdef __cplusplus
/** \defgroup ctassert Compile-time assert functionality
* @{
*/
/**
* Compile-time assert namespace
*/
namespace NppStCTprep
{
template <bool x>
struct CT_ASSERT_FAILURE;
template <>
struct CT_ASSERT_FAILURE<true> {};
template <int x>
struct assertTest{};
}
#define NPPST_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro
#define NPPST_CT_PREP_PASTE(a,b) NPPST_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro
/**
* Performs compile-time assertion of a condition on the file scope
*/
#define NPPST_CT_ASSERT(X) \
typedef NppStCTprep::assertTest<sizeof(NppStCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \
NPPST_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)
/*@}*/
#endif
/** \defgroup typedefs NPP Integral and compound types of guaranteed size
* @{
*/
typedef bool NppStBool; ///< Bool of size less than integer
typedef long long NppSt64s; ///< 64-bit signed integer
typedef unsigned long long NppSt64u; ///< 64-bit unsigned integer
typedef int NppSt32s; ///< 32-bit signed integer
typedef unsigned int NppSt32u; ///< 32-bit unsigned integer
typedef short NppSt16s; ///< 16-bit signed short
typedef unsigned short NppSt16u; ///< 16-bit unsigned short
typedef char NppSt8s; ///< 8-bit signed char
typedef unsigned char NppSt8u; ///< 8-bit unsigned char
typedef float NppSt32f; ///< 32-bit IEEE-754 (single precision) float
typedef double NppSt64f; ///< 64-bit IEEE-754 (double precision) float
/**
* 2D Rectangle, 8-bit unsigned fields
* This struct contains position and size information of a rectangle in two space
*/
struct NppStRect8u
{
NppSt8u x; ///< x-coordinate of upper left corner
NppSt8u y; ///< y-coordinate of upper left corner
NppSt8u width; ///< Rectangle width
NppSt8u height; ///< Rectangle height
#ifdef __cplusplus
NppStRect8u() : x(0), y(0), width(0), height(0) {};
NppStRect8u(NppSt8u x, NppSt8u y, NppSt8u width, NppSt8u height) : x(x), y(y), width(width), height(height) {}
#endif
};
/**
* 2D Rectangle, 32-bit signed fields
* This struct contains position and size information of a rectangle in two space
*/
struct NppStRect32s
{
NppSt32s x; ///< x-coordinate of upper left corner
NppSt32s y; ///< y-coordinate of upper left corner
NppSt32s width; ///< Rectangle width
NppSt32s height; ///< Rectangle height
#ifdef __cplusplus
NppStRect32s() : x(0), y(0), width(0), height(0) {};
NppStRect32s(NppSt32s x, NppSt32s y, NppSt32s width, NppSt32s height) : x(x), y(y), width(width), height(height) {}
#endif
};
/**
* 2D Rectangle, 32-bit unsigned fields
* This struct contains position and size information of a rectangle in two space
*/
struct NppStRect32u
{
NppSt32u x; ///< x-coordinate of upper left corner
NppSt32u y; ///< y-coordinate of upper left corner
NppSt32u width; ///< Rectangle width
NppSt32u height; ///< Rectangle height
#ifdef __cplusplus
NppStRect32u() : x(0), y(0), width(0), height(0) {};
NppStRect32u(NppSt32u x, NppSt32u y, NppSt32u width, NppSt32u height) : x(x), y(y), width(width), height(height) {}
#endif
};
/**
* 2D Size, 32-bit signed fields
* This struct typically represents the size of a a rectangular region in two space
*/
struct NppStSize32s
{
NppSt32s width; ///< Rectangle width
NppSt32s height; ///< Rectangle height
#ifdef __cplusplus
NppStSize32s() : width(0), height(0) {};
NppStSize32s(NppSt32s width, NppSt32s height) : width(width), height(height) {}
#endif
};
/**
* 2D Size, 32-bit unsigned fields
* This struct typically represents the size of a a rectangular region in two space
*/
struct NppStSize32u
{
NppSt32u width; ///< Rectangle width
NppSt32u height; ///< Rectangle height
#ifdef __cplusplus
NppStSize32u() : width(0), height(0) {};
NppStSize32u(NppSt32u width, NppSt32u height) : width(width), height(height) {}
#endif
};
/**
* Error Status Codes
*
* Almost all NPP function return error-status information using
* these return codes.
* Negative return codes indicate errors, positive return codes indicate
* warnings, a return code of 0 indicates success.
*/
enum NppStStatus
{
//already present in NPP
__NPP_SUCCESS = 0, ///< Successful operation (same as NPP_NO_ERROR)
__NPP_ERROR = -1, ///< Unknown error
__NPP_CUDA_KERNEL_EXECUTION_ERROR = -3, ///< CUDA kernel execution error
__NPP_NULL_POINTER_ERROR = -4, ///< NULL pointer argument error
__NPP_TEXTURE_BIND_ERROR = -24, ///< CUDA texture binding error or non-zero offset returned
__NPP_MEMCPY_ERROR = -13, ///< CUDA memory copy error
__NPP_MEM_ALLOC_ERR = -12, ///< CUDA memory allocation error
__NPP_MEMFREE_ERR = -15, ///< CUDA memory deallocation error
//to be added
NPP_INVALID_ROI, ///< Invalid region of interest argument
NPP_INVALID_STEP, ///< Invalid image lines step argument (check sign, alignment, relation to image width)
NPP_INVALID_SCALE, ///< Invalid scale parameter passed
NPP_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer
NPP_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned)
NPP_MEM_INTERNAL_ERROR, ///< Internal memory management error
};
/*@}*/
#ifdef __cplusplus
/** \defgroup ct_typesize_checks Client-side sizeof types compile-time check
* @{
*/
NPPST_CT_ASSERT(sizeof(NppStBool) <= 4);
NPPST_CT_ASSERT(sizeof(NppSt64s) == 8);
NPPST_CT_ASSERT(sizeof(NppSt64u) == 8);
NPPST_CT_ASSERT(sizeof(NppSt32s) == 4);
NPPST_CT_ASSERT(sizeof(NppSt32u) == 4);
NPPST_CT_ASSERT(sizeof(NppSt16s) == 2);
NPPST_CT_ASSERT(sizeof(NppSt16u) == 2);
NPPST_CT_ASSERT(sizeof(NppSt8s) == 1);
NPPST_CT_ASSERT(sizeof(NppSt8u) == 1);
NPPST_CT_ASSERT(sizeof(NppSt32f) == 4);
NPPST_CT_ASSERT(sizeof(NppSt64f) == 8);
NPPST_CT_ASSERT(sizeof(NppStRect8u) == sizeof(NppSt32u));
NPPST_CT_ASSERT(sizeof(NppStRect32s) == 4 * sizeof(NppSt32s));
NPPST_CT_ASSERT(sizeof(NppStRect32u) == 4 * sizeof(NppSt32u));
NPPST_CT_ASSERT(sizeof(NppStSize32u) == 2 * sizeof(NppSt32u));
/*@}*/
#endif
#ifdef __cplusplus
extern "C" {
#endif
/** \defgroup core_npp NPP Core
* Basic functions for CUDA streams management.
* WARNING: These functions couldn't be exported into DLL, so they can be used only with static version of NPP_staging
* @{
*/
/**
* Gets an active CUDA stream used by NPP (Not an API yet!)
* \return Current CUDA stream
*/
cudaStream_t nppStGetActiveCUDAstream();
/**
* Sets an active CUDA stream used by NPP (Not an API yet!)
* \param cudaStream [IN] cudaStream CUDA stream to become current
* \return CUDA stream used before
*/
cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);
/*@}*/
/** \defgroup nppi NPP Image Processing
* @{
*/
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel.
*
* \param d_src [IN] Source image pointer (CUDA device memory)
* \param srcStep [IN] Source image line step
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
* \param dstStep [IN] Destination image line step
* \param srcRoi [IN] Region of interest in the source image
* \param scale [IN] Downsampling scale factor (positive integer)
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
*
* \return NPP status code
*/
NppStStatus nppiStDownsampleNearest_32u_C1R(NppSt32u *d_src, NppSt32u srcStep,
NppSt32u *d_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale,
NppStBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NppStStatus nppiStDownsampleNearest_32s_C1R(NppSt32s *d_src, NppSt32u srcStep,
NppSt32s *d_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale,
NppStBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NppStStatus nppiStDownsampleNearest_32f_C1R(NppSt32f *d_src, NppSt32u srcStep,
NppSt32f *d_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale,
NppStBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NppStStatus nppiStDownsampleNearest_64u_C1R(NppSt64u *d_src, NppSt32u srcStep,
NppSt64u *d_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale,
NppStBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NppStStatus nppiStDownsampleNearest_64s_C1R(NppSt64s *d_src, NppSt32u srcStep,
NppSt64s *d_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale,
NppStBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NppStStatus nppiStDownsampleNearest_64f_C1R(NppSt64f *d_src, NppSt32u srcStep,
NppSt64f *d_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale,
NppStBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. Host implementation.
*
* \param h_src [IN] Source image pointer (Host or pinned memory)
* \param srcStep [IN] Source image line step
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
* \param dstStep [IN] Destination image line step
* \param srcRoi [IN] Region of interest in the source image
* \param scale [IN] Downsampling scale factor (positive integer)
*
* \return NPP status code
*/
NppStStatus nppiStDownsampleNearest_32u_C1R_host(NppSt32u *h_src, NppSt32u srcStep,
NppSt32u *h_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NppStStatus nppiStDownsampleNearest_32s_C1R_host(NppSt32s *h_src, NppSt32u srcStep,
NppSt32s *h_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NppStStatus nppiStDownsampleNearest_32f_C1R_host(NppSt32f *h_src, NppSt32u srcStep,
NppSt32f *h_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NppStStatus nppiStDownsampleNearest_64u_C1R_host(NppSt64u *h_src, NppSt32u srcStep,
NppSt64u *h_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NppStStatus nppiStDownsampleNearest_64s_C1R_host(NppSt64s *h_src, NppSt32u srcStep,
NppSt64s *h_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NppStStatus nppiStDownsampleNearest_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStep,
NppSt64f *h_dst, NppSt32u dstStep,
NppStSize32u srcRoi, NppSt32u scale);
/**
* Computes standard deviation for each rectangular region of the input image using integral images.
*
* \param d_sum [IN] Integral image pointer (CUDA device memory)
* \param sumStep [IN] Integral image line step
* \param d_sqsum [IN] Squared integral image pointer (CUDA device memory)
* \param sqsumStep [IN] Squared integral image line step
* \param d_norm [OUT] Stddev image pointer (CUDA device memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
* \param normStep [IN] Stddev image line step
* \param roi [IN] Region of interest in the source image
* \param rect [IN] Rectangular region to calculate stddev over
* \param scaleArea [IN] Multiplication factor to account decimated scale
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
*
* \return NPP status code
*/
NppStStatus nppiStRectStdDev_32f_C1R(NppSt32u *d_sum, NppSt32u sumStep,
NppSt64u *d_sqsum, NppSt32u sqsumStep,
NppSt32f *d_norm, NppSt32u normStep,
NppStSize32u roi, NppStRect32u rect,
NppSt32f scaleArea, NppStBool readThruTexture);
/**
* Computes standard deviation for each rectangular region of the input image using integral images. Host implementation
*
* \param h_sum [IN] Integral image pointer (Host or pinned memory)
* \param sumStep [IN] Integral image line step
* \param h_sqsum [IN] Squared integral image pointer (Host or pinned memory)
* \param sqsumStep [IN] Squared integral image line step
* \param h_norm [OUT] Stddev image pointer (Host or pinned memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
* \param normStep [IN] Stddev image line step
* \param roi [IN] Region of interest in the source image
* \param rect [IN] Rectangular region to calculate stddev over
* \param scaleArea [IN] Multiplication factor to account decimated scale
*
* \return NPP status code
*/
NppStStatus nppiStRectStdDev_32f_C1R_host(NppSt32u *h_sum, NppSt32u sumStep,
NppSt64u *h_sqsum, NppSt32u sqsumStep,
NppSt32f *h_norm, NppSt32u normStep,
NppStSize32u roi, NppStRect32u rect,
NppSt32f scaleArea);
/**
* Transposes an image. 32-bit unsigned pixels, single channel
*
* \param d_src [IN] Source image pointer (CUDA device memory)
* \param srcStride [IN] Source image line step
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
* \param dstStride [IN] Destination image line step
* \param srcRoi [IN] Region of interest of the source image
*
* \return NPP status code
*/
NppStStatus nppiStTranspose_32u_C1R(NppSt32u *d_src, NppSt32u srcStride,
NppSt32u *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 32-bit signed pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NppStStatus nppiStTranspose_32s_C1R(NppSt32s *d_src, NppSt32u srcStride,
NppSt32s *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 32-bit float pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NppStStatus nppiStTranspose_32f_C1R(NppSt32f *d_src, NppSt32u srcStride,
NppSt32f *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 64-bit unsigned pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NppStStatus nppiStTranspose_64u_C1R(NppSt64u *d_src, NppSt32u srcStride,
NppSt64u *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 64-bit signed pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NppStStatus nppiStTranspose_64s_C1R(NppSt64s *d_src, NppSt32u srcStride,
NppSt64s *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 64-bit float pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NppStStatus nppiStTranspose_64f_C1R(NppSt64f *d_src, NppSt32u srcStride,
NppSt64f *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 32-bit unsigned pixels, single channel. Host implementation
*
* \param h_src [IN] Source image pointer (Host or pinned memory)
* \param srcStride [IN] Source image line step
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
* \param dstStride [IN] Destination image line step
* \param srcRoi [IN] Region of interest of the source image
*
* \return NPP status code
*/
NppStStatus nppiStTranspose_32u_C1R_host(NppSt32u *h_src, NppSt32u srcStride,
NppSt32u *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 32-bit signed pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NppStStatus nppiStTranspose_32s_C1R_host(NppSt32s *h_src, NppSt32u srcStride,
NppSt32s *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 32-bit float pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NppStStatus nppiStTranspose_32f_C1R_host(NppSt32f *h_src, NppSt32u srcStride,
NppSt32f *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 64-bit unsigned pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NppStStatus nppiStTranspose_64u_C1R_host(NppSt64u *h_src, NppSt32u srcStride,
NppSt64u *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 64-bit signed pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NppStStatus nppiStTranspose_64s_C1R_host(NppSt64s *h_src, NppSt32u srcStride,
NppSt64s *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Transposes an image. 64-bit float pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NppStStatus nppiStTranspose_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStride,
NppSt64f *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);
/**
* Calculates the size of the temporary buffer for integral image creation
*
* \param roiSize [IN] Size of the input image
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
*
* \return NPP status code
*/
NppStStatus nppiStIntegralGetSize_8u32u(NppStSize32u roiSize, NppSt32u *pBufsize);
/**
* Calculates the size of the temporary buffer for integral image creation
* \see nppiStIntegralGetSize_8u32u
*/
NppStStatus nppiStIntegralGetSize_32f32f(NppStSize32u roiSize, NppSt32u *pBufsize);
/**
* Creates an integral image representation for the input image
*
* \param d_src [IN] Source image pointer (CUDA device memory)
* \param srcStep [IN] Source image line step
* \param d_dst [OUT] Destination integral image pointer (CUDA device memory)
* \param dstStep [IN] Destination image line step
* \param roiSize [IN] Region of interest of the source image
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
* \param bufSize [IN] Size of the pBuffer in bytes
*
* \return NPP status code
*/
NppStStatus nppiStIntegral_8u32u_C1R(NppSt8u *d_src, NppSt32u srcStep,
NppSt32u *d_dst, NppSt32u dstStep, NppStSize32u roiSize,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Creates an integral image representation for the input image
* \see nppiStIntegral_8u32u_C1R
*/
NppStStatus nppiStIntegral_32f32f_C1R(NppSt32f *d_src, NppSt32u srcStep,
NppSt32f *d_dst, NppSt32u dstStep, NppStSize32u roiSize,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Creates an integral image representation for the input image. Host implementation
*
* \param h_src [IN] Source image pointer (Host or pinned memory)
* \param srcStep [IN] Source image line step
* \param h_dst [OUT] Destination integral image pointer (Host or pinned memory)
* \param dstStep [IN] Destination image line step
* \param roiSize [IN] Region of interest of the source image
*
* \return NPP status code
*/
NppStStatus nppiStIntegral_8u32u_C1R_host(NppSt8u *h_src, NppSt32u srcStep,
NppSt32u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
/**
* Creates an integral image representation for the input image. Host implementation
* \see nppiStIntegral_8u32u_C1R_host
*/
NppStStatus nppiStIntegral_32f32f_C1R_host(NppSt32f *h_src, NppSt32u srcStep,
NppSt32f *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
/**
* Calculates the size of the temporary buffer for squared integral image creation
*
* \param roiSize [IN] Size of the input image
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
*
* \return NPP status code
*/
NppStStatus nppiStSqrIntegralGetSize_8u64u(NppStSize32u roiSize, NppSt32u *pBufsize);
/**
* Creates a squared integral image representation for the input image
*
* \param d_src [IN] Source image pointer (CUDA device memory)
* \param srcStep [IN] Source image line step
* \param d_dst [OUT] Destination squared integral image pointer (CUDA device memory)
* \param dstStep [IN] Destination image line step
* \param roiSize [IN] Region of interest of the source image
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
* \param bufSize [IN] Size of the pBuffer in bytes
*
* \return NPP status code
*/
NppStStatus nppiStSqrIntegral_8u64u_C1R(NppSt8u *d_src, NppSt32u srcStep,
NppSt64u *d_dst, NppSt32u dstStep, NppStSize32u roiSize,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Creates a squared integral image representation for the input image. Host implementation
*
* \param h_src [IN] Source image pointer (Host or pinned memory)
* \param srcStep [IN] Source image line step
* \param h_dst [OUT] Destination squared integral image pointer (Host or pinned memory)
* \param dstStep [IN] Destination image line step
* \param roiSize [IN] Region of interest of the source image
*
* \return NPP status code
*/
NppStStatus nppiStSqrIntegral_8u64u_C1R_host(NppSt8u *h_src, NppSt32u srcStep,
NppSt64u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);
/*@}*/
/** \defgroup npps NPP Signal Processing
* @{
*/
/**
* Calculates the size of the temporary buffer for vector compaction. 32-bit unsigned values
*
* \param srcLen [IN] Length of the input vector in elements
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
*
* \return NPP status code
*/
NppStStatus nppsStCompactGetSize_32u(NppSt32u srcLen, NppSt32u *pBufsize);
/**
* Calculates the size of the temporary buffer for vector compaction. 32-bit signed values
* \see nppsStCompactGetSize_32u
*/
NppStStatus nppsStCompactGetSize_32s(NppSt32u srcLen, NppSt32u *pBufsize);
/**
* Calculates the size of the temporary buffer for vector compaction. 32-bit float values
* \see nppsStCompactGetSize_32u
*/
NppStStatus nppsStCompactGetSize_32f(NppSt32u srcLen, NppSt32u *pBufsize);
/**
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values
*
* \param d_src [IN] Source vector pointer (CUDA device memory)
* \param srcLen [IN] Source vector length
* \param d_dst [OUT] Destination vector pointer (CUDA device memory)
* \param p_dstLen [OUT] Pointer to the destination vector length (Pinned memory or NULL)
* \param elemRemove [IN] The value to be removed
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
* \param bufSize [IN] Size of the pBuffer in bytes
*
* \return NPP status code
*/
NppStStatus nppsStCompact_32u(NppSt32u *d_src, NppSt32u srcLen,
NppSt32u *d_dst, NppSt32u *p_dstLen,
NppSt32u elemRemove,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Compacts the input vector by removing elements of specified value. 32-bit signed values
* \see nppsStCompact_32u
*/
NppStStatus nppsStCompact_32s(NppSt32s *d_src, NppSt32u srcLen,
NppSt32s *d_dst, NppSt32u *p_dstLen,
NppSt32s elemRemove,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Compacts the input vector by removing elements of specified value. 32-bit float values
* \see nppsStCompact_32u
*/
NppStStatus nppsStCompact_32f(NppSt32f *d_src, NppSt32u srcLen,
NppSt32f *d_dst, NppSt32u *p_dstLen,
NppSt32f elemRemove,
NppSt8u *pBuffer, NppSt32u bufSize);
/**
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values. Host implementation
*
* \param h_src [IN] Source vector pointer (CUDA device memory)
* \param srcLen [IN] Source vector length
* \param h_dst [OUT] Destination vector pointer (CUDA device memory)
* \param dstLen [OUT] Pointer to the destination vector length (can be NULL)
* \param elemRemove [IN] The value to be removed
*
* \return NPP status code
*/
NppStStatus nppsStCompact_32u_host(NppSt32u *h_src, NppSt32u srcLen,
NppSt32u *h_dst, NppSt32u *dstLen, NppSt32u elemRemove);
/**
* Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation
* \see nppsStCompact_32u_host
*/
NppStStatus nppsStCompact_32s_host(NppSt32s *h_src, NppSt32u srcLen,
NppSt32s *h_dst, NppSt32u *dstLen, NppSt32s elemRemove);
/**
* Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation
* \see nppsStCompact_32u_host
*/
NppStStatus nppsStCompact_32f_host(NppSt32f *h_src, NppSt32u srcLen,
NppSt32f *h_dst, NppSt32u *dstLen, NppSt32f elemRemove);
/*@}*/
#ifdef __cplusplus
}
#endif
#endif // _npp_staging_h_
+9 -12
View File
@@ -36,10 +36,12 @@ file(GLOB lib_device_hdrs "src/opencv2/gpu/device/*.h*")
source_group("Device" FILES ${lib_device_hdrs})
if (HAVE_CUDA AND MSVC)
file(GLOB ncv_srcs "src/nvidia/*.cpp")
file(GLOB ncv_hdrs "src/nvidia/*.h*")
file(GLOB ncv_cuda "src/nvidia/*.cu")
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda})
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
file(GLOB_RECURSE ncv_hdr1 "src/nvidia/*.hpp")
file(GLOB_RECURSE ncv_hdr2 "src/nvidia/*.h")
source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda})
include_directories("src/nvidia/core" "src/nvidia/NPP_staging")
endif()
if (HAVE_CUDA)
@@ -74,17 +76,13 @@ if (HAVE_CUDA)
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
endif()
include(FindNPP_staging.cmake)
include_directories(${NPPST_INC})
endif()
CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
#CUDA_BUILD_CLEAN_TARGET()
endif()
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda} ${cuda_objs})
if(PCHSupport_FOUND)
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
@@ -117,8 +115,7 @@ set_target_properties(${the_target} PROPERTIES
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} )
if (HAVE_CUDA)
target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})
target_link_libraries(${the_target} ${NPPST_LIB})
target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
endif()
-24
View File
@@ -1,24 +0,0 @@
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(BIT_SUFF 32)
else()
set(BIT_SUFF 64)
endif()
if (APPLE)
set(PLATFORM_SUFF Darwin)
elseif (UNIX)
set(PLATFORM_SUFF Linux)
else()
set(PLATFORM_SUFF Windows)
endif()
set(LIB_FILE NPP_staging_static_${PLATFORM_SUFF}_${BIT_SUFF}_v1)
find_library(NPPST_LIB
NAMES "${LIB_FILE}" "lib${LIB_FILE}"
PATHS "${CMAKE_SOURCE_DIR}/3rdparty/NPP_staging"
DOC "NPP staging library"
)
SET(NPPST_INC "${CMAKE_SOURCE_DIR}//3rdparty/NPP_staging")
+7 -7
View File
@@ -83,25 +83,25 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz) );
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz) );
}
else if (src.elemSize() == 4)
{
NppStSize32u sz;
NcvSize32u sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( nppiStTranspose_32u_C1R(const_cast<NppSt32u*>(src.ptr<NppSt32u>()), src.step,
dst.ptr<NppSt32u>(), dst.step, sz) );
nppSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), src.step,
dst.ptr<Ncv32u>(), dst.step, sz) );
}
else // if (src.elemSize() == 8)
{
NppStSize32u sz;
NcvSize32u sz;
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( nppiStTranspose_64u_C1R(const_cast<NppSt64u*>(src.ptr<NppSt64u>()), src.step,
dst.ptr<NppSt64u>(), dst.step, sz) );
nppSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), src.step,
dst.ptr<Ncv64u>(), dst.step, sz) );
}
cudaSafeCall( cudaThreadSynchronize() );
+4 -4
View File
@@ -126,7 +126,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
minNeighbors,
scaleStep, 1,
flags,
*gpuAllocator, *cpuAllocator, devProp.major, devProp.minor, 0);
*gpuAllocator, *cpuAllocator, devProp, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
@@ -146,8 +146,8 @@ private:
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
// Load the classifier from file (assuming its size is about 1 mb) using a simple allocator
gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice);
cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned);
gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice, devProp.textureAlignment);
cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment);
ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR);
ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR);
@@ -212,7 +212,7 @@ private:
roi.height = d_src.height();
Ncv32u numDetections;
ncvStat = ncvDetectObjectsMultiScale_device(d_src, roi, d_rects, numDetections, haar, *h_haarStages,
*d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp.major, devProp.minor, 0);
*d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
+16 -12
View File
@@ -560,16 +560,19 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer)
sum.create(src.rows + 1, src.cols + 1, CV_32S);
NppStSize32u roiSize;
NcvSize32u roiSize;
roiSize.width = src.cols;
roiSize.height = src.rows;
NppSt32u bufSize;
nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize) );
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
Ncv32u bufSize;
nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<NppSt8u*>(src.ptr<NppSt8u>()), src.step,
sum.ptr<NppSt32u>(), sum.step, roiSize, buffer.ptr<NppSt8u>(), bufSize) );
nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), src.step,
sum.ptr<Ncv32u>(), sum.step, roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
cudaSafeCall( cudaThreadSynchronize() );
}
@@ -600,19 +603,20 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum)
{
CV_Assert(src.type() == CV_8U);
NppStSize32u roiSize;
NcvSize32u roiSize;
roiSize.width = src.cols;
roiSize.height = src.rows;
NppSt32u bufSize;
nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize));
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
Ncv32u bufSize;
nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
GpuMat buf(1, bufSize, CV_8U);
sqsum.create(src.rows + 1, src.cols + 1, CV_64F);
nppSafeCall(nppiStSqrIntegral_8u64u_C1R(
const_cast<NppSt8u*>(src.ptr<NppSt8u>(0)), src.step,
sqsum.ptr<NppSt64u>(0), sqsum.step, roiSize,
buf.ptr<NppSt8u>(0), bufSize));
nppSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), src.step,
sqsum.ptr<Ncv64u>(0), sqsum.step, roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
cudaSafeCall( cudaThreadSynchronize() );
}
@@ -57,8 +57,8 @@
#include <algorithm>
#include "npp.h"
#include "NCV.hpp"
#include "NPP_staging/NPP_staging.hpp"
#include "NCVRuntimeTemplates.hpp"
#include "NCVHaarObjectDetection.hpp"
@@ -970,8 +970,7 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
Ncv32f scaleArea,
INCVMemAllocator &gpuAllocator,
INCVMemAllocator &cpuAllocator,
Ncv32u devPropMajor,
Ncv32u devPropMinor,
cudaDeviceProp &devProp,
cudaStream_t cuStream)
{
ncvAssertReturn(d_integralImage.memType() == d_weights.memType() &&
@@ -1077,15 +1076,15 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
Ncv32f scaleAreaPixels = scaleArea * ((haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER) *
(haar.ClassifierSize.height - 2*HAAR_STDDEV_BORDER));
NcvBool bTexCacheCascade = devPropMajor < 2;
NcvBool bTexCacheCascade = devProp.major < 2;
NcvBool bTexCacheIImg = true; //this works better even on Fermi so far
NcvBool bDoAtomicCompaction = devPropMajor >= 2 || (devPropMajor == 1 && devPropMinor >= 3);
NcvBool bDoAtomicCompaction = devProp.major >= 2 || (devProp.major == 1 && devProp.minor >= 3);
NCVVector<Ncv32u> *d_ptrNowData = &d_vecPixelMask;
NCVVector<Ncv32u> *d_ptrNowTmp = &d_vecPixelMaskTmp;
Ncv32u szNppCompactTmpBuf;
nppsStCompactGetSize_32u(d_vecPixelMask.length(), &szNppCompactTmpBuf);
nppsStCompactGetSize_32u(d_vecPixelMask.length(), &szNppCompactTmpBuf, devProp);
if (bDoAtomicCompaction)
{
szNppCompactTmpBuf = 0;
@@ -1185,11 +1184,11 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
}
else
{
NppStStatus nppSt;
NCVStatus nppSt;
nppSt = nppsStCompact_32u(d_ptrNowTmp->ptr(), d_vecPixelMask.length(),
d_ptrNowData->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
d_tmpBufCompact.ptr(), szNppCompactTmpBuf);
ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
ncvAssertReturn(nppSt == NPPST_SUCCESS, NCV_NPP_ERROR);
}
numDetections = *hp_numDet;
}
@@ -1240,11 +1239,11 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
}
else
{
NppStStatus nppSt;
NCVStatus nppSt;
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), d_vecPixelMask.length(),
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
d_tmpBufCompact.ptr(), szNppCompactTmpBuf);
ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
ncvAssertReturnNcvStat(nppSt);
}
swap(d_ptrNowData, d_ptrNowTmp);
@@ -1310,11 +1309,11 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
}
else
{
NppStStatus nppSt;
NCVStatus nppSt;
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), numDetections,
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
d_tmpBufCompact.ptr(), szNppCompactTmpBuf);
ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
ncvAssertReturnNcvStat(nppSt);
}
swap(d_ptrNowData, d_ptrNowTmp);
@@ -1371,11 +1370,11 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
}
else
{
NppStStatus nppSt;
NCVStatus nppSt;
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), numDetections,
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,
d_tmpBufCompact.ptr(), szNppCompactTmpBuf);
ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);
d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);
ncvAssertReturnNcvStat(nppSt);
}
swap(d_ptrNowData, d_ptrNowTmp);
@@ -1715,8 +1714,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
INCVMemAllocator &gpuAllocator,
INCVMemAllocator &cpuAllocator,
Ncv32u devPropMajor,
Ncv32u devPropMinor,
cudaDeviceProp &devProp,
cudaStream_t cuStream)
{
ncvAssertReturn(d_srcImg.memType() == d_dstRects.memType() &&
@@ -1773,12 +1771,12 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
NCVVectorAlloc<NcvRect32u> h_hypothesesIntermediate(cpuAllocator, d_srcImg.width() * d_srcImg.height());
ncvAssertReturn(h_hypothesesIntermediate.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
NppStStatus nppStat;
NCVStatus nppStat;
Ncv32u szTmpBufIntegral, szTmpBufSqIntegral;
nppStat = nppiStIntegralGetSize_8u32u(NppStSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufIntegral);
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
nppStat = nppiStSqrIntegralGetSize_8u64u(NppStSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufSqIntegral);
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
nppStat = nppiStIntegralGetSize_8u32u(NcvSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufIntegral, devProp);
ncvAssertReturnNcvStat(nppStat);
nppStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufSqIntegral, devProp);
ncvAssertReturnNcvStat(nppStat);
NCVVectorAlloc<Ncv8u> d_tmpIIbuf(gpuAllocator, std::max(szTmpBufIntegral, szTmpBufSqIntegral));
ncvAssertReturn(d_tmpIIbuf.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
@@ -1786,15 +1784,15 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
nppStat = nppiStIntegral_8u32u_C1R(d_srcImg.ptr(), d_srcImg.pitch(),
d_integralImage.ptr(), d_integralImage.pitch(),
NppStSize32u(d_srcImg.width(), d_srcImg.height()),
d_tmpIIbuf.ptr(), szTmpBufIntegral);
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
NcvSize32u(d_srcImg.width(), d_srcImg.height()),
d_tmpIIbuf.ptr(), szTmpBufIntegral, devProp);
ncvAssertReturnNcvStat(nppStat);
nppStat = nppiStSqrIntegral_8u64u_C1R(d_srcImg.ptr(), d_srcImg.pitch(),
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
NppStSize32u(d_srcImg.width(), d_srcImg.height()),
d_tmpIIbuf.ptr(), szTmpBufSqIntegral);
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
NcvSize32u(d_srcImg.width(), d_srcImg.height()),
d_tmpIIbuf.ptr(), szTmpBufSqIntegral, devProp);
ncvAssertReturnNcvStat(nppStat);
NCV_SKIP_COND_END
@@ -1859,7 +1857,7 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
Ncv32u scale = scalesVector[i];
NcvSize32u srcRoi, scaledIIRoi, searchRoi;
NppStSize32u srcIIRoi;
NcvSize32u srcIIRoi;
srcRoi.width = d_srcImg.width();
srcRoi.height = d_srcImg.height();
srcIIRoi.width = srcRoi.width + 1;
@@ -1875,15 +1873,15 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
d_integralImage.ptr(), d_integralImage.pitch(),
d_scaledIntegralImage.ptr(), d_scaledIntegralImage.pitch(),
srcIIRoi, scale, true);
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
ncvAssertReturnNcvStat(nppStat);
nppStat = nppiStDownsampleNearest_64u_C1R(
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
d_scaledSqIntegralImage.ptr(), d_scaledSqIntegralImage.pitch(),
srcIIRoi, scale, true);
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
ncvAssertReturnNcvStat(nppStat);
const NppStRect32u rect(
const NcvRect32u rect(
HAAR_STDDEV_BORDER,
HAAR_STDDEV_BORDER,
haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER,
@@ -1892,9 +1890,9 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
d_scaledIntegralImage.ptr(), d_scaledIntegralImage.pitch(),
d_scaledSqIntegralImage.ptr(), d_scaledSqIntegralImage.pitch(),
d_rectStdDev.ptr(), d_rectStdDev.pitch(),
NppStSize32u(searchRoi.width, searchRoi.height), rect,
NcvSize32u(searchRoi.width, searchRoi.height), rect,
(Ncv32f)scale*scale, true);
ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);
ncvAssertReturnNcvStat(nppStat);
NCV_SKIP_COND_END
@@ -1904,8 +1902,8 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
detectionsOnThisScale,
haar, h_HaarStages, d_HaarStages, d_HaarNodes, d_HaarFeatures, false,
searchRoi, pixelStep, (Ncv32f)scale*scale,
gpuAllocator, cpuAllocator, devPropMajor, devPropMinor, cuStream);
ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
gpuAllocator, cpuAllocator, devProp, cuStream);
ncvAssertReturnNcvStat(nppStat);
NCV_SKIP_COND_BEGIN
@@ -2250,6 +2248,10 @@ NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
return ncvStat;
}
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,
Ncv32u &numHypotheses,
Ncv32u minNeighbors,
@@ -2539,7 +2541,7 @@ NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
}
NCVStatus ncvHaarStoreNVBIN_host(std::string &filename,
NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
HaarClassifierCascadeDescriptor haar,
NCVVector<HaarStage64> &h_HaarStages,
NCVVector<HaarClassifierNode128> &h_HaarNodes,
@@ -75,13 +75,13 @@ struct HaarFeature64
#define HaarFeature64_CreateCheck_MaxRectField 0xFF
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u clsWidth, Ncv32u clsHeight)
__host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u /*clsWidth*/, Ncv32u /*clsHeight*/)
{
ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES);
((NcvRect8u*)&(this->_ui2.x))->x = rectX;
((NcvRect8u*)&(this->_ui2.x))->y = rectY;
((NcvRect8u*)&(this->_ui2.x))->width = rectWidth;
((NcvRect8u*)&(this->_ui2.x))->height = rectHeight;
((NcvRect8u*)&(this->_ui2.x))->x = (Ncv8u)rectX;
((NcvRect8u*)&(this->_ui2.x))->y = (Ncv8u)rectY;
((NcvRect8u*)&(this->_ui2.x))->width = (Ncv8u)rectWidth;
((NcvRect8u*)&(this->_ui2.x))->height = (Ncv8u)rectHeight;
return NCV_SUCCESS;
}
@@ -306,11 +306,11 @@ struct HaarStage64
};
NPPST_CT_ASSERT(sizeof(HaarFeature64) == 8);
NPPST_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
NPPST_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
NPPST_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
NPPST_CT_ASSERT(sizeof(HaarStage64) == 8);
NCV_CT_ASSERT(sizeof(HaarFeature64) == 8);
NCV_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);
NCV_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);
NCV_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);
NCV_CT_ASSERT(sizeof(HaarStage64) == 8);
//==============================================================================
@@ -347,7 +347,7 @@ enum
NCVPipeObjDet_VisualizeInPlace = 0x004,
};
NCV_EXPORTS
NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
NcvSize32u srcRoi,
NCVVector<NcvRect32u> &d_dstRects,
@@ -367,15 +367,14 @@ NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,
INCVMemAllocator &gpuAllocator,
INCVMemAllocator &cpuAllocator,
Ncv32u devPropMajor,
Ncv32u devPropMinor,
cudaDeviceProp &devProp,
cudaStream_t cuStream);
#define OBJDET_MASK_ELEMENT_INVALID_32U 0xFFFFFFFF
#define HAAR_STDDEV_BORDER 1
NCV_EXPORTS
NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,
NCVMatrix<Ncv32f> &d_weights,
NCVMatrixAlloc<Ncv32u> &d_pixelMask,
@@ -391,11 +390,10 @@ NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImag
Ncv32f scaleArea,
INCVMemAllocator &gpuAllocator,
INCVMemAllocator &cpuAllocator,
Ncv32u devPropMajor,
Ncv32u devPropMinor,
cudaDeviceProp &devProp,
cudaStream_t cuStream);
NCV_EXPORTS
NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
NCVMatrix<Ncv32f> &h_weights,
NCVMatrixAlloc<Ncv32u> &h_pixelMask,
@@ -409,7 +407,7 @@ NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
Ncv32u pixelStep,
Ncv32f scaleArea);
NCV_EXPORTS
NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
Ncv32u dstStride,
Ncv32u dstWidth,
@@ -419,7 +417,7 @@ NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,
Ncv8u color,
cudaStream_t cuStream);
NCV_EXPORTS
NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
Ncv32u dstStride,
Ncv32u dstWidth,
@@ -429,7 +427,7 @@ NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,
Ncv32u color,
cudaStream_t cuStream);
NCV_EXPORTS
NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
Ncv32u dstStride,
Ncv32u dstWidth,
@@ -438,7 +436,7 @@ NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,
Ncv32u numRects,
Ncv8u color);
NCV_EXPORTS
NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
Ncv32u dstStride,
Ncv32u dstWidth,
@@ -450,7 +448,7 @@ NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,
#define RECT_SIMILARITY_PROPORTION 0.2f
NCV_EXPORTS
NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
Ncv32u numPixelMaskDetections,
NCVVector<NcvRect32u> &hypotheses,
@@ -461,7 +459,7 @@ NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
Ncv32f curScale,
cudaStream_t cuStream);
NCV_EXPORTS
NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
Ncv32u numPixelMaskDetections,
NCVVector<NcvRect32u> &hypotheses,
@@ -471,18 +469,18 @@ NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
Ncv32u rectHeight,
Ncv32f curScale);
NCV_EXPORTS
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,
Ncv32u &numHypotheses,
Ncv32u minNeighbors,
Ncv32f intersectEps,
NCVVector<Ncv32u> *hypothesesWeights);
NCV_EXPORTS
NCVStatus ncvHaarGetClassifierSize(const std::string &filename, Ncv32u &numStages,
Ncv32u &numNodes, Ncv32u &numFeatures);
NCV_EXPORTS
NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
HaarClassifierCascadeDescriptor &haar,
NCVVector<HaarStage64> &h_HaarStages,
@@ -490,6 +488,7 @@ NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,
NCVVector<HaarFeature64> &h_HaarFeatures);
NCV_EXPORTS
NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,
HaarClassifierCascadeDescriptor haar,
NCVVector<HaarStage64> &h_HaarStages,
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,637 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef _npp_staging_hpp_
#define _npp_staging_hpp_
#include "NCV.hpp"
/**
* \file NPP_staging.hpp
* NPP Staging Library
*/
/** \defgroup core_npp NPPST Core
* Basic functions for CUDA streams management.
* @{
*/
/**
* Gets an active CUDA stream used by NPPST
* NOT THREAD SAFE
* \return Current CUDA stream
*/
cudaStream_t nppStGetActiveCUDAstream();
/**
* Sets an active CUDA stream used by NPPST
* NOT THREAD SAFE
* \param cudaStream [IN] cudaStream CUDA stream to become current
* \return CUDA stream used before
*/
cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);
/*@}*/
/** \defgroup nppi NPPST Image Processing
* @{
*/
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel.
*
* \param d_src [IN] Source image pointer (CUDA device memory)
* \param srcStep [IN] Source image line step
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
* \param dstStep [IN] Destination image line step
* \param srcRoi [IN] Region of interest in the source image
* \param scale [IN] Downsampling scale factor (positive integer)
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppiStDownsampleNearest_32u_C1R(Ncv32u *d_src, Ncv32u srcStep,
Ncv32u *d_dst, Ncv32u dstStep,
NcvSize32u srcRoi, Ncv32u scale,
NcvBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NCV_EXPORTS
NCVStatus nppiStDownsampleNearest_32s_C1R(Ncv32s *d_src, Ncv32u srcStep,
Ncv32s *d_dst, Ncv32u dstStep,
NcvSize32u srcRoi, Ncv32u scale,
NcvBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NCV_EXPORTS
NCVStatus nppiStDownsampleNearest_32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
Ncv32f *d_dst, Ncv32u dstStep,
NcvSize32u srcRoi, Ncv32u scale,
NcvBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NCV_EXPORTS
NCVStatus nppiStDownsampleNearest_64u_C1R(Ncv64u *d_src, Ncv32u srcStep,
Ncv64u *d_dst, Ncv32u dstStep,
NcvSize32u srcRoi, Ncv32u scale,
NcvBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NCV_EXPORTS
NCVStatus nppiStDownsampleNearest_64s_C1R(Ncv64s *d_src, Ncv32u srcStep,
Ncv64s *d_dst, Ncv32u dstStep,
NcvSize32u srcRoi, Ncv32u scale,
NcvBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.
* \see nppiStDownsampleNearest_32u_C1R
*/
NCV_EXPORTS
NCVStatus nppiStDownsampleNearest_64f_C1R(Ncv64f *d_src, Ncv32u srcStep,
Ncv64f *d_dst, Ncv32u dstStep,
NcvSize32u srcRoi, Ncv32u scale,
NcvBool readThruTexture);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. Host implementation.
*
* \param h_src [IN] Source image pointer (Host or pinned memory)
* \param srcStep [IN] Source image line step
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
* \param dstStep [IN] Destination image line step
* \param srcRoi [IN] Region of interest in the source image
* \param scale [IN] Downsampling scale factor (positive integer)
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppiStDownsampleNearest_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStep,
Ncv32u *h_dst, Ncv32u dstStep,
NcvSize32u srcRoi, Ncv32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NCV_EXPORTS
NCVStatus nppiStDownsampleNearest_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStep,
Ncv32s *h_dst, Ncv32u dstStep,
NcvSize32u srcRoi, Ncv32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NCV_EXPORTS
NCVStatus nppiStDownsampleNearest_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
Ncv32f *h_dst, Ncv32u dstStep,
NcvSize32u srcRoi, Ncv32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NCV_EXPORTS
NCVStatus nppiStDownsampleNearest_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStep,
Ncv64u *h_dst, Ncv32u dstStep,
NcvSize32u srcRoi, Ncv32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NCV_EXPORTS
NCVStatus nppiStDownsampleNearest_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStep,
Ncv64s *h_dst, Ncv32u dstStep,
NcvSize32u srcRoi, Ncv32u scale);
/**
* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.
* \see nppiStDownsampleNearest_32u_C1R_host
*/
NCV_EXPORTS
NCVStatus nppiStDownsampleNearest_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStep,
Ncv64f *h_dst, Ncv32u dstStep,
NcvSize32u srcRoi, Ncv32u scale);
/**
* Computes standard deviation for each rectangular region of the input image using integral images.
*
* \param d_sum [IN] Integral image pointer (CUDA device memory)
* \param sumStep [IN] Integral image line step
* \param d_sqsum [IN] Squared integral image pointer (CUDA device memory)
* \param sqsumStep [IN] Squared integral image line step
* \param d_norm [OUT] Stddev image pointer (CUDA device memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
* \param normStep [IN] Stddev image line step
* \param roi [IN] Region of interest in the source image
* \param rect [IN] Rectangular region to calculate stddev over
* \param scaleArea [IN] Multiplication factor to account decimated scale
* \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppiStRectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,
Ncv64u *d_sqsum, Ncv32u sqsumStep,
Ncv32f *d_norm, Ncv32u normStep,
NcvSize32u roi, NcvRect32u rect,
Ncv32f scaleArea, NcvBool readThruTexture);
/**
* Computes standard deviation for each rectangular region of the input image using integral images. Host implementation
*
* \param h_sum [IN] Integral image pointer (Host or pinned memory)
* \param sumStep [IN] Integral image line step
* \param h_sqsum [IN] Squared integral image pointer (Host or pinned memory)
* \param sqsumStep [IN] Squared integral image line step
* \param h_norm [OUT] Stddev image pointer (Host or pinned memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image
* \param normStep [IN] Stddev image line step
* \param roi [IN] Region of interest in the source image
* \param rect [IN] Rectangular region to calculate stddev over
* \param scaleArea [IN] Multiplication factor to account decimated scale
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppiStRectStdDev_32f_C1R_host(Ncv32u *h_sum, Ncv32u sumStep,
Ncv64u *h_sqsum, Ncv32u sqsumStep,
Ncv32f *h_norm, Ncv32u normStep,
NcvSize32u roi, NcvRect32u rect,
Ncv32f scaleArea);
/**
* Transposes an image. 32-bit unsigned pixels, single channel
*
* \param d_src [IN] Source image pointer (CUDA device memory)
* \param srcStride [IN] Source image line step
* \param d_dst [OUT] Destination image pointer (CUDA device memory)
* \param dstStride [IN] Destination image line step
* \param srcRoi [IN] Region of interest of the source image
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppiStTranspose_32u_C1R(Ncv32u *d_src, Ncv32u srcStride,
Ncv32u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
/**
* Transposes an image. 32-bit signed pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NCV_EXPORTS
NCVStatus nppiStTranspose_32s_C1R(Ncv32s *d_src, Ncv32u srcStride,
Ncv32s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
/**
* Transposes an image. 32-bit float pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NCV_EXPORTS
NCVStatus nppiStTranspose_32f_C1R(Ncv32f *d_src, Ncv32u srcStride,
Ncv32f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
/**
* Transposes an image. 64-bit unsigned pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NCV_EXPORTS
NCVStatus nppiStTranspose_64u_C1R(Ncv64u *d_src, Ncv32u srcStride,
Ncv64u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
/**
* Transposes an image. 64-bit signed pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NCV_EXPORTS
NCVStatus nppiStTranspose_64s_C1R(Ncv64s *d_src, Ncv32u srcStride,
Ncv64s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
/**
* Transposes an image. 64-bit float pixels, single channel
* \see nppiStTranspose_32u_C1R
*/
NCV_EXPORTS
NCVStatus nppiStTranspose_64f_C1R(Ncv64f *d_src, Ncv32u srcStride,
Ncv64f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);
/**
* Transposes an image. 32-bit unsigned pixels, single channel. Host implementation
*
* \param h_src [IN] Source image pointer (Host or pinned memory)
* \param srcStride [IN] Source image line step
* \param h_dst [OUT] Destination image pointer (Host or pinned memory)
* \param dstStride [IN] Destination image line step
* \param srcRoi [IN] Region of interest of the source image
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppiStTranspose_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStride,
Ncv32u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
/**
* Transposes an image. 32-bit signed pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NCV_EXPORTS
NCVStatus nppiStTranspose_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStride,
Ncv32s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
/**
* Transposes an image. 32-bit float pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NCV_EXPORTS
NCVStatus nppiStTranspose_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStride,
Ncv32f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
/**
* Transposes an image. 64-bit unsigned pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NCV_EXPORTS
NCVStatus nppiStTranspose_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStride,
Ncv64u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
/**
* Transposes an image. 64-bit signed pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NCV_EXPORTS
NCVStatus nppiStTranspose_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStride,
Ncv64s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
/**
* Transposes an image. 64-bit float pixels, single channel. Host implementation
* \see nppiStTranspose_32u_C1R_host
*/
NCV_EXPORTS
NCVStatus nppiStTranspose_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStride,
Ncv64f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);
/**
* Calculates the size of the temporary buffer for integral image creation
*
* \param roiSize [IN] Size of the input image
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppiStIntegralGetSize_8u32u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
/**
* Calculates the size of the temporary buffer for integral image creation
* \see nppiStIntegralGetSize_8u32u
*/
NCV_EXPORTS
NCVStatus nppiStIntegralGetSize_32f32f(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
/**
* Creates an integral image representation for the input image
*
* \param d_src [IN] Source image pointer (CUDA device memory)
* \param srcStep [IN] Source image line step
* \param d_dst [OUT] Destination integral image pointer (CUDA device memory)
* \param dstStep [IN] Destination image line step
* \param roiSize [IN] Region of interest of the source image
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
* \param bufSize [IN] Size of the pBuffer in bytes
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppiStIntegral_8u32u_C1R(Ncv8u *d_src, Ncv32u srcStep,
Ncv32u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
/**
* Creates an integral image representation for the input image
* \see nppiStIntegral_8u32u_C1R
*/
NCV_EXPORTS
NCVStatus nppiStIntegral_32f32f_C1R(Ncv32f *d_src, Ncv32u srcStep,
Ncv32f *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
/**
* Creates an integral image representation for the input image. Host implementation
*
* \param h_src [IN] Source image pointer (Host or pinned memory)
* \param srcStep [IN] Source image line step
* \param h_dst [OUT] Destination integral image pointer (Host or pinned memory)
* \param dstStep [IN] Destination image line step
* \param roiSize [IN] Region of interest of the source image
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppiStIntegral_8u32u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
Ncv32u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
/**
* Creates an integral image representation for the input image. Host implementation
* \see nppiStIntegral_8u32u_C1R_host
*/
NCV_EXPORTS
NCVStatus nppiStIntegral_32f32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,
Ncv32f *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
/**
* Calculates the size of the temporary buffer for squared integral image creation
*
* \param roiSize [IN] Size of the input image
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppiStSqrIntegralGetSize_8u64u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);
/**
* Creates a squared integral image representation for the input image
*
* \param d_src [IN] Source image pointer (CUDA device memory)
* \param srcStep [IN] Source image line step
* \param d_dst [OUT] Destination squared integral image pointer (CUDA device memory)
* \param dstStep [IN] Destination image line step
* \param roiSize [IN] Region of interest of the source image
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
* \param bufSize [IN] Size of the pBuffer in bytes
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppiStSqrIntegral_8u64u_C1R(Ncv8u *d_src, Ncv32u srcStep,
Ncv64u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,
Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);
/**
* Creates a squared integral image representation for the input image. Host implementation
*
* \param h_src [IN] Source image pointer (Host or pinned memory)
* \param srcStep [IN] Source image line step
* \param h_dst [OUT] Destination squared integral image pointer (Host or pinned memory)
* \param dstStep [IN] Destination image line step
* \param roiSize [IN] Region of interest of the source image
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppiStSqrIntegral_8u64u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,
Ncv64u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);
/*@}*/
/** \defgroup npps NPPST Signal Processing
* @{
*/
/**
* Calculates the size of the temporary buffer for vector compaction. 32-bit unsigned values
*
* \param srcLen [IN] Length of the input vector in elements
* \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppsStCompactGetSize_32u(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
/**
* Calculates the size of the temporary buffer for vector compaction. 32-bit signed values
* \see nppsStCompactGetSize_32u
*/
NCVStatus nppsStCompactGetSize_32s(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
/**
* Calculates the size of the temporary buffer for vector compaction. 32-bit float values
* \see nppsStCompactGetSize_32u
*/
NCVStatus nppsStCompactGetSize_32f(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);
/**
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values
*
* \param d_src [IN] Source vector pointer (CUDA device memory)
* \param srcLen [IN] Source vector length
* \param d_dst [OUT] Destination vector pointer (CUDA device memory)
* \param p_dstLen [OUT] Pointer to the destination vector length (Pinned memory or NULL)
* \param elemRemove [IN] The value to be removed
* \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)
* \param bufSize [IN] Size of the pBuffer in bytes
* \param devProp [IN] CUDA device properties structure, containing texture alignment information
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppsStCompact_32u(Ncv32u *d_src, Ncv32u srcLen,
Ncv32u *d_dst, Ncv32u *p_dstLen,
Ncv32u elemRemove, Ncv8u *pBuffer,
Ncv32u bufSize, cudaDeviceProp &devProp);
/**
* Compacts the input vector by removing elements of specified value. 32-bit signed values
* \see nppsStCompact_32u
*/
NCV_EXPORTS
NCVStatus nppsStCompact_32s(Ncv32s *d_src, Ncv32u srcLen,
Ncv32s *d_dst, Ncv32u *p_dstLen,
Ncv32s elemRemove, Ncv8u *pBuffer,
Ncv32u bufSize, cudaDeviceProp &devProp);
/**
* Compacts the input vector by removing elements of specified value. 32-bit float values
* \see nppsStCompact_32u
*/
NCV_EXPORTS
NCVStatus nppsStCompact_32f(Ncv32f *d_src, Ncv32u srcLen,
Ncv32f *d_dst, Ncv32u *p_dstLen,
Ncv32f elemRemove, Ncv8u *pBuffer,
Ncv32u bufSize, cudaDeviceProp &devProp);
/**
* Compacts the input vector by removing elements of specified value. 32-bit unsigned values. Host implementation
*
* \param h_src [IN] Source vector pointer (CUDA device memory)
* \param srcLen [IN] Source vector length
* \param h_dst [OUT] Destination vector pointer (CUDA device memory)
* \param dstLen [OUT] Pointer to the destination vector length (can be NULL)
* \param elemRemove [IN] The value to be removed
*
* \return NCV status code
*/
NCV_EXPORTS
NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,
Ncv32u *h_dst, Ncv32u *dstLen, Ncv32u elemRemove);
/**
* Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation
* \see nppsStCompact_32u_host
*/
NCV_EXPORTS
NCVStatus nppsStCompact_32s_host(Ncv32s *h_src, Ncv32u srcLen,
Ncv32s *h_dst, Ncv32u *dstLen, Ncv32s elemRemove);
/**
* Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation
* \see nppsStCompact_32u_host
*/
NCV_EXPORTS
NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen,
Ncv32f *h_dst, Ncv32u *dstLen, Ncv32f elemRemove);
/*@}*/
#endif // _npp_staging_hpp_
@@ -40,15 +40,13 @@
//M*/
#include <precomp.hpp>
#if !defined (HAVE_CUDA)
#else /* !defined (HAVE_CUDA) */
#include <ios>
#include <stdarg.h>
#include "NCV.hpp"
@@ -94,17 +92,6 @@ void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func)
//==============================================================================
NCVStatus GPUAlignmentValue(Ncv32u &alignment)
{
int curDev;
cudaDeviceProp curProp;
ncvAssertCUDAReturn(cudaGetDevice(&curDev), NCV_CUDA_ERROR);
ncvAssertCUDAReturn(cudaGetDeviceProperties(&curProp, curDev), NCV_CUDA_ERROR);
alignment = curProp.textureAlignment; //GPUAlignmentValue(curProp.major);
return NCV_SUCCESS;
}
Ncv32u alignUp(Ncv32u what, Ncv32u alignment)
{
Ncv32u alignMask = alignment-1;
@@ -216,7 +203,7 @@ NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment)
}
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment)
NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr)
:
currentSize(0),
_maxSize(0),
@@ -229,17 +216,26 @@ NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity,
allocBegin = NULL;
switch (memT)
if (reusePtr == NULL)
{
case NCVMemoryTypeDevice:
ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );
break;
case NCVMemoryTypeHostPinned:
ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );
break;
case NCVMemoryTypeHostPageable:
allocBegin = (Ncv8u *)malloc(capacity);
break;
bReusesMemory = false;
switch (memT)
{
case NCVMemoryTypeDevice:
ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );
break;
case NCVMemoryTypeHostPinned:
ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );
break;
case NCVMemoryTypeHostPageable:
allocBegin = (Ncv8u *)malloc(capacity);
break;
}
}
else
{
bReusesMemory = true;
allocBegin = (Ncv8u *)reusePtr;
}
if (capacity == 0)
@@ -260,18 +256,23 @@ NCVMemStackAllocator::~NCVMemStackAllocator()
if (allocBegin != NULL)
{
ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction");
switch (_memType)
if (!bReusesMemory)
{
case NCVMemoryTypeDevice:
ncvAssertCUDAReturn(cudaFree(allocBegin), );
break;
case NCVMemoryTypeHostPinned:
ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );
break;
case NCVMemoryTypeHostPageable:
free(allocBegin);
break;
switch (_memType)
{
case NCVMemoryTypeDevice:
ncvAssertCUDAReturn(cudaFree(allocBegin), );
break;
case NCVMemoryTypeHostPinned:
ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );
break;
case NCVMemoryTypeHostPageable:
free(allocBegin);
break;
}
}
allocBegin = NULL;
}
}
@@ -356,14 +357,14 @@ size_t NCVMemStackAllocator::maxSize(void) const
//===================================================================
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT)
NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment)
:
currentSize(0),
_maxSize(0),
_memType(memT)
_memType(memT),
_alignment(alignment)
{
ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", );
ncvAssertPrintReturn(NCV_SUCCESS == GPUAlignmentValue(this->_alignment), "NCVMemNativeAllocator ctor:: couldn't get device _alignment", );
}
@@ -42,8 +42,49 @@
#ifndef _ncv_hpp_
#define _ncv_hpp_
#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS //&& !defined(__CUDACC__)
#define NCV_EXPORTS __declspec(dllexport)
#else
#define NCV_EXPORTS
#endif
#include <cuda_runtime.h>
#include "npp_staging.h"
//==============================================================================
//
// Compile-time assert functionality
//
//==============================================================================
/**
* Compile-time assert namespace
*/
namespace NcvCTprep
{
template <bool x>
struct CT_ASSERT_FAILURE;
template <>
struct CT_ASSERT_FAILURE<true> {};
template <int x>
struct assertTest{};
}
#define NCV_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro
#define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro
/**
* Performs compile-time assertion of a condition on the file scope
*/
#define NCV_CT_ASSERT(X) \
typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \
NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)
//==============================================================================
@@ -82,62 +123,72 @@ typedef float Ncv32f;
typedef double Ncv64f;
typedef struct
struct NcvRect8u
{
Ncv8u x;
Ncv8u y;
Ncv8u width;
Ncv8u height;
} NcvRect8u;
NcvRect8u() : x(0), y(0), width(0), height(0) {};
NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height) : x(x), y(y), width(width), height(height) {}
};
typedef struct
struct NcvRect32s
{
Ncv32s x; ///< x-coordinate of upper left corner.
Ncv32s y; ///< y-coordinate of upper left corner.
Ncv32s width; ///< Rectangle width.
Ncv32s height; ///< Rectangle height.
} NcvRect32s;
NcvRect32s() : x(0), y(0), width(0), height(0) {};
NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height) : x(x), y(y), width(width), height(height) {}
};
typedef struct
struct NcvRect32u
{
Ncv32u x; ///< x-coordinate of upper left corner.
Ncv32u y; ///< y-coordinate of upper left corner.
Ncv32u width; ///< Rectangle width.
Ncv32u height; ///< Rectangle height.
} NcvRect32u;
NcvRect32u() : x(0), y(0), width(0), height(0) {};
NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height) : x(x), y(y), width(width), height(height) {}
};
typedef struct
struct NcvSize32s
{
Ncv32s width; ///< Rectangle width.
Ncv32s height; ///< Rectangle height.
} NcvSize32s;
NcvSize32s() : width(0), height(0) {};
NcvSize32s(Ncv32s width, Ncv32s height) : width(width), height(height) {}
};
typedef struct
struct NcvSize32u
{
Ncv32u width; ///< Rectangle width.
Ncv32u height; ///< Rectangle height.
} NcvSize32u;
NcvSize32u() : width(0), height(0) {};
NcvSize32u(Ncv32u width, Ncv32u height) : width(width), height(height) {}
};
NPPST_CT_ASSERT(sizeof(NcvBool) <= 4);
NPPST_CT_ASSERT(sizeof(Ncv64s) == 8);
NPPST_CT_ASSERT(sizeof(Ncv64u) == 8);
NPPST_CT_ASSERT(sizeof(Ncv32s) == 4);
NPPST_CT_ASSERT(sizeof(Ncv32u) == 4);
NPPST_CT_ASSERT(sizeof(Ncv16s) == 2);
NPPST_CT_ASSERT(sizeof(Ncv16u) == 2);
NPPST_CT_ASSERT(sizeof(Ncv8s) == 1);
NPPST_CT_ASSERT(sizeof(Ncv8u) == 1);
NPPST_CT_ASSERT(sizeof(Ncv32f) == 4);
NPPST_CT_ASSERT(sizeof(Ncv64f) == 8);
NPPST_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
NPPST_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
NPPST_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
NPPST_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
NCV_CT_ASSERT(sizeof(NcvBool) <= 4);
NCV_CT_ASSERT(sizeof(Ncv64s) == 8);
NCV_CT_ASSERT(sizeof(Ncv64u) == 8);
NCV_CT_ASSERT(sizeof(Ncv32s) == 4);
NCV_CT_ASSERT(sizeof(Ncv32u) == 4);
NCV_CT_ASSERT(sizeof(Ncv16s) == 2);
NCV_CT_ASSERT(sizeof(Ncv16u) == 2);
NCV_CT_ASSERT(sizeof(Ncv8s) == 1);
NCV_CT_ASSERT(sizeof(Ncv8u) == 1);
NCV_CT_ASSERT(sizeof(Ncv32f) == 4);
NCV_CT_ASSERT(sizeof(Ncv64f) == 8);
NCV_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
NCV_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
//==============================================================================
@@ -162,13 +213,13 @@ const Ncv32u K_LOG2_WARP_SIZE = 5;
#define NCV_CT_PREP_STRINGIZE(x) NCV_CT_PREP_STRINGIZE_AUX(x)
void ncvDebugOutput(const char *msg, ...);
NCV_EXPORTS void ncvDebugOutput(const char *msg, ...);
typedef void NCVDebugOutputHandler(const char* msg);
void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
#define ncvAssertPrintCheck(pred, msg) \
@@ -222,6 +273,7 @@ void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
*/
enum NCVStatus
{
//NCV statuses
NCV_SUCCESS,
NCV_CUDA_ERROR,
@@ -257,6 +309,24 @@ enum NCVStatus
NCV_NOIMPL_HAAR_TILTED_FEATURES,
NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
//NPP statuses
NPPST_SUCCESS = NCV_SUCCESS, ///< Successful operation (same as NPP_NO_ERROR)
NPPST_ERROR, ///< Unknown error
NPPST_CUDA_KERNEL_EXECUTION_ERROR, ///< CUDA kernel execution error
NPPST_NULL_POINTER_ERROR, ///< NULL pointer argument error
NPPST_TEXTURE_BIND_ERROR, ///< CUDA texture binding error or non-zero offset returned
NPPST_MEMCPY_ERROR, ///< CUDA memory copy error
NPPST_MEM_ALLOC_ERR, ///< CUDA memory allocation error
NPPST_MEMFREE_ERR, ///< CUDA memory deallocation error
//NPPST statuses
NPPST_INVALID_ROI, ///< Invalid region of interest argument
NPPST_INVALID_STEP, ///< Invalid image lines step argument (check sign, alignment, relation to image width)
NPPST_INVALID_SCALE, ///< Invalid scale parameter passed
NPPST_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer
NPPST_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned)
NPPST_MEM_INTERNAL_ERROR, ///< Internal memory management error
};
@@ -285,11 +355,11 @@ enum NCVStatus
typedef struct _NcvTimer *NcvTimer;
NcvTimer ncvStartTimer(void);
NCV_EXPORTS NcvTimer ncvStartTimer(void);
double ncvEndQueryTimerUs(NcvTimer t);
NCV_EXPORTS double ncvEndQueryTimerUs(NcvTimer t);
double ncvEndQueryTimerMs(NcvTimer t);
NCV_EXPORTS double ncvEndQueryTimerMs(NcvTimer t);
//==============================================================================
@@ -299,16 +369,10 @@ double ncvEndQueryTimerMs(NcvTimer t);
//==============================================================================
/**
* Alignment of GPU memory chunks in bytes
*/
NCVStatus GPUAlignmentValue(Ncv32u &alignment);
/**
* Calculates the aligned top bound value
*/
Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
NCV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
/**
@@ -326,7 +390,7 @@ enum NCVMemoryType
/**
* NCVMemPtr
*/
struct NCVMemPtr
struct NCV_EXPORTS NCVMemPtr
{
void *ptr;
NCVMemoryType memtype;
@@ -337,7 +401,7 @@ struct NCVMemPtr
/**
* NCVMemSegment
*/
struct NCVMemSegment
struct NCV_EXPORTS NCVMemSegment
{
NCVMemPtr begin;
size_t size;
@@ -348,7 +412,7 @@ struct NCVMemSegment
/**
* INCVMemAllocator (Interface)
*/
class INCVMemAllocator
class NCV_EXPORTS INCVMemAllocator
{
public:
virtual ~INCVMemAllocator() = 0;
@@ -370,7 +434,7 @@ inline INCVMemAllocator::~INCVMemAllocator() {}
/**
* NCVMemStackAllocator
*/
class NCVMemStackAllocator : public INCVMemAllocator
class NCV_EXPORTS NCVMemStackAllocator : public INCVMemAllocator
{
NCVMemStackAllocator();
NCVMemStackAllocator(const NCVMemStackAllocator &);
@@ -378,7 +442,7 @@ class NCVMemStackAllocator : public INCVMemAllocator
public:
explicit NCVMemStackAllocator(Ncv32u alignment);
NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment);
NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr=NULL);
virtual ~NCVMemStackAllocator();
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
@@ -400,17 +464,18 @@ private:
Ncv8u *end;
size_t currentSize;
size_t _maxSize;
NcvBool bReusesMemory;
};
/**
* NCVMemNativeAllocator
*/
class NCVMemNativeAllocator : public INCVMemAllocator
class NCV_EXPORTS NCVMemNativeAllocator : public INCVMemAllocator
{
public:
NCVMemNativeAllocator(NCVMemoryType memT);
NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment);
virtual ~NCVMemNativeAllocator();
virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
@@ -438,9 +503,9 @@ private:
/**
* Copy dispatcher
*/
NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
const void *src, NCVMemoryType srcType,
size_t sz, cudaStream_t cuStream);
NCV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
const void *src, NCVMemoryType srcType,
size_t sz, cudaStream_t cuStream);
/**
@@ -514,6 +579,7 @@ class NCVVectorAlloc : public NCVVector<T>
{
NCVVectorAlloc();
NCVVectorAlloc(const NCVVectorAlloc &);
NCVVectorAlloc& operator=(const NCVVectorAlloc<T>&);
public:
@@ -563,8 +629,7 @@ public:
return allocatedMem;
}
private:
private:
INCVMemAllocator &allocator;
NCVMemSegment allocatedMem;
};
@@ -707,7 +772,7 @@ class NCVMatrixAlloc : public NCVMatrix<T>
{
NCVMatrixAlloc();
NCVMatrixAlloc(const NCVMatrixAlloc &);
NCVMatrixAlloc& operator=(const NCVMatrixAlloc &);
public:
NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0)
@@ -1,3 +1,51 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef _ncvruntimetemplates_hpp_
#define _ncvruntimetemplates_hpp_
#include <stdarg.h>
#include <vector>
////////////////////////////////////////////////////////////////////////////////
// The Loki Library
// Copyright (c) 2001 by Andrei Alexandrescu
@@ -14,13 +62,6 @@
// http://loki-lib.sourceforge.net/index.php?n=Main.License
////////////////////////////////////////////////////////////////////////////////
#ifndef _ncvruntimetemplates_hpp_
#define _ncvruntimetemplates_hpp_
#include <stdarg.h>
#include <vector>
namespace Loki
{
//==============================================================================
+135 -135
View File
@@ -68,51 +68,51 @@ namespace cv { namespace gpu { namespace device
//! Read Write Traits
template <size_t src_elem_size, size_t dst_elem_size>
struct UnReadWriteTraits_
{
enum {shift=1};
};
template <size_t src_elem_size>
struct UnReadWriteTraits_<src_elem_size, 1>
{
enum {shift=4};
};
template <size_t src_elem_size>
struct UnReadWriteTraits_<src_elem_size, 2>
{
enum {shift=2};
template <size_t src_elem_size, size_t dst_elem_size>
struct UnReadWriteTraits_
{
enum {shift=1};
};
template <typename T, typename D> struct UnReadWriteTraits
{
enum {shift=UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift};
typedef typename TypeVec<T, shift>::vec_t read_type;
typedef typename TypeVec<D, shift>::vec_t write_type;
template <size_t src_elem_size>
struct UnReadWriteTraits_<src_elem_size, 1>
{
enum {shift=4};
};
template <size_t src_elem_size>
struct UnReadWriteTraits_<src_elem_size, 2>
{
enum {shift=2};
};
template <typename T, typename D> struct UnReadWriteTraits
{
enum {shift=UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift};
typedef typename TypeVec<T, shift>::vec_t read_type;
typedef typename TypeVec<D, shift>::vec_t write_type;
};
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>
struct BinReadWriteTraits_
{
enum {shift=1};
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>
struct BinReadWriteTraits_
{
enum {shift=1};
};
template <size_t src_elem_size1, size_t src_elem_size2>
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>
{
enum {shift=4};
template <size_t src_elem_size1, size_t src_elem_size2>
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>
{
enum {shift=4};
};
template <size_t src_elem_size1, size_t src_elem_size2>
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>
{
enum {shift=2};
template <size_t src_elem_size1, size_t src_elem_size2>
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>
{
enum {shift=2};
};
template <typename T1, typename T2, typename D> struct BinReadWriteTraits
{
enum {shift=BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
typedef typename TypeVec<T1, shift>::vec_t read_type1;
typedef typename TypeVec<T2, shift>::vec_t read_type2;
typedef typename TypeVec<D , shift>::vec_t write_type;
template <typename T1, typename T2, typename D> struct BinReadWriteTraits
{
enum {shift=BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
typedef typename TypeVec<T1, shift>::vec_t read_type1;
typedef typename TypeVec<T2, shift>::vec_t read_type2;
typedef typename TypeVec<D , shift>::vec_t write_type;
};
//! Transform kernels
@@ -122,14 +122,14 @@ namespace cv { namespace gpu { namespace device
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
{
if (mask(y, x_shifted))
dst.x = op(src.x);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
}
@@ -138,18 +138,18 @@ namespace cv { namespace gpu { namespace device
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
{
if (mask(y, x_shifted))
dst.x = op(src.x);
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
}
@@ -158,22 +158,22 @@ namespace cv { namespace gpu { namespace device
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
{
if (mask(y, x_shifted))
dst.x = op(src.x);
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
}
@@ -182,65 +182,65 @@ namespace cv { namespace gpu { namespace device
{
template <typename T, typename D, typename UnOp, typename Mask>
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
{
{
if (mask(y, x_shifted))
dst.x = op(src.x);
dst.x = op(src.x);
if (mask(y, x_shifted + 1))
dst.y = op(src.y);
dst.y = op(src.y);
if (mask(y, x_shifted + 2))
dst.z = op(src.z);
dst.z = op(src.z);
if (mask(y, x_shifted + 3))
dst.w = op(src.w);
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
{
{
if (mask(y, x_shifted))
dst.x = op(src1.x, src2.x);
dst.x = op(src1.x, src2.x);
if (mask(y, x_shifted + 1))
dst.y = op(src1.y, src2.y);
dst.y = op(src1.y, src2.y);
if (mask(y, x_shifted + 2))
dst.z = op(src1.z, src2.z);
dst.z = op(src1.z, src2.z);
if (mask(y, x_shifted + 3))
dst.w = op(src1.w, src2.w);
}
};
template <typename T, typename D, typename UnOp, typename Mask>
__global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, UnOp op)
{
typedef typename UnReadWriteTraits<T, D>::read_type read_type;
typedef typename UnReadWriteTraits<T, D>::write_type write_type;
const int shift = UnReadWriteTraits<T, D>::shift;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * shift;
if (y < src_.rows)
{
const T* src = src_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + shift - 1 < src_.cols)
{
read_type src_n_el = ((const read_type*)src)[x];
write_type dst_n_el;
OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src[real_x]);
}
}
}
template <typename T, typename D, typename UnOp, typename Mask>
__global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, UnOp op)
{
typedef typename UnReadWriteTraits<T, D>::read_type read_type;
typedef typename UnReadWriteTraits<T, D>::write_type write_type;
const int shift = UnReadWriteTraits<T, D>::shift;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * shift;
if (y < src_.rows)
{
const T* src = src_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + shift - 1 < src_.cols)
{
read_type src_n_el = ((const read_type*)src)[x];
write_type dst_n_el;
OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src[real_x]);
}
}
}
}
template <typename T, typename D, typename UnOp, typename Mask>
@@ -255,44 +255,44 @@ namespace cv { namespace gpu { namespace device
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
__global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_,
const Mask mask, BinOp op)
{
typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1;
typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2;
typedef typename BinReadWriteTraits<T1, T2, D>::write_type write_type;
const int shift = BinReadWriteTraits<T1, T2, D>::shift;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * shift;
if (y < src1_.rows)
{
const T1* src1 = src1_.ptr(y);
const T2* src2 = src2_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + shift - 1 < src1_.cols)
{
read_type1 src1_n_el = ((const read_type1*)src1)[x];
read_type2 src2_n_el = ((const read_type2*)src2)[x];
write_type dst_n_el;
OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src1[real_x], src2[real_x]);
}
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
__global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_,
const Mask mask, BinOp op)
{
typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1;
typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2;
typedef typename BinReadWriteTraits<T1, T2, D>::write_type write_type;
const int shift = BinReadWriteTraits<T1, T2, D>::shift;
const int x = threadIdx.x + blockIdx.x * blockDim.x;
const int y = threadIdx.y + blockIdx.y * blockDim.y;
const int x_shifted = x * shift;
if (y < src1_.rows)
{
const T1* src1 = src1_.ptr(y);
const T2* src2 = src2_.ptr(y);
D* dst = dst_.ptr(y);
if (x_shifted + shift - 1 < src1_.cols)
{
read_type1 src1_n_el = ((const read_type1*)src1)[x];
read_type2 src2_n_el = ((const read_type2*)src2)[x];
write_type dst_n_el;
OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
((write_type*)dst)[x] = dst_n_el;
}
else
{
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
{
if (mask(y, real_x))
dst[real_x] = op(src1[real_x], src2[real_x]);
}
}
}
}
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
@@ -355,11 +355,11 @@ namespace cv
template <typename T, typename D, typename UnOp, typename Mask>
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask,
cudaStream_t stream = 0)
{
{
const int shift = device::UnReadWriteTraits<T, D>::shift;
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src.cols, threads.x * shift);
grid.y = divUp(src.rows, threads.y);
@@ -373,7 +373,7 @@ namespace cv
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
BinOp op, const Mask& mask, cudaStream_t stream = 0)
{
{
const int shift = device::BinReadWriteTraits<T1, T2, D>::shift;
dim3 threads(16, 16, 1);
@@ -392,7 +392,7 @@ namespace cv
template <typename T, typename D, typename UnOp, typename Mask>
static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask,
cudaStream_t stream = 0)
{
{
TransformChooser<device::VecTraits<T>::cn == 1 && device::VecTraits<D>::cn == 1 && device::UnReadWriteTraits<T, D>::shift != 1>::call(src, dst, op, mask, stream);
}
+3 -3
View File
@@ -69,9 +69,9 @@
#include "cufft.h"
#include "opencv2/gpu/stream_accessor.hpp"
#include "npp.h"
#include "npp_staging.h"
#include "nvidia/NCV.hpp"
#include "nvidia/core/NCV.hpp"
#include "nvidia/NPP_staging/npp_staging.hpp"
#include "nvidia/NCVHaarObjectDetection.hpp"
#define CUDART_MINIMUM_REQUIRED_VERSION 3020
+1 -1
View File
@@ -1378,7 +1378,7 @@ cmpEpsFlt_(const _Tp* src1, const _Tp* src2, size_t total, int imaxdiff, size_t
{
_Tp a = src1[i], b = src2[i];
if( a < 0 ) a ^= C; if( b < 0 ) b ^= C;
_Tp d = std::abs(a - b);
_Tp d = std::abs(double(a - b));
if( d > imaxdiff )
{
idx = i + startidx;
+2 -2
View File
@@ -3,8 +3,8 @@
#
# ----------------------------------------------------------------------------
add_subdirectory(c)
add_subdirectory(cpp)
#add_subdirectory(c)
#add_subdirectory(cpp)
add_subdirectory(gpu)
if(0)
+3 -1
View File
@@ -14,10 +14,12 @@ if (BUILD_EXAMPLES)
"${CMAKE_SOURCE_DIR}/modules/legacy/include"
"${CMAKE_SOURCE_DIR}/modules/contrib/include"
"${CMAKE_SOURCE_DIR}/modules/gpu/include"
"${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia"
"${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core"
)
if(HAVE_CUDA)
include_directories(${CUDA_INCLUDE_DIRS})
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core)
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
@@ -1,60 +1,25 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma warning( disable : 4201 4408 4127 4100)
#include <cstdio>
#include "cvconfig.h"
#if !defined(HAVE_CUDA)
int main( int argc, const char** argv ) { return printf("Please compile the librarary with CUDA support."), -1; }
#else
#include <cuda_runtime.h>
#define CV_NO_BACKWARD_COMPATIBILITY
#include "opencv2/opencv.hpp"
#include "NCVHaarObjectDetection.hpp"
using namespace cv;
using namespace std;
const Size preferredVideoFrameSize(640, 480);
string preferredClassifier = "haarcascade_frontalface_alt.xml";
string wndTitle = "NVIDIA Computer Vision SDK :: Face Detection in Video Feed";
const Size2i preferredVideoFrameSize(640, 480);
std::string preferredClassifier = "haarcascade_frontalface_alt.xml";
std::string wndTitle = "NVIDIA Computer Vision SDK :: Face Detection in Video Feed";
void printSyntax(void)
@@ -62,7 +27,6 @@ void printSyntax(void)
printf("Syntax: FaceDetectionFeed.exe [-c cameranum | -v filename] classifier.xml\n");
}
void imagePrintf(Mat& img, int lineOffsY, Scalar color, const char *format, ...)
{
int fontFace = CV_FONT_HERSHEY_PLAIN;
@@ -83,7 +47,6 @@ void imagePrintf(Mat& img, int lineOffsY, Scalar color, const char *format, ...)
va_end(arg_ptr);
}
NCVStatus process(Mat *srcdst,
Ncv32u width, Ncv32u height,
NcvBool bShowAllHypotheses, NcvBool bLargestFace,
@@ -104,15 +67,16 @@ NCVStatus process(Mat *srcdst,
ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
NCVMatrixAlloc<Ncv8u> h_src(cpuAllocator, width, height);
ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
NCVVectorAlloc<NcvRect32u> d_rects(gpuAllocator, 100);
NCVVectorAlloc<NcvRect32u> d_rects(gpuAllocator, 100);
ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
Mat h_src_hdr(Size(width, height), CV_8U, h_src.ptr(), h_src.stride());
NCV_SKIP_COND_BEGIN
for (Ncv32u i=0; i<(Ncv32u)srcdst->rows; i++)
{
memcpy(h_src.ptr() + i * h_src.stride(), srcdst->ptr(i), srcdst->cols);
}
NCV_SKIP_COND_BEGIN
(*srcdst).copyTo(h_src_hdr);
ncvStat = h_src.copySolid(d_src, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
@@ -130,8 +94,9 @@ NCVStatus process(Mat *srcdst,
haar.ClassifierSize,
bShowAllHypotheses ? 0 : 4,
1.2f, 1,
(bLargestFace ? NCVPipeObjDet_FindLargestObject : 0) | NCVPipeObjDet_VisualizeInPlace,
gpuAllocator, cpuAllocator, devProp.major, devProp.minor, 0);
(bLargestFace ? NCVPipeObjDet_FindLargestObject : 0)
| NCVPipeObjDet_VisualizeInPlace,
gpuAllocator, cpuAllocator, devProp, 0);
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
@@ -141,14 +106,16 @@ NCVStatus process(Mat *srcdst,
ncvAssertReturnNcvStat(ncvStat);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
h_src_hdr.copyTo(*srcdst);
for (Ncv32u i=0; i<(Ncv32u)srcdst->rows; i++)
{
memcpy(srcdst->ptr(i), h_src.ptr() + i * h_src.stride(), srcdst->cols);
}
NCV_SKIP_COND_END
return NCV_SUCCESS;
}
int main( int argc, const char** argv )
{
NCVStatus ncvStat;
@@ -160,14 +127,19 @@ int main( int argc, const char** argv )
printf(" Space - Switch between NCV and OpenCV\n");
printf(" L - Switch between FullSearch and LargestFace modes\n");
printf(" U - Toggle unfiltered hypotheses visualization in FullSearch\n");
if (argc != 4 && argc != 1)
return printSyntax(), -1;
VideoCapture capture;
Size frameSize;
bool bQuit = false;
if (argc == 1 || strcmp(argv[1], "-c") == 0)
Size2i frameSize;
if (argc != 4 && argc != 1)
{
printSyntax();
return -1;
}
if (argc == 1 || strcmp(argv[1], "-c") == 0)
{
// Camera input is specified
int camIdx = (argc == 3) ? atoi(argv[2]) : 0;
@@ -192,14 +164,26 @@ int main( int argc, const char** argv )
return printSyntax(), -1;
NcvBool bUseOpenCV = true;
NcvBool bLargestFace = true;
NcvBool bShowAllHypotheses = false;
NcvBool bLargestFace = false; //LargestFace=true is used usually during training
NcvBool bShowAllHypotheses = false;
string classifierFile = (argc == 1) ? preferredClassifier : argv[3];
CascadeClassifier classifierOpenCV;
std::string classifierFile;
if (argc == 1)
{
classifierFile = preferredClassifier;
}
else
{
classifierFile.assign(argv[3]);
}
if (!classifierOpenCV.load(classifierFile))
return printf("Error (in OpenCV) opening classifier\n"), printSyntax(), -1;
{
printf("Error (in OpenCV) opening classifier\n");
printSyntax();
return -1;
}
int devId;
ncvAssertCUDAReturn(cudaGetDevice(&devId), -1);
@@ -214,9 +198,9 @@ int main( int argc, const char** argv )
//
//==============================================================================
NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice);
NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice, devProp.textureAlignment);
ncvAssertPrintReturn(gpuCascadeAllocator.isInitialized(), "Error creating cascade GPU allocator", -1);
NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned);
NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment);
ncvAssertPrintReturn(cpuCascadeAllocator.isInitialized(), "Error creating cascade CPU allocator", -1);
Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;
@@ -278,32 +262,36 @@ int main( int argc, const char** argv )
// Main processing loop
//
//==============================================================================
namedWindow(wndTitle, 1);
namedWindow(wndTitle, 1);
Mat frame, gray, frameDisp;
for(;;)
do
{
// For camera and video file, capture the next image
// For camera and video file, capture the next image
capture >> frame;
if (frame.empty())
break;
Mat gray;
cvtColor(frame, gray, CV_BGR2GRAY);
//
// process
//
NcvSize32u minSize = haar.ClassifierSize;
if (bLargestFace)
{
Ncv32u ratioX = preferredVideoFrameSize.width / minSize.width;
Ncv32u ratioY = preferredVideoFrameSize.height / minSize.height;
Ncv32u ratioSmallest = std::min(ratioX, ratioY);
ratioSmallest = (Ncv32u)std::max(ratioSmallest / 2.5f, 1.f);
ratioSmallest = std::max((Ncv32u)(ratioSmallest / 2.5f), (Ncv32u)1);
minSize.width *= ratioSmallest;
minSize.height *= ratioSmallest;
}
Ncv32f avgTime;
NcvTimer timer = ncvStartTimer();
if (!bUseOpenCV)
@@ -324,15 +312,16 @@ int main( int argc, const char** argv )
rectsOpenCV,
1.2f,
bShowAllHypotheses && !bLargestFace ? 0 : 4,
(bLargestFace ? CV_HAAR_FIND_BIGGEST_OBJECT : 0) | CV_HAAR_SCALE_IMAGE,
(bLargestFace ? CV_HAAR_FIND_BIGGEST_OBJECT : 0)
| CV_HAAR_SCALE_IMAGE,
Size(minSize.width, minSize.height));
for (size_t rt = 0; rt < rectsOpenCV.size(); ++rt)
rectangle(gray, rectsOpenCV[rt], Scalar(255));
}
Ncv32f avgTime = (Ncv32f)ncvEndQueryTimerMs(timer);
avgTime = (Ncv32f)ncvEndQueryTimerMs(timer);
cvtColor(gray, frameDisp, CV_GRAY2BGR);
imagePrintf(frameDisp, 0, CV_RGB(255, 0,0), "Space - Switch NCV%s / OpenCV%s", bUseOpenCV?"":" (ON)", bUseOpenCV?" (ON)":"");
@@ -347,16 +336,25 @@ int main( int argc, const char** argv )
case ' ':
bUseOpenCV = !bUseOpenCV;
break;
case 'L':case 'l':
case 'L':
case 'l':
bLargestFace = !bLargestFace;
break;
case 'U':case 'u':
case 'U':
case 'u':
bShowAllHypotheses = !bShowAllHypotheses;
break;
case 27:
return 0;
bQuit = true;
break;
}
}
} while (!bQuit);
cvDestroyWindow(wndTitle.c_str());
return 0;
}
#endif
+16 -3
View File
@@ -3,12 +3,15 @@
# ----------------------------------------------------------------------------
project(opencv_test_gpu)
set(the_target "opencv_test_gpu")
file(GLOB test_srcs "src/*.cpp")
source_group("Src" FILES ${test_srcs})
file(GLOB test_hdrs "src/*.h*")
source_group("Src" FILES ${test_srcs})
source_group("Include" FILES ${test_hdrs})
set(the_target "opencv_test_gpu")
include_directories (
"${CMAKE_SOURCE_DIR}/include/opencv"
@@ -26,11 +29,21 @@ include_directories (
"${CMAKE_SOURCE_DIR}/modules/ml/include"
"${CMAKE_CURRENT_SOURCE_DIR}/src"
"${CMAKE_CURRENT_BINARY_DIR}"
"${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia"
"${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core"
)
include_directories(../cxts)
add_executable(${the_target} ${test_srcs} ${test_hdrs})
if(HAVE_CUDA)
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
file(GLOB nvidia "src/nvidia/*.*")
SET(ncv_cpp ../../modules/gpu/src/nvidia/core/NCV.cpp)
source_group("Src\\nvidia" FILES ${nvidia})
endif()
add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia} ${ncv_cpp})
# Additional target properties
set_target_properties(${the_target} PROPERTIES
+127
View File
@@ -0,0 +1,127 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _ncvautotestlister_hpp_
#define _ncvautotestlister_hpp_
#include <vector>
#include "NCVTest.hpp"
class NCVAutoTestLister
{
public:
NCVAutoTestLister(std::string testSuiteName, NcvBool bStopOnFirstFail=false, NcvBool bCompactOutput=true)
:
testSuiteName(testSuiteName),
bStopOnFirstFail(bStopOnFirstFail),
bCompactOutput(bCompactOutput)
{
}
void add(INCVTest *test)
{
this->tests.push_back(test);
}
void invoke()
{
Ncv32u nPassed = 0;
Ncv32u nFailed = 0;
Ncv32u nFailedMem = 0;
if (bCompactOutput)
{
printf("Test suite '%s' with %d tests\n",
testSuiteName.c_str(),
(int)(this->tests.size()));
}
for (Ncv32u i=0; i<this->tests.size(); i++)
{
INCVTest &curTest = *tests[i];
NCVTestReport curReport;
bool res = curTest.executeTest(curReport);
if (!bCompactOutput)
{
printf("Test %3i %16s; Consumed mem GPU = %8d, CPU = %8d; %s\n",
i,
curTest.getName().c_str(),
curReport.statsNums["MemGPU"],
curReport.statsNums["MemCPU"],
curReport.statsText["rcode"].c_str());
}
if (res)
{
nPassed++;
if (bCompactOutput)
{
printf(".");
}
}
else
{
if (!curReport.statsText["rcode"].compare("FAILED"))
{
nFailed++;
if (bCompactOutput)
{
printf("x");
}
if (bStopOnFirstFail)
{
break;
}
}
else
{
nFailedMem++;
if (bCompactOutput)
{
printf("m");
}
}
}
fflush(stdout);
}
if (bCompactOutput)
{
printf("\n");
}
printf("Test suite '%s' complete: %d total, %d passed, %d memory errors, %d failed\n\n",
testSuiteName.c_str(),
(int)(this->tests.size()),
nPassed,
nFailedMem,
nFailed);
}
~NCVAutoTestLister()
{
for (Ncv32u i=0; i<this->tests.size(); i++)
{
delete tests[i];
}
}
private:
NcvBool bStopOnFirstFail;
NcvBool bCompactOutput;
std::string testSuiteName;
std::vector<INCVTest *> tests;
};
#endif // _ncvautotestlister_hpp_
+211
View File
@@ -0,0 +1,211 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _ncvtest_hpp_
#define _ncvtest_hpp_
#pragma warning( disable : 4201 4408 4127 4100)
#include <string>
#include <vector>
#include <map>
#include <memory>
#include <algorithm>
#include <fstream>
#include <cuda_runtime.h>
#include "NPP_staging.hpp"
struct NCVTestReport
{
std::map<std::string, Ncv32u> statsNums;
std::map<std::string, std::string> statsText;
};
class INCVTest
{
public:
virtual bool executeTest(NCVTestReport &report) = 0;
virtual std::string getName() const = 0;
};
class NCVTestProvider : public INCVTest
{
public:
NCVTestProvider(std::string testName)
:
testName(testName)
{
int devId;
ncvAssertPrintReturn(cudaSuccess == cudaGetDevice(&devId), "Error returned from cudaGetDevice", );
ncvAssertPrintReturn(cudaSuccess == cudaGetDeviceProperties(&this->devProp, devId), "Error returned from cudaGetDeviceProperties", );
}
virtual bool init() = 0;
virtual bool process() = 0;
virtual bool deinit() = 0;
virtual bool toString(std::ofstream &strOut) = 0;
virtual std::string getName() const
{
return this->testName;
}
virtual ~NCVTestProvider()
{
deinitMemory();
}
virtual bool executeTest(NCVTestReport &report)
{
bool res;
report.statsText["rcode"] = "FAILED";
res = initMemory(report);
if (!res)
{
dumpToFile(report);
deinitMemory();
return false;
}
res = init();
if (!res)
{
dumpToFile(report);
deinit();
deinitMemory();
return false;
}
res = process();
if (!res)
{
dumpToFile(report);
deinit();
deinitMemory();
return false;
}
res = deinit();
if (!res)
{
dumpToFile(report);
deinitMemory();
return false;
}
deinitMemory();
report.statsText["rcode"] = "Passed";
return true;
}
protected:
cudaDeviceProp devProp;
std::auto_ptr<INCVMemAllocator> allocatorGPU;
std::auto_ptr<INCVMemAllocator> allocatorCPU;
private:
std::string testName;
bool initMemory(NCVTestReport &report)
{
this->allocatorGPU.reset(new NCVMemStackAllocator(devProp.textureAlignment));
this->allocatorCPU.reset(new NCVMemStackAllocator(devProp.textureAlignment));
if (!this->allocatorGPU.get()->isInitialized() ||
!this->allocatorCPU.get()->isInitialized())
{
report.statsText["rcode"] = "Memory FAILED";
return false;
}
if (!this->process())
{
report.statsText["rcode"] = "Memory FAILED";
return false;
}
Ncv32u maxGPUsize = (Ncv32u)this->allocatorGPU.get()->maxSize();
Ncv32u maxCPUsize = (Ncv32u)this->allocatorCPU.get()->maxSize();
report.statsNums["MemGPU"] = maxGPUsize;
report.statsNums["MemCPU"] = maxCPUsize;
this->allocatorGPU.reset(new NCVMemStackAllocator(NCVMemoryTypeDevice, maxGPUsize, devProp.textureAlignment));
this->allocatorCPU.reset(new NCVMemStackAllocator(NCVMemoryTypeHostPinned, maxCPUsize, devProp.textureAlignment));
if (!this->allocatorGPU.get()->isInitialized() ||
!this->allocatorCPU.get()->isInitialized())
{
report.statsText["rcode"] = "Memory FAILED";
return false;
}
return true;
}
void deinitMemory()
{
this->allocatorGPU.reset();
this->allocatorCPU.reset();
}
void dumpToFile(NCVTestReport &report)
{
bool bReasonMem = (0 == report.statsText["rcode"].compare("Memory FAILED"));
std::string fname = "TestDump_";
fname += (bReasonMem ? "m_" : "") + this->testName + ".log";
std::ofstream stream(fname.c_str(), std::ios::trunc | std::ios::out);
if (!stream.is_open()) return;
stream << "NCV Test Failure Log: " << this->testName << std::endl;
stream << "====================================================" << std::endl << std::endl;
stream << "Test initialization report: " << std::endl;
for (std::map<std::string,std::string>::iterator it=report.statsText.begin();
it != report.statsText.end(); it++)
{
stream << it->first << "=" << it->second << std::endl;
}
for (std::map<std::string,Ncv32u>::iterator it=report.statsNums.begin();
it != report.statsNums.end(); it++)
{
stream << it->first << "=" << it->second << std::endl;
}
stream << std::endl;
stream << "Test initialization parameters: " << std::endl;
bool bSerializeRes = false;
try
{
bSerializeRes = this->toString(stream);
}
catch (...)
{
}
if (!bSerializeRes)
{
stream << "Couldn't retrieve object dump" << std::endl;
}
stream.flush();
}
};
#endif // _ncvtest_hpp_
@@ -0,0 +1,161 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _ncvtestsourceprovider_hpp_
#define _ncvtestsourceprovider_hpp_
#include <memory>
#include "NCV.hpp"
#include <opencv2/highgui/highgui.hpp>
template <class T>
class NCVTestSourceProvider
{
public:
NCVTestSourceProvider(Ncv32u seed, T rangeLow, T rangeHigh, Ncv32u maxWidth, Ncv32u maxHeight)
:
bInit(false)
{
ncvAssertPrintReturn(rangeLow < rangeHigh, "NCVTestSourceProvider ctor:: Invalid range", );
int devId;
cudaDeviceProp devProp;
ncvAssertPrintReturn(cudaSuccess == cudaGetDevice(&devId), "Error returned from cudaGetDevice", );
ncvAssertPrintReturn(cudaSuccess == cudaGetDeviceProperties(&devProp, devId), "Error returned from cudaGetDeviceProperties", );
//Ncv32u maxWpitch = alignUp(maxWidth * sizeof(T), devProp.textureAlignment);
allocatorCPU.reset(new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment));
data.reset(new NCVMatrixAlloc<T>(*this->allocatorCPU.get(), maxWidth, maxHeight));
ncvAssertPrintReturn(data.get()->isMemAllocated(), "NCVTestSourceProvider ctor:: Matrix not allocated", );
this->dataWidth = maxWidth;
this->dataHeight = maxHeight;
srand(seed);
for (Ncv32u i=0; i<maxHeight; i++)
{
for (Ncv32u j=0; j<data.get()->stride(); j++)
{
data.get()->ptr()[i * data.get()->stride() + j] =
(T)(((1.0 * rand()) / RAND_MAX) * (rangeHigh - rangeLow) + rangeLow);
}
}
this->bInit = true;
}
NCVTestSourceProvider(std::string pgmFilename)
:
bInit(false)
{
ncvAssertPrintReturn(sizeof(T) == 1, "NCVTestSourceProvider ctor:: PGM constructor complies only with 8bit types", );
cv::Mat image = cv::imread(pgmFilename);
ncvAssertPrintReturn(!image.empty(), "NCVTestSourceProvider ctor:: PGM file error", );
int devId;
cudaDeviceProp devProp;
ncvAssertPrintReturn(cudaSuccess == cudaGetDevice(&devId), "Error returned from cudaGetDevice", );
ncvAssertPrintReturn(cudaSuccess == cudaGetDeviceProperties(&devProp, devId), "Error returned from cudaGetDeviceProperties", );
allocatorCPU.reset(new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment));
data.reset(new NCVMatrixAlloc<T>(*this->allocatorCPU.get(), image.cols, image.rows));
ncvAssertPrintReturn(data.get()->isMemAllocated(), "NCVTestSourceProvider ctor:: Matrix not allocated", );
this->dataWidth = image.cols;
this->dataHeight = image.rows;
cv::Mat hdr(image.size(), CV_8UC1, data.get()->ptr(), data.get()->pitch());
image.copyTo(hdr);
this->bInit = true;
}
NcvBool fill(NCVMatrix<T> &dst)
{
ncvAssertReturn(this->isInit() &&
dst.memType() == allocatorCPU.get()->memType(), false);
if (dst.width() == 0 || dst.height() == 0)
{
return true;
}
for (Ncv32u i=0; i<dst.height(); i++)
{
Ncv32u srcLine = i % this->dataHeight;
Ncv32u srcFullChunks = dst.width() / this->dataWidth;
for (Ncv32u j=0; j<srcFullChunks; j++)
{
memcpy(dst.ptr() + i * dst.stride() + j * this->dataWidth,
this->data.get()->ptr() + this->data.get()->stride() * srcLine,
this->dataWidth * sizeof(T));
}
Ncv32u srcLastChunk = dst.width() % this->dataWidth;
memcpy(dst.ptr() + i * dst.stride() + srcFullChunks * this->dataWidth,
this->data.get()->ptr() + this->data.get()->stride() * srcLine,
srcLastChunk * sizeof(T));
}
return true;
}
NcvBool fill(NCVVector<T> &dst)
{
ncvAssertReturn(this->isInit() &&
dst.memType() == allocatorCPU.get()->memType(), false);
if (dst.length() == 0)
{
return true;
}
Ncv32u srcLen = this->dataWidth * this->dataHeight;
Ncv32u srcFullChunks = (Ncv32u)dst.length() / srcLen;
for (Ncv32u j=0; j<srcFullChunks; j++)
{
memcpy(dst.ptr() + j * srcLen, this->data.get()->ptr(), srcLen * sizeof(T));
}
Ncv32u srcLastChunk = dst.length() % srcLen;
memcpy(dst.ptr() + srcFullChunks * srcLen, this->data.get()->ptr(), srcLastChunk * sizeof(T));
return true;
}
~NCVTestSourceProvider()
{
data.reset();
allocatorCPU.reset();
}
private:
NcvBool isInit(void)
{
return this->bInit;
}
NcvBool bInit;
std::auto_ptr< INCVMemAllocator > allocatorCPU;
std::auto_ptr< NCVMatrixAlloc<T> > data;
Ncv32u dataWidth;
Ncv32u dataHeight;
};
#endif // _ncvtestsourceprovider_hpp_
+129
View File
@@ -0,0 +1,129 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#include "TestCompact.h"
TestCompact::TestCompact(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
Ncv32u length, Ncv32u badElem, Ncv32u badElemPercentage)
:
NCVTestProvider(testName),
src(src),
length(length),
badElem(badElem),
badElemPercentage(badElemPercentage > 100 ? 100 : badElemPercentage)
{
}
bool TestCompact::toString(std::ofstream &strOut)
{
strOut << "length=" << length << std::endl;
strOut << "badElem=" << badElem << std::endl;
strOut << "badElemPercentage=" << badElemPercentage << std::endl;
return true;
}
bool TestCompact::init()
{
return true;
}
bool TestCompact::process()
{
NCVStatus ncvStat;
bool rcode = false;
NCVVectorAlloc<Ncv32u> h_vecSrc(*this->allocatorCPU.get(), this->length);
ncvAssertReturn(h_vecSrc.isMemAllocated(), false);
NCVVectorAlloc<Ncv32u> d_vecSrc(*this->allocatorGPU.get(), this->length);
ncvAssertReturn(d_vecSrc.isMemAllocated(), false);
NCVVectorAlloc<Ncv32u> h_vecDst(*this->allocatorCPU.get(), this->length);
ncvAssertReturn(h_vecDst.isMemAllocated(), false);
NCVVectorAlloc<Ncv32u> d_vecDst(*this->allocatorGPU.get(), this->length);
ncvAssertReturn(d_vecDst.isMemAllocated(), false);
NCVVectorAlloc<Ncv32u> h_vecDst_d(*this->allocatorCPU.get(), this->length);
ncvAssertReturn(h_vecDst_d.isMemAllocated(), false);
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
NCV_SKIP_COND_BEGIN
ncvAssertReturn(this->src.fill(h_vecSrc), false);
for (Ncv32u i=0; i<this->length; i++)
{
Ncv32u tmp = (h_vecSrc.ptr()[i]) & 0xFF;
tmp = tmp * 99 / 255;
if (tmp < this->badElemPercentage)
{
h_vecSrc.ptr()[i] = this->badElem;
}
}
NCV_SKIP_COND_END
NCVVectorAlloc<Ncv32u> h_dstLen(*this->allocatorCPU.get(), 1);
ncvAssertReturn(h_dstLen.isMemAllocated(), false);
Ncv32u bufSize;
ncvStat = nppsStCompactGetSize_32u(this->length, &bufSize, this->devProp);
ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);
ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);
Ncv32u h_outElemNum_h = 0;
NCV_SKIP_COND_BEGIN
ncvStat = h_vecSrc.copySolid(d_vecSrc, 0);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = nppsStCompact_32u(d_vecSrc.ptr(), this->length,
d_vecDst.ptr(), h_dstLen.ptr(), this->badElem,
d_tmpBuf.ptr(), bufSize, this->devProp);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = d_vecDst.copySolid(h_vecDst_d, 0);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = nppsStCompact_32u_host(h_vecSrc.ptr(), this->length, h_vecDst.ptr(), &h_outElemNum_h, this->badElem);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
NCV_SKIP_COND_END
//bit-to-bit check
bool bLoopVirgin = true;
NCV_SKIP_COND_BEGIN
if (h_dstLen.ptr()[0] != h_outElemNum_h)
{
bLoopVirgin = false;
}
else
{
for (Ncv32u i=0; bLoopVirgin && i < h_outElemNum_h; i++)
{
if (h_vecDst.ptr()[i] != h_vecDst_d.ptr()[i])
{
bLoopVirgin = false;
}
}
}
NCV_SKIP_COND_END
if (bLoopVirgin)
{
rcode = true;
}
return rcode;
}
bool TestCompact::deinit()
{
return true;
}
+41
View File
@@ -0,0 +1,41 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _testhypothesescompact_h_
#define _testhypothesescompact_h_
#include "NCVTest.hpp"
#include "NCVTestSourceProvider.hpp"
class TestCompact : public NCVTestProvider
{
public:
TestCompact(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
Ncv32u length, Ncv32u badElem, Ncv32u badElemPercentage);
virtual bool init();
virtual bool process();
virtual bool deinit();
virtual bool toString(std::ofstream &strOut);
private:
TestCompact(const TestCompact&);
TestCompact& operator=(const TestCompact&);
NCVTestSourceProvider<Ncv32u> &src;
Ncv32u length;
Ncv32u badElem;
Ncv32u badElemPercentage;
};
#endif // _testhypothesescompact_h_
+163
View File
@@ -0,0 +1,163 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#include "TestDrawRects.h"
#include "NCVHaarObjectDetection.hpp"
template <class T>
TestDrawRects<T>::TestDrawRects(std::string testName, NCVTestSourceProvider<T> &src, NCVTestSourceProvider<Ncv32u> &src32u,
Ncv32u width, Ncv32u height, Ncv32u numRects, T color)
:
NCVTestProvider(testName),
src(src),
src32u(src32u),
width(width),
height(height),
numRects(numRects),
color(color)
{
}
template <class T>
bool TestDrawRects<T>::toString(std::ofstream &strOut)
{
strOut << "sizeof(T)=" << sizeof(T) << std::endl;
strOut << "width=" << width << std::endl;
strOut << "height=" << height << std::endl;
strOut << "numRects=" << numRects << std::endl;
strOut << "color=" << color << std::endl;
return true;
}
template <class T>
bool TestDrawRects<T>::init()
{
return true;
}
template <class T>
bool TestDrawRects<T>::process()
{
NCVStatus ncvStat;
bool rcode = false;
NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height);
ncvAssertReturn(d_img.isMemAllocated(), false);
NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height);
ncvAssertReturn(h_img.isMemAllocated(), false);
NCVMatrixAlloc<T> h_img_d(*this->allocatorCPU.get(), this->width, this->height);
ncvAssertReturn(h_img_d.isMemAllocated(), false);
NCVVectorAlloc<NcvRect32u> d_rects(*this->allocatorGPU.get(), this->numRects);
ncvAssertReturn(d_rects.isMemAllocated(), false);
NCVVectorAlloc<NcvRect32u> h_rects(*this->allocatorCPU.get(), this->numRects);
ncvAssertReturn(h_rects.isMemAllocated(), false);
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
NCV_SKIP_COND_BEGIN
ncvAssertReturn(this->src.fill(h_img), false);
ncvStat = h_img.copySolid(d_img, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
//fill vector of rectangles with random rects covering the input
NCVVectorReuse<Ncv32u> h_rects_as32u(h_rects.getSegment());
ncvAssertReturn(h_rects_as32u.isMemReused(), false);
ncvAssertReturn(this->src32u.fill(h_rects_as32u), false);
for (Ncv32u i=0; i<this->numRects; i++)
{
h_rects.ptr()[i].x = (Ncv32u)(((1.0 * h_rects.ptr()[i].x) / RAND_MAX) * (this->width-2));
h_rects.ptr()[i].y = (Ncv32u)(((1.0 * h_rects.ptr()[i].y) / RAND_MAX) * (this->height-2));
h_rects.ptr()[i].width = (Ncv32u)(((1.0 * h_rects.ptr()[i].width) / RAND_MAX) * (this->width+10 - h_rects.ptr()[i].x));
h_rects.ptr()[i].height = (Ncv32u)(((1.0 * h_rects.ptr()[i].height) / RAND_MAX) * (this->height+10 - h_rects.ptr()[i].y));
}
ncvStat = h_rects.copySolid(d_rects, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
if (sizeof(T) == sizeof(Ncv32u))
{
ncvStat = ncvDrawRects_32u_device((Ncv32u *)d_img.ptr(), d_img.stride(), this->width, this->height,
(NcvRect32u *)d_rects.ptr(), this->numRects, this->color, 0);
}
else if (sizeof(T) == sizeof(Ncv8u))
{
ncvStat = ncvDrawRects_8u_device((Ncv8u *)d_img.ptr(), d_img.stride(), this->width, this->height,
(NcvRect32u *)d_rects.ptr(), this->numRects, (Ncv8u)this->color, 0);
}
else
{
ncvAssertPrintReturn(false, "Incorrect drawrects test instance", false);
}
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
NCV_SKIP_COND_END
ncvStat = d_img.copySolid(h_img_d, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
NCV_SKIP_COND_BEGIN
if (sizeof(T) == sizeof(Ncv32u))
{
ncvStat = ncvDrawRects_32u_host((Ncv32u *)h_img.ptr(), h_img.stride(), this->width, this->height,
(NcvRect32u *)h_rects.ptr(), this->numRects, this->color);
}
else if (sizeof(T) == sizeof(Ncv8u))
{
ncvStat = ncvDrawRects_8u_host((Ncv8u *)h_img.ptr(), h_img.stride(), this->width, this->height,
(NcvRect32u *)h_rects.ptr(), this->numRects, (Ncv8u)this->color);
}
else
{
ncvAssertPrintReturn(false, "Incorrect drawrects test instance", false);
}
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
NCV_SKIP_COND_END
//bit-to-bit check
bool bLoopVirgin = true;
NCV_SKIP_COND_BEGIN
//const Ncv64f relEPS = 0.005;
for (Ncv32u i=0; bLoopVirgin && i < h_img.height(); i++)
{
for (Ncv32u j=0; bLoopVirgin && j < h_img.width(); j++)
{
if (h_img.ptr()[h_img.stride()*i+j] != h_img_d.ptr()[h_img_d.stride()*i+j])
{
bLoopVirgin = false;
}
}
}
NCV_SKIP_COND_END
if (bLoopVirgin)
{
rcode = true;
}
return rcode;
}
template <class T>
bool TestDrawRects<T>::deinit()
{
return true;
}
template class TestDrawRects<Ncv8u>;
template class TestDrawRects<Ncv32u>;
+44
View File
@@ -0,0 +1,44 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _testdrawrects_h_
#define _testdrawrects_h_
#include "NCVTest.hpp"
#include "NCVTestSourceProvider.hpp"
template <class T>
class TestDrawRects : public NCVTestProvider
{
public:
TestDrawRects(std::string testName, NCVTestSourceProvider<T> &src, NCVTestSourceProvider<Ncv32u> &src32u,
Ncv32u width, Ncv32u height, Ncv32u numRects, T color);
virtual bool init();
virtual bool process();
virtual bool deinit();
virtual bool toString(std::ofstream &strOut);
private:
TestDrawRects(const TestDrawRects&);
TestDrawRects& operator=(const TestDrawRects&);
NCVTestSourceProvider<T> &src;
NCVTestSourceProvider<Ncv32u> &src32u;
Ncv32u width;
Ncv32u height;
Ncv32u numRects;
T color;
};
#endif // _testdrawrects_h_
@@ -0,0 +1,267 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#include <float.h>
#include "TestHaarCascadeApplication.h"
#include "NCVHaarObjectDetection.hpp"
TestHaarCascadeApplication::TestHaarCascadeApplication(std::string testName, NCVTestSourceProvider<Ncv8u> &src,
std::string cascadeName, Ncv32u width, Ncv32u height)
:
NCVTestProvider(testName),
src(src),
cascadeName(cascadeName),
width(width),
height(height)
{
}
bool TestHaarCascadeApplication::toString(std::ofstream &strOut)
{
strOut << "cascadeName=" << cascadeName << std::endl;
strOut << "width=" << width << std::endl;
strOut << "height=" << height << std::endl;
return true;
}
bool TestHaarCascadeApplication::init()
{
return true;
}
bool TestHaarCascadeApplication::process()
{
NCVStatus ncvStat;
bool rcode = false;
Ncv32u numStages, numNodes, numFeatures;
ncvStat = ncvHaarGetClassifierSize(this->cascadeName, numStages, numNodes, numFeatures);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
NCVVectorAlloc<HaarStage64> h_HaarStages(*this->allocatorCPU.get(), numStages);
ncvAssertReturn(h_HaarStages.isMemAllocated(), false);
NCVVectorAlloc<HaarClassifierNode128> h_HaarNodes(*this->allocatorCPU.get(), numNodes);
ncvAssertReturn(h_HaarNodes.isMemAllocated(), false);
NCVVectorAlloc<HaarFeature64> h_HaarFeatures(*this->allocatorCPU.get(), numFeatures);
ncvAssertReturn(h_HaarFeatures.isMemAllocated(), false);
NCVVectorAlloc<HaarStage64> d_HaarStages(*this->allocatorGPU.get(), numStages);
ncvAssertReturn(d_HaarStages.isMemAllocated(), false);
NCVVectorAlloc<HaarClassifierNode128> d_HaarNodes(*this->allocatorGPU.get(), numNodes);
ncvAssertReturn(d_HaarNodes.isMemAllocated(), false);
NCVVectorAlloc<HaarFeature64> d_HaarFeatures(*this->allocatorGPU.get(), numFeatures);
ncvAssertReturn(d_HaarFeatures.isMemAllocated(), false);
HaarClassifierCascadeDescriptor haar;
haar.ClassifierSize.width = haar.ClassifierSize.height = 1;
haar.bNeedsTiltedII = false;
haar.NumClassifierRootNodes = numNodes;
haar.NumClassifierTotalNodes = numNodes;
haar.NumFeatures = numFeatures;
haar.NumStages = numStages;
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
NCV_SKIP_COND_BEGIN
ncvStat = ncvHaarLoadFromFile_host(this->cascadeName, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvAssertReturn(NCV_SUCCESS == h_HaarStages.copySolid(d_HaarStages, 0), false);
ncvAssertReturn(NCV_SUCCESS == h_HaarNodes.copySolid(d_HaarNodes, 0), false);
ncvAssertReturn(NCV_SUCCESS == h_HaarFeatures.copySolid(d_HaarFeatures, 0), false);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
NCV_SKIP_COND_END
NcvSize32s srcRoi, srcIIRoi, searchRoi;
srcRoi.width = this->width;
srcRoi.height = this->height;
srcIIRoi.width = srcRoi.width + 1;
srcIIRoi.height = srcRoi.height + 1;
searchRoi.width = srcIIRoi.width - haar.ClassifierSize.width;
searchRoi.height = srcIIRoi.height - haar.ClassifierSize.height;
if (searchRoi.width <= 0 || searchRoi.height <= 0)
{
return false;
}
NcvSize32u searchRoiU(searchRoi.width, searchRoi.height);
NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height);
ncvAssertReturn(d_img.isMemAllocated(), false);
NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height);
ncvAssertReturn(h_img.isMemAllocated(), false);
Ncv32u integralWidth = this->width + 1;
Ncv32u integralHeight = this->height + 1;
NCVMatrixAlloc<Ncv32u> d_integralImage(*this->allocatorGPU.get(), integralWidth, integralHeight);
ncvAssertReturn(d_integralImage.isMemAllocated(), false);
NCVMatrixAlloc<Ncv64u> d_sqIntegralImage(*this->allocatorGPU.get(), integralWidth, integralHeight);
ncvAssertReturn(d_sqIntegralImage.isMemAllocated(), false);
NCVMatrixAlloc<Ncv32u> h_integralImage(*this->allocatorCPU.get(), integralWidth, integralHeight);
ncvAssertReturn(h_integralImage.isMemAllocated(), false);
NCVMatrixAlloc<Ncv64u> h_sqIntegralImage(*this->allocatorCPU.get(), integralWidth, integralHeight);
ncvAssertReturn(h_sqIntegralImage.isMemAllocated(), false);
NCVMatrixAlloc<Ncv32f> d_rectStdDev(*this->allocatorGPU.get(), this->width, this->height);
ncvAssertReturn(d_rectStdDev.isMemAllocated(), false);
NCVMatrixAlloc<Ncv32u> d_pixelMask(*this->allocatorGPU.get(), this->width, this->height);
ncvAssertReturn(d_pixelMask.isMemAllocated(), false);
NCVMatrixAlloc<Ncv32f> h_rectStdDev(*this->allocatorCPU.get(), this->width, this->height);
ncvAssertReturn(h_rectStdDev.isMemAllocated(), false);
NCVMatrixAlloc<Ncv32u> h_pixelMask(*this->allocatorCPU.get(), this->width, this->height);
ncvAssertReturn(h_pixelMask.isMemAllocated(), false);
NCVVectorAlloc<NcvRect32u> d_hypotheses(*this->allocatorGPU.get(), this->width * this->height);
ncvAssertReturn(d_hypotheses.isMemAllocated(), false);
NCVVectorAlloc<NcvRect32u> h_hypotheses(*this->allocatorCPU.get(), this->width * this->height);
ncvAssertReturn(h_hypotheses.isMemAllocated(), false);
NCVStatus nppStat;
Ncv32u szTmpBufIntegral, szTmpBufSqIntegral;
nppStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &szTmpBufIntegral, this->devProp);
ncvAssertReturn(nppStat == NPPST_SUCCESS, false);
nppStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &szTmpBufSqIntegral, this->devProp);
ncvAssertReturn(nppStat == NPPST_SUCCESS, false);
NCVVectorAlloc<Ncv8u> d_tmpIIbuf(*this->allocatorGPU.get(), std::max(szTmpBufIntegral, szTmpBufSqIntegral));
ncvAssertReturn(d_tmpIIbuf.isMemAllocated(), false);
Ncv32u detectionsOnThisScale_d = 0;
Ncv32u detectionsOnThisScale_h = 0;
NCV_SKIP_COND_BEGIN
ncvAssertReturn(this->src.fill(h_img), false);
ncvStat = h_img.copySolid(d_img, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
nppStat = nppiStIntegral_8u32u_C1R(d_img.ptr(), d_img.pitch(),
d_integralImage.ptr(), d_integralImage.pitch(),
NcvSize32u(d_img.width(), d_img.height()),
d_tmpIIbuf.ptr(), szTmpBufIntegral, this->devProp);
ncvAssertReturn(nppStat == NPPST_SUCCESS, false);
nppStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(),
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
NcvSize32u(d_img.width(), d_img.height()),
d_tmpIIbuf.ptr(), szTmpBufSqIntegral, this->devProp);
ncvAssertReturn(nppStat == NPPST_SUCCESS, false);
const NcvRect32u rect(
HAAR_STDDEV_BORDER,
HAAR_STDDEV_BORDER,
haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER,
haar.ClassifierSize.height - 2*HAAR_STDDEV_BORDER);
nppStat = nppiStRectStdDev_32f_C1R(
d_integralImage.ptr(), d_integralImage.pitch(),
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
d_rectStdDev.ptr(), d_rectStdDev.pitch(),
NcvSize32u(searchRoi.width, searchRoi.height), rect,
1.0f, true);
ncvAssertReturn(nppStat == NPPST_SUCCESS, false);
ncvStat = d_integralImage.copySolid(h_integralImage, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvStat = d_rectStdDev.copySolid(h_rectStdDev, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
for (Ncv32u i=0; i<searchRoiU.height; i++)
{
for (Ncv32u j=0; j<h_pixelMask.stride(); j++)
{
if (j<searchRoiU.width)
{
h_pixelMask.ptr()[i*h_pixelMask.stride()+j] = (i << 16) | j;
}
else
{
h_pixelMask.ptr()[i*h_pixelMask.stride()+j] = OBJDET_MASK_ELEMENT_INVALID_32U;
}
}
}
ncvAssertReturn(cudaSuccess == cudaStreamSynchronize(0), false);
Ncv32u fpu_oldcw, fpu_cw;
_controlfp_s(&fpu_cw, 0, 0);
fpu_oldcw = fpu_cw;
_controlfp_s(&fpu_cw, _PC_24, _MCW_PC);
ncvStat = ncvApplyHaarClassifierCascade_host(
h_integralImage, h_rectStdDev, h_pixelMask,
detectionsOnThisScale_h,
haar, h_HaarStages, h_HaarNodes, h_HaarFeatures, false,
searchRoiU, 1, 1.0f);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
_controlfp_s(&fpu_cw, fpu_oldcw, _MCW_PC);
NCV_SKIP_COND_END
int devId;
ncvAssertCUDAReturn(cudaGetDevice(&devId), false);
cudaDeviceProp devProp;
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), false);
ncvStat = ncvApplyHaarClassifierCascade_device(
d_integralImage, d_rectStdDev, d_pixelMask,
detectionsOnThisScale_d,
haar, h_HaarStages, d_HaarStages, d_HaarNodes, d_HaarFeatures, false,
searchRoiU, 1, 1.0f,
*this->allocatorGPU.get(), *this->allocatorCPU.get(),
devProp, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
NCVMatrixAlloc<Ncv32u> h_pixelMask_d(*this->allocatorCPU.get(), this->width, this->height);
ncvAssertReturn(h_pixelMask_d.isMemAllocated(), false);
//bit-to-bit check
bool bLoopVirgin = true;
NCV_SKIP_COND_BEGIN
ncvStat = d_pixelMask.copySolid(h_pixelMask_d, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
if (detectionsOnThisScale_d != detectionsOnThisScale_h)
{
bLoopVirgin = false;
}
else
{
std::sort(h_pixelMask_d.ptr(), h_pixelMask_d.ptr() + detectionsOnThisScale_d);
for (Ncv32u i=0; i<detectionsOnThisScale_d && bLoopVirgin; i++)
{
if (h_pixelMask.ptr()[i] != h_pixelMask_d.ptr()[i])
{
bLoopVirgin = false;
}
}
}
NCV_SKIP_COND_END
if (bLoopVirgin)
{
rcode = true;
}
return rcode;
}
bool TestHaarCascadeApplication::deinit()
{
return true;
}
@@ -0,0 +1,41 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _testhaarcascadeapplication_h_
#define _testhaarcascadeapplication_h_
#include "NCVTest.hpp"
#include "NCVTestSourceProvider.hpp"
class TestHaarCascadeApplication : public NCVTestProvider
{
public:
TestHaarCascadeApplication(std::string testName, NCVTestSourceProvider<Ncv8u> &src,
std::string cascadeName, Ncv32u width, Ncv32u height);
virtual bool init();
virtual bool process();
virtual bool deinit();
virtual bool toString(std::ofstream &strOut);
private:
TestHaarCascadeApplication(const TestHaarCascadeApplication&);
TestHaarCascadeApplication& operator=(const TestHaarCascadeApplication&);
NCVTestSourceProvider<Ncv8u> &src;
std::string cascadeName;
Ncv32u width;
Ncv32u height;
};
#endif // _testhaarcascadeapplication_h_
@@ -0,0 +1,123 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#include "TestHaarCascadeLoader.h"
#include "NCVHaarObjectDetection.hpp"
TestHaarCascadeLoader::TestHaarCascadeLoader(std::string testName, std::string cascadeName)
:
NCVTestProvider(testName),
cascadeName(cascadeName)
{
}
bool TestHaarCascadeLoader::toString(std::ofstream &strOut)
{
strOut << "cascadeName=" << cascadeName << std::endl;
return true;
}
bool TestHaarCascadeLoader::init()
{
return true;
}
bool TestHaarCascadeLoader::process()
{
NCVStatus ncvStat;
bool rcode = false;
Ncv32u numStages, numNodes, numFeatures;
Ncv32u numStages_2 = 0, numNodes_2 = 0, numFeatures_2 = 0;
ncvStat = ncvHaarGetClassifierSize(this->cascadeName, numStages, numNodes, numFeatures);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
NCVVectorAlloc<HaarStage64> h_HaarStages(*this->allocatorCPU.get(), numStages);
ncvAssertReturn(h_HaarStages.isMemAllocated(), false);
NCVVectorAlloc<HaarClassifierNode128> h_HaarNodes(*this->allocatorCPU.get(), numNodes);
ncvAssertReturn(h_HaarNodes.isMemAllocated(), false);
NCVVectorAlloc<HaarFeature64> h_HaarFeatures(*this->allocatorCPU.get(), numFeatures);
ncvAssertReturn(h_HaarFeatures.isMemAllocated(), false);
NCVVectorAlloc<HaarStage64> h_HaarStages_2(*this->allocatorCPU.get(), numStages);
ncvAssertReturn(h_HaarStages_2.isMemAllocated(), false);
NCVVectorAlloc<HaarClassifierNode128> h_HaarNodes_2(*this->allocatorCPU.get(), numNodes);
ncvAssertReturn(h_HaarNodes_2.isMemAllocated(), false);
NCVVectorAlloc<HaarFeature64> h_HaarFeatures_2(*this->allocatorCPU.get(), numFeatures);
ncvAssertReturn(h_HaarFeatures_2.isMemAllocated(), false);
HaarClassifierCascadeDescriptor haar;
HaarClassifierCascadeDescriptor haar_2;
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
NCV_SKIP_COND_BEGIN
const std::string testNvbinName = "test.nvbin";
ncvStat = ncvHaarLoadFromFile_host(this->cascadeName, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvStat = ncvHaarStoreNVBIN_host(testNvbinName, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvStat = ncvHaarGetClassifierSize(testNvbinName, numStages_2, numNodes_2, numFeatures_2);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvStat = ncvHaarLoadFromFile_host(testNvbinName, haar_2, h_HaarStages_2, h_HaarNodes_2, h_HaarFeatures_2);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
NCV_SKIP_COND_END
//bit-to-bit check
bool bLoopVirgin = true;
NCV_SKIP_COND_BEGIN
if (
numStages_2 != numStages ||
numNodes_2 != numNodes ||
numFeatures_2 != numFeatures ||
haar.NumStages != haar_2.NumStages ||
haar.NumClassifierRootNodes != haar_2.NumClassifierRootNodes ||
haar.NumClassifierTotalNodes != haar_2.NumClassifierTotalNodes ||
haar.NumFeatures != haar_2.NumFeatures ||
haar.ClassifierSize.width != haar_2.ClassifierSize.width ||
haar.ClassifierSize.height != haar_2.ClassifierSize.height ||
haar.bNeedsTiltedII != haar_2.bNeedsTiltedII ||
haar.bHasStumpsOnly != haar_2.bHasStumpsOnly )
{
bLoopVirgin = false;
}
if (memcmp(h_HaarStages.ptr(), h_HaarStages_2.ptr(), haar.NumStages * sizeof(HaarStage64)) ||
memcmp(h_HaarNodes.ptr(), h_HaarNodes_2.ptr(), haar.NumClassifierTotalNodes * sizeof(HaarClassifierNode128)) ||
memcmp(h_HaarFeatures.ptr(), h_HaarFeatures_2.ptr(), haar.NumFeatures * sizeof(HaarFeature64)) )
{
bLoopVirgin = false;
}
NCV_SKIP_COND_END
if (bLoopVirgin)
{
rcode = true;
}
return rcode;
}
bool TestHaarCascadeLoader::deinit()
{
return true;
}
@@ -0,0 +1,34 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _testhaarcascadeloader_h_
#define _testhaarcascadeloader_h_
#include "NCVTest.hpp"
#include "NCVTestSourceProvider.hpp"
class TestHaarCascadeLoader : public NCVTestProvider
{
public:
TestHaarCascadeLoader(std::string testName, std::string cascadeName);
virtual bool init();
virtual bool process();
virtual bool deinit();
virtual bool toString(std::ofstream &strOut);
private:
std::string cascadeName;
};
#endif // _testhaarcascadeloader_h_
@@ -0,0 +1,176 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#include "TestHypothesesFilter.h"
#include "NCVHaarObjectDetection.hpp"
TestHypothesesFilter::TestHypothesesFilter(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
Ncv32u numDstRects, Ncv32u minNeighbors, Ncv32f eps)
:
NCVTestProvider(testName),
src(src),
numDstRects(numDstRects),
minNeighbors(minNeighbors),
eps(eps)
{
}
bool TestHypothesesFilter::toString(std::ofstream &strOut)
{
strOut << "numDstRects=" << numDstRects << std::endl;
strOut << "minNeighbors=" << minNeighbors << std::endl;
strOut << "eps=" << eps << std::endl;
return true;
}
bool TestHypothesesFilter::init()
{
this->canvasWidth = 4096;
this->canvasHeight = 4096;
return true;
}
bool compareRects(const NcvRect32u &r1, const NcvRect32u &r2, Ncv32f eps)
{
double delta = eps*(std::min(r1.width, r2.width) + std::min(r1.height, r2.height))*0.5;
return std::abs((Ncv32s)r1.x - (Ncv32s)r2.x) <= delta &&
std::abs((Ncv32s)r1.y - (Ncv32s)r2.y) <= delta &&
std::abs((Ncv32s)r1.x + (Ncv32s)r1.width - (Ncv32s)r2.x - (Ncv32s)r2.width) <= delta &&
std::abs((Ncv32s)r1.y + (Ncv32s)r1.height - (Ncv32s)r2.y - (Ncv32s)r2.height) <= delta;
}
inline bool operator < (const NcvRect32u &a, const NcvRect32u &b)
{
return a.x < b.x;
}
bool TestHypothesesFilter::process()
{
NCVStatus ncvStat;
bool rcode = false;
NCVVectorAlloc<Ncv32u> h_random32u(*this->allocatorCPU.get(), this->numDstRects * sizeof(NcvRect32u) / sizeof(Ncv32u));
ncvAssertReturn(h_random32u.isMemAllocated(), false);
Ncv32u srcSlotSize = 2 * this->minNeighbors + 1;
NCVVectorAlloc<NcvRect32u> h_vecSrc(*this->allocatorCPU.get(), this->numDstRects*srcSlotSize);
ncvAssertReturn(h_vecSrc.isMemAllocated(), false);
NCVVectorAlloc<NcvRect32u> h_vecDst_groundTruth(*this->allocatorCPU.get(), this->numDstRects);
ncvAssertReturn(h_vecDst_groundTruth.isMemAllocated(), false);
NCV_SET_SKIP_COND(this->allocatorCPU.get()->isCounting());
NCV_SKIP_COND_BEGIN
ncvAssertReturn(this->src.fill(h_random32u), false);
Ncv32u randCnt = 0;
Ncv64f randVal;
for (Ncv32u i=0; i<this->numDstRects; i++)
{
h_vecDst_groundTruth.ptr()[i].x = i * this->canvasWidth / this->numDstRects + this->canvasWidth / (this->numDstRects * 4);
h_vecDst_groundTruth.ptr()[i].y = i * this->canvasHeight / this->numDstRects + this->canvasHeight / (this->numDstRects * 4);
h_vecDst_groundTruth.ptr()[i].width = this->canvasWidth / (this->numDstRects * 2);
h_vecDst_groundTruth.ptr()[i].height = this->canvasHeight / (this->numDstRects * 2);
Ncv32u numNeighbors = this->minNeighbors + 1 + (Ncv32u)(((1.0 * h_random32u.ptr()[i]) * (this->minNeighbors + 1)) / 0xFFFFFFFF);
numNeighbors = (numNeighbors > srcSlotSize) ? srcSlotSize : numNeighbors;
//fill in strong hypotheses (2 * ((1.0 * randVal) / 0xFFFFFFFF) - 1)
for (Ncv32u j=0; j<numNeighbors; j++)
{
randVal = (1.0 * h_random32u.ptr()[randCnt++]) / 0xFFFFFFFF; randCnt = randCnt % h_random32u.length();
h_vecSrc.ptr()[srcSlotSize * i + j].x =
h_vecDst_groundTruth.ptr()[i].x +
(Ncv32s)(h_vecDst_groundTruth.ptr()[i].width * this->eps * (randVal - 0.5));
randVal = (1.0 * h_random32u.ptr()[randCnt++]) / 0xFFFFFFFF; randCnt = randCnt % h_random32u.length();
h_vecSrc.ptr()[srcSlotSize * i + j].y =
h_vecDst_groundTruth.ptr()[i].y +
(Ncv32s)(h_vecDst_groundTruth.ptr()[i].height * this->eps * (randVal - 0.5));
h_vecSrc.ptr()[srcSlotSize * i + j].width = h_vecDst_groundTruth.ptr()[i].width;
h_vecSrc.ptr()[srcSlotSize * i + j].height = h_vecDst_groundTruth.ptr()[i].height;
}
//generate weak hypotheses (to be removed in processing)
for (Ncv32u j=numNeighbors; j<srcSlotSize; j++)
{
randVal = (1.0 * h_random32u.ptr()[randCnt++]) / 0xFFFFFFFF; randCnt = randCnt % h_random32u.length();
h_vecSrc.ptr()[srcSlotSize * i + j].x =
this->canvasWidth + h_vecDst_groundTruth.ptr()[i].x +
(Ncv32s)(h_vecDst_groundTruth.ptr()[i].width * this->eps * (randVal - 0.5));
randVal = (1.0 * h_random32u.ptr()[randCnt++]) / 0xFFFFFFFF; randCnt = randCnt % h_random32u.length();
h_vecSrc.ptr()[srcSlotSize * i + j].y =
this->canvasHeight + h_vecDst_groundTruth.ptr()[i].y +
(Ncv32s)(h_vecDst_groundTruth.ptr()[i].height * this->eps * (randVal - 0.5));
h_vecSrc.ptr()[srcSlotSize * i + j].width = h_vecDst_groundTruth.ptr()[i].width;
h_vecSrc.ptr()[srcSlotSize * i + j].height = h_vecDst_groundTruth.ptr()[i].height;
}
}
//shuffle
for (Ncv32u i=0; i<this->numDstRects*srcSlotSize-1; i++)
{
Ncv32u randVal = h_random32u.ptr()[randCnt++]; randCnt = randCnt % h_random32u.length();
Ncv32u secondSwap = randVal % (this->numDstRects*srcSlotSize-1 - i);
NcvRect32u tmp = h_vecSrc.ptr()[i + secondSwap];
h_vecSrc.ptr()[i + secondSwap] = h_vecSrc.ptr()[i];
h_vecSrc.ptr()[i] = tmp;
}
NCV_SKIP_COND_END
Ncv32u numHypothesesSrc = h_vecSrc.length();
NCV_SKIP_COND_BEGIN
ncvStat = ncvFilterHypotheses_host(h_vecSrc, numHypothesesSrc, this->minNeighbors, this->eps, NULL);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
NCV_SKIP_COND_END
//verification
bool bLoopVirgin = true;
NCV_SKIP_COND_BEGIN
if (numHypothesesSrc != this->numDstRects)
{
bLoopVirgin = false;
}
else
{
std::vector<NcvRect32u> tmpRects(numHypothesesSrc);
memcpy(&tmpRects[0], h_vecSrc.ptr(), numHypothesesSrc * sizeof(NcvRect32u));
std::sort(tmpRects.begin(), tmpRects.end());
for (Ncv32u i=0; i<numHypothesesSrc && bLoopVirgin; i++)
{
if (!compareRects(tmpRects[i], h_vecDst_groundTruth.ptr()[i], this->eps))
{
bLoopVirgin = false;
}
}
}
NCV_SKIP_COND_END
if (bLoopVirgin)
{
rcode = true;
}
return rcode;
}
bool TestHypothesesFilter::deinit()
{
return true;
}
@@ -0,0 +1,44 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _testhypothesesfilter_h_
#define _testhypothesesfilter_h_
#include "NCVTest.hpp"
#include "NCVTestSourceProvider.hpp"
class TestHypothesesFilter : public NCVTestProvider
{
public:
TestHypothesesFilter(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
Ncv32u numDstRects, Ncv32u minNeighbors, Ncv32f eps);
virtual bool init();
virtual bool process();
virtual bool deinit();
virtual bool toString(std::ofstream &strOut);
private:
TestHypothesesFilter(const TestHypothesesFilter&);
TestHypothesesFilter& operator=(const TestHypothesesFilter&);
NCVTestSourceProvider<Ncv32u> &src;
Ncv32u numDstRects;
Ncv32u minNeighbors;
Ncv32f eps;
Ncv32u canvasWidth;
Ncv32u canvasHeight;
};
#endif // _testhypothesesfilter_h_
+134
View File
@@ -0,0 +1,134 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#include "TestHypothesesGrow.h"
#include "NCVHaarObjectDetection.hpp"
TestHypothesesGrow::TestHypothesesGrow(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
Ncv32u rectWidth, Ncv32u rectHeight, Ncv32f rectScale,
Ncv32u maxLenSrc, Ncv32u lenSrc, Ncv32u maxLenDst, Ncv32u lenDst)
:
NCVTestProvider(testName),
src(src),
rectWidth(rectWidth),
rectHeight(rectHeight),
rectScale(rectScale),
maxLenSrc(maxLenSrc),
lenSrc(lenSrc),
maxLenDst(maxLenDst),
lenDst(lenDst)
{
}
bool TestHypothesesGrow::toString(std::ofstream &strOut)
{
strOut << "rectWidth=" << rectWidth << std::endl;
strOut << "rectHeight=" << rectHeight << std::endl;
strOut << "rectScale=" << rectScale << std::endl;
strOut << "maxLenSrc=" << maxLenSrc << std::endl;
strOut << "lenSrc=" << lenSrc << std::endl;
strOut << "maxLenDst=" << maxLenDst << std::endl;
strOut << "lenDst=" << lenDst << std::endl;
return true;
}
bool TestHypothesesGrow::init()
{
return true;
}
bool TestHypothesesGrow::process()
{
NCVStatus ncvStat;
bool rcode = false;
NCVVectorAlloc<Ncv32u> h_vecSrc(*this->allocatorCPU.get(), this->maxLenSrc);
ncvAssertReturn(h_vecSrc.isMemAllocated(), false);
NCVVectorAlloc<Ncv32u> d_vecSrc(*this->allocatorGPU.get(), this->maxLenSrc);
ncvAssertReturn(d_vecSrc.isMemAllocated(), false);
NCVVectorAlloc<NcvRect32u> h_vecDst(*this->allocatorCPU.get(), this->maxLenDst);
ncvAssertReturn(h_vecDst.isMemAllocated(), false);
NCVVectorAlloc<NcvRect32u> d_vecDst(*this->allocatorGPU.get(), this->maxLenDst);
ncvAssertReturn(d_vecDst.isMemAllocated(), false);
NCVVectorAlloc<NcvRect32u> h_vecDst_d(*this->allocatorCPU.get(), this->maxLenDst);
ncvAssertReturn(h_vecDst_d.isMemAllocated(), false);
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
NCV_SKIP_COND_BEGIN
ncvAssertReturn(this->src.fill(h_vecSrc), false);
memset(h_vecDst.ptr(), 0, h_vecDst.length() * sizeof(NcvRect32u));
NCVVectorReuse<Ncv32u> h_vecDst_as32u(h_vecDst.getSegment(), lenDst * sizeof(NcvRect32u) / sizeof(Ncv32u));
ncvAssertReturn(h_vecDst_as32u.isMemReused(), false);
ncvAssertReturn(this->src.fill(h_vecDst_as32u), false);
memcpy(h_vecDst_d.ptr(), h_vecDst.ptr(), h_vecDst.length() * sizeof(NcvRect32u));
NCV_SKIP_COND_END
ncvStat = h_vecSrc.copySolid(d_vecSrc, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvStat = h_vecDst.copySolid(d_vecDst, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
Ncv32u h_outElemNum_d = 0;
Ncv32u h_outElemNum_h = 0;
NCV_SKIP_COND_BEGIN
h_outElemNum_d = this->lenDst;
ncvStat = ncvGrowDetectionsVector_device(d_vecSrc, this->lenSrc,
d_vecDst, h_outElemNum_d, this->maxLenDst,
this->rectWidth, this->rectHeight, this->rectScale, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvStat = d_vecDst.copySolid(h_vecDst_d, 0);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);
h_outElemNum_h = this->lenDst;
ncvStat = ncvGrowDetectionsVector_host(h_vecSrc, this->lenSrc,
h_vecDst, h_outElemNum_h, this->maxLenDst,
this->rectWidth, this->rectHeight, this->rectScale);
ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
NCV_SKIP_COND_END
//bit-to-bit check
bool bLoopVirgin = true;
NCV_SKIP_COND_BEGIN
if (h_outElemNum_d != h_outElemNum_h)
{
bLoopVirgin = false;
}
else
{
if (memcmp(h_vecDst.ptr(), h_vecDst_d.ptr(), this->maxLenDst * sizeof(NcvRect32u)))
{
bLoopVirgin = false;
}
}
NCV_SKIP_COND_END
if (bLoopVirgin)
{
rcode = true;
}
return rcode;
}
bool TestHypothesesGrow::deinit()
{
return true;
}
+46
View File
@@ -0,0 +1,46 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _testhypothesesgrow_h_
#define _testhypothesesgrow_h_
#include "NCVTest.hpp"
#include "NCVTestSourceProvider.hpp"
class TestHypothesesGrow : public NCVTestProvider
{
public:
TestHypothesesGrow(std::string testName, NCVTestSourceProvider<Ncv32u> &src,
Ncv32u rectWidth, Ncv32u rectHeight, Ncv32f rectScale,
Ncv32u maxLenSrc, Ncv32u lenSrc, Ncv32u maxLenDst, Ncv32u lenDst);
virtual bool init();
virtual bool process();
virtual bool deinit();
virtual bool toString(std::ofstream &strOut);
private:
TestHypothesesGrow(const TestHypothesesGrow&);
TestHypothesesGrow& operator=(const TestHypothesesGrow&);
NCVTestSourceProvider<Ncv32u> &src;
Ncv32u rectWidth;
Ncv32u rectHeight;
Ncv32f rectScale;
Ncv32u maxLenSrc;
Ncv32u lenSrc;
Ncv32u maxLenDst;
Ncv32u lenDst;
};
#endif // _testhypothesesgrow_h_
+185
View File
@@ -0,0 +1,185 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#include <math.h>
#include "TestIntegralImage.h"
template <class T_in, class T_out>
TestIntegralImage<T_in, T_out>::TestIntegralImage(std::string testName, NCVTestSourceProvider<T_in> &src,
Ncv32u width, Ncv32u height)
:
NCVTestProvider(testName),
src(src),
width(width),
height(height)
{
}
template <class T_in, class T_out>
bool TestIntegralImage<T_in, T_out>::toString(std::ofstream &strOut)
{
strOut << "sizeof(T_in)=" << sizeof(T_in) << std::endl;
strOut << "sizeof(T_out)=" << sizeof(T_out) << std::endl;
strOut << "width=" << width << std::endl;
strOut << "height=" << height << std::endl;
return true;
}
template <class T_in, class T_out>
bool TestIntegralImage<T_in, T_out>::init()
{
return true;
}
template <class T_in, class T_out>
bool TestIntegralImage<T_in, T_out>::process()
{
NCVStatus ncvStat;
bool rcode = false;
Ncv32u widthII = this->width + 1;
Ncv32u heightII = this->height + 1;
NCVMatrixAlloc<T_in> d_img(*this->allocatorGPU.get(), this->width, this->height);
ncvAssertReturn(d_img.isMemAllocated(), false);
NCVMatrixAlloc<T_in> h_img(*this->allocatorCPU.get(), this->width, this->height);
ncvAssertReturn(h_img.isMemAllocated(), false);
NCVMatrixAlloc<T_out> d_imgII(*this->allocatorGPU.get(), widthII, heightII);
ncvAssertReturn(d_imgII.isMemAllocated(), false);
NCVMatrixAlloc<T_out> h_imgII(*this->allocatorCPU.get(), widthII, heightII);
ncvAssertReturn(h_imgII.isMemAllocated(), false);
NCVMatrixAlloc<T_out> h_imgII_d(*this->allocatorCPU.get(), widthII, heightII);
ncvAssertReturn(h_imgII_d.isMemAllocated(), false);
Ncv32u bufSize;
if (sizeof(T_in) == sizeof(Ncv8u))
{
ncvStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &bufSize, this->devProp);
ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
}
else if (sizeof(T_in) == sizeof(Ncv32f))
{
ncvStat = nppiStIntegralGetSize_32f32f(NcvSize32u(this->width, this->height), &bufSize, this->devProp);
ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
}
else
{
ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);
}
NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);
ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
NCV_SKIP_COND_BEGIN
ncvAssertReturn(this->src.fill(h_img), false);
ncvStat = h_img.copySolid(d_img, 0);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
if (sizeof(T_in) == sizeof(Ncv8u))
{
ncvStat = nppiStIntegral_8u32u_C1R((Ncv8u *)d_img.ptr(), d_img.pitch(),
(Ncv32u *)d_imgII.ptr(), d_imgII.pitch(),
NcvSize32u(this->width, this->height),
d_tmpBuf.ptr(), bufSize, this->devProp);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
}
else if (sizeof(T_in) == sizeof(Ncv32f))
{
ncvStat = nppiStIntegral_32f32f_C1R((Ncv32f *)d_img.ptr(), d_img.pitch(),
(Ncv32f *)d_imgII.ptr(), d_imgII.pitch(),
NcvSize32u(this->width, this->height),
d_tmpBuf.ptr(), bufSize, this->devProp);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
}
else
{
ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);
}
ncvStat = d_imgII.copySolid(h_imgII_d, 0);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
if (sizeof(T_in) == sizeof(Ncv8u))
{
ncvStat = nppiStIntegral_8u32u_C1R_host((Ncv8u *)h_img.ptr(), h_img.pitch(),
(Ncv32u *)h_imgII.ptr(), h_imgII.pitch(),
NcvSize32u(this->width, this->height));
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
}
else if (sizeof(T_in) == sizeof(Ncv32f))
{
ncvStat = nppiStIntegral_32f32f_C1R_host((Ncv32f *)h_img.ptr(), h_img.pitch(),
(Ncv32f *)h_imgII.ptr(), h_imgII.pitch(),
NcvSize32u(this->width, this->height));
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
}
else
{
ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);
}
NCV_SKIP_COND_END
//bit-to-bit check
bool bLoopVirgin = true;
NCV_SKIP_COND_BEGIN
for (Ncv32u i=0; bLoopVirgin && i < h_img.height() + 1; i++)
{
for (Ncv32u j=0; bLoopVirgin && j < h_img.width() + 1; j++)
{
if (sizeof(T_in) == sizeof(Ncv8u))
{
if (h_imgII.ptr()[h_imgII.stride()*i+j] != h_imgII_d.ptr()[h_imgII_d.stride()*i+j])
{
bLoopVirgin = false;
}
}
else if (sizeof(T_in) == sizeof(Ncv32f))
{
if (fabsf((float)h_imgII.ptr()[h_imgII.stride()*i+j] - (float)h_imgII_d.ptr()[h_imgII_d.stride()*i+j]) > 0.01f)
{
bLoopVirgin = false;
}
}
else
{
ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);
}
}
}
NCV_SKIP_COND_END
if (bLoopVirgin)
{
rcode = true;
}
return rcode;
}
template <class T_in, class T_out>
bool TestIntegralImage<T_in, T_out>::deinit()
{
return true;
}
template class TestIntegralImage<Ncv8u, Ncv32u>;
template class TestIntegralImage<Ncv32f, Ncv32f>;
+40
View File
@@ -0,0 +1,40 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _testintegralimage_h_
#define _testintegralimage_h_
#include "NCVTest.hpp"
#include "NCVTestSourceProvider.hpp"
template <class T_in, class T_out>
class TestIntegralImage : public NCVTestProvider
{
public:
TestIntegralImage(std::string testName, NCVTestSourceProvider<T_in> &src,
Ncv32u width, Ncv32u height);
virtual bool init();
virtual bool process();
virtual bool deinit();
virtual bool toString(std::ofstream &strOut);
private:
TestIntegralImage(const TestIntegralImage&);
TestIntegralImage& operator=(const TestIntegralImage&);
NCVTestSourceProvider<T_in> &src;
Ncv32u width;
Ncv32u height;
};
#endif // _testintegralimage_h_
@@ -0,0 +1,117 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#include "TestIntegralImageSquared.h"
TestIntegralImageSquared::TestIntegralImageSquared(std::string testName, NCVTestSourceProvider<Ncv8u> &src,
Ncv32u width, Ncv32u height)
:
NCVTestProvider(testName),
src(src),
width(width),
height(height)
{
}
bool TestIntegralImageSquared::toString(std::ofstream &strOut)
{
strOut << "width=" << width << std::endl;
strOut << "height=" << height << std::endl;
return true;
}
bool TestIntegralImageSquared::init()
{
return true;
}
bool TestIntegralImageSquared::process()
{
NCVStatus ncvStat;
bool rcode = false;
Ncv32u widthSII = this->width + 1;
Ncv32u heightSII = this->height + 1;
NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height);
ncvAssertReturn(d_img.isMemAllocated(), false);
NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height);
ncvAssertReturn(h_img.isMemAllocated(), false);
NCVMatrixAlloc<Ncv64u> d_imgSII(*this->allocatorGPU.get(), widthSII, heightSII);
ncvAssertReturn(d_imgSII.isMemAllocated(), false);
NCVMatrixAlloc<Ncv64u> h_imgSII(*this->allocatorCPU.get(), widthSII, heightSII);
ncvAssertReturn(h_imgSII.isMemAllocated(), false);
NCVMatrixAlloc<Ncv64u> h_imgSII_d(*this->allocatorCPU.get(), widthSII, heightSII);
ncvAssertReturn(h_imgSII_d.isMemAllocated(), false);
Ncv32u bufSize;
ncvStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &bufSize, this->devProp);
ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);
ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
NCV_SKIP_COND_BEGIN
ncvAssertReturn(this->src.fill(h_img), false);
ncvStat = h_img.copySolid(d_img, 0);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(),
d_imgSII.ptr(), d_imgSII.pitch(),
NcvSize32u(this->width, this->height),
d_tmpBuf.ptr(), bufSize, this->devProp);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = d_imgSII.copySolid(h_imgSII_d, 0);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = nppiStSqrIntegral_8u64u_C1R_host(h_img.ptr(), h_img.pitch(),
h_imgSII.ptr(), h_imgSII.pitch(),
NcvSize32u(this->width, this->height));
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
NCV_SKIP_COND_END
//bit-to-bit check
bool bLoopVirgin = true;
NCV_SKIP_COND_BEGIN
for (Ncv32u i=0; bLoopVirgin && i < h_img.height() + 1; i++)
{
for (Ncv32u j=0; bLoopVirgin && j < h_img.width() + 1; j++)
{
if (h_imgSII.ptr()[h_imgSII.stride()*i+j] != h_imgSII_d.ptr()[h_imgSII_d.stride()*i+j])
{
bLoopVirgin = false;
}
}
}
NCV_SKIP_COND_END
if (bLoopVirgin)
{
rcode = true;
}
return rcode;
}
bool TestIntegralImageSquared::deinit()
{
return true;
}
@@ -0,0 +1,39 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _testintegralimagesquared_h_
#define _testintegralimagesquared_h_
#include "NCVTest.hpp"
#include "NCVTestSourceProvider.hpp"
class TestIntegralImageSquared : public NCVTestProvider
{
public:
TestIntegralImageSquared(std::string testName, NCVTestSourceProvider<Ncv8u> &src,
Ncv32u width, Ncv32u height);
virtual bool init();
virtual bool process();
virtual bool deinit();
virtual bool toString(std::ofstream &strOut);
private:
TestIntegralImageSquared(const TestIntegralImageSquared&);
TestIntegralImageSquared& operator=(const TestIntegralImageSquared&);
NCVTestSourceProvider<Ncv8u> &src;
Ncv32u width;
Ncv32u height;
};
#endif // _testintegralimagesquared_h_
+180
View File
@@ -0,0 +1,180 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#include <math.h>
#include "TestRectStdDev.h"
TestRectStdDev::TestRectStdDev(std::string testName, NCVTestSourceProvider<Ncv8u> &src,
Ncv32u width, Ncv32u height, NcvRect32u rect, Ncv32f scaleFactor,
NcvBool bTextureCache)
:
NCVTestProvider(testName),
src(src),
width(width),
height(height),
rect(rect),
scaleFactor(scaleFactor),
bTextureCache(bTextureCache)
{
}
bool TestRectStdDev::toString(std::ofstream &strOut)
{
strOut << "width=" << width << std::endl;
strOut << "height=" << height << std::endl;
strOut << "rect=[" << rect.x << ", " << rect.y << ", " << rect.width << ", " << rect.height << "]\n";
strOut << "scaleFactor=" << scaleFactor << std::endl;
strOut << "bTextureCache=" << bTextureCache << std::endl;
return true;
}
bool TestRectStdDev::init()
{
return true;
}
bool TestRectStdDev::process()
{
NCVStatus ncvStat;
bool rcode = false;
Ncv32s _normWidth = (Ncv32s)this->width - this->rect.x - this->rect.width + 1;
Ncv32s _normHeight = (Ncv32s)this->height - this->rect.y - this->rect.height + 1;
if (_normWidth <= 0 || _normHeight <= 0)
{
return true;
}
Ncv32u normWidth = (Ncv32u)_normWidth;
Ncv32u normHeight = (Ncv32u)_normHeight;
NcvSize32u szNormRoi(normWidth, normHeight);
Ncv32u widthII = this->width + 1;
Ncv32u heightII = this->height + 1;
Ncv32u widthSII = this->width + 1;
Ncv32u heightSII = this->height + 1;
NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height);
ncvAssertReturn(d_img.isMemAllocated(), false);
NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height);
ncvAssertReturn(h_img.isMemAllocated(), false);
NCVMatrixAlloc<Ncv32u> d_imgII(*this->allocatorGPU.get(), widthII, heightII);
ncvAssertReturn(d_imgII.isMemAllocated(), false);
NCVMatrixAlloc<Ncv32u> h_imgII(*this->allocatorCPU.get(), widthII, heightII);
ncvAssertReturn(h_imgII.isMemAllocated(), false);
NCVMatrixAlloc<Ncv64u> d_imgSII(*this->allocatorGPU.get(), widthSII, heightSII);
ncvAssertReturn(d_imgSII.isMemAllocated(), false);
NCVMatrixAlloc<Ncv64u> h_imgSII(*this->allocatorCPU.get(), widthSII, heightSII);
ncvAssertReturn(h_imgSII.isMemAllocated(), false);
NCVMatrixAlloc<Ncv32f> d_norm(*this->allocatorGPU.get(), normWidth, normHeight);
ncvAssertReturn(d_norm.isMemAllocated(), false);
NCVMatrixAlloc<Ncv32f> h_norm(*this->allocatorCPU.get(), normWidth, normHeight);
ncvAssertReturn(h_norm.isMemAllocated(), false);
NCVMatrixAlloc<Ncv32f> h_norm_d(*this->allocatorCPU.get(), normWidth, normHeight);
ncvAssertReturn(h_norm_d.isMemAllocated(), false);
Ncv32u bufSizeII, bufSizeSII;
ncvStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &bufSizeII, this->devProp);
ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
ncvStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &bufSizeSII, this->devProp);
ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
Ncv32u bufSize = bufSizeII > bufSizeSII ? bufSizeII : bufSizeSII;
NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);
ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
NCV_SKIP_COND_BEGIN
ncvAssertReturn(this->src.fill(h_img), false);
ncvStat = h_img.copySolid(d_img, 0);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = nppiStIntegral_8u32u_C1R(d_img.ptr(), d_img.pitch(),
d_imgII.ptr(), d_imgII.pitch(),
NcvSize32u(this->width, this->height),
d_tmpBuf.ptr(), bufSize, this->devProp);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(),
d_imgSII.ptr(), d_imgSII.pitch(),
NcvSize32u(this->width, this->height),
d_tmpBuf.ptr(), bufSize, this->devProp);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = nppiStRectStdDev_32f_C1R(d_imgII.ptr(), d_imgII.pitch(),
d_imgSII.ptr(), d_imgSII.pitch(),
d_norm.ptr(), d_norm.pitch(),
szNormRoi, this->rect,
this->scaleFactor,
this->bTextureCache);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = d_norm.copySolid(h_norm_d, 0);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = nppiStIntegral_8u32u_C1R_host(h_img.ptr(), h_img.pitch(),
h_imgII.ptr(), h_imgII.pitch(),
NcvSize32u(this->width, this->height));
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = nppiStSqrIntegral_8u64u_C1R_host(h_img.ptr(), h_img.pitch(),
h_imgSII.ptr(), h_imgSII.pitch(),
NcvSize32u(this->width, this->height));
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
ncvStat = nppiStRectStdDev_32f_C1R_host(h_imgII.ptr(), h_imgII.pitch(),
h_imgSII.ptr(), h_imgSII.pitch(),
h_norm.ptr(), h_norm.pitch(),
szNormRoi, this->rect,
this->scaleFactor);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
NCV_SKIP_COND_END
//bit-to-bit check
bool bLoopVirgin = true;
NCV_SKIP_COND_BEGIN
const Ncv64f relEPS = 0.005;
for (Ncv32u i=0; bLoopVirgin && i < h_norm.height(); i++)
{
for (Ncv32u j=0; bLoopVirgin && j < h_norm.width(); j++)
{
Ncv64f absErr = fabs(h_norm.ptr()[h_norm.stride()*i+j] - h_norm_d.ptr()[h_norm_d.stride()*i+j]);
Ncv64f relErr = absErr / h_norm.ptr()[h_norm.stride()*i+j];
if (relErr > relEPS)
{
bLoopVirgin = false;
}
}
}
NCV_SKIP_COND_END
if (bLoopVirgin)
{
rcode = true;
}
return rcode;
}
bool TestRectStdDev::deinit()
{
return true;
}
+44
View File
@@ -0,0 +1,44 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _testrectstddev_h_
#define _testrectstddev_h_
#include "NCVTest.hpp"
#include "NCVTestSourceProvider.hpp"
class TestRectStdDev : public NCVTestProvider
{
public:
TestRectStdDev(std::string testName, NCVTestSourceProvider<Ncv8u> &src,
Ncv32u width, Ncv32u height, NcvRect32u rect, Ncv32f scaleFactor,
NcvBool bTextureCache);
virtual bool init();
virtual bool process();
virtual bool deinit();
virtual bool toString(std::ofstream &strOut);
private:
TestRectStdDev(const TestRectStdDev&);
TestRectStdDev& operator=(const TestRectStdDev&);
NCVTestSourceProvider<Ncv8u> &src;
NcvRect32u rect;
Ncv32u width;
Ncv32u height;
Ncv32f scaleFactor;
NcvBool bTextureCache;
};
#endif // _testrectstddev_h_
+161
View File
@@ -0,0 +1,161 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#include <math.h>
#include "TestResize.h"
template <class T>
TestResize<T>::TestResize(std::string testName, NCVTestSourceProvider<T> &src,
Ncv32u width, Ncv32u height, Ncv32u scaleFactor, NcvBool bTextureCache)
:
NCVTestProvider(testName),
src(src),
width(width),
height(height),
scaleFactor(scaleFactor),
bTextureCache(bTextureCache)
{
}
template <class T>
bool TestResize<T>::toString(std::ofstream &strOut)
{
strOut << "sizeof(T)=" << sizeof(T) << std::endl;
strOut << "width=" << width << std::endl;
strOut << "scaleFactor=" << scaleFactor << std::endl;
strOut << "bTextureCache=" << bTextureCache << std::endl;
return true;
}
template <class T>
bool TestResize<T>::init()
{
return true;
}
template <class T>
bool TestResize<T>::process()
{
NCVStatus ncvStat;
bool rcode = false;
Ncv32s smallWidth = this->width / this->scaleFactor;
Ncv32s smallHeight = this->height / this->scaleFactor;
if (smallWidth == 0 || smallHeight == 0)
{
return true;
}
NcvSize32u srcSize(this->width, this->height);
NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height);
ncvAssertReturn(d_img.isMemAllocated(), false);
NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height);
ncvAssertReturn(h_img.isMemAllocated(), false);
NCVMatrixAlloc<T> d_small(*this->allocatorGPU.get(), smallWidth, smallHeight);
ncvAssertReturn(d_small.isMemAllocated(), false);
NCVMatrixAlloc<T> h_small(*this->allocatorCPU.get(), smallWidth, smallHeight);
ncvAssertReturn(h_small.isMemAllocated(), false);
NCVMatrixAlloc<T> h_small_d(*this->allocatorCPU.get(), smallWidth, smallHeight);
ncvAssertReturn(h_small_d.isMemAllocated(), false);
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
NCV_SKIP_COND_BEGIN
ncvAssertReturn(this->src.fill(h_img), false);
NCV_SKIP_COND_END
ncvStat = h_img.copySolid(d_img, 0);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
NCV_SKIP_COND_BEGIN
if (sizeof(T) == sizeof(Ncv32u))
{
ncvStat = nppiStDownsampleNearest_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(),
(Ncv32u *)d_small.ptr(), d_small.pitch(),
srcSize, this->scaleFactor,
this->bTextureCache);
}
else if (sizeof(T) == sizeof(Ncv64u))
{
ncvStat = nppiStDownsampleNearest_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(),
(Ncv64u *)d_small.ptr(), d_small.pitch(),
srcSize, this->scaleFactor,
this->bTextureCache);
}
else
{
ncvAssertPrintReturn(false, "Incorrect downsample test instance", false);
}
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
NCV_SKIP_COND_END
ncvStat = d_small.copySolid(h_small_d, 0);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
NCV_SKIP_COND_BEGIN
if (sizeof(T) == sizeof(Ncv32u))
{
ncvStat = nppiStDownsampleNearest_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(),
(Ncv32u *)h_small.ptr(), h_small.pitch(),
srcSize, this->scaleFactor);
}
else if (sizeof(T) == sizeof(Ncv64u))
{
ncvStat = nppiStDownsampleNearest_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(),
(Ncv64u *)h_small.ptr(), h_small.pitch(),
srcSize, this->scaleFactor);
}
else
{
ncvAssertPrintReturn(false, "Incorrect downsample test instance", false);
}
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
NCV_SKIP_COND_END
//bit-to-bit check
bool bLoopVirgin = true;
NCV_SKIP_COND_BEGIN
//const Ncv64f relEPS = 0.005;
for (Ncv32u i=0; bLoopVirgin && i < h_small.height(); i++)
{
for (Ncv32u j=0; bLoopVirgin && j < h_small.width(); j++)
{
if (h_small.ptr()[h_small.stride()*i+j] != h_small_d.ptr()[h_small_d.stride()*i+j])
{
bLoopVirgin = false;
}
}
}
NCV_SKIP_COND_END
if (bLoopVirgin)
{
rcode = true;
}
return rcode;
}
template <class T>
bool TestResize<T>::deinit()
{
return true;
}
template class TestResize<Ncv32u>;
template class TestResize<Ncv64u>;
+42
View File
@@ -0,0 +1,42 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _testresize_h_
#define _testresize_h_
#include "NCVTest.hpp"
#include "NCVTestSourceProvider.hpp"
template <class T>
class TestResize : public NCVTestProvider
{
public:
TestResize(std::string testName, NCVTestSourceProvider<T> &src,
Ncv32u width, Ncv32u height, Ncv32u scaleFactor, NcvBool bTextureCache);
virtual bool init();
virtual bool process();
virtual bool deinit();
virtual bool toString(std::ofstream &strOut);
private:
TestResize(const TestResize&);
TestResize& operator=(const TestResize&);
NCVTestSourceProvider<T> &src;
Ncv32u width;
Ncv32u height;
Ncv32u scaleFactor;
NcvBool bTextureCache;
};
#endif // _testresize_h_
+148
View File
@@ -0,0 +1,148 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#include <math.h>
#include "TestTranspose.h"
template <class T>
TestTranspose<T>::TestTranspose(std::string testName, NCVTestSourceProvider<T> &src,
Ncv32u width, Ncv32u height)
:
NCVTestProvider(testName),
src(src),
width(width),
height(height)
{
}
template <class T>
bool TestTranspose<T>::toString(std::ofstream &strOut)
{
strOut << "sizeof(T)=" << sizeof(T) << std::endl;
strOut << "width=" << width << std::endl;
return true;
}
template <class T>
bool TestTranspose<T>::init()
{
return true;
}
template <class T>
bool TestTranspose<T>::process()
{
NCVStatus ncvStat;
bool rcode = false;
NcvSize32u srcSize(this->width, this->height);
NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height);
ncvAssertReturn(d_img.isMemAllocated(), false);
NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height);
ncvAssertReturn(h_img.isMemAllocated(), false);
NCVMatrixAlloc<T> d_dst(*this->allocatorGPU.get(), this->height, this->width);
ncvAssertReturn(d_dst.isMemAllocated(), false);
NCVMatrixAlloc<T> h_dst(*this->allocatorCPU.get(), this->height, this->width);
ncvAssertReturn(h_dst.isMemAllocated(), false);
NCVMatrixAlloc<T> h_dst_d(*this->allocatorCPU.get(), this->height, this->width);
ncvAssertReturn(h_dst_d.isMemAllocated(), false);
NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
NCV_SKIP_COND_BEGIN
ncvAssertReturn(this->src.fill(h_img), false);
NCV_SKIP_COND_END
ncvStat = h_img.copySolid(d_img, 0);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
NCV_SKIP_COND_BEGIN
if (sizeof(T) == sizeof(Ncv32u))
{
ncvStat = nppiStTranspose_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(),
(Ncv32u *)d_dst.ptr(), d_dst.pitch(),
NcvSize32u(this->width, this->height));
}
else if (sizeof(T) == sizeof(Ncv64u))
{
ncvStat = nppiStTranspose_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(),
(Ncv64u *)d_dst.ptr(), d_dst.pitch(),
NcvSize32u(this->width, this->height));
}
else
{
ncvAssertPrintReturn(false, "Incorrect transpose test instance", false);
}
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
NCV_SKIP_COND_END
ncvStat = d_dst.copySolid(h_dst_d, 0);
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
NCV_SKIP_COND_BEGIN
if (sizeof(T) == sizeof(Ncv32u))
{
ncvStat = nppiStTranspose_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(),
(Ncv32u *)h_dst.ptr(), h_dst.pitch(),
NcvSize32u(this->width, this->height));
}
else if (sizeof(T) == sizeof(Ncv64u))
{
ncvStat = nppiStTranspose_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(),
(Ncv64u *)h_dst.ptr(), h_dst.pitch(),
NcvSize32u(this->width, this->height));
}
else
{
ncvAssertPrintReturn(false, "Incorrect downsample test instance", false);
}
ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
NCV_SKIP_COND_END
//bit-to-bit check
bool bLoopVirgin = true;
NCV_SKIP_COND_BEGIN
//const Ncv64f relEPS = 0.005;
for (Ncv32u i=0; bLoopVirgin && i < this->width; i++)
{
for (Ncv32u j=0; bLoopVirgin && j < this->height; j++)
{
if (h_dst.ptr()[h_dst.stride()*i+j] != h_dst_d.ptr()[h_dst_d.stride()*i+j])
{
bLoopVirgin = false;
}
}
}
NCV_SKIP_COND_END
if (bLoopVirgin)
{
rcode = true;
}
return rcode;
}
template <class T>
bool TestTranspose<T>::deinit()
{
return true;
}
template class TestTranspose<Ncv32u>;
template class TestTranspose<Ncv64u>;
+41
View File
@@ -0,0 +1,41 @@
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual
* property and proprietary rights in and to this software and
* related documentation and any modifications thereto.
* Any use, reproduction, disclosure, or distribution of this
* software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef _testtranspose_h_
#define _testtranspose_h_
#include "NCVTest.hpp"
#include "NCVTestSourceProvider.hpp"
template <class T>
class TestTranspose : public NCVTestProvider
{
public:
TestTranspose(std::string testName, NCVTestSourceProvider<T> &src,
Ncv32u width, Ncv32u height);
virtual bool init();
virtual bool process();
virtual bool deinit();
virtual bool toString(std::ofstream &strOut);
private:
TestTranspose(const TestTranspose&);
TestTranspose& operator=(const TestTranspose&);
NCVTestSourceProvider<T> &src;
Ncv32u width;
Ncv32u height;
};
#endif // _testtranspose_h_
+346
View File
@@ -0,0 +1,346 @@
#pragma warning (disable : 4408 4201 4100)
#include <cstdio>
#include "NCV.hpp"
#include "NCVHaarObjectDetection.hpp"
#include "TestIntegralImage.h"
#include "TestIntegralImageSquared.h"
#include "TestRectStdDev.h"
#include "TestResize.h"
#include "TestCompact.h"
#include "TestTranspose.h"
#include "TestDrawRects.h"
#include "TestHypothesesGrow.h"
#include "TestHypothesesFilter.h"
#include "TestHaarCascadeLoader.h"
#include "TestHaarCascadeApplication.h"
#include "NCVAutoTestLister.hpp"
#include "NCVTestSourceProvider.hpp"
template <class T_in, class T_out>
void generateIntegralTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<T_in> &src,
Ncv32u maxWidth, Ncv32u maxHeight)
{
for (Ncv32f _i=1.0; _i<maxWidth; _i*=1.2f)
{
Ncv32u i = (Ncv32u)_i;
char testName[80];
sprintf_s(testName, sizeof(testName), "LinIntImgW%dH%d", i, 2);
testLister.add(new TestIntegralImage<T_in, T_out>(testName, src, i, 2));
}
for (Ncv32f _i=1.0; _i<maxHeight; _i*=1.2f)
{
Ncv32u i = (Ncv32u)_i;
char testName[80];
sprintf_s(testName, sizeof(testName), "LinIntImgW%dH%d", 2, i);
testLister.add(new TestIntegralImage<T_in, T_out>(testName, src, 2, i));
}
//test VGA
testLister.add(new TestIntegralImage<T_in, T_out>("LinIntImg_VGA", src, 640, 480));
//TODO: add tests of various resolutions up to 4096x4096
}
void generateSquaredIntegralTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv8u> &src,
Ncv32u maxWidth, Ncv32u maxHeight)
{
for (Ncv32f _i=1.0; _i<maxWidth; _i*=1.2f)
{
Ncv32u i = (Ncv32u)_i;
char testName[80];
sprintf_s(testName, sizeof(testName), "SqIntImgW%dH%d", i, 32);
testLister.add(new TestIntegralImageSquared(testName, src, i, 32));
}
for (Ncv32f _i=1.0; _i<maxHeight; _i*=1.2f)
{
Ncv32u i = (Ncv32u)_i;
char testName[80];
sprintf_s(testName, sizeof(testName), "SqIntImgW%dH%d", 32, i);
testLister.add(new TestIntegralImageSquared(testName, src, 32, i));
}
//test VGA
testLister.add(new TestIntegralImageSquared("SqLinIntImg_VGA", src, 640, 480));
//TODO: add tests of various resolutions up to 4096x4096
}
void generateRectStdDevTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv8u> &src,
Ncv32u maxWidth, Ncv32u maxHeight)
{
NcvRect32u rect(1,1,18,18);
for (Ncv32f _i=32; _i<maxHeight/2 && _i < maxWidth/2; _i*=1.2f)
{
Ncv32u i = (Ncv32u)_i;
char testName[80];
sprintf_s(testName, sizeof(testName), "RectStdDevW%dH%d", i*2, i);
testLister.add(new TestRectStdDev(testName, src, i*2, i, rect, 1, true));
testLister.add(new TestRectStdDev(testName, src, i*2, i, rect, 1.5, false));
testLister.add(new TestRectStdDev(testName, src, i-1, i*2-1, rect, 1, false));
testLister.add(new TestRectStdDev(testName, src, i-1, i*2-1, rect, 2.5, true));
}
//test VGA
testLister.add(new TestRectStdDev("RectStdDev_VGA", src, 640, 480, rect, 1, true));
//TODO: add tests of various resolutions up to 4096x4096
}
template <class T>
void generateResizeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<T> &src)
{
//test VGA
for (Ncv32u i=1; i<480; i+=3)
{
char testName[80];
sprintf_s(testName, sizeof(testName), "TestResize_VGA_s%d", i);
testLister.add(new TestResize<T>(testName, src, 640, 480, i, true));
testLister.add(new TestResize<T>(testName, src, 640, 480, i, false));
}
//test HD
for (Ncv32u i=1; i<1080; i+=5)
{
char testName[80];
sprintf_s(testName, sizeof(testName), "TestResize_1080_s%d", i);
testLister.add(new TestResize<T>(testName, src, 1920, 1080, i, true));
testLister.add(new TestResize<T>(testName, src, 1920, 1080, i, false));
}
//TODO: add tests of various resolutions up to 4096x4096
}
void generateNPPSTVectorTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv32u> &src, Ncv32u maxLength)
{
//compaction
for (Ncv32f _i=256.0; _i<maxLength; _i*=1.1f)
{
Ncv32u i = (Ncv32u)_i;
char testName[80];
sprintf_s(testName, sizeof(testName), "Compaction%d", i);
testLister.add(new TestCompact(testName, src, i, 0xFFFFFFFF, 30));
}
for (Ncv32u i=1; i<260; i++)
{
char testName[80];
sprintf_s(testName, sizeof(testName), "Compaction%d", i);
testLister.add(new TestCompact(testName, src, i, 0xC001C0DE, 70));
testLister.add(new TestCompact(testName, src, i, 0xC001C0DE, 0));
testLister.add(new TestCompact(testName, src, i, 0xC001C0DE, 100));
}
for (Ncv32u i=256*256-256; i<256*256+257; i++)
{
char testName[80];
sprintf_s(testName, sizeof(testName), "Compaction%d", i);
testLister.add(new TestCompact(testName, src, i, 0xFFFFFFFF, 40));
}
for (Ncv32u i=256*256*256-10; i<256*256*256+10; i++)
{
char testName[80];
sprintf_s(testName, sizeof(testName), "Compaction%d", i);
testLister.add(new TestCompact(testName, src, i, 0x00000000, 2));
}
}
template <class T>
void generateTransposeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<T> &src)
{
for (int i=2; i<64; i+=4)
{
for (int j=2; j<64; j+=4)
{
char testName[80];
sprintf_s(testName, sizeof(testName), "TestTranspose_%dx%d", i, j);
testLister.add(new TestTranspose<T>(testName, src, i, j));
}
}
for (int i=1; i<128; i+=1)
{
for (int j=1; j<2; j+=1)
{
char testName[80];
sprintf_s(testName, sizeof(testName), "TestTranspose_%dx%d", i, j);
testLister.add(new TestTranspose<T>(testName, src, i, j));
}
}
testLister.add(new TestTranspose<T>("TestTranspose_VGA", src, 640, 480));
testLister.add(new TestTranspose<T>("TestTranspose_HD1080", src, 1920, 1080));
}
template <class T>
void generateDrawRectsTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<T> &src, NCVTestSourceProvider<Ncv32u> &src32u,
Ncv32u maxWidth, Ncv32u maxHeight)
{
for (Ncv32f _i=16.0; _i<maxWidth; _i*=1.1f)
{
Ncv32u i = (Ncv32u)_i;
Ncv32u j = maxHeight * i / maxWidth;
if (!j) continue;
char testName[80];
sprintf_s(testName, sizeof(testName), "DrawRectsW%dH%d", i, j);
if (sizeof(T) == sizeof(Ncv32u))
{
testLister.add(new TestDrawRects<T>(testName, src, src32u, i, j, i*j/1000+1, (T)0xFFFFFFFF));
}
else if (sizeof(T) == sizeof(Ncv8u))
{
testLister.add(new TestDrawRects<T>(testName, src, src32u, i, j, i*j/1000+1, (T)0xFF));
}
else
{
ncvAssertPrintCheck(false, "Attempted to instantiate non-existing DrawRects test suite");
}
}
//test VGA
testLister.add(new TestDrawRects<T>("DrawRects_VGA", src, src32u, 640, 480, 640*480/1000, (T)0xFF));
//TODO: add tests of various resolutions up to 4096x4096
}
void generateVectorTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv32u> &src, Ncv32u maxLength)
{
//growth
for (Ncv32f _i=10.0; _i<maxLength; _i*=1.1f)
{
Ncv32u i = (Ncv32u)_i;
char testName[80];
sprintf_s(testName, sizeof(testName), "VectorGrow%d", i);
testLister.add(new TestHypothesesGrow(testName, src, 20, 20, 2.2f, i, i/2, i, i/4));
testLister.add(new TestHypothesesGrow(testName, src, 10, 42, 1.2f, i, i, i, 0));
}
testLister.add(new TestHypothesesGrow("VectorGrow01b", src, 10, 42, 1.2f, 10, 0, 10, 1));
testLister.add(new TestHypothesesGrow("VectorGrow11b", src, 10, 42, 1.2f, 10, 1, 10, 1));
testLister.add(new TestHypothesesGrow("VectorGrow10b", src, 10, 42, 1.2f, 10, 1, 10, 0));
testLister.add(new TestHypothesesGrow("VectorGrow00b", src, 10, 42, 1.2f, 10, 0, 10, 0));
}
void generateHypothesesFiltrationTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv32u> &src, Ncv32u maxLength)
{
for (Ncv32f _i=1.0; _i<maxLength; _i*=1.1f)
{
Ncv32u i = (Ncv32u)_i;
char testName[80];
sprintf_s(testName, sizeof(testName), "HypFilter%d", i);
testLister.add(new TestHypothesesFilter(testName, src, i, 3, 0.2f));
testLister.add(new TestHypothesesFilter(testName, src, i, 0, 0.2f));
testLister.add(new TestHypothesesFilter(testName, src, i, 1, 0.1f));
}
}
void generateHaarLoaderTests(NCVAutoTestLister &testLister)
{
testLister.add(new TestHaarCascadeLoader("haarcascade_eye.xml", "haarcascade_eye.xml"));
testLister.add(new TestHaarCascadeLoader("haarcascade_frontalface_alt.xml", "haarcascade_frontalface_alt.xml"));
testLister.add(new TestHaarCascadeLoader("haarcascade_frontalface_alt2.xml", "haarcascade_frontalface_alt2.xml"));
testLister.add(new TestHaarCascadeLoader("haarcascade_frontalface_alt_tree.xml", "haarcascade_frontalface_alt_tree.xml"));
testLister.add(new TestHaarCascadeLoader("haarcascade_eye_tree_eyeglasses.xml", "haarcascade_eye_tree_eyeglasses.xml"));
}
void generateHaarApplicationTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv8u> &src,
Ncv32u maxWidth, Ncv32u maxHeight)
{
for (Ncv32u i=20; i<512; i+=11)
{
for (Ncv32u j=20; j<128; j+=5)
{
char testName[80];
sprintf_s(testName, sizeof(testName), "HaarAppl%d_%d", i, j);
testLister.add(new TestHaarCascadeApplication(testName, src, "haarcascade_frontalface_alt.xml", j, i));
}
}
for (Ncv32f _i=20.0; _i<maxWidth; _i*=1.1f)
{
Ncv32u i = (Ncv32u)_i;
char testName[80];
sprintf_s(testName, sizeof(testName), "HaarAppl%d", i);
testLister.add(new TestHaarCascadeApplication(testName, src, "haarcascade_frontalface_alt.xml", i, i));
}
}
static void devNullOutput(const char *msg)
{
}
int main_nvidia()
{
printf("Testing NVIDIA Computer Vision SDK\n");
printf("==================================\n");
ncvSetDebugOutputHandler(devNullOutput);
NCVAutoTestLister testListerII("NPPST Integral Image" );//,,true, false);
NCVAutoTestLister testListerSII("NPPST Squared Integral Image" );//,,true, false);
NCVAutoTestLister testListerRStdDev("NPPST RectStdDev" );//,,true, false);
NCVAutoTestLister testListerResize("NPPST Resize" );//,,true, false);
NCVAutoTestLister testListerNPPSTVectorOperations("NPPST Vector Operations" );//,,true, false);
NCVAutoTestLister testListerTranspose("NPPST Transpose" );//,,true, false);
NCVAutoTestLister testListerVectorOperations("Vector Operations" );//,,true, false);
NCVAutoTestLister testListerHaarLoader("Haar Cascade Loader" );//,,true, false);
NCVAutoTestLister testListerHaarAppl("Haar Cascade Application" );//,,true, false);
NCVAutoTestLister testListerHypFiltration("Hypotheses Filtration" );//,,true, false);
NCVAutoTestLister testListerVisualize("Visualization" );//,,true, false);
printf("Initializing data source providers\n");
NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 4096, 4096);
NCVTestSourceProvider<Ncv8u> testSrcRandom_8u(2010, 0, 255, 4096, 4096);
NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, 0xFFFFFFFFFFFFFFFF, 4096, 4096);
NCVTestSourceProvider<Ncv8u> testSrcFacesVGA_8u("../../data/group_1_640x480_VGA.pgm");
NCVTestSourceProvider<Ncv32f> testSrcRandom_32f(2010, -1.0f, 1.0f, 4096, 4096);
printf("Generating NPPST test suites\n");
generateIntegralTests<Ncv8u, Ncv32u>(testListerII, testSrcRandom_8u, 4096, 4096);
generateIntegralTests<Ncv32f, Ncv32f>(testListerII, testSrcRandom_32f, 4096, 4096);
generateSquaredIntegralTests(testListerSII, testSrcRandom_8u, 4096, 4096);
generateRectStdDevTests(testListerRStdDev, testSrcRandom_8u, 4096, 4096);
generateResizeTests(testListerResize, testSrcRandom_32u);
generateResizeTests(testListerResize, testSrcRandom_64u);
generateNPPSTVectorTests(testListerNPPSTVectorOperations, testSrcRandom_32u, 4096*4096);
generateTransposeTests(testListerTranspose, testSrcRandom_32u);
generateTransposeTests(testListerTranspose, testSrcRandom_64u);
printf("Generating NCV test suites\n");
generateDrawRectsTests(testListerVisualize, testSrcRandom_8u, testSrcRandom_32u, 4096, 4096);
generateDrawRectsTests(testListerVisualize, testSrcRandom_32u, testSrcRandom_32u, 4096, 4096);
generateVectorTests(testListerVectorOperations, testSrcRandom_32u, 4096*4096);
generateHypothesesFiltrationTests(testListerHypFiltration, testSrcRandom_32u, 1024);
generateHaarLoaderTests(testListerHaarLoader);
generateHaarApplicationTests(testListerHaarAppl, testSrcFacesVGA_8u, 1280, 720);
testListerII.invoke();
testListerSII.invoke();
testListerRStdDev.invoke();
testListerResize.invoke();
testListerNPPSTVectorOperations.invoke();
testListerTranspose.invoke();
testListerVisualize.invoke();
testListerVectorOperations.invoke();
testListerHypFiltration.invoke();
testListerHaarLoader.invoke();
testListerHaarAppl.invoke();
return 0;
}
+63
View File
@@ -0,0 +1,63 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "gputest.hpp"
#include "cvconfig.h"
class CV_NVidiaTestsCaller : public CvTest
{
public:
CV_NVidiaTestsCaller() : CvTest("GPU-NVidia", "NVidia") {}
virtual ~CV_NVidiaTestsCaller() {}
protected:
void run( int )
{
#if defined(HAVE_CUDA)
int main_nvidia();
main_nvidia();
ts->set_failed_test_info(CvTS::OK);
#else
ts->set_failed_test_info(CvTS::SKIPPED);
#endif
}
} CV_NVidiaTestsCaller_test;