Merge pull request #8465 from claudiofantacci:enh/cudastreamhog
This commit is contained in:
commit
36e8017580
@ -98,28 +98,31 @@ namespace cv { namespace cuda { namespace device
|
||||
}
|
||||
|
||||
|
||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||
int nblocks_win_x, int nblocks_win_y, int ncells_block_x, int ncells_block_y)
|
||||
void set_up_constants(int nbins,
|
||||
int block_stride_x, int block_stride_y,
|
||||
int nblocks_win_x, int nblocks_win_y,
|
||||
int ncells_block_x, int ncells_block_y,
|
||||
const cudaStream_t& stream)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnbins, &nbins, sizeof(nbins)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_x, &block_stride_x, sizeof(block_stride_x)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_y, &block_stride_y, sizeof(block_stride_y)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_x, &nblocks_win_x, sizeof(nblocks_win_x)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_y, &nblocks_win_y, sizeof(nblocks_win_y)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cncells_block_x, &ncells_block_x, sizeof(ncells_block_x)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cncells_block_y, &ncells_block_y, sizeof(ncells_block_y)) );
|
||||
cudaSafeCall(cudaMemcpyToSymbolAsync(cnbins, &nbins, sizeof(nbins), 0, cudaMemcpyHostToDevice, stream));
|
||||
cudaSafeCall(cudaMemcpyToSymbolAsync(cblock_stride_x, &block_stride_x, sizeof(block_stride_x), 0, cudaMemcpyHostToDevice, stream));
|
||||
cudaSafeCall(cudaMemcpyToSymbolAsync(cblock_stride_y, &block_stride_y, sizeof(block_stride_y), 0, cudaMemcpyHostToDevice, stream));
|
||||
cudaSafeCall(cudaMemcpyToSymbolAsync(cnblocks_win_x, &nblocks_win_x, sizeof(nblocks_win_x), 0, cudaMemcpyHostToDevice, stream));
|
||||
cudaSafeCall(cudaMemcpyToSymbolAsync(cnblocks_win_y, &nblocks_win_y, sizeof(nblocks_win_y), 0, cudaMemcpyHostToDevice, stream));
|
||||
cudaSafeCall(cudaMemcpyToSymbolAsync(cncells_block_x, &ncells_block_x, sizeof(ncells_block_x), 0, cudaMemcpyHostToDevice, stream));
|
||||
cudaSafeCall(cudaMemcpyToSymbolAsync(cncells_block_y, &ncells_block_y, sizeof(ncells_block_y), 0, cudaMemcpyHostToDevice, stream));
|
||||
|
||||
int block_hist_size = nbins * ncells_block_x * ncells_block_y;
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size, &block_hist_size, sizeof(block_hist_size)) );
|
||||
cudaSafeCall(cudaMemcpyToSymbolAsync(cblock_hist_size, &block_hist_size, sizeof(block_hist_size), 0, cudaMemcpyHostToDevice, stream));
|
||||
|
||||
int block_hist_size_2up = power_2up(block_hist_size);
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size_2up, &block_hist_size_2up, sizeof(block_hist_size_2up)) );
|
||||
cudaSafeCall(cudaMemcpyToSymbolAsync(cblock_hist_size_2up, &block_hist_size_2up, sizeof(block_hist_size_2up), 0, cudaMemcpyHostToDevice, stream));
|
||||
|
||||
int descr_width = nblocks_win_x * block_hist_size;
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdescr_width, &descr_width, sizeof(descr_width)) );
|
||||
cudaSafeCall(cudaMemcpyToSymbolAsync(cdescr_width, &descr_width, sizeof(descr_width), 0, cudaMemcpyHostToDevice, stream));
|
||||
|
||||
int descr_size = descr_width * nblocks_win_y;
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdescr_size, &descr_size, sizeof(descr_size)) );
|
||||
cudaSafeCall(cudaMemcpyToSymbolAsync(cdescr_size, &descr_size, sizeof(descr_size), 0, cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
|
||||
@ -230,10 +233,15 @@ namespace cv { namespace cuda { namespace device
|
||||
}
|
||||
|
||||
//declaration of variables and invoke the kernel with the calculated number of blocks
|
||||
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
int height, int width, const PtrStepSzf& grad,
|
||||
const PtrStepSzb& qangle, float sigma, float* block_hists,
|
||||
int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y)
|
||||
void compute_hists(int nbins,
|
||||
int block_stride_x, int block_stride_y,
|
||||
int height, int width,
|
||||
const PtrStepSzf& grad, const PtrStepSzb& qangle,
|
||||
float sigma,
|
||||
float* block_hists,
|
||||
int cell_size_x, int cell_size_y,
|
||||
int ncells_block_x, int ncells_block_y,
|
||||
const cudaStream_t& stream)
|
||||
{
|
||||
const int ncells_block = ncells_block_x * ncells_block_y;
|
||||
const int patch_side = cell_size_x / 4;
|
||||
@ -259,20 +267,15 @@ namespace cv { namespace cuda { namespace device
|
||||
int final_hists_size = (nbins * ncells_block * nblocks) * sizeof(float);
|
||||
int smem = hists_size + final_hists_size;
|
||||
if (nblocks == 4)
|
||||
compute_hists_kernel_many_blocks<4><<<grid, threads, smem>>>(
|
||||
img_block_width, grad, qangle, scale, block_hists, cell_size_x, patch_size, block_patch_size, threads_cell, threads_block, half_cell_size);
|
||||
compute_hists_kernel_many_blocks<4><<<grid, threads, smem, stream>>>(img_block_width, grad, qangle, scale, block_hists, cell_size_x, patch_size, block_patch_size, threads_cell, threads_block, half_cell_size);
|
||||
else if (nblocks == 3)
|
||||
compute_hists_kernel_many_blocks<3><<<grid, threads, smem>>>(
|
||||
img_block_width, grad, qangle, scale, block_hists, cell_size_x, patch_size, block_patch_size, threads_cell, threads_block, half_cell_size);
|
||||
compute_hists_kernel_many_blocks<3><<<grid, threads, smem, stream>>>(img_block_width, grad, qangle, scale, block_hists, cell_size_x, patch_size, block_patch_size, threads_cell, threads_block, half_cell_size);
|
||||
else if (nblocks == 2)
|
||||
compute_hists_kernel_many_blocks<2><<<grid, threads, smem>>>(
|
||||
img_block_width, grad, qangle, scale, block_hists, cell_size_x, patch_size, block_patch_size, threads_cell, threads_block, half_cell_size);
|
||||
compute_hists_kernel_many_blocks<2><<<grid, threads, smem, stream>>>(img_block_width, grad, qangle, scale, block_hists, cell_size_x, patch_size, block_patch_size, threads_cell, threads_block, half_cell_size);
|
||||
else
|
||||
compute_hists_kernel_many_blocks<1><<<grid, threads, smem>>>(
|
||||
img_block_width, grad, qangle, scale, block_hists, cell_size_x, patch_size, block_patch_size, threads_cell, threads_block, half_cell_size);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
compute_hists_kernel_many_blocks<1><<<grid, threads, smem, stream>>>(img_block_width, grad, qangle, scale, block_hists, cell_size_x, patch_size, block_patch_size, threads_cell, threads_block, half_cell_size);
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
|
||||
@ -347,8 +350,14 @@ namespace cv { namespace cuda { namespace device
|
||||
}
|
||||
|
||||
|
||||
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
int height, int width, float* block_hists, float threshold, int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y)
|
||||
void normalize_hists(int nbins,
|
||||
int block_stride_x, int block_stride_y,
|
||||
int height, int width,
|
||||
float* block_hists,
|
||||
float threshold,
|
||||
int cell_size_x, int cell_size_y,
|
||||
int ncells_block_x, int ncells_block_y,
|
||||
const cudaStream_t& stream)
|
||||
{
|
||||
const int nblocks = 1;
|
||||
|
||||
@ -361,21 +370,19 @@ namespace cv { namespace cuda { namespace device
|
||||
dim3 grid(divUp(img_block_width, nblocks), img_block_height);
|
||||
|
||||
if (nthreads == 32)
|
||||
normalize_hists_kernel_many_blocks<32, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
normalize_hists_kernel_many_blocks<32, nblocks><<<grid, threads, 0, stream>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
else if (nthreads == 64)
|
||||
normalize_hists_kernel_many_blocks<64, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
normalize_hists_kernel_many_blocks<64, nblocks><<<grid, threads, 0, stream>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
else if (nthreads == 128)
|
||||
normalize_hists_kernel_many_blocks<128, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
normalize_hists_kernel_many_blocks<128, nblocks><<<grid, threads, 0, stream>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
else if (nthreads == 256)
|
||||
normalize_hists_kernel_many_blocks<256, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
normalize_hists_kernel_many_blocks<256, nblocks><<<grid, threads, 0, stream>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
else if (nthreads == 512)
|
||||
normalize_hists_kernel_many_blocks<512, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
normalize_hists_kernel_many_blocks<512, nblocks><<<grid, threads, 0, stream>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
else
|
||||
CV_Error(cv::Error::StsBadArg, "normalize_hists: histogram's size is too big, try to decrease number of bins");
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -511,8 +518,10 @@ namespace cv { namespace cuda { namespace device
|
||||
|
||||
|
||||
template <int nthreads>
|
||||
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
|
||||
const float* block_hists, PtrStepf descriptors)
|
||||
__global__ void extract_descrs_by_rows_kernel(const int img_block_width,
|
||||
const int win_block_stride_x, const int win_block_stride_y,
|
||||
const float* block_hists,
|
||||
PtrStepf descriptors)
|
||||
{
|
||||
// Get left top corner of the window in src
|
||||
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
||||
@ -531,8 +540,14 @@ namespace cv { namespace cuda { namespace device
|
||||
}
|
||||
|
||||
|
||||
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
|
||||
int height, int width, float* block_hists, int cell_size_x, int ncells_block_x, PtrStepSzf descriptors)
|
||||
void extract_descrs_by_rows(int win_height, int win_width,
|
||||
int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x,
|
||||
int height, int width,
|
||||
float* block_hists, int cell_size_x,
|
||||
int ncells_block_x,
|
||||
PtrStepSzf descriptors,
|
||||
const cudaStream_t& stream)
|
||||
{
|
||||
const int nthreads = 256;
|
||||
|
||||
@ -544,17 +559,16 @@ namespace cv { namespace cuda { namespace device
|
||||
dim3 grid(img_win_width, img_win_height);
|
||||
|
||||
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) / block_stride_x;
|
||||
extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(
|
||||
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
extract_descrs_by_rows_kernel<nthreads><<<grid, threads, 0, stream>>>(img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
|
||||
template <int nthreads>
|
||||
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
|
||||
const int win_block_stride_y, const float* block_hists,
|
||||
__global__ void extract_descrs_by_cols_kernel(const int img_block_width,
|
||||
const int win_block_stride_x, const int win_block_stride_y,
|
||||
const float* block_hists,
|
||||
PtrStepf descriptors)
|
||||
{
|
||||
// Get left top corner of the window in src
|
||||
@ -579,9 +593,14 @@ namespace cv { namespace cuda { namespace device
|
||||
}
|
||||
|
||||
|
||||
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int cell_size_x, int ncells_block_x,
|
||||
PtrStepSzf descriptors)
|
||||
void extract_descrs_by_cols(int win_height, int win_width,
|
||||
int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x,
|
||||
int height, int width,
|
||||
float* block_hists,
|
||||
int cell_size_x, int ncells_block_x,
|
||||
PtrStepSzf descriptors,
|
||||
const cudaStream_t& stream)
|
||||
{
|
||||
const int nthreads = 256;
|
||||
|
||||
@ -593,11 +612,9 @@ namespace cv { namespace cuda { namespace device
|
||||
dim3 grid(img_win_width, img_win_height);
|
||||
|
||||
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) / block_stride_x;
|
||||
extract_descrs_by_cols_kernel<nthreads><<<grid, threads>>>(
|
||||
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
extract_descrs_by_cols_kernel<nthreads><<<grid, threads, 0, stream>>>(img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -707,8 +724,12 @@ namespace cv { namespace cuda { namespace device
|
||||
}
|
||||
|
||||
|
||||
void compute_gradients_8UC4(int nbins, int height, int width, const PtrStepSzb& img,
|
||||
float angle_scale, PtrStepSzf grad, PtrStepSzb qangle, bool correct_gamma)
|
||||
void compute_gradients_8UC4(int nbins,
|
||||
int height, int width, const PtrStepSzb& img,
|
||||
float angle_scale,
|
||||
PtrStepSzf grad, PtrStepSzb qangle,
|
||||
bool correct_gamma,
|
||||
const cudaStream_t& stream)
|
||||
{
|
||||
(void)nbins;
|
||||
const int nthreads = 256;
|
||||
@ -717,13 +738,11 @@ namespace cv { namespace cuda { namespace device
|
||||
dim3 gdim(divUp(width, bdim.x), divUp(height, bdim.y));
|
||||
|
||||
if (correct_gamma)
|
||||
compute_gradients_8UC4_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
|
||||
compute_gradients_8UC4_kernel<nthreads, 1><<<gdim, bdim, 0, stream>>>(height, width, img, angle_scale, grad, qangle);
|
||||
else
|
||||
compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
|
||||
compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim, 0, stream>>>(height, width, img, angle_scale, grad, qangle);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int nthreads, int correct_gamma>
|
||||
@ -780,8 +799,12 @@ namespace cv { namespace cuda { namespace device
|
||||
}
|
||||
|
||||
|
||||
void compute_gradients_8UC1(int nbins, int height, int width, const PtrStepSzb& img,
|
||||
float angle_scale, PtrStepSzf grad, PtrStepSzb qangle, bool correct_gamma)
|
||||
void compute_gradients_8UC1(int nbins,
|
||||
int height, int width, const PtrStepSzb& img,
|
||||
float angle_scale,
|
||||
PtrStepSzf grad, PtrStepSzb qangle,
|
||||
bool correct_gamma,
|
||||
const cudaStream_t& stream)
|
||||
{
|
||||
(void)nbins;
|
||||
const int nthreads = 256;
|
||||
@ -790,13 +813,11 @@ namespace cv { namespace cuda { namespace device
|
||||
dim3 gdim(divUp(width, bdim.x), divUp(height, bdim.y));
|
||||
|
||||
if (correct_gamma)
|
||||
compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
|
||||
compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim, 0, stream>>>(height, width, img, angle_scale, grad, qangle);
|
||||
else
|
||||
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
|
||||
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim, 0, stream>>>(height, width, img, angle_scale, grad, qangle);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -64,17 +64,30 @@ namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace hog
|
||||
{
|
||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||
void set_up_constants(int nbins,
|
||||
int block_stride_x, int block_stride_y,
|
||||
int nblocks_win_x, int nblocks_win_y,
|
||||
int ncells_block_x, int ncells_block_y);
|
||||
int ncells_block_x, int ncells_block_y,
|
||||
const cudaStream_t& stream);
|
||||
|
||||
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
int height, int width, const PtrStepSzf& grad,
|
||||
const PtrStepSzb& qangle, float sigma, float* block_hists,
|
||||
int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y);
|
||||
void compute_hists(int nbins,
|
||||
int block_stride_x, int block_stride_y,
|
||||
int height, int width,
|
||||
const PtrStepSzf& grad, const PtrStepSzb& qangle,
|
||||
float sigma,
|
||||
float* block_hists,
|
||||
int cell_size_x, int cell_size_y,
|
||||
int ncells_block_x, int ncells_block_y,
|
||||
const cudaStream_t& stream);
|
||||
|
||||
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
int height, int width, float* block_hists, float threshold, int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y);
|
||||
void normalize_hists(int nbins,
|
||||
int block_stride_x, int block_stride_y,
|
||||
int height, int width,
|
||||
float* block_hists,
|
||||
float threshold,
|
||||
int cell_size_x, int cell_size_y,
|
||||
int ncells_block_x, int ncells_block_y,
|
||||
const cudaStream_t& stream);
|
||||
|
||||
void classify_hists(int win_height, int win_width, int block_stride_y,
|
||||
int block_stride_x, int win_stride_y, int win_stride_x, int height,
|
||||
@ -82,20 +95,38 @@ namespace cv { namespace cuda { namespace device
|
||||
float threshold, int cell_size_x, int ncells_block_x, unsigned char* labels);
|
||||
|
||||
void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
float* coefs, float free_coef, float threshold, int cell_size_x, int ncells_block_x, float *confidences);
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
float* coefs, float free_coef, float threshold, int cell_size_x, int ncells_block_x, float *confidences);
|
||||
|
||||
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int cell_size_x, int ncells_block_x,
|
||||
cv::cuda::PtrStepSzf descriptors);
|
||||
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int cell_size_x, int ncells_block_x,
|
||||
cv::cuda::PtrStepSzf descriptors);
|
||||
void extract_descrs_by_rows(int win_height, int win_width,
|
||||
int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x,
|
||||
int height, int width,
|
||||
float* block_hists,
|
||||
int cell_size_x, int ncells_block_x,
|
||||
cv::cuda::PtrStepSzf descriptors,
|
||||
const cudaStream_t& stream);
|
||||
void extract_descrs_by_cols(int win_height, int win_width,
|
||||
int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x,
|
||||
int height, int width,
|
||||
float* block_hists,
|
||||
int cell_size_x, int ncells_block_x,
|
||||
cv::cuda::PtrStepSzf descriptors,
|
||||
const cudaStream_t& stream);
|
||||
|
||||
void compute_gradients_8UC1(int nbins, int height, int width, const cv::cuda::PtrStepSzb& img,
|
||||
float angle_scale, cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle, bool correct_gamma);
|
||||
void compute_gradients_8UC4(int nbins, int height, int width, const cv::cuda::PtrStepSzb& img,
|
||||
float angle_scale, cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle, bool correct_gamma);
|
||||
void compute_gradients_8UC1(int nbins,
|
||||
int height, int width, const cv::cuda::PtrStepSzb& img,
|
||||
float angle_scale,
|
||||
cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle,
|
||||
bool correct_gamma,
|
||||
const cudaStream_t& stream);
|
||||
void compute_gradients_8UC4(int nbins,
|
||||
int height, int width, const cv::cuda::PtrStepSzb& img,
|
||||
float angle_scale,
|
||||
cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle,
|
||||
bool correct_gamma,
|
||||
const cudaStream_t& stream);
|
||||
|
||||
void resize_8UC1(const cv::cuda::PtrStepSzb& src, cv::cuda::PtrStepSzb dst);
|
||||
void resize_8UC4(const cv::cuda::PtrStepSzb& src, cv::cuda::PtrStepSzb dst);
|
||||
@ -182,8 +213,8 @@ namespace
|
||||
|
||||
private:
|
||||
int getTotalHistSize(Size img_size) const;
|
||||
void computeBlockHistograms(const GpuMat& img, GpuMat& block_hists);
|
||||
void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
|
||||
void computeBlockHistograms(const GpuMat& img, GpuMat& block_hists, Stream& stream);
|
||||
// void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle, Stream& stream);
|
||||
|
||||
// Coefficients of the separating plane
|
||||
float free_coef_;
|
||||
@ -310,7 +341,7 @@ namespace
|
||||
BufferPool pool(Stream::Null());
|
||||
|
||||
GpuMat block_hists = pool.getBuffer(1, getTotalHistSize(img.size()), CV_32FC1);
|
||||
computeBlockHistograms(img, block_hists);
|
||||
computeBlockHistograms(img, block_hists, Stream::Null());
|
||||
|
||||
Size wins_per_img = numPartsWithin(img.size(), win_size_, win_stride_);
|
||||
|
||||
@ -458,19 +489,16 @@ namespace
|
||||
|
||||
CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC4 );
|
||||
CV_Assert( win_stride_.width % block_stride_.width == 0 && win_stride_.height % block_stride_.height == 0 );
|
||||
CV_Assert( !stream );
|
||||
|
||||
BufferPool pool(stream);
|
||||
|
||||
GpuMat block_hists = pool.getBuffer(1, getTotalHistSize(img.size()), CV_32FC1);
|
||||
computeBlockHistograms(img, block_hists);
|
||||
|
||||
BufferPool pool(stream);
|
||||
GpuMat block_hists = pool.getBuffer(1, getTotalHistSize(img.size()), CV_32FC1);
|
||||
Size wins_per_img = numPartsWithin(img.size(), win_size_, win_stride_);
|
||||
Size blocks_per_win = numPartsWithin(win_size_, block_size_, block_stride_);
|
||||
const size_t block_hist_size = getBlockHistogramSize();
|
||||
Size blocks_per_win = numPartsWithin(win_size_, block_size_, block_stride_);
|
||||
Size wins_per_img = numPartsWithin(img.size(), win_size_, win_stride_);
|
||||
|
||||
_descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32FC1);
|
||||
GpuMat descriptors = _descriptors.getGpuMat();
|
||||
GpuMat descriptors = _descriptors.getGpuMat();
|
||||
|
||||
computeBlockHistograms(img, block_hists, stream);
|
||||
|
||||
switch (descr_format_)
|
||||
{
|
||||
@ -481,7 +509,8 @@ namespace
|
||||
img.rows, img.cols,
|
||||
block_hists.ptr<float>(),
|
||||
cell_size_.width, cells_per_block_.width,
|
||||
descriptors);
|
||||
descriptors,
|
||||
StreamAccessor::getStream(stream));
|
||||
break;
|
||||
case DESCR_FORMAT_COL_BY_COL:
|
||||
hog::extract_descrs_by_cols(win_size_.height, win_size_.width,
|
||||
@ -490,7 +519,8 @@ namespace
|
||||
img.rows, img.cols,
|
||||
block_hists.ptr<float>(),
|
||||
cell_size_.width, cells_per_block_.width,
|
||||
descriptors);
|
||||
descriptors,
|
||||
StreamAccessor::getStream(stream));
|
||||
break;
|
||||
default:
|
||||
CV_Error(cv::Error::StsBadArg, "Unknown descriptor format");
|
||||
@ -504,18 +534,39 @@ namespace
|
||||
return static_cast<int>(block_hist_size * blocks_per_img.area());
|
||||
}
|
||||
|
||||
void HOG_Impl::computeBlockHistograms(const GpuMat& img, GpuMat& block_hists)
|
||||
void HOG_Impl::computeBlockHistograms(const GpuMat& img, GpuMat& block_hists, Stream& stream)
|
||||
{
|
||||
BufferPool pool(stream);
|
||||
cv::Size blocks_per_win = numPartsWithin(win_size_, block_size_, block_stride_);
|
||||
hog::set_up_constants(nbins_, block_stride_.width, block_stride_.height, blocks_per_win.width, blocks_per_win.height, cells_per_block_.width, cells_per_block_.height);
|
||||
float angleScale = static_cast<float>(nbins_ / CV_PI);
|
||||
GpuMat grad = pool.getBuffer(img.size(), CV_32FC2);
|
||||
GpuMat qangle = pool.getBuffer(img.size(), CV_8UC2);
|
||||
|
||||
BufferPool pool(Stream::Null());
|
||||
hog::set_up_constants(nbins_,
|
||||
block_stride_.width, block_stride_.height,
|
||||
blocks_per_win.width, blocks_per_win.height,
|
||||
cells_per_block_.width, cells_per_block_.height,
|
||||
StreamAccessor::getStream(stream));
|
||||
|
||||
GpuMat grad = pool.getBuffer(img.size(), CV_32FC2);
|
||||
GpuMat qangle = pool.getBuffer(img.size(), CV_8UC2);
|
||||
computeGradient(img, grad, qangle);
|
||||
|
||||
block_hists.create(1, getTotalHistSize(img.size()), CV_32FC1);
|
||||
switch (img.type())
|
||||
{
|
||||
case CV_8UC1:
|
||||
hog::compute_gradients_8UC1(nbins_,
|
||||
img.rows, img.cols, img,
|
||||
angleScale,
|
||||
grad, qangle,
|
||||
gamma_correction_,
|
||||
StreamAccessor::getStream(stream));
|
||||
break;
|
||||
case CV_8UC4:
|
||||
hog::compute_gradients_8UC4(nbins_,
|
||||
img.rows, img.cols, img,
|
||||
angleScale,
|
||||
grad, qangle,
|
||||
gamma_correction_,
|
||||
StreamAccessor::getStream(stream));
|
||||
break;
|
||||
}
|
||||
|
||||
hog::compute_hists(nbins_,
|
||||
block_stride_.width, block_stride_.height,
|
||||
@ -524,7 +575,8 @@ namespace
|
||||
(float)getWinSigma(),
|
||||
block_hists.ptr<float>(),
|
||||
cell_size_.width, cell_size_.height,
|
||||
cells_per_block_.width, cells_per_block_.height);
|
||||
cells_per_block_.width, cells_per_block_.height,
|
||||
StreamAccessor::getStream(stream));
|
||||
|
||||
hog::normalize_hists(nbins_,
|
||||
block_stride_.width, block_stride_.height,
|
||||
@ -532,24 +584,8 @@ namespace
|
||||
block_hists.ptr<float>(),
|
||||
(float)threshold_L2hys_,
|
||||
cell_size_.width, cell_size_.height,
|
||||
cells_per_block_.width, cells_per_block_.height);
|
||||
}
|
||||
|
||||
void HOG_Impl::computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle)
|
||||
{
|
||||
grad.create(img.size(), CV_32FC2);
|
||||
qangle.create(img.size(), CV_8UC2);
|
||||
|
||||
float angleScale = (float)(nbins_ / CV_PI);
|
||||
switch (img.type())
|
||||
{
|
||||
case CV_8UC1:
|
||||
hog::compute_gradients_8UC1(nbins_, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction_);
|
||||
break;
|
||||
case CV_8UC4:
|
||||
hog::compute_gradients_8UC4(nbins_, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction_);
|
||||
break;
|
||||
}
|
||||
cells_per_block_.width, cells_per_block_.height,
|
||||
StreamAccessor::getStream(stream));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user