Merge pull request #12735 from tomoaki0705:fixCudaStereoTestFailure

2018-10-04 11:33:34 +00:00 · 2018-10-04 11:33:34 +00:00 · 0101fa789c
commit 0101fa789c
parent 12b3d157a5 7294ae0f17
2 changed files with 17 additions and 12 deletions
--- a/modules/cudastereo/src/cuda/stereobp.cu
+++ b/modules/cudastereo/src/cuda/stereobp.cu
@ -255,7 +255,7 @@ namespace cv { namespace cuda { namespace device
        ///////////////////////////////////////////////////////////////

        template <typename T>
-        __global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const PtrStep<T> src, PtrStep<T> dst)
+        __global__ void data_step_down(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStep<T> src, PtrStep<T> dst)
        {
            const int x = blockIdx.x * blockDim.x + threadIdx.x;
            const int y = blockIdx.y * blockDim.y + threadIdx.y;
@ -264,10 +264,15 @@ namespace cv { namespace cuda { namespace device
            {
                for (int d = 0; d < cndisp; ++d)
                {
-                    float dst_reg  = src.ptr(d * src_rows + (2*y+0))[(2*x+0)];
-                          dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+0)];
-                          dst_reg += src.ptr(d * src_rows + (2*y+0))[(2*x+1)];
-                          dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+1)];
+                    // check the index of src
+                    const int x0 = 2 * x;
+                    const int x1 = ::min(x0 + 1, src_cols - 1);
+                    const int y0 = 2 * y;
+                    const int y1 = ::min(y0 + 1, src_rows - 1);
+                    float dst_reg  = src.ptr(d * src_rows + y0)[x0];
+                          dst_reg += src.ptr(d * src_rows + y1)[x0];
+                          dst_reg += src.ptr(d * src_rows + y0)[x1];
+                          dst_reg += src.ptr(d * src_rows + y1)[x1];

                    dst.ptr(d * dst_rows + y)[x] = saturate_cast<T>(dst_reg);
                }
@ -275,7 +280,7 @@ namespace cv { namespace cuda { namespace device
        }

        template<typename T>
-        void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream)
+        void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream)
        {
            dim3 threads(32, 8, 1);
            dim3 grid(1, 1, 1);
@ -283,15 +288,15 @@ namespace cv { namespace cuda { namespace device
            grid.x = divUp(dst_cols, threads.x);
            grid.y = divUp(dst_rows, threads.y);

-            data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)src, (PtrStepSz<T>)dst);
+            data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_cols, src_rows, (PtrStepSz<T>)src, (PtrStepSz<T>)dst);
            cudaSafeCall( cudaGetLastError() );

            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
        }

-        template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
-        template void data_step_down_gpu<float>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
+        template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
+        template void data_step_down_gpu<float>(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);

        ///////////////////////////////////////////////////////////////
        /////////////////// level up messages  ////////////////////////
--- a/modules/cudastereo/src/stereobp.cpp
+++ b/modules/cudastereo/src/stereobp.cpp
@ -61,7 +61,7 @@ namespace cv { namespace cuda { namespace device
        template<typename T, typename D>
        void comp_data_gpu(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream);
        template<typename T>
-        void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
+        void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
        template <typename T>
        void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
        template <typename T>
@ -283,7 +283,7 @@ namespace
    {
        using namespace cv::cuda::device::stereobp;

-        typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
+        typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
        static const data_step_down_t data_step_down_callers[2] =
        {
            data_step_down_gpu<short>, data_step_down_gpu<float>
@ -318,7 +318,7 @@ namespace

            datas_[i].create(rows_all_[i] * ndisp_, cols_all_[i], msg_type_);

-            data_step_down_callers[funcIdx](cols_all_[i], rows_all_[i], rows_all_[i-1], datas_[i-1], datas_[i], stream);
+            data_step_down_callers[funcIdx](cols_all_[i], rows_all_[i], cols_all_[i-1], rows_all_[i-1], datas_[i-1], datas_[i], stream);
        }

        PtrStepSzb mus[] = {u_, u2_};