cudev: add feature to convert FP32(float) from/to FP16(half) on GPU

* add feature of Fp16 on GPU (cudev)
  * add test
  * leave template function as unimplemented to raise error
This commit is contained in:
Tomoaki Teshima
2016-07-26 08:58:34 +09:00
parent f2e9588c93
commit 2974b049e7
5 changed files with 144 additions and 0 deletions
@@ -855,6 +855,8 @@ private:
CV_EXPORTS void printCudaDeviceInfo(int device);
CV_EXPORTS void printShortCudaDeviceInfo(int device);
CV_EXPORTS void convertFp16Cuda(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
//! @} cudacore_init
}} // namespace cv { namespace cuda {
+43
View File
@@ -510,6 +510,17 @@ namespace
gridTransformUnary_< ConvertToPolicy<scalar_type> >(globPtr<T>(src), globPtr<D>(dst), op, stream);
}
template <typename T, typename D>
void convertScaleHalf(const GpuMat& src, const GpuMat& dst, Stream& stream)
{
typedef typename VecTraits<T>::elem_type src_elem_type;
typedef typename VecTraits<D>::elem_type dst_elem_type;
typedef typename LargerType<src_elem_type, float>::type larger_elem_type;
typedef typename LargerType<float, dst_elem_type>::type scalar_type;
gridTransformUnary_< ConvertToPolicy<scalar_type> >(globPtr<T>(src), globPtr<D>(dst), saturate_cast_fp16_func<T,D>(), stream);
}
}
void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& stream) const
@@ -583,4 +594,36 @@ void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, doub
funcs[sdepth][ddepth](reshape(1), dst.reshape(1), alpha, beta, stream);
}
void cv::cuda::convertFp16Cuda(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src = _src.getGpuMat();
int ddepth = 0;
switch(src.depth())
{
case CV_32F:
ddepth = CV_16S;
break;
case CV_16S:
ddepth = CV_32F;
break;
default:
CV_Error(Error::StsUnsupportedFormat, "Unsupported input depth");
return;
}
int type = CV_MAKE_TYPE(CV_MAT_DEPTH(ddepth), src.channels());
_dst.create(src.size(), type);
GpuMat dst = _dst.getGpuMat();
typedef void (*func_t)(const GpuMat& src, const GpuMat& dst, Stream& stream);
static const func_t funcs[] =
{
0, 0, 0,
convertScaleHalf<float, short>, 0, convertScaleHalf<short, float>,
0, 0,
};
funcs[ddepth](src.reshape(1), dst.reshape(1), stream);
}
#endif