cudev: add feature to convert FP32(float) from/to FP16(half) on GPU

* add feature of Fp16 on GPU (cudev)
  * add test
  * leave template function as unimplemented to raise error
This commit is contained in:
Tomoaki Teshima
2016-07-26 08:58:34 +09:00
parent f2e9588c93
commit 2974b049e7
5 changed files with 144 additions and 0 deletions
@@ -668,6 +668,27 @@ template <typename T, typename D> struct saturate_cast_func : unary_function<T,
}
};
// Convert Fp16 dummy
template <typename T, typename D> struct saturate_cast_fp16_func;
// Convert Fp16 from Fp32
template <> struct saturate_cast_fp16_func<float, short> : unary_function<float, short>
{
__device__ __forceinline__ short operator ()(float v) const
{
return cast_fp16<float, short>(v);
}
};
// Convert Fp16 to Fp32
template <> struct saturate_cast_fp16_func<short, float> : unary_function<short, float>
{
__device__ __forceinline__ float operator ()(short v) const
{
return cast_fp16<short, float>(v);
}
};
// Threshold Functors
template <typename T> struct ThreshBinaryFunc : unary_function<T, T>
@@ -270,6 +270,17 @@ template <> __device__ __forceinline__ uint saturate_cast<uint>(double v)
#endif
}
template <typename T, typename D> __device__ __forceinline__ D cast_fp16(T v);
template <> __device__ __forceinline__ float cast_fp16<short, float>(short v)
{
return __half2float(v);
}
template <> __device__ __forceinline__ short cast_fp16<float, short>(float v)
{
return (short)__float2half_rn(v);
}
//! @}
}}