cudev: add feature to convert FP32(float) from/to FP16(half) on GPU
* add feature of Fp16 on GPU (cudev) * add test * leave template function as unimplemented to raise error
This commit is contained in:
@@ -668,6 +668,27 @@ template <typename T, typename D> struct saturate_cast_func : unary_function<T,
|
||||
}
|
||||
};
|
||||
|
||||
// Convert Fp16 dummy
|
||||
template <typename T, typename D> struct saturate_cast_fp16_func;
|
||||
|
||||
// Convert Fp16 from Fp32
|
||||
template <> struct saturate_cast_fp16_func<float, short> : unary_function<float, short>
|
||||
{
|
||||
__device__ __forceinline__ short operator ()(float v) const
|
||||
{
|
||||
return cast_fp16<float, short>(v);
|
||||
}
|
||||
};
|
||||
|
||||
// Convert Fp16 to Fp32
|
||||
template <> struct saturate_cast_fp16_func<short, float> : unary_function<short, float>
|
||||
{
|
||||
__device__ __forceinline__ float operator ()(short v) const
|
||||
{
|
||||
return cast_fp16<short, float>(v);
|
||||
}
|
||||
};
|
||||
|
||||
// Threshold Functors
|
||||
|
||||
template <typename T> struct ThreshBinaryFunc : unary_function<T, T>
|
||||
|
||||
@@ -270,6 +270,17 @@ template <> __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T, typename D> __device__ __forceinline__ D cast_fp16(T v);
|
||||
|
||||
template <> __device__ __forceinline__ float cast_fp16<short, float>(short v)
|
||||
{
|
||||
return __half2float(v);
|
||||
}
|
||||
|
||||
template <> __device__ __forceinline__ short cast_fp16<float, short>(float v)
|
||||
{
|
||||
return (short)__float2half_rn(v);
|
||||
}
|
||||
//! @}
|
||||
|
||||
}}
|
||||
|
||||
Reference in New Issue
Block a user