cudev: add feature to convert FP32(float) from/to FP16(half) on GPU

* add feature of Fp16 on GPU (cudev) * add test * leave template function as unimplemented to raise error
2016-07-26 08:58:34 +09:00
parent f2e9588c93
commit 2974b049e7
5 changed files with 144 additions and 0 deletions
@@ -668,6 +668,27 @@ template <typename T, typename D> struct saturate_cast_func : unary_function<T,
    }
 };

+// Convert Fp16 dummy
+template <typename T, typename D> struct saturate_cast_fp16_func;
+
+// Convert Fp16 from Fp32
+template <> struct saturate_cast_fp16_func<float, short> : unary_function<float, short>
+{
+    __device__ __forceinline__ short operator ()(float v) const
+    {
+        return cast_fp16<float, short>(v);
+    }
+};
+
+// Convert Fp16 to Fp32
+template <> struct saturate_cast_fp16_func<short, float> : unary_function<short, float>
+{
+    __device__ __forceinline__ float operator ()(short v) const
+    {
+        return cast_fp16<short, float>(v);
+    }
+};
+
 // Threshold Functors

 template <typename T> struct ThreshBinaryFunc : unary_function<T, T>
@@ -270,6 +270,17 @@ template <> __device__ __forceinline__ uint saturate_cast<uint>(double v)
 #endif
 }

+template <typename T, typename D> __device__ __forceinline__ D cast_fp16(T v);
+
+template <> __device__ __forceinline__ float cast_fp16<short, float>(short v)
+{
+    return __half2float(v);
+}
+
+template <> __device__ __forceinline__ short cast_fp16<float, short>(float v)
+{
+    return (short)__float2half_rn(v);
+}
 //! @}

 }}