eliminate unnecessary double arithmetics in CUDA

This commit is contained in:
Vladislav Vinogradov
2013-05-30 13:10:11 +04:00
parent 0b270e2b08
commit cd2b7448f4
9 changed files with 26 additions and 17 deletions
+13 -4
View File
@@ -72,7 +72,7 @@ namespace reduce
}
template <typename T>
__device__ __forceinline__ T result(T r, double) const
__device__ __forceinline__ T result(T r, int) const
{
return r;
}
@@ -81,6 +81,15 @@ namespace reduce
__host__ __device__ __forceinline__ Sum(const Sum&) {}
};
template <typename T> struct OutputType
{
typedef float type;
};
template <> struct OutputType<double>
{
typedef double type;
};
struct Avg
{
template <typename T>
@@ -96,7 +105,7 @@ namespace reduce
}
template <typename T>
__device__ __forceinline__ typename TypeVec<double, VecTraits<T>::cn>::vec_type result(T r, double sz) const
__device__ __forceinline__ typename TypeVec<typename OutputType<typename VecTraits<T>::elem_type>::type, VecTraits<T>::cn>::vec_type result(T r, float sz) const
{
return r / sz;
}
@@ -121,7 +130,7 @@ namespace reduce
}
template <typename T>
__device__ __forceinline__ T result(T r, double) const
__device__ __forceinline__ T result(T r, int) const
{
return r;
}
@@ -146,7 +155,7 @@ namespace reduce
}
template <typename T>
__device__ __forceinline__ T result(T r, double) const
__device__ __forceinline__ T result(T r, int) const
{
return r;
}