加载中…
个人资料
  • 博客等级:
  • 博客积分:
  • 博客访问:
  • 关注人气:
  • 获赠金笔:0支
  • 赠出金笔:0支
  • 荣誉徽章:
正文 字体大小:

GPU中双精度(double)浮点运算的实现

(2012-09-14 14:44:22)
标签:

杂谈

分类: Cplusplus

GPU中双精度(double)浮点运算的实现!

2008-05-27 10:20

当前NVIDA的GPU芯片仅支持单精度(float)浮点运算,对一些应用来说精度可能不够用,一些关键的步骤可能需要双精度运算,才能保证程序的正常执行。对此,本人尝试用两个单精度浮点数数来代表一个双精度浮点数:

//类型定义, 对于GPU编程,我们可以直接用内置类型float2, 这里的定义只是为了清晰起见。

  1. typedef struct { float x, y;} float2
  2. 双精度和float2之间的转换

    //双精度到float2的转换(有溢出的风险)

     float2 double_to_float2(double val)

     {

       float2 ret;

       ret.x = (float)val;

       ret.y = (float)(val - (double)ret.x);

       return ret;

    }

    //float2到双精度的转换

     double float2_to_double(float2 val)

     {

         double ret;

         ret = val.x;

         ret += val.y;

         return ret;

     }

 

下一步就是关于 float2 类型的加减乘除的运算了(在GPU上实现)。说明,浮点运算中,括号是至关重要的;浮点数运算不满足结合律。

//加法: float2 + float2

static inline __device__ float2 add(float2 dsa, float 2 dsb)

{

    float2 dsc;

    float t1, t2, e;

    t1 = dsa.x + dsb.x;

    e = t1 - dsa.x;

    t2 = ((dsb.x -e) + ( dsa.x - (t1 -e))) + dsa.y + dsb.y;

    dsc.x = t1 + t2;

    dsc.y = t2 -(dsc.x -t1);

    return dsc;

}

//减法: float2 - float2

static inline __device__ float2 sub(float2 dsa, float2 dsb)

{

     float2 dsc;

     float t1, t2, e;

     t1 = dsa.x - dsb.x;

     e = t1 - dsa.x;

     t2 = ((-dsb.x -e) + ( dsa.x - (t1 -e))) + dsa.y - dsb.y;

     dsc.x = t1 + t2;

     dsc.y = t2 -(dsc.x -t1);

     return dsc;

}

#define split 8139.0

//乘法: float2 * float2

static inline __device__ float2 mul(float2 dsa, float2 dsb)

{

     float2 dsc;

     float a1, a2, b1, b2, cona, conb, c11, c21, c2, e, t1, t2;

     cona = dsa.x * split;

     conb = dsb.x * split;

     a1 = cona - ( cona - dsa.x);

     b1 = conb - ( conb - dsb.x);

     a2 = dsa.x - a1;

     b2 = dsb.x - b1;

     c11 = dsa.x * dsb.x ;

     c21 = (((a1*b1 - c11)+a1*b2)+a2*b1)+a2*b2;

     c2 = dsa.x * dsb.y + dsa.y * dsb.x;

     t1 = c11 + c2;

     e = t1 - c11;

     t2 = ((c2-e) + (c11 - (t1 -e))) + c21 + dsa.y * dsb.y;

     dsc.x = t1 + t2;

     dsc.y = t2 - ( dsc.x -t1);

     return dsc;

}

 

//除法:  float2 / float2

static inline __device__ float2 div(float2 dsa, float2 dsb)

{

     float2 dsc;

     float a1, a2, b1, b2, cona, conb, c11, c2, c21, e, s1, s2;

     float t1, t2, t11, t12, t21, t22;

     s2 = 1.0f/dsb.x;

     s1 = dsa.x * s2;

     cona = s1 * split;

     conb = dsb.x * split;

     a1 = cona - (cona - s1);

     b1 = conb - (conb - dsb.x);

     a2 = s1 -a1;

     b2 = dsb.x - b1;

     c11 = s1 *dsb.x;

     c21 = (((a1*b1 -c11)+a1*b2)+a2*b1)+a2*b2;

     c2 = s1*dsb.y;

     t1 = c11 + c2;

     e = t1 - c11;

     t2 = ((c2-e)+(c11-(t1-e))) + c21;

     t12 = t1 + t2;

     t22 = t2 - (t12 - t1);

 

     t11 = dsa.x  - t12;

     e = t11 -dsa.x;

     t21 = ((-t12-e)+(dsa.x - (t11-e))) + dsa.y - t22;

     s2 *= (t11+t21);

     dsc.x = s1 + s2;

     dsc.y = s2 - ( dsc.x - s1);

     return dsc;

}

0

阅读 收藏 喜欢 打印举报/Report
  

新浪BLOG意见反馈留言板 欢迎批评指正

新浪简介 | About Sina | 广告服务 | 联系我们 | 招聘信息 | 网站律师 | SINA English | 产品答疑

新浪公司 版权所有