当前位置:网站首页>Float float simulates double precision computation on CPU and GPU

Float float simulates double precision computation on CPU and GPU

2022-06-09 10:57:00 weixin_ forty-two million eight hundred and forty-nine thousand

float-float

be based on Lewis The code in the book machining . You can use two float Single precision floating point number simulation CPU and GPU Upper double precision double Type operation .

It can be used in computing logic that requires mixed precision , Ensure the accuracy requirements of specific calculations .


FLOAT2 Type source code (float2.h)

#pragma once 
#ifdef __CUDACC__
#define __HOST__DEVICE__ __host__ __device__
#else 
#define __HOST__DEVICE__ 
#endif

#define __INLINE__ __HOST__DEVICE__ inline


struct FLOAT2{
    

float x32,y32;

__INLINE__ 
FLOAT2():x32(0.0f),y32(0.0f){
    }

__INLINE__ explicit 
FLOAT2(float x):x32(x),y32(0.0f){
    }

__INLINE__  explicit 
FLOAT2(float x,float y):x32(x),y32(y) {
    }

/************************************************************** *  Type conversion  * ************************************************************/
// Type conversion : FLOAT2->float
__INLINE__  
operator float(){
    return x32;}

// Type conversion : FLOAT2->double
__INLINE__ 
operator double(){
    return double(x32)+double(y32);}

//double->FLOAT2,  Overflow is not considered 
__INLINE__  explicit 
FLOAT2(double a)
{
    
    x32=a;
    y32=a-x32;
}

__INLINE__ static bool isnan(const FLOAT2 a64)
{
    
    return std::isnan(a64.x32) || std::isnan(a64.y32);
}
__INLINE__ static bool isinf(const FLOAT2 a64)
{
    
    return !isnan(a64) &&(std::isinf(a64.x32) || std::isinf(a64.y32));
}
__INLINE__ static bool isnormal(const FLOAT2 a64)
{
    
    return std::isnormal(a64.x32) && std::isnormal(a64.y32);
}
__INLINE__ static bool isfinite(const FLOAT2 a64)
{
    
    std::isfinite(a64.x32) && std::isfinite(a64.y32);
}
__INLINE__ static bool iszero(const FLOAT2 a64)
{
    
    return a64.x32==0.0f && a64.y32==0.0f;
}

/******************************************************** *  Calculation function  * *******************************************************/
//==
__INLINE__ static bool EQ64(const FLOAT2 a64, const FLOAT2 b64)
{
    
    return a64.x32==b64.x32 && a64.y32==b64.y32;
}
//> 
__INLINE__ static bool GT64(const FLOAT2 a64, const FLOAT2 b64)
{
    
    return a64.x32>b64.x32 || (a64.x32==b64.x32 && a64.y32>b64.y32);
}
//>=
__INLINE__ static bool GE64(const FLOAT2 a64, const FLOAT2 b64)
{
    
    return a64.x32>=b64.x32 || (a64.x32==b64.x32 && a64.y32>=b64.y32);
}
//<
__INLINE__ static bool LT64(const FLOAT2 a64, const FLOAT2 b64)
{
    
    return !GE64(a64,b64);
}
//<=
__INLINE__ static bool LE64(const FLOAT2 a64, const FLOAT2 b64)
{
    
    return !GT64(a64,b64);
}

//-
__INLINE__ static FLOAT2 NEG64(const FLOAT2 a64)
{
    
    return FLOAT2{
    -a64.x32,-a64.y32};
}

/************************************************************ * Addition * ***********************************************************/

// take a32+b32 Normalized formation FLOAT2
__INLINE__ static FLOAT2 QuickTwoSum(const float a32, const float b32)
{
    
    // assumes a32 > b32
    float s32, e32 ;

    s32 = a32 + b32 ;
    e32 = b32 - (s32 - a32) ;                   // This is NOT always zero!
    return  FLOAT2{
    s32, e32} ;
}

__INLINE__ static FLOAT2 TwoSum(const float a32, const float b32)
{
    
    float s32, v32, e32 ;

    s32 = a32 + b32 ;
    v32 = s32 - a32 ;                           // This is NOT the same as b32!
    e32 = (a32 - (s32 - v32)) + (b32 - v32) ;   // This is NOT always zero!
    return FLOAT2{
    s32, e32} ;
}

__INLINE__ static FLOAT2 Add64(const FLOAT2 a64, const FLOAT2 b64)
{
    
    FLOAT2 s64, t64 ;

    s64 = TwoSum(a64.x32, b64.x32) ;
    t64 = TwoSum(a64.y32, b64.y32) ;
    s64.y32 += t64.x32 ;
    s64 = QuickTwoSum(s64.x32, s64.y32) ;
    s64.y32 += t64.y32 ;
    return QuickTwoSum(s64.x32, s64.y32) ;
}

__INLINE__ static FLOAT2 Add64(const FLOAT2 a64, const float b32)
{
    
    FLOAT2 s64;
    s64 = TwoSum(a64.x32, b32) ;
    s64.y32 += a64.y32 ;
    return QuickTwoSum(s64.x32, s64.y32) ;
}

__INLINE__ static FLOAT2 Add64(const float a32, const FLOAT2 b64)
{
    
    return Add64(b64,a32);
}

/********************************************************* * Subtraction * *******************************************************/
__INLINE__ static FLOAT2 Sub64(const FLOAT2 a64,const FLOAT2 b64)
{
    
    return Add64(a64,NEG64(b64));
}

__INLINE__ static FLOAT2 Sub64(const FLOAT2 a64,const float b32)
{
    
    return Add64(a64, -b32);
}

__INLINE__ static FLOAT2 Sub64(const float a32, const FLOAT2 b64)
{
    
    return NEG64(Sub64(b64,a32));
}

/***************************************************************** * Multiplication * ****************************************************************/

__INLINE__ static FLOAT2 Split(const float a32)
{
    
    float p32,ahi32,alo32;
    p32=4097.0f*a32;
    ahi32=p32-(p32-a32);
    alo32=a32-ahi32;

    return FLOAT2(ahi32,alo32);
}

__INLINE__ static FLOAT2 TwoProd(const float a32, const float b32)
{
    
    FLOAT2 a64, b64 ;
    float p32, e32 ;

    p32 = a32 * b32 ;
    a64 = Split(a32) ;
    b64 = Split(b32) ;
    e32 = ((a64.x32*b64.x32 - p32) + a64.x32*b64.y32 + a64.y32*b64.x32) + a64.y32*b64.y32 ;
    return FLOAT2{
    p32, e32} ;
}

__INLINE__ static FLOAT2 Mul64(const FLOAT2 a64, const FLOAT2 b64)
{
    
    FLOAT2 p64 ;

    p64 = TwoProd(a64.x32, b64.x32) ;
    p64.y32 += a64.x32 * b64.y32 ;
    p64.y32 += a64.y32 * b64.x32 ;
    return QuickTwoSum(p64.x32, p64.y32) ;
}

__INLINE__ static FLOAT2 Mul64(const FLOAT2 a64, const float b32)
{
    
    FLOAT2 p64 ;

    p64 = TwoProd(a64.x32, b32) ;
    p64.y32 += a64.y32 * b32 ;
    return QuickTwoSum(p64.x32, p64.y32) ;
}

__INLINE__ static FLOAT2 Mul64(const float a32, const FLOAT2 b64)
{
    
    return Mul64(b64,a32);
}

/***************************************************************** * Division * ****************************************************************/

// Newton's iteration , One iteration is OK
__INLINE__ static FLOAT2 Div64(const FLOAT2 a64, const FLOAT2 b64)
{
    
    //check divide by zero
    //ASSERT(!iszero(b64))

    FLOAT2 q64, e64 ;
    float i32, d32 ;

    i32 = 1.0f / b64.x32 ;

    float q32=a64.x32*i32;
    d32=Sub64(a64,Mul64(b64,q32)).x32;
    e64=TwoProd(i32,d32);
    return Add64(q32,e64);
}

__INLINE__ static FLOAT2 Div64(const FLOAT2 a64, const float b32)
{
    
    //check divide by zero
    //ASSERT(b32!=0.0f)

    FLOAT2 q64, e64 ;
    float i32, d32 ;

    i32 = 1.0f / b32 ;

    float q32=a64.x32*i32;
    d32=Sub64(a64,TwoProd(b32,q32)).x32;
    e64=TwoProd(i32,d32);
    return Add64(q32,e64);
}

__INLINE__ static FLOAT2 Div64(const float a32, const FLOAT2 b64)
{
    
    //check divide by zero
    //ASSERT(!iszero(b64))
    
    FLOAT2 t64(a32);
    return Div64(t64,b64);
}

/***************************************************************** * special functions * ****************************************************************/
 //TODO: 
 //ln,log10
 //sin,cos,sincos
 //exp,exp1m
 //polyval

// Count the reciprocal 
__INLINE__ static FLOAT2 Recip64(const FLOAT2 a64)
{
    
    FLOAT2 e64 ;
    float i32, d32 ;
    
    // Approximate value 
    i32 = 1.0f / a64.x32 ;
    
     // One newton iteration method :  Find zero  f(x)=x^{-1}-a
    // iteration : x=x*(2-a*x)
    return Mul64(i32,Sub64(2.0f,Mul64(a64,i32)));
}

// Calculation 1/sqrt(a)
//x=1/sqrt(a)=> f(x)=x^{-2}-a  Find zero 
// iteration : x=x*(1.5-0.5*a*x*x)
//https://math.stackexchange.com/questions/607794/newton-raphson-for-reciprocal-square-root
 __INLINE__ static FLOAT2 Rsqrt64(const FLOAT2 a64)
 {
    
     // Approximate value 
     float i32=rsqrt(a64.x32);

     // A Newton iteration 
     // iteration : x=x*(1.5-0.5*a*x*x)=0.5*x*(3-a*x*x)
     FLOAT2 t64=Mul64(i32,Mul64(i32,a64));
     return Mul64(0.5f*i32, Sub64(3.0f, t64));
 }

 __INLINE__ static FLOAT2 Sqrt64(const FLOAT2 a64)
 {
    
     return Recip64(Rsqrt64(a64));
 }

}; //struct FLOAT2


The test program (test_float2.cu)

#include "float2.h"
#include <cstdio>
__INLINE__  static void test_float2()
{
    
   const double sqrt_2=1.4142135623730950488016887242097;
   const double sqrt_3=1.7320508075688772935274463415059;
   const double sqrt_2_plus_sqrt_3=3.146264369941972560695830907207;
   const float z=1.5f; //3/2

   printf("sqrt_2=%18.16g, sqrt_3=%18.16g\n",sqrt_2,sqrt_3);

   FLOAT2 a64(sqrt_2),b64(sqrt_3);

   auto x64=FLOAT2::Add64(a64,b64);

   double y=x64;

   printf("y=%18.16g, %18.16g\n",y,sqrt_2+sqrt_3);

   printf("%18.16g\n",double(FLOAT2::Sub64(z,FLOAT2(sqrt_2))));

   x64=FLOAT2::Div64(a64,b64);
   y=x64;
   printf("y=%18.16g\n",y);

   x64=FLOAT2::Recip64(FLOAT2(sqrt_2));
   y=x64;
   printf("y=%18.16g\n",y);
   
   x64=FLOAT2::Rsqrt64(FLOAT2(2.0f));
   y=x64;
   printf("y=%18.16g\n",y);

}

__global__ void kernel_float2()
{
    
    test_float2();
}

void 
test_driver()
{
    
   //==========================================================
   printf("----------->On CPU:\n");
   test_float2();
   
   //============================================================
   printf("------------>On GPU: \n");
   kernel_float2<<<1,1>>>();
}

int main()
{
    
    test_driver();
    return(0);
}

// compile : nvcc -O3 -use_fast_math test_float2.cu

/*********************************************************** matlab: vpa(sqrt(2)) #1.4142135623730950488016887242097 vpa(sqrt(3)) #1.7320508075688772935274463415059 vpa(sqrt(2)+sqrt(3)) #3.146264369941972560695830907207 vpa(3/2-sqrt(2)) #0.085786437626904854525378141261172 vpa(sqrt(2/3)) #0.81649658092772603273242802490196 **************************************************************/
原网站

版权声明
本文为[weixin_ forty-two million eight hundred and forty-nine thousand]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/160/202206091013352632.html