Custom floating point

From Hamsterworks Wiki!

Jump to: navigation, search

This [C Coding Samples|C coding sample] was added July 2016

/**********************************************
* Testing a range limited custom floating point
* for calculating c1*pow(x,5)+c2*pow(x,4) over
* a range of x=0 to x=2^22-1.
* Assumes 32-bit 'signed' and 'unsigned' types.
********************************************/
#include <stdio.h>
#include <time.h>

double c1 = 1.40715e-09;
double c2 = 9.18189e-08;

/* 1.4071499998200009962090462067863e-9 in custom floating point*/
unsigned m_c1 = 3244666990;
signed e_c1 = -29;

/*  9.18189e-08 in custom floating point */
unsigned m_c2 = 3308124510;
signed e_c2 = -23;

/****************************************************************/
/* Conversion for printing                                      */
/****************************************************************/
static double fixed_to_float(unsigned m, int e) {

  double t;
  t = m;
  t /= 65536;
  t /= 65536;
  while(e > 0) {
    t *= 2; e--;
  }
  while(e < 0) {
    t /= 2; e++;
  }
  return t;
}

/****************************************************************/
/* Adjust so MSB of mantissa is set                             */
/****************************************************************/
static void fixed_normalize(unsigned *m, signed *e) {
  /* Adjust to have a one in the MSB of mantissa */
  if(*m == 0) {
    *e = 0;
     return;
  }
  while((*m & 0x80000000) == 0) {
    *m <<= 1;
    *e = *e -1;
  }
}


/****************************************************************/
/* Multiply two numbers together, using only 32bit math         */
/****************************************************************/
static void fixed_mult(unsigned *m1, signed *e1, unsigned *m2, signed *e2) {
  unsigned h1,l1,h2,l2;
  unsigned h1h2, h1l2, l1h2, l1l2;
  unsigned carry,acc;

  if(*m1 == 0 || *m2 == 0) {
    *m1 = 0;
    *e1 = 0;
  }

  h1 = *m1 >> 16;
  l1 = *m1 & 0xFFFF;
  h2 = *m2 >> 16;
  l2 = *m2 & 0xFFFF;

  h1h2 = h1 * h2;
  h1l2 = h1 * l2;
  l1h2 = l1 * h2;
  l1l2 = l1 * l2;

  carry = 0;
  acc = l1l2 >> 16;

  acc += h1l2;
  if(acc < h1l2) carry += 0x10000;

  acc += l1h2;
  if(acc < l1h2) carry += 0x10000;

  acc >>= 16;
  acc += carry + h1h2;
  *m1 = acc;
  *e1 += *e2;
  fixed_normalize(m1,e1);
}

/****************************************************************/
/* Add two numbers together                                     */
/****************************************************************/
void fixed_add(unsigned *m1, signed *e1, unsigned *m2, signed *e2) {
  if(*m2 == 0) {
    return; /* Nothing to add */
  }

  if(*m1 == 0) {
    *m1 = *m2;
    *e1 = *e2;
    return; /* Nothing in m1 at the moment */
  }

  /* Do we need to move the decimal on m1*2^e1 ? */
  while(*e2 > *e1) {
    *m1 >>= 1;
    *e1 = *e1 + 1;
  }

  /* Do we need to move the decimal on m2*2^e2 ? */
  while(*e1 > *e2) {
    *m2 >>= 1;
    *e2 = *e2 + 1;
  }

  /* As e2 = e1 we can now just add */
  *m1 += *m2;

  /* Test for overflow */
  if(*m1 < *m2) {
    *m1 = (*m1>>1) | 0x80000000;
    *e1 = *e1 + 1;
  }
}

/****************************************************************/
/* The main calculation in custom fixed point                   */
/****************************************************************/
double calcFixed(unsigned i, unsigned *m, signed *e) {
  unsigned m_x = i;
  signed e_x = 32;
  unsigned m_t1;
  signed e_t1;
  unsigned m_t2;
  signed e_t2;
  fixed_normalize(&m_x, &e_x);
  m_t2 = m_t1 = m_x;
  e_t2 = e_t1 = e_x;

  fixed_mult(&m_t1, &e_t1, &m_x, &e_x);  /* t1 = x^2 */
  fixed_mult(&m_t1, &e_t1, &m_x, &e_x);  /* t1 = x^3 */
  fixed_mult(&m_t1, &e_t1, &m_x, &e_x);  /* t1 = x^4 */
  fixed_mult(&m_t2, &e_t2, &m_t1, &e_t1);  /* t2 = x^5 */
  fixed_mult(&m_t1, &e_t1, &m_c2, &e_c2); /* t1 = c2 * x ^ 4 */
  fixed_mult(&m_t2, &e_t2, &m_c1, &e_c1); /* t2 = c1 * x ^ 5 */
  fixed_add(&m_t1, &e_t1, &m_t2, &e_t2);
  *m = m_t1;
  *e = e_t1;
  return 0;
}

/****************************************************************/
/* The same calculation in floating point                       */
/****************************************************************/
double calcFloat(unsigned i) {
  double xd = i;
  return c1 * xd*xd*xd*xd*xd + c2* xd*xd*xd*xd;
}

/****************************************************************/
/* Test harness to verify the code                              */
/****************************************************************/
int main(int argc, char *argv[]) {
  int start_time;
  int end_time;
  int j;
  unsigned i;
  double t = 0;
  double max_error=0.0, min_error=0.0;
  int max_i = 0, min_i = 0;

  /* Time the H/W floating point routine */
  start_time = time(NULL);
  for(j = 0; j < 250; j++) {
    for(i = 0; i < 0x400000; i++) {
      t+= calcFloat(i);
    }
  }
  end_time = time(NULL);
  printf("Float took %i seconds\n", end_time-start_time);
  printf("t is %f (to prevent optimization)\n",t);

  /* Time the custom point routine */
  start_time = time(NULL);
  for(j = 0; j < 250; j++) {
    for(i = 0; i < 0x400000; i++) {
      unsigned m;
      signed e;
      calcFixed(i, &m, &e);
    }
  }
  end_time = time(NULL);
  printf("Fixed Took %i seconds\n", end_time-start_time);

  /* Check the accuracy */
  for(i = 1;i < 4*1024*1024; i++) {
    unsigned m;
    signed e;
    double error;
    calcFixed(i, &m, &e);
    error = 1.0 - fixed_to_float(m,e)/calcFloat(i);

    if(max_error < error) {
      max_error = error;
      max_i = i;
    }

    if(min_error > error) {
      min_error = error;
      min_i = i;
    }
  }
  printf("Max error at %i is %3.6f%%\n",max_i, max_error);
  printf("Min error at %i is %3.6f%%\n",min_i, min_error);
  return 0;
}

Personal tools