# Custom floating point

This [C Coding Samples|C coding sample] was added July 2016

```/**********************************************
* Testing a range limited custom floating point
* for calculating c1*pow(x,5)+c2*pow(x,4) over
* a range of x=0 to x=2^22-1.
* Assumes 32-bit 'signed' and 'unsigned' types.
********************************************/
#include <stdio.h>
#include <time.h>

double c1 = 1.40715e-09;
double c2 = 9.18189e-08;

/* 1.4071499998200009962090462067863e-9 in custom floating point*/
unsigned m_c1 = 3244666990;
signed e_c1 = -29;

/*  9.18189e-08 in custom floating point */
unsigned m_c2 = 3308124510;
signed e_c2 = -23;

/****************************************************************/
/* Conversion for printing                                      */
/****************************************************************/
static double fixed_to_float(unsigned m, int e) {

double t;
t = m;
t /= 65536;
t /= 65536;
while(e > 0) {
t *= 2; e--;
}
while(e < 0) {
t /= 2; e++;
}
return t;
}

/****************************************************************/
/* Adjust so MSB of mantissa is set                             */
/****************************************************************/
static void fixed_normalize(unsigned *m, signed *e) {
/* Adjust to have a one in the MSB of mantissa */
if(*m == 0) {
*e = 0;
return;
}
while((*m & 0x80000000) == 0) {
*m <<= 1;
*e = *e -1;
}
}

/****************************************************************/
/* Multiply two numbers together, using only 32bit math         */
/****************************************************************/
static void fixed_mult(unsigned *m1, signed *e1, unsigned *m2, signed *e2) {
unsigned h1,l1,h2,l2;
unsigned h1h2, h1l2, l1h2, l1l2;
unsigned carry,acc;

if(*m1 == 0 || *m2 == 0) {
*m1 = 0;
*e1 = 0;
}

h1 = *m1 >> 16;
l1 = *m1 & 0xFFFF;
h2 = *m2 >> 16;
l2 = *m2 & 0xFFFF;

h1h2 = h1 * h2;
h1l2 = h1 * l2;
l1h2 = l1 * h2;
l1l2 = l1 * l2;

carry = 0;
acc = l1l2 >> 16;

acc += h1l2;
if(acc < h1l2) carry += 0x10000;

acc += l1h2;
if(acc < l1h2) carry += 0x10000;

acc >>= 16;
acc += carry + h1h2;
*m1 = acc;
*e1 += *e2;
fixed_normalize(m1,e1);
}

/****************************************************************/
/* Add two numbers together                                     */
/****************************************************************/
void fixed_add(unsigned *m1, signed *e1, unsigned *m2, signed *e2) {
if(*m2 == 0) {
return; /* Nothing to add */
}

if(*m1 == 0) {
*m1 = *m2;
*e1 = *e2;
return; /* Nothing in m1 at the moment */
}

/* Do we need to move the decimal on m1*2^e1 ? */
while(*e2 > *e1) {
*m1 >>= 1;
*e1 = *e1 + 1;
}

/* Do we need to move the decimal on m2*2^e2 ? */
while(*e1 > *e2) {
*m2 >>= 1;
*e2 = *e2 + 1;
}

/* As e2 = e1 we can now just add */
*m1 += *m2;

/* Test for overflow */
if(*m1 < *m2) {
*m1 = (*m1>>1) | 0x80000000;
*e1 = *e1 + 1;
}
}

/****************************************************************/
/* The main calculation in custom fixed point                   */
/****************************************************************/
double calcFixed(unsigned i, unsigned *m, signed *e) {
unsigned m_x = i;
signed e_x = 32;
unsigned m_t1;
signed e_t1;
unsigned m_t2;
signed e_t2;
fixed_normalize(&m_x, &e_x);
m_t2 = m_t1 = m_x;
e_t2 = e_t1 = e_x;

fixed_mult(&m_t1, &e_t1, &m_x, &e_x);  /* t1 = x^2 */
fixed_mult(&m_t1, &e_t1, &m_x, &e_x);  /* t1 = x^3 */
fixed_mult(&m_t1, &e_t1, &m_x, &e_x);  /* t1 = x^4 */
fixed_mult(&m_t2, &e_t2, &m_t1, &e_t1);  /* t2 = x^5 */
fixed_mult(&m_t1, &e_t1, &m_c2, &e_c2); /* t1 = c2 * x ^ 4 */
fixed_mult(&m_t2, &e_t2, &m_c1, &e_c1); /* t2 = c1 * x ^ 5 */
*m = m_t1;
*e = e_t1;
return 0;
}

/****************************************************************/
/* The same calculation in floating point                       */
/****************************************************************/
double calcFloat(unsigned i) {
double xd = i;
return c1 * xd*xd*xd*xd*xd + c2* xd*xd*xd*xd;
}

/****************************************************************/
/* Test harness to verify the code                              */
/****************************************************************/
int main(int argc, char *argv[]) {
int start_time;
int end_time;
int j;
unsigned i;
double t = 0;
double max_error=0.0, min_error=0.0;
int max_i = 0, min_i = 0;

/* Time the H/W floating point routine */
start_time = time(NULL);
for(j = 0; j < 250; j++) {
for(i = 0; i < 0x400000; i++) {
t+= calcFloat(i);
}
}
end_time = time(NULL);
printf("Float took %i seconds\n", end_time-start_time);
printf("t is %f (to prevent optimization)\n",t);

/* Time the custom point routine */
start_time = time(NULL);
for(j = 0; j < 250; j++) {
for(i = 0; i < 0x400000; i++) {
unsigned m;
signed e;
calcFixed(i, &m, &e);
}
}
end_time = time(NULL);
printf("Fixed Took %i seconds\n", end_time-start_time);

/* Check the accuracy */
for(i = 1;i < 4*1024*1024; i++) {
unsigned m;
signed e;
double error;
calcFixed(i, &m, &e);
error = 1.0 - fixed_to_float(m,e)/calcFloat(i);

if(max_error < error) {
max_error = error;
max_i = i;
}

if(min_error > error) {
min_error = error;
min_i = i;
}
}
printf("Max error at %i is %3.6f%%\n",max_i, max_error);
printf("Min error at %i is %3.6f%%\n",min_i, min_error);
return 0;
}

```