I am writing a program that does some floating addition that uses bit patterns with shifts applied to the mantissa and such to obtain the sum of the two floating point numbers. Logically and on paper I can get this to compute the correct sum, but I must be missing something in my program because the output ( in base 2 scientific notation) is not correct ? Anyone see where my error is ? Thank you for the help !
Code:#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <assert.h>
int isNegative (float f)
{
unsigned int* iptr = (unsigned int*)&f;
return ( ((*iptr) & 0x80000000) ? 1:0);
}
unsigned char getExponent (float f)
{
unsigned int* iptr = (unsigned int*)&f;
return (((*iptr >> 23) & 0xff) - 127);
}
unsigned int getMantissa (float f)
{
unsigned int* iptr = (unsigned int*)&f;
if( *iptr == 0 ) return 0;
return ((*iptr & 0xFFFFFF) | 0x800000 );
}
float sum (float left, float right)
{
unsigned int littleMan;
unsigned int bigMan;
unsigned char littleExp;
unsigned char bigExp;
unsigned char lexp = getExponent(left);
unsigned char rexp = getExponent(right);
int Dexponent;
if (lexp > rexp)
{
bigExp = lexp;
bigMan = getMantissa(left);
littleExp = rexp;
littleMan = getMantissa(right);
}
else
{
bigExp = rexp;
bigMan = getMantissa(right);
littleExp = lexp;
littleMan = getMantissa(left);
}
printf("little: %x %x\n", littleExp, littleMan);
printf("big: %x %x\n", bigExp, bigMan);
void shift( unsigned int *valToShift, int bitsToShift )
{
// Masks is used to mask out bits to check for a "sticky" bit.
static unsigned masks[24] =
{
0, 1, 3, 7, 0xf, 0x1f, 0x3f, 0x7f,
0xff, 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff,
0xffff, 0x1ffff, 0x3ffff, 0x7ffff, 0xfffff, 0x1fffff, 0x3fffff, 0x7fffff
};
// HOmasks - masks out the H.O. bit of the value masked by the masks entry.
static unsigned HOmasks[24] =
{
0,
1, 2, 4, 0x8, 0x10, 0x20, 0x40, 0x80,
0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000,
0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000
};
// shiftedOut- Holds the value that will be shifted out of a mantissa
// during the denormalization operation (used to round a denormalized value).
int shiftedOut;
assert( bitsToShift <= 23 );
// Grabs the bits we're going to shift out (so we can determine
// how to round this value after the shift).
shiftedOut = *valToShift & masks[ bitsToShift ];
// Shift the value to the right the specified number of bits:
*valToShift = *valToShift >> bitsToShift;
// If necessary, round the value:
if( shiftedOut > HOmasks[ bitsToShift ] )
{
// If the bits we shifted out are greater than 1/2 the L.O. bit, then
// round the value up by one.
*valToShift = *valToShift + 1;
}
else if( shiftedOut == HOmasks[ bitsToShift ] )
{
// If the bits we shifted out are exactly 1/2 of the L.O. bit's value,
// then round the value to the nearest number whose L.O. bit is zero.
*valToShift = *valToShift + ((*valToShift & 1) == 1);
}
// else we round the value down to the previous value. The current
// value is already truncated (rounded down), so we don't have to do anything.
}
// I got two actual floating point values. I want to add them together.
// 1. "denormalize" one of the operands if their exponents aren't
// the same (when adding or subtracting values, the exponents must be the same).
//
// Algorithm: choose the value with the smaller exponent. Shift its mantissa
// to the right the number of bits specified by the difference between the two
// exponents.
if( rexp > lexp )
{
shift( &littleMan, (rexp - lexp));
Dexponent = rexp;
}
else if( rexp < lexp )
{
shift( &littleMan, (lexp - rexp));
Dexponent = lexp;
}
unsigned int result = Dexponent;
float fresult = *(float*)&result;
return(fresult);
}
int main()
{
const int SIZE = 256;
char line[SIZE];
while (1)
{
float f1;
float f2;
float left = f1;
float right = f2;
printf("Please enter the first float ( \"q\" to quit):");
fgets(line,SIZE,stdin);
if (toupper(line[0]) =='Q')
break;
f1 = atof(line);
printf("Please enter the second float ( \"q\" to quit):");
fgets(line,SIZE,stdin);
if (toupper(line[0]) == 'Q')
break;
f2 = atof(line);
if (isNegative(f1) || isNegative(f2))
printf ("One of thse is negative, but %g + %g == %g\n", f1,f2,sum(f1,f2));
else
printf("%g + %g == %g\n", f1,f2,sum(f1,f2));
}
return(EXIT_SUCCESS);
}