Thread: FPN math

  1. #1
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    651

    FPN math

    Some of you will already know this but I'm working on a function for a compiler I'm making, this function is supposed to read both integers and floating numbers based on text provided. I was struggling to identify what variables I needed where and how to handle them, I then thought to do a small project/file with fixed data to verify those details, thing is I've now hit a roadblock and struggling to come up with ideas of how to fix it, I think my problem lies in how I calculate the mantissa (parameter man) but it could also be the exponent (parametet exp) or both, I started with the floating part as 0 to check the exponent so I don't think it's that though. Here's what I have so far:
    Code:
    #include <stdlib.h>
    #include <stdio.h>
    #include <inttypes.h>
    #include <float.h>
    typedef unsigned char uchar;
    typedef unsigned long long ullong;
    #define TYPE float
    typedef union LDBL {
    	TYPE fpn;
    	uchar hex[sizeof(TYPE)];
    	struct {
    		ulong man : 23;
    		ulong exp : 8;
    		ulong sig : 1;
    	};
    } LDBL;
    
    void print( char *text, LDBL val ) {
    	printf("%s",text);
    	for ( size_t i = 0; i < sizeof(TYPE); ++i ) {
    		printf( " %02X", val.hex[i] );
    	}
    }
    #define NUM 2
    #define FPN 0
    int main() {
    	ulong num = 3, fpn = 0, exp = 0;
    	ulong bit = 1, pos = 0;
    	LDBL test = {0}, mine = {0};
    	test.fpn = fpn;
    	test.fpn /= 10;
    	test.fpn += num;
    	printf("%E\n", test.fpn);
    	for ( pos = 0; bit; bit <<= 1 ) {
    		mine.man |= bit;
    		if ( num & bit ) mine.exp = pos;
    		++pos;
    		if ( pos == FLT_MAX_EXP ) break;
    	}
    	mine.exp += FLT_MAX_EXP;
    	if ( exp )
    		mine.exp = (mine.exp == (FLT_MAX_EXP) * 2) ? -1 : mine.exp + exp;
    	num *= 10;
    	bit = 1;
    	pos = 0;
    	if ( mine.exp != (FLT_MAX_EXP + exp) )
    		for ( ; !(bit & num); bit <<= 1, ++pos );
    	for ( ; pos < 23; ++pos ) {
    		fpn <<= 1;
    		mine.man <<= 1;
    		if ( !fpn ) break;
    		mine.man |= (fpn & bit);
    	}
    	print( "test", test );
    	putchar('\n');
    	print( "mine", mine );
    	putchar('\n');
    	printf( "%08X %08X\n", test.exp, mine.exp );
    	return 0;
    }
    I'll take a look at some bugs mentioned of mitsy before taking a break until I think of something or someone else manages to think of something (and posts it)

    Edit: Oh and here are my results:
    Code:
    gcc -Wall -o "test_fpn" "test_fpn.c" && "./test_fpn"
    3.000000E+00
    test 00 00 40 40
    mine FE FF FF 40
    00000080 00000081
    Compilation finished successfully.
    Last edited by awsdert; 10-29-2019 at 03:45 AM.

  2. #2
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    651
    Never mind, I managed to get it , now to test with more complex FPNs
    Edit: Just thought I should've posted the code I have now:
    Code:
    #include <stdlib.h>
    #include <stdio.h>
    #include <inttypes.h>
    #include <float.h>
    typedef unsigned char uchar;
    typedef unsigned long long ullong;
    #define TYPE float
    typedef union LDBL {
    	TYPE fpn;
    	uchar hex[sizeof(TYPE)];
    	struct {
    		ulong man : 23;
    		ulong exp : 8;
    		ulong sig : 1;
    	};
    } LDBL;
    
    void print( char *text, LDBL val ) {
    	printf("%s",text);
    	for ( size_t i = 0; i < sizeof(TYPE); ++i ) {
    		printf( " %02X", val.hex[i] );
    	}
    }
    #define NUM 2
    #define FPN 0
    int main() {
    	ulong num = 3, fpn = 0, exp = 0;
    	ulong bit = 1, pos = 0;
    	LDBL test = {0}, mine = {0};
    	test.fpn = fpn;
    	test.fpn /= 10;
    	test.fpn += num;
    	printf("%E\n", test.fpn);
    	for ( pos = 0; bit; bit <<= 1 ) {
    		if ( num & bit ) mine.exp = pos;
    		++pos;
    		if ( pos < 23 ) break;
    	}
    	mine.man = num;
    	pos = mine.exp + 1;
    	mine.exp += FLT_MAX_EXP;
    	if ( exp )
    		mine.exp = (mine.exp == (FLT_MAX_EXP) * 2) ? -1 : mine.exp + exp;
    	num *= 10;
    	bit = 1;
    	if ( mine.exp != (FLT_MAX_EXP + exp) )
    		for ( ; !(bit & num); bit <<= 1 );
    	for ( ; pos < 23; ++pos ) {
    		fpn <<= 1;
    		mine.man <<= 1;
    		mine.man |= (fpn & bit);
    	}
    	print( "test", test );
    	putchar('\n');
    	print( "mine", mine );
    	putchar('\n');
    	printf( "%08X %08X\n", test.exp, mine.exp );
    	return 0;
    }

  3. #3
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    651
    Turned out to be a coincidence that it resulted in correct hex, had the wrong comparison in my first loop and it was breaking coincedently at right moment, fixed the comparison and no longer breaks at right moment:
    Code:
    #include <limits.h>
    #include <stdlib.h>
    #include <stdio.h>
    #include <inttypes.h>
    #include <float.h>
    typedef unsigned char uchar;
    typedef unsigned long long ullong;
    #define bitsof(T) (sizeof(T) * CHAR_BIT)
    #define FPNT float
    #define FPNT_MAN_BIT 23
    #define FPNT_EXP_BIT ((bitsof(FPNT) - FPNT_MAN_BIT) - 1)
    #define FPNT_EXP_MAX FLT_MAX_EXP
    typedef union LDBL {
    	FPNT fpn;
    	uchar hex[sizeof(FPNT)];
    	struct {
    		ulong man : FPNT_MAN_BIT;
    		ulong exp : FPNT_EXP_BIT;
    		ulong sig : 1;
    	};
    } LDBL;
    
    void print( char *text, LDBL val ) {
    	printf("%s",text);
    	for ( size_t i = 0; i < sizeof(FPNT); ++i ) {
    		printf( " %02X", val.hex[i] );
    	}
    }
    int main() {
    	ulong num = 3, fpn = 0, exp = 0;
    	ulong bit = 1, pos = 0;
    	LDBL test = {0}, mine = {0};
    	test.fpn = fpn;
    	test.fpn /= 10;
    	test.fpn += num;
    	printf("%E\n", test.fpn);
    	for ( pos = 0; pos < FPNT_MAN_BIT; ++pos ) {
    		if ( num & bit ) mine.exp = pos;
    		bit <<= 1;
    	}
    	mine.man = num;
    	pos = mine.exp - 1;
    	mine.exp += FPNT_EXP_MAX;
    	if ( exp )
    		mine.exp = (mine.exp == (FPNT_EXP_MAX * 2)) ? -1 : mine.exp + exp;
    	num *= 10;
    	bit = 1;
    	if ( mine.exp < (FPNT_EXP_MAX + exp) )
    		for ( ; !(bit & num); bit <<= 1 );
    	if ( !(10 & 1) )
    		bit >>= 1;
    	for ( ; pos < FPNT_MAN_BIT; ++pos ) {
    		fpn <<= 1;
    		mine.man <<= 1;
    		mine.man |= (fpn & bit);
    	}
    	print( "test", test );
    	putchar('\n');
    	print( "mine", mine );
    	putchar('\n');
    	printf( "%08X %08X\n", test.exp, mine.exp );
    	return 0;
    }
    Need to restock on milk and have something to eat so I'll check after that, my current results:
    Code:
    gcc -Wall -o "test_fpn" "test_fpn.c" && "./test_fpn"
    3.000000E+00
    test 00 00 40 40
    mine 00 00 80 40
    00000080 00000081
    Compilation finished successfully.
    Last edited by awsdert; 10-29-2019 at 04:18 AM. Reason: forgot to remove directory from results

  4. #4
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    651
    Seem to have got it this time, however ran a test just before this post with fpn set to 14 and got different results so only halfway there:
    Code:
    #include <limits.h>
    #include <stdlib.h>
    #include <stdio.h>
    #include <inttypes.h>
    #include <float.h>
    typedef unsigned char uchar;
    typedef unsigned long long ullong;
    #define bitsof(T) (sizeof(T) * CHAR_BIT)
    #define FPNT float
    #define FPNT_MAN_BIT 23
    #define FPNT_EXP_BIT ((bitsof(FPNT) - FPNT_MAN_BIT) - 1)
    #define FPNT_EXP_MAX FLT_MAX_EXP
    typedef union LDBL {
    	FPNT fpn;
    	uchar hex[sizeof(FPNT)];
    	struct {
    		ulong man : FPNT_MAN_BIT;
    		ulong exp : FPNT_EXP_BIT;
    		ulong sig : 1;
    	};
    } LDBL;
    
    void print( char *text, LDBL val ) {
    	printf("%s",text);
    	for ( size_t i = 0; i < sizeof(FPNT); ++i ) {
    		printf( " %02X", val.hex[i] );
    	}
    }
    int main() {
    	ulong num = 3, fpn = 0, exp = 0;
    	ulong pos = 0;
    	LDBL test = {0}, mine = {0};
    	test.fpn = fpn;
    	test.fpn /= 10;
    	test.fpn += num;
    	printf("%E\n", test.fpn);
    	mine.man = num;
    	if ( num ) {
    		for ( ++pos ; pos < FPNT_MAN_BIT; ++pos ) {
    			num /= 10;
    			if ( !num ) break;
    		}
    		mine.exp = pos - 1;
    	}
    	mine.exp += FPNT_EXP_MAX;
    	if ( exp )
    		mine.exp = (mine.exp == (FPNT_EXP_MAX * 2)) ? -1 : mine.exp + exp;
    	printf( "test.exp %08X\nmine.exp %08X\n", test.exp, mine.exp );
    	exp = pos;
    	num = 10;
    	while ( --exp ) num *= 10;
    	for ( ; pos < FPNT_MAN_BIT; ++pos ) {
    		fpn *= 2;
    		mine.man <<= 1;
    		if ( fpn > num ) {
    			mine.man |= 1u;
    			fpn -= (fpn % num);
    		}
    	}
    	printf( "test.man %08X\nmine.man %08X\n", test.man, mine.man );
    	print( "test.hex", test );
    	putchar('\n');
    	print( "mine.hex", mine );
    	putchar('\n');
    	return 0;
    }

  5. #5
    Registered User
    Join Date
    Feb 2019
    Posts
    550
    What do you want to do? Convert an unsigned int to float?
    If this is the case I suggest calculating the expoent first:

    e=log2(N);

    You can get an reliable log2() integer function from Bit Twiddling Hacks. Or something like this:
    Code:
    int log2_(unsigned int x)
    {
      return sizeof(x)*8 - __builtin_clz(x) - 1;
    }
    Since zero is not a valid value for x.

    Now, since normalized float is:

    x=(1 + M/2^23)*2^e
    x=2^e + M*2^(e-23)
    M*2^(e-23) = x - 2^e
    M = (x - 2^e)*2^(23-e)
    M = (x - (1 << e)) << (23 - e)

    Of course, if e > 23 you need to change the direction of the shift...

    if e < 24 then M = (x - (1 << e)) << (23 - e); else M = (x - (1 << e)) >> (e - 23).

    And remember: E=e+127.

    Let's say you want to convert 1023:

    e=log2(1023)=9
    M=(1023 - (1 << 9)) << (23 - 9) = (1023 - 512) << 14 = 511 << 14,
    M=8372224
    E=136

    To be sure:

    x = (1 + M/2^23)*2^e = (1 + 8372224/8388608)*2^9 = 1023

    Here's the actual floating point:
    Code:
    #include <stdio.h>
    
    struct fp_s {
      unsigned int m:23;
      unsigned int e:8;
      unsigned int s:1;
    };
    
    int main( void )
    {
      float f = 1023;
      struct fp_s *p = (struct fp_s *)&f;
    
      printf( "S=%u, E=%u, M=%u\n", p->s, p->e, p->m );
    }
    Compiling and running:
    Code:
    $ cc -o test test.c
    $ ./test
    S=0, E=136, M=8372224
    There are other small problems you'll need to deal with: Rouding, zero, sub-normal values, infinities and NaNs, but to covert an Integer to floating point is that easy.
    Last edited by flp1969; 10-29-2019 at 09:46 AM.

  6. #6
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    651
    No not trying to convert a simple integer, trying to construct an fpn from 2 seperate integers, one for the whole number part and another for the decimal part, I'm trying to do it that way cause the function in mitsy will be doing it that way, starts with normal integer then when hits '.' it passes it into another variable, resets the one currently working with and continues normally, afetr the loop ends it checks the aforementioned variable and enters fpn mode if it's not 0, which now that I think of it will exclude 0.N so I'll have to rework that, doesn't change that I need 2 variables to keep track of though, the reason I don't just use a native float is that I need to support compiling to non-native systems, the simplest way to do that is construct the binary directly, the binary can then be passed into instructions or in preprocessor mode be passed into a native float via functions and then used in the expression given to the preprocessor

  7. #7
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    651

    typo

    As it turns out I seem to have managed it correctly, I didn't realise the test value had not come out correctly, I finally check that one against mine just now in %E form and learned the .14 had not been recorded as it should've so now I'm just using the hardcoded value of 3.14f to fill it, annoying to keep changing 2 values when I want to test the more advanced format but whatever. I did some tweaking before I tested that though so I'll post my current code:
    Code:
    #include <limits.h>
    #include <stdlib.h>
    #include <stdio.h>
    #include <inttypes.h>
    #include <float.h>
    typedef unsigned char uchar;
    typedef unsigned long long ullong;
    #define bitsof(T) (sizeof(T) * CHAR_BIT)
    #define FPNT float
    #define FPNT_MAN_BIT 23
    #define FPNT_EXP_BIT ((bitsof(FPNT) - FPNT_MAN_BIT) - 1)
    #define FPNT_EXP_MAX FLT_MAX_EXP
    typedef union LDBL {
    	FPNT fpn;
    	uchar hex[sizeof(FPNT)];
    	struct {
    		ulong man : FPNT_MAN_BIT;
    		ulong exp : FPNT_EXP_BIT;
    		ulong sig : 1;
    	};
    } LDBL;
    
    void print( char *text, LDBL val ) {
    	printf("%s",text);
    	for ( size_t i = 0; i < sizeof(FPNT); ++i ) {
    		printf( " %02X", val.hex[i] );
    	}
    }
    int main() {
    	ulong num = 3, fpn = 14, exp = 0;
    	ulong pos = 0;
    	LDBL test = {0}, mine = {0};
    	test.fpn = 3.14f;
    	mine.man = num;
    	if ( num ) {
    		for ( ++pos ; pos < FPNT_MAN_BIT; ++pos ) {
    			num /= 10;
    			if ( !num ) break;
    		}
    		mine.exp = pos - 1;
    	}
    	mine.exp += FPNT_EXP_MAX;
    	if ( exp )
    		mine.exp = (mine.exp == (FPNT_EXP_MAX * 2)) ? -1 : mine.exp + exp;
    	printf( "test.exp %08X\nmine.exp %08X\n", test.exp, mine.exp );
    	exp = pos;
    	num = 100;
    	for ( ; pos < FPNT_MAN_BIT; ++pos ) {
    		fpn *= 2;
    		mine.man <<= 1;
    		if ( fpn >= num ) {
    			mine.man |= 1u;
    			fpn -= num;
    		}
    	}
    	printf( "test.man %08X\nmine.man %08X\n", test.man, mine.man );
    	print( "test.hex", test );
    	putchar('\n');
    	print( "mine.hex", mine );
    	putchar('\n');
    	printf("test.fpn %E mine.fpn %E\n", test.fpn, mine.fpn);
    	return 0;
    }
    With results:
    Code:
    gcc -Wall -o "test_fpn" "test_fpn.c" && "./test_fpn"
    test.exp 00000080
    mine.exp 00000080
    test.man 0048F5C3
    mine.man 0048F5C2
    test.hex C3 F5 48 40
    mine.hex C2 F5 48 40
    test.fpn 3.140000E+00 mine.fpn 3.140000E+00
    Compilation finished successfully.
    Not a prefect match but I'm sure some more tweaking will get it right.
    Last edited by awsdert; 10-29-2019 at 11:13 AM. Reason: Forgot I switched back to softcode to see if that was the problem, switched back to 3.14f

  8. #8
    Registered User
    Join Date
    Feb 2019
    Posts
    550
    Quote Originally Posted by awsdert View Post
    No not trying to convert a simple integer, trying to construct an fpn from 2 seperate integers, one for the whole number part and another for the decimal part
    Fixed point to floating point then? If you already have the bits isn't its just a matter of shifting then to the correct position?

    Of course, you have to recalculate the fractional part... if you are dealing the integral and fractional parts as 32 bits values, 2^32 (fractional) is the same as 1.0, so:

    n = (2^32*f)/(10^(log10(f) + 1)).

    Taking 3.14 as your example... f=14 can be encoded as (2^32*14)/(10^2) = 601295421 (0b00100011110101110000101000111101 in 32 bits binary - of course this calculation must be done with enough precision to avoid overflows). So 3.14 can be encoded as 0b11.[00100011110101110000101000111101]. Shifting the binary point 1 position to the left we get 0b1.100100011110101110000101000111101 and e=1. Now we have our "inplicit" one and the fractional part that will satisfy the floating point float format if restricted to 23 bits: [0b1.100_1000_1111_0101_1100_0010]_1000111101.

    So, M=0b10010001111010111000010 (0x48f5c2 - 23 bits), E=128 (0x80) (E=e+127) and S=0. Almost exactly what is expected for a floating point (float) value... The only difference is about rounding. Notice the _10001111101 final part, if this msb is 1 we need to add 1 to M, getting exactly the correct value:

    To be sure:

    v = (1 + 0x48f5c3 / 2^23) * 2^1 = (1+4781507/2^23)*2 = 3.14000010490417480468
    Last edited by flp1969; 10-29-2019 at 02:49 PM.

  9. #9
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    651
    Oh I tried just similar when I started testing my code, didn't come out right, the main problem I'm having is detecting when 0.N becomes 1.N, it's not easy when the source data starts at same position so what I'm trying to do is get the bit that serves as +1 or +0.5 and do my comparisons around those points but I also need to be able to check for N0.N scenarios, in anothers words a naive implementation will have erroneous results like I do above

  10. #10
    Registered User
    Join Date
    Feb 2019
    Posts
    550
    Quote Originally Posted by awsdert View Post
    Oh I tried just similar when I started testing my code, didn't come out right, the main problem I'm having is detecting when 0.N becomes 1.N, it's not easy when the source data starts at same position so what I'm trying to do is get the bit that serves as +1 or +0.5 and do my comparisons around those points but I also need to be able to check for N0.N scenarios, in anothers words a naive implementation will have erroneous results like I do above
    Well... here it worked well (there is a problem thou):
    Code:
    #include <assert.h>
    #include <stdio.h>
    #include <stdint.h>
    
    union fp_u {
      float f;
      struct {
        unsigned int m:23;
        unsigned int e:8;
        unsigned int s:1;
      } s;
    };
    
    static int log10_( unsigned int );
    static uint64_t pow10_( unsigned int );
    
    // There is a problem with this routine:
    //  It is not possible to represent fractional values less then 0.1
    //  (or less than 0.01, or 0.001, or 0.0001...).
    float floatFromFixed( unsigned int i, unsigned int f )
    {
      uint64_t n;
      unsigned int tmp;
      int e;
      union fp_u fp = { .f=0.0 };
    
      // Special case: i and f == 0, then return 0.0.
      if ( i != 0 || f != 0 )
      {
        // Calculate the fractional 'fixed' part.
        tmp = ( ( uint64_t )f << 32 ) / pow10_(log10_( f ) + 1);
    
        n = (( uint64_t )i << 32) + tmp;
        e = 0;
        if ( i > 0 )
        {
          // shift right until only bit 32, as the upper bit, is 1.
          while ( n & 0xfffffffe00000000ULL )
          {
            n >>= 1;
            e++;
          }
        }
        else // we aren't dealing with negative values
             // for now, so this block is for i == 0.
        {
          // shift left until bit 32 is 1.
          while ( ! ( n & 0x100000000ULL ) )
          {
            n <<= 1;
            e--;
          }
        }
    
        // Isolate the upper 23 bits from fractional part.
        tmp = ( n & 0xffffffffU ) >> 9;
    
        // round to nearest.
        if ( n & 0x100 ) 
          tmp++;
    
        fp.s.m = tmp;
        fp.s.e = e + 127;
        // fp.s.s = 0;    // ignore the signal, for now.
      }
    
      return fp.f;
    }
    
    // Simplier way to calculate log10() from an integer?
    int log10_( unsigned int n )
    {
     return (n >= 1000000000) ? 9 : (n >= 100000000) ? 8 : (n >= 10000000) ? 7 : 
            (n >= 1000000) ? 6 : (n >= 100000) ? 5 : (n >= 10000) ? 4 : 
            (n >= 1000) ? 3 : (n >= 100) ? 2 : (n >= 10) ? 1 : 0;
    }
    
    // Calculate 10^e
    uint64_t pow10_( unsigned int e )
    {
      // e must be less than 10.
      assert( e < 10 );
    
      uint64_t m = 1;
    
      // we could do this with SSE or FP87, but we're trying to avoid
      // using floating point...
      while ( e-- )
        m *= 10;
    
      return m;
    }
    
    int main( void )
    {
      float f = floatFromFixed( 3,  14 );
      float g = floatFromFixed( 0, 105 );
    
      printf( "3.14  -> %f\n"
              "0.105 -> %f\n", f, g );
    }
    As you can see... it's not possible to pass values as 3.04... But the above code shows the fixed to float approach I was talinking about.

  11. #11
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    651
    Thanks for the input flp1969, unfortunatly that function looks to much like algebra to me and I never liked algebra cause it made me think too hard, anyways before I looked at your post I had redone my attempt and somehow I'm off by one when doing 1.0e+0:
    Code:
    gcc -Wall -o "test_fpn" "test_fpn.c" && "./test_fpn"
    test.sig 00000000 mine.sig 00000000
    test.exp 0000007F mine.exp 00000080
    test.man 00000000 mine.man 007FFFFF
    test.hex 00 00 80 3F mine.hex FF FF 7F 40
    Compilation finished successfully.
    Code:
    int main() {
    	ullong num = 1, fpn = 0, one = 0;
    	long pos = 1, exp = 0;
    	LDBL test = {0}, mine = {0};
    	test.fpn = 1.0e+0;
    	if ( num ) {
    		mine.man = num;
    		for ( ; pos < FPNT_MAN_BIT; ++pos ) {
    			num /= 10;
    			if ( !num ) break;
    			one *= 10;
    		}
    		mine.exp = pos - 1;
    	}
    	if ( mine.man || fpn ) {
    		mine.exp += FPNT_EXP_MAX;
    		for ( --pos; pos < FPNT_MAN_BIT; ++pos ) {
    			fpn *= 2;
    			mine.man <<= 1;
    			if ( fpn >= one ) {
    				mine.man |= 1u;
    				fpn -= one;
    				++fpn;
    			}
    		}
    	}
    	pos = 0;
    	for ( ; pos < exp; ++pos, mine.man *= 2 );
    	for ( ; pos > exp; --pos, mine.man *= 2 );
    	// if ( mine.exp ) mine.exp--;
    	printf( "test.sig %08X mine.sig %08X\n", test.sig, mine.sig );
    	printf( "test.exp %08X mine.exp %08X\n", test.exp, mine.exp );
    	printf( "test.man %08X mine.man %08X\n", test.man, mine.man );
    	print( "test.hex", test );
    	putchar(' ');
    	print( "mine.hex", mine );
    	putchar('\n');
    	//printf("test.fpn %E mine.fpn %E\n", test.fpn, mine.fpn);
    	return 0;
    }
    Any ideas what what is causing that?

  12. #12
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    651
    Also now that I've looked at it a bit more you're version relies on integers bigger than a float, as you may have guessed I will be attempting long doubles later so I'd like to use a version that does not rely on larger integers because that opens up the problem of what to do when there is no such integers, while I can program array type integers I still struggle with the division part which would be essential at the top of your function, I would also need to program a log10 etc variant for those array integers, all that still excludes the possibility of not enough memory anyway.

  13. #13
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    651
    Never mind, I forgot to set the variable one among other things, new(ish) problem however:
    Code:
    gcc -Wall -o "test_fpn" "test_fpn.c" && "./test_fpn"
    test.sig 00000000 mine.sig 00000000
    test.exp 0000007B mine.exp 0000007F
    test.man 004CCCCD mine.man 003FFFFF
    test.hex CD CC CC 3D mine.hex FF FF BF 3F
    Compilation finished successfully.
    Code:
    int main() {
    	ullong num = 0, fpn = 1, one = 1;
    	long pos = 1, exp = 0;
    	LDBL test = {0}, mine = {0};
    	test.fpn = 0.1e+0;
    	if ( num ) {
    		mine.man = num;
    		for ( ; pos < FPNT_MAN_BIT; ++pos ) {
    			num /= 10;
    			one *= 10;
    			if ( !num ) break;
    		}
    		mine.exp = --pos;
    	}
    	if ( mine.man || fpn ) {
    		mine.exp += FPNT_EXP_MAX;
    		for ( ; pos < FPNT_MAN_BIT; ++pos ) {
    			fpn *= 2;
    			mine.man <<= 1;
    			if ( fpn >= one ) {
    				mine.man |= 1u;
    				fpn -= one;
    			}
    		}
    	}
    	pos = 0;
    	for ( ; pos < exp; ++pos, mine.man *= 2 );
    	for ( ; pos > exp; --pos, mine.man *= 2 );
    	if ( mine.exp ) mine.exp--;
    	printf( "test.sig %08X mine.sig %08X\n", test.sig, mine.sig );
    	printf( "test.exp %08X mine.exp %08X\n", test.exp, mine.exp );
    	printf( "test.man %08X mine.man %08X\n", test.man, mine.man );
    	print( "test.hex", test );
    	putchar(' ');
    	print( "mine.hex", mine );
    	putchar('\n');
    	//printf("test.fpn %E mine.fpn %E\n", test.fpn, mine.fpn);
    	return 0;
    }
    I get that there's also a problem with the exponent on this test but putting that aside I'd like to focus on the mantissa since the exponent will be easier to fix when the mantissa handling is correct. Since I don't have any ideas right now while I'm waiting on a response I'll just move the code into it's own function.

  14. #14
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    651
    Finished moving to a function like you indicated with your post:
    Code:
    gcc -Wall -o "test_fpn" "test_fpn.c" && "./test_fpn"
    test.sig 0 mine.sig 0 test.exp 00000000 mine.exp 00000000 test.man 00000000 mine.man 00000000 test.hex 00 00 00 00 mine.hex 00 00 00 00 test.fpn 0.000000E+00 mine.fpn 0.000000E+00
    test.sig 0 mine.sig 0 test.exp 0000007F mine.exp 0000007F test.man 00000000 mine.man 00000000 test.hex 00 00 80 3F mine.hex 00 00 80 3F test.fpn 1.000000E+00 mine.fpn 1.000000E+00
    test.sig 0 mine.sig 0 test.exp 0000007B mine.exp 0000007F test.man 004CCCCD mine.man 003FFFFF test.hex CD CC CC 3D mine.hex FF FF BF 3F test.fpn 1.000000E-01 mine.fpn 1.500000E+00
    Compilation finished successfully.
    Code:
    LDBL makeFPN( ullong num, ullong fpn, long exp ) {
    	LDBL dst = {0};
    	ullong one = 1;
    	long pos = 1;
    	if ( num ) {
    		dst.man = num;
    		for ( ; pos < FPNT_MAN_BIT; ++pos ) {
    			num /= 10;
    			one *= 10;
    			if ( !num ) break;
    		}
    		dst.exp = --pos;
    	}
    	if ( dst.man || fpn ) {
    		dst.exp += FPNT_EXP_MAX;
    		for ( ; pos < FPNT_MAN_BIT; ++pos ) {
    			fpn *= 2;
    			dst.man <<= 1;
    			if ( fpn >= one ) {
    				dst.man |= 1u;
    				fpn -= one;
    			}
    		}
    	}
    	pos = 0;
    	for ( ; pos < exp; ++pos, dst.man *= 2 );
    	for ( ; pos > exp; --pos, dst.man *= 2 );
    	if ( dst.exp ) dst.exp--;
    	return dst;
    }
    typedef struct test_val {
    	ullong num;
    	ullong fpn;
    	long exp;
    } test_val_t;
    
    test_val_t values[] = {{0},{1,0,0},{0,1,0}};
    float floats[] = {0,1,0.1};
    int main() {
    	int i;
    	LDBL test = {0}, mine = makeFPN(0,1,0);
    	for ( i = 0; i < 3; ++i ) {
    		test.fpn = floats[i];
    		mine = makeFPN(values[i].num, values[i].fpn,values[i].exp);
    		printf( "test.sig %u mine.sig %u ", test.sig, mine.sig );
    		printf( "test.exp %08X mine.exp %08X ", test.exp, mine.exp );
    		printf( "test.man %08X mine.man %08X ", test.man, mine.man );
    		print( "test.hex", test );
    		putchar(' ');
    		print( "mine.hex", mine );
    		putchar(' ');
    		printf("test.fpn %E mine.fpn %E\n", test.fpn, mine.fpn);
    	}
    	return 0;
    }

  15. #15
    Registered User awsdert's Avatar
    Join Date
    Jan 2015
    Posts
    651
    I got a little closer, but now I gotta go to work, here's what I have in the meantime:
    Code:
    gcc -Wall -o "test_fpn" "test_fpn.c" && "./test_fpn"
    test.sig 0 mine.sig 0 test.exp 00 mine.exp 00 test.man 000000 mine.man 000000 test.hex 00 00 00 00 mine.hex 00 00 00 00 test.fpn 0.000000E+00 mine.fpn 0.000000E+00
    test.sig 0 mine.sig 0 test.exp 7F mine.exp 7F test.man 000000 mine.man 000000 test.hex 00 00 80 3F mine.hex 00 00 80 3F test.fpn 1.000000E+00 mine.fpn 1.000000E+00
    test.sig 0 mine.sig 0 test.exp 7B mine.exp 7F test.man 4CCCCD mine.man 0CCCCC test.hex CD CC CC 3D mine.hex CC CC 8C 3F test.fpn 1.000000E-01 mine.fpn 1.100000E+00
    Compilation finished successfully.
    Code:
    LDBL makeFPN( ullong num, ullong fpn, long exp ) {
    	LDBL dst = {0};
    	ullong one = 10;
    	long pos = 0;
    	if ( num ) {
    		dst.man = num;
    		for ( pos = 1; pos < FPNT_MAN_BIT; ++pos ) {
    			num /= 10;
    			one *= 10;
    			if ( !num ) break;
    		}
    		dst.exp = --pos;
    	}
    	if ( dst.man || fpn ) {
    		dst.exp += FPNT_EXP_MAX;
    		for ( ; pos < FPNT_MAN_BIT; ++pos ) {
    			fpn *= 2;
    			dst.man <<= 1;
    			if ( fpn >= one ) {
    				dst.man |= 1u;
    				fpn -= one;
    			}
    		}
    	}
    	pos = 0;
    	for ( ; pos < exp; ++pos, dst.man *= 2 );
    	for ( ; pos > exp; --pos, dst.man *= 2 );
    	if ( dst.exp ) dst.exp--;
    	return dst;
    }

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. C++ and Math
    By darren78 in forum C++ Programming
    Replies: 2
    Last Post: 07-08-2010, 09:19 AM
  2. hex math
    By kroiz in forum C Programming
    Replies: 25
    Last Post: 01-20-2009, 03:46 PM
  3. Basic Math Problem. Undefined Math Functions
    By gsoft in forum C Programming
    Replies: 1
    Last Post: 12-28-2004, 03:14 AM
  4. math.h
    By sweets in forum C++ Programming
    Replies: 2
    Last Post: 05-05-2003, 01:27 PM
  5. Math Help
    By CAP in forum C Programming
    Replies: 2
    Last Post: 08-19-2002, 12:03 AM

Tags for this Thread