There's also this page: Correct Decimal To Floating-Point Using Big Integers - Exploring Binary
(That entire website has some interesting reading actually)
Printable View
There's also this page: Correct Decimal To Floating-Point Using Big Integers - Exploring Binary
(That entire website has some interesting reading actually)
Thanks, got me started on the correct rounding mode but I'm struggling to understand which data to change and when, here's what I got, if you understand what it means could you point it out for me please.
Edit: Noticed a whoopsie when checking what rounding method to use, corrected while cleaning up the switch statement:Code:typedef struct mcc_fpn {
long man_bits;
long exp_bits;
long max_exp;
long min_exp;
long max_exp_digits;
long min_exp_digits;
long rounds;
long epsilon;
ulong exp_bias;
long pos;
long exp;
mcc_uhuge_t neg;
mcc_uhuge_t num;
mcc_uhuge_t one;
mcc_uhuge_t base;
mcc_uhuge_t raw;
} mcc_fpn_t;
...
mcc_fpn_t mcc_fpn_make( mcc_fpn_t mcc ) {
mcc_fpn_t tmp = mcc;
mcc_uhuge_t fpn, cpy, inf;
long dir = 0;
if ( !(mcc.num) || mcc.one < 1 || tmp.pos > 0 ) goto mcc_fpn_sig;
inf = 0;
inf = ~inf;
inf <<= tmp.exp_bits;
inf = ~inf;
tmp.pos += tmp.exp;
if ( !(tmp.num % tmp.one) ) {
tmp.num /= tmp.one;
tmp.one = 1;
tmp.pos = 0;
}
if ( tmp.exp != 0 ) tmp.exp += tmp.pos;
if ( tmp.exp > tmp.max_exp || tmp.exp < tmp.min_exp ) {
mcc_fpn_inf:
tmp.raw = inf;
goto mcc_fpn_exp;
}
for ( ; tmp.exp > 0; tmp.exp-- ) tmp.num *= mcc.base;
for ( ; tmp.exp < 0; tmp.exp++ ) tmp.one *= mcc.base;
fpn = tmp.num % tmp.one;
cpy = tmp.one / 2;
tmp.num /= tmp.one;
tmp.base = fpn;
if ( fpn ) {
if ( fpn < cpy ) dir = -1;
else if ( fpn > cpy ) dir = 1;
else {
switch ( mcc.rounds ) {
case 1:
if ( tmp.num & 1u ) dir = 1;
else dir = -1;
case 2:
dir = (tmp.num & 1) ? -1 : 1;
}
}
}
tmp.pos = 0;
if ( tmp.num ) for ( cpy = tmp.num; cpy > 1; tmp.pos++, cpy >>= 1 );
else for ( cpy = fpn; cpy < tmp.one; tmp.pos--, cpy <<= 1 );
mcc.pos = tmp.pos;
tmp.raw = tmp.exp_bias + tmp.pos - 1;
if ( tmp.raw > inf ) goto mcc_fpn_inf;
mcc.raw = tmp.num;
if ( tmp.pos > tmp.man_bits ) {
tmp.pos -= tmp.man_bits;
mcc.raw >>= tmp.pos - 1;
fpn = mcc.raw & 1u;
mcc.raw >>= 1;
}
else {
for ( ; tmp.pos < tmp.man_bits; tmp.pos++ ) {
fpn *= 2;
mcc.raw <<= 1;
if ( fpn >= tmp.one ) {
mcc.raw |= 1;
fpn -= tmp.one;
}
}
}
switch ( dir ) {
case 1: mcc.raw++; break;
case -1: mcc.raw--; break;
}
cpy = (bitsof(mcc.raw) - mcc.man_bits);
mcc.raw <<= cpy;
mcc.raw >>= cpy;
mcc_fpn_exp:
tmp.raw <<= mcc.man_bits;
mcc.raw |= tmp.raw;
mcc_fpn_sig:
tmp.neg <<= mcc.man_bits;
tmp.neg <<= mcc.exp_bits;
mcc.raw |= tmp.neg;
mcc.num = tmp.num;
mcc.one = tmp.one;
return mcc;
}
Code:switch ( mcc.rounds ) {
case 1: dir = (tmp.num & 1u) ? 1 : -1; break;
case 2: dir = (tmp.num & 1u) ? -1 : 1; break;
}
Made a couple of minor modifications to kill possible infinite loops and clear fpn if equal to half tmp.one but still no luck in getting the rounding right:
Code:gcc -Wall -o "mcc_fpn" "mcc_fpn.c" && "./mcc_fpn"
mcc.rounds = 1
mcc.epsilon = 0
given 1e-1
expect 0.100000
result 0.100000
gcc mantissa = 110011001100110011001101
mcc mantissa = 110011001100110011001011
gcc exponent = 01111011
mcc exponent = 01111011
gcc negative = 00
mcc negative = 00
tmp.base = 10
tmp.neg = 0
tmp.num = 0
gcc.one = 1
tmp.one = 10
tmp.pos = -4
tmp.exp = -1
Compilation finished successfully.
Code:if ( fpn ) {
if ( fpn < cpy ) dir = -1;
else if ( fpn > cpy ) dir = 1;
else {
fpn = 0;
switch ( mcc.rounds ) {
case 1: tmp.num += (tmp.num & 1u) ? 1 : -1; break;
case 2: tmp.num += (tmp.num & 1u) ? -1 : 1; break;
}
}
}
Seems the exponent is real stubborn about dying as as an issue, is frequently 1 off
I managed to upload my recent code to onlinegdb this morning
GDB online Debugger | Code, Compile, Run, Debug online C, C++
But haven't the time to repond or spend on this since I gotta go to work
Well I think I finally fixed the exponent issue since issues I'd been have with the really small ones have disappeared in tests, currently trying to diagnose why my mantissa is quite shifted enough on a later number in the test loop:
Btw the fix was in how I used the number of digits after the decimal pointCode:make (in directory: /run/media/zxuiji/DATA/github/mc)
gcc -Wall -o ./big_fpn.elf mcc_fpn.c bimath.c
./big_fpn.elf
Numbers limited to: 6.6e+/-6
mcc.rounds = 1
mcc.epsilon = 0
given 2.5e-1
expect 2.500000e-01, 0.250000
result 1.250000e-01, 0.125000
gcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110100000000000000000000000
mcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110000000000000000000000000
gcc mantissa = 00000000000000000000000
mcc mantissa = 00000000000000000000000
gcc exponent = 01111101
mcc exponent = 01111100
gcc negative = 0
mcc negative = 0
tmp.base = 10
tmp.neg = 0
tmp.num = 0
tmp.fpn = 25
tmp.one = 100
tmp.tmp = 0
tmp.pos = -3
tmp.exp = -1
gcc.pos = -1
gcc.exp = -1
fpn_stats_put() 588
was eql 587, not eql 1
was nil 0, not nil 0
was inc 0, not inc 0
was shl 1, not shl 0
was mul 0, not mul 1
was inf 0, not inf 0
99.829930% Correct
rm ./big_fpn.elf
Compilation finished successfully.
Code:if ( tmp.exp > 0 ) {
tmp.exp += tmp.pos;
while ( tmp.pos < 0 ) {
if ( tmp.one == 1 ) break;
tmp.one /= tmp.base;
tmp.pos++;
}
if ( tmp.pos ) goto mcc_fpn_sig;
}
I seem to have found where to do the rounding:
Code:if ( tmp.pos > limits.man_bits ) {
tmp.pos -= limits.man_bits;
tmp.fpn = 0;
tmp.fpn = ~(tmp.fpn);
tmp.fpn <<= tmp.pos;
tmp.fpn = ~(tmp.fpn);
tmp.fpn &= tmp.num;
tmp.one <<= tmp.pos;
mcc.raw = tmp.num >> tmp.pos;
tmp.tmp = tmp.one / 2;
if ( tmp.fpn > tmp.tmp ) mcc.raw++;
else if ( tmp.fpn == tmp.tmp ) {
switch ( mcc.rounds ) {
case 1: if ( mcc.raw & 1u ) mcc.raw++; break;
}
}
}
Edit: Just uploaded: GDB online Debugger | Code, Compile, Run, Debug online C, C++Code:make
gcc -Wall -o ./big_fpn.elf mcc_fpn.c bimath.c
./big_fpn.elf
Numbers limited to: 36.36e+/-36
mcc.rounds = 1
mcc.epsilon = 0
fpn_stats_put() count 98568
was eql 97019, not eql 1549
was nil 962, not nil 0
was inc 0, not inc 0
was shl 0, not shl 0
was mul 587, not mul 0
was inf 0, not inf 0
98.428494% Correct
rm ./big_fpn.elf
Compilation finished successfully.
Edit: just uploaded the code: GDB online Debugger | Code, Compile, Run, Debug online C, C++
Managed to resolve the nil thing and most of the exponent off by 1s, now just need to resolve a few more edge cases, putting a copy of output here for self reference mainly so I can un-comment out the code that hides these cases since any change could result in thousands of these:
Code:make
gcc -Wall -o ./big_fpn.elf mcc_fpn.c bimath.c
./big_fpn.elf
Numbers limited to: 36.36e+/-36
mcc.rounds = 1
mcc.epsilon = 0
given 0.25e0
expect 2.500000e-01, 0.250000
result 1.250000e-01, 0.125000
gcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110100000000000000000000000
mcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110000000000000000000000000
gcc mantissa = 00000000000000000000000
mcc mantissa = 00000000000000000000000
gcc exponent = 01111101
mcc exponent = 01111100
gcc negative = 0
mcc negative = 0
tmp.base = 10
tmp.neg = 0
tmp.num = 0
tmp.fpn = 25
tmp.one = 100
tmp.tmp = 0
tmp.pos = -3
tmp.exp = 0
gcc.pos = -2
gcc.exp = 0
given 0.25e0
expect 2.500000e-01, 0.250000
result 1.250000e-01, 0.125000
gcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110100000000000000000000000
mcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110000000000000000000000000
gcc mantissa = 00000000000000000000000
mcc mantissa = 00000000000000000000000
gcc exponent = 01111101
mcc exponent = 01111100
gcc negative = 0
mcc negative = 0
tmp.base = 10
tmp.neg = 0
tmp.num = 0
tmp.fpn = 25
tmp.one = 100
tmp.tmp = 0
tmp.pos = -3
tmp.exp = 0
gcc.pos = -2
gcc.exp = 0
given 1.25e-1
expect 1.250000e-01, 0.125000
result 6.250000e-02, 0.062500
gcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110000000000000000000000000
mcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111101100000000000000000000000
gcc mantissa = 00000000000000000000000
mcc mantissa = 00000000000000000000000
gcc exponent = 01111100
mcc exponent = 01111011
gcc negative = 0
mcc negative = 0
tmp.base = 10
tmp.neg = 0
tmp.num = 0
tmp.fpn = 125
tmp.one = 1000
tmp.tmp = 0
tmp.pos = -4
tmp.exp = -1
gcc.pos = -2
gcc.exp = -1
given 2.5e-1
expect 2.500000e-01, 0.250000
result 1.250000e-01, 0.125000
gcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110100000000000000000000000
mcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110000000000000000000000000
gcc mantissa = 00000000000000000000000
mcc mantissa = 00000000000000000000000
gcc exponent = 01111101
mcc exponent = 01111100
gcc negative = 0
mcc negative = 0
tmp.base = 10
tmp.neg = 0
tmp.num = 0
tmp.fpn = 25
tmp.one = 100
tmp.tmp = 0
tmp.pos = -3
tmp.exp = -1
gcc.pos = -1
gcc.exp = -1
given 6.25e-2
expect 6.250000e-02, 0.062500
result 3.125000e-02, 0.031250
gcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111101100000000000000000000000
mcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111101000000000000000000000000
gcc mantissa = 00000000000000000000000
mcc mantissa = 00000000000000000000000
gcc exponent = 01111011
mcc exponent = 01111010
gcc negative = 0
mcc negative = 0
tmp.base = 10
tmp.neg = 0
tmp.num = 0
tmp.fpn = 625
tmp.one = 10000
tmp.tmp = 0
tmp.pos = -5
tmp.exp = -2
gcc.pos = -2
gcc.exp = -2
given 12.5e-2
expect 1.250000e-01, 0.125000
result 6.250000e-02, 0.062500
gcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110000000000000000000000000
mcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111101100000000000000000000000
gcc mantissa = 00000000000000000000000
mcc mantissa = 00000000000000000000000
gcc exponent = 01111100
mcc exponent = 01111011
gcc negative = 0
mcc negative = 0
tmp.base = 10
tmp.neg = 0
tmp.num = 0
tmp.fpn = 125
tmp.one = 1000
tmp.tmp = 0
tmp.pos = -4
tmp.exp = -2
gcc.pos = -1
gcc.exp = -2
given 25.0e-2
expect 2.500000e-01, 0.250000
result 1.250000e-01, 0.125000
gcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110100000000000000000000000
mcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111110000000000000000000000000
gcc mantissa = 00000000000000000000000
mcc mantissa = 00000000000000000000000
gcc exponent = 01111101
mcc exponent = 01111100
gcc negative = 0
mcc negative = 0
tmp.base = 10
tmp.neg = 0
tmp.num = 0
tmp.fpn = 25
tmp.one = 100
tmp.tmp = 0
tmp.pos = -3
tmp.exp = -2
gcc.pos = -1
gcc.exp = -2
given 31.25e-3
expect 3.125000e-02, 0.031250
result 1.562500e-02, 0.015625
gcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111101000000000000000000000000
mcc = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111100100000000000000000000000
gcc mantissa = 00000000000000000000000
mcc mantissa = 00000000000000000000000
gcc exponent = 01111010
mcc exponent = 01111001
gcc negative = 0
mcc negative = 0
tmp.base = 10
tmp.neg = 0
tmp.num = 0
tmp.fpn = 3125
tmp.one = 100000
tmp.tmp = 0
tmp.pos = -6
tmp.exp = -3
gcc.pos = -2
gcc.exp = -3
fpn_stats_put() count 98568
was eql 98560, not eql 8
was nil 0, not nil 0
was inc 0, not inc 0
was shl 8, not shl 0
was mul 0, not mul 8
was inf 0, not inf 0
99.991882% Correct
rm ./big_fpn.elf
Compilation finished successfully.
Just finished doing like for like code on the bigint math version of mcc_fpn_make() - mcc_big_make() - and it seems the problems I was having with fpn_make() on those final few were actually just down to lack of space:
The second set of results were generated using the same strings and copying the result into the fpn raw so I could check using the same function if they where correct:Code:make
gcc -Wall -o ./vflt.elf mcc_vflt.c mcc_vint.c
./vflt.elf
Numbers limited to: 36.36e+/-36
mcc.rounds = 1
mcc.epsilon = 0
fpn_stats_put() count 98568
was eql 98560, not eql 8
was nil 0, not nil 0
was inc 0, not inc 0
was shl 0, not shl 0
was mul 0, not mul 8
was inf 0, not inf 0
99.991882% Correct
fpn_stats_put() count 98568 Big
was eql 98568, not eql 0
was nil 0, not nil 0
was inc 0, not inc 0
was shl 0, not shl 0
was mul 0, not mul 0
was inf 0, not inf 0
100.000000% Correct
rm ./vflt.elf
Compilation finished successfully.
GDB online Debugger | Code, Compile, Run, Debug online C, C++Code:for ( i = 0; i <= e; ++i ) {
for ( j = 0; j <= c; ++j ) {
for ( k = 0; k < f; ++k ) {
memset( txt, 0, bitsof(double) );
snprintf( txt, bitsof(double), "%d.%de%d", j, k, i );
tmp = mcc_fpn_read( txt, &gcc, mcc, mcc );
details( &stats, txt, gcc, tmp );
mcc_big_read( txt, &big_gcc, big_mcc, big_tmp );
gcc.raw = *((mcc_uhuge_t*)(big_gcc.raw.zero.seg));
tmp.raw = *((mcc_uhuge_t*)(big_mcc.raw.zero.seg));
details( &big_stats, txt, gcc, tmp );
memset( txt, 0, bitsof(double) );
snprintf( txt, bitsof(double), "%d.%de%d", j, k, -i );
tmp = mcc_fpn_read( txt, &gcc, mcc, mcc );
details( &stats, txt, gcc, tmp );
mcc_big_read( txt, &big_gcc, big_mcc, big_tmp );
gcc.raw = *((mcc_uhuge_t*)(big_gcc.raw.zero.seg));
tmp.raw = *((mcc_uhuge_t*)(big_mcc.raw.zero.seg));
details( &big_stats, txt, gcc, tmp );
}
}
}
If you're gonna run the code be aware that it takes a while to finish, I'm about to start testing the larger numbers
Edit: Was fine upto 100.100e+/-100, now running with the limit set to 1000.1000e+/-1000
Might've been in an infinite loop, had been running for a while so I force quit and looked for possible causes, added back in a check for one being multiplied into nil and am now re-running, also due to how long it was taking I decided to split the checks across my 4 cores with pthread:
Apparently adding back in that check caused the previously incorrect results to correct themelves, seems that <= doesn't catch going into 0, go figureCode:make
gcc -Wall -lpthread -o ./vflt.elf mcc_vflt.c mcc_vint.c
./vflt.elf
Numbers limited to: 100.100e+/-100
FLT_ROUNDS = 1
FLT_EPSILON = 0.000000
fpn_stats_put() count 2100800 Fast set
was eql 2100800, not eql 0
was nil 0, not nil 0
was inc 0, not inc 0
was shl 0, not shl 0
was mul 0, not mul 0
was inf 0, not inf 0
100.000000% Correct
fpn_stats_put() count 2100800 Slow set
was eql 2100800, not eql 0
was nil 0, not nil 0
was inc 0, not inc 0
was shl 0, not shl 0
was mul 0, not mul 0
was inf 0, not inf 0
100.000000% Correct
rm ./vflt.elf
Compilation finished successfully.
Okay, seems that previous 100% on the fast version was a fluke, probably skipped or something, anyways the limit of a 1000 was taking too long and given how old my CPU is I didn't wanna stress it that long, the slow version is accurate enough for meantime and I can begin putting that in a library soon, my new AMD Radeon GFX card just arrived so I'm gonna try installing that and see if I can get vulkan running, remembering the experience I had with NVIDIA I might have to re-install manjaro so expect a possible silence for a while.