1. There's also this page: Correct Decimal To Floating-Point Using Big Integers - Exploring Binary

(That entire website has some interesting reading actually)

2. Originally Posted by Hodor
There's also this page: Correct Decimal To Floating-Point Using Big Integers - Exploring Binary

(That entire website has some interesting reading actually)
Thanks, got me started on the correct rounding mode but I'm struggling to understand which data to change and when, here's what I got, if you understand what it means could you point it out for me please.
Code:
```typedef struct mcc_fpn {
long man_bits;
long exp_bits;
long max_exp;
long min_exp;
long max_exp_digits;
long min_exp_digits;
long rounds;
long epsilon;
ulong exp_bias;
long pos;
long exp;
mcc_uhuge_t neg;
mcc_uhuge_t num;
mcc_uhuge_t one;
mcc_uhuge_t base;
mcc_uhuge_t raw;
} mcc_fpn_t;
...
mcc_fpn_t mcc_fpn_make( mcc_fpn_t mcc ) {
mcc_fpn_t tmp = mcc;
mcc_uhuge_t fpn, cpy, inf;
long dir = 0;
if ( !(mcc.num) || mcc.one < 1 || tmp.pos > 0 ) goto mcc_fpn_sig;
inf = 0;
inf = ~inf;
inf <<= tmp.exp_bits;
inf = ~inf;
tmp.pos += tmp.exp;
if ( !(tmp.num % tmp.one) ) {
tmp.num /= tmp.one;
tmp.one = 1;
tmp.pos = 0;
}
if ( tmp.exp != 0 ) tmp.exp += tmp.pos;
if ( tmp.exp > tmp.max_exp || tmp.exp < tmp.min_exp ) {
mcc_fpn_inf:
tmp.raw = inf;
goto mcc_fpn_exp;
}
for ( ; tmp.exp > 0; tmp.exp-- ) tmp.num *= mcc.base;
for ( ; tmp.exp < 0; tmp.exp++ ) tmp.one *= mcc.base;
fpn = tmp.num % tmp.one;
cpy = tmp.one / 2;
tmp.num /= tmp.one;
tmp.base = fpn;
if ( fpn ) {
if ( fpn < cpy ) dir = -1;
else if ( fpn > cpy ) dir = 1;
else {
switch ( mcc.rounds ) {
case 1:
if ( tmp.num & 1u ) dir = 1;
else dir = -1;
case 2:
dir = (tmp.num & 1) ? -1 : 1;
}
}
}
tmp.pos = 0;
if ( tmp.num ) for ( cpy = tmp.num; cpy > 1; tmp.pos++, cpy >>= 1 );
else for ( cpy = fpn; cpy < tmp.one; tmp.pos--, cpy <<= 1 );
mcc.pos = tmp.pos;
tmp.raw = tmp.exp_bias + tmp.pos - 1;
if ( tmp.raw > inf ) goto mcc_fpn_inf;
mcc.raw = tmp.num;
if ( tmp.pos > tmp.man_bits ) {
tmp.pos -= tmp.man_bits;
mcc.raw >>= tmp.pos - 1;
fpn = mcc.raw & 1u;
mcc.raw >>= 1;
}
else {
for ( ; tmp.pos < tmp.man_bits; tmp.pos++ ) {
fpn *= 2;
mcc.raw <<= 1;
if ( fpn >= tmp.one ) {
mcc.raw |= 1;
fpn -= tmp.one;
}
}
}
switch ( dir ) {
case 1: mcc.raw++; break;
case -1: mcc.raw--; break;
}
cpy = (bitsof(mcc.raw) - mcc.man_bits);
mcc.raw <<= cpy;
mcc.raw >>= cpy;
mcc_fpn_exp:
tmp.raw <<= mcc.man_bits;
mcc.raw |= tmp.raw;
mcc_fpn_sig:
tmp.neg <<= mcc.man_bits;
tmp.neg <<= mcc.exp_bits;
mcc.raw |= tmp.neg;
mcc.num = tmp.num;
mcc.one = tmp.one;
return mcc;
}```
Edit: Noticed a whoopsie when checking what rounding method to use, corrected while cleaning up the switch statement:
Code:
```			switch ( mcc.rounds ) {
case 1: dir = (tmp.num & 1u) ? 1 : -1; break;
case 2: dir = (tmp.num & 1u) ? -1 : 1; break;
}```

3. Made a couple of minor modifications to kill possible infinite loops and clear fpn if equal to half tmp.one but still no luck in getting the rounding right:
Code:
```gcc -Wall -o "mcc_fpn" "mcc_fpn.c" && "./mcc_fpn"
mcc.rounds = 1
mcc.epsilon = 0
given 1e-1
expect 0.100000
result 0.100000
gcc mantissa = 110011001100110011001101
mcc mantissa = 110011001100110011001011
gcc exponent = 01111011
mcc exponent = 01111011
gcc negative = 00
mcc negative = 00
tmp.base = 10
tmp.neg = 0
tmp.num = 0
gcc.one = 1
tmp.one = 10
tmp.pos = -4
tmp.exp = -1
Compilation finished successfully.```
Code:
```	if ( fpn ) {
if ( fpn < cpy ) dir = -1;
else if ( fpn > cpy ) dir = 1;
else {
fpn  = 0;
switch ( mcc.rounds ) {
case 1: tmp.num += (tmp.num & 1u) ? 1 : -1; break;
case 2: tmp.num += (tmp.num & 1u) ? -1 : 1; break;
}
}
}```