Since I had already suspected my detection of UTF-8 bytes was faulty I went and checked my wiki bookmark and changed my snextchr function to look like this:
Code:
int snextchr( UTF *utf, char8_t *c, size_t leng ) {
int ret = 0;
size_t i, j, max;
if ( !utf || !c ) {
if ( c && leng )
(void)memset( c, 0, (leng+1) * sizeof(char8_t) );
ret = EDESTADDRREQ;
FAIL( stderr, ret, "utf and/or c was NULL!");
return ret;
}
for ( i = 0; i < leng; ++i ) {
if ( utf->pos >= utf->len ) break;
c[i] = utf->txt[utf->pos];
if ( c[i] & 0x80 ) {
if ( c[i] & 0x40 ) {
if ( c[i] & 0x20 )
max = ( c[i] & 0x10 ) ? 3 : 2;
else max = 1;
}
ret = EILSEQ;
FAIL( stderr, ret, "Corrupt UTF-8 character" );
c[i] = 0;
break;
}
else max = 0;
for ( j = 0; j < max; ++j ) {
if ( ++i == leng || utf->pos == utf->len ) break;
c[i] = utf->txt[utf->pos++];
}
}
c[leng] = 0;
return ret;
}
After fixing the function reading from the terminal to check UTF-8 the same way I tried running it only to encounter a few segfaults, so I fixed those and now I find that I'm somehow not getting any character from my function:
nextc():
Code:
bool nextc( NEXTC *_nextc ) {
int ret = nextc_validate( _nextc );
size_t i;
switch ( ret ) {
case 0: break;
case ENODATA: return 0;
default:
_nextc->err = ret;
return 0;
}
for ( i = 0; i < NEXTC_C_SIZE; ++i ) {
_nextc->p[i] = _nextc->c[i];
_nextc->c[i] = 0;
}
_nextc->err = _nextc->nextchr( _nextc->src, _nextc->c, NEXTC_C_SIZE );
return ( _nextc->err == 0 ) ? 1 : 0;
}
Code:
int rdEscChr( NEXTC *_nextc, char8_t *c, size_t leng ) {
int ret = 0;
uint_least64_t num = 0;
size_t size = (leng + 1) * sizeof(char8_t);
char const *esc, def[] = "\\";
if ( !_nextc || !c ) {
if ( c && size ) (void)memset( c, 0, size );
ret = EDESTADDRREQ;
FAIL( stderr, ret, "Invalid parameter/s" );
return ret;
}
memset( c, 0, size );
esc = getenv("ESCAPE_CHAR");
if ( !esc ) esc = def;
if ( _nextc->c[0] != esc[0] ) {
ret = EILSEQ;
FAIL( stderr, ret, "Invalid escape character" );
(void)printf( "Expected '%s'\n", esc );
return ret;
}
if ( utf2std == iconv_null &&
(ret = get_std_encoding()) != EXIT_SUCCESS )
return ret;
if ( !nextc(_nextc) )
return _nextc->err;
switch ( _nextc->c[0] ) {
case 'a': num = 0x07; break;
case 'b': num = 0x08; break;
case 'e': num = 0x1B; break;
case 'f': num = 0x0C; break;
case 'n': num = 0x0A; break;
case 'r': num = 0x0D; break;
case 't': num = 0x09; break;
case 'u':
if ( !nextc(_nextc) ) {
ret = _nextc->err;
break;
}
ret = rdU64_base62( _nextc, 4, 4, 16, 0, &num );
if ( ret != EXIT_SUCCESS ) return ret;
ret = type2utf( c, leng, (char*)&num, sizeof(char32_t), 'u' );
return ret;
case 'U':
if ( !nextc(_nextc) ) {
ret = _nextc->err;
break;
}
ret = rdU64_base62( _nextc, 8, 8, 16, 0, &num );
if ( ret != EXIT_SUCCESS ) return ret;
ret = type2utf( c, leng, (char*)&num, sizeof(char32_t), 'U' );
return ret;
case 'v': num = 0x0B; break;
case 'x':
if ( !nextc(_nextc) ) {
ret = _nextc->err;
break;
}
ret = rdU64_base62( _nextc, 1, 2, 16, 0, &num );
break;
default:
if ( _nextc->c[0] >= U'0' && _nextc->c[0] <= U'9' ) {
ret = rdU64_base62( _nextc, 1, 3, 8, 0, &num );
break;
}
num = _nextc->c[0];
}
if ( ret != 0 )
return ret;
c[0] = (char8_t)num;
ret = nextc(_nextc) ? EXIT_SUCCESS : _nextc->err;
return ret;
}
Can anyone spot any possible causes?
Edit:
my ouput right now:
Code:
make char.run (in directory: /media/lee/ZXUIJI_1TB/github/mc)
cc -fPIC -Wall -Wno-multichar -shared -o ./libnext.so -c next.c
cc -fPIC -Wall -Wno-multichar -D OUT=char.elf -o ./char.elf char.c ./libnext.so
./char.elf
char.c:497:fnextchr_utf(): Error: 0x00000022, 34, Numerical result out of range, Info: Imcomplete UTF-8 character
⺢, 'abcd': 0x00002EA2, 0x61626364
'abcd' = 'd', d:c:b:a
\u2ea2 = ''
std_encoding = 'UTF-8'
Please enter a character:
rm char.elf libnext.so
Compilation finished successfully.