Another thread meant to be found by others who are encountering a problem I've just fixed in mine, turns out using literals like 0x80 will not detect the UTF state but literals like 0x80u will, here's some example code to play with (just replace/rip out what won't compile since you don't have that... or make it yourself your choice):
Code:
#include "next.h"
#include <stdlib.h>
#include <stdio.h>
int fnextchr( FILE *file, char8_t *c, size_t leng ) {
size_t i, max;
long int p;
if ( feof(file) ) return ENODATA;
p = ftell(file);
c[0] = fgetc(file);
if ( c[0] < 0x80u || c[0] == (char8_t)-1 ) return 0;
if ( c[0] & 0xC0u ) {
if ( c[0] & 0xE0u )
max = (c[0] & 0xF0u) ? 3 : 2;
else max = 1;
}
else return 0;
if ( max > leng ) {
fseek( file, p, SEEK_SET );
return ERANGE;
}
for ( i = 1; i < max; ++i ) {
if ( feof(file) ) return EILSEQ;
c[i] = fgetc(file);
}
return 0;
}
int main() {
long int p = 0;
NEXTC _nextc = {0};
_nextc.src = stdin;
_nextc.nextchr = (func_nextchr)fnextchr;
puts("Enter mizu character and others:\n");
while ( nextc(&_nextc) ) {
if ( _nextc.c[0] == U'\r' || _nextc.c[0] == U'\n' ) break;
printf("Character at stdin position %ld: '%s'\n", p, _nextc.c );
p += strlen( (char*)_nextc.c );
}
return 0;
}
I used '⺢abcd' as my test set to identify where I was going wrong, I can now go back to my original projects and check to see if they're working correctly now (fixed 'em straight after identifying the problem)