Hello,
I tried to extract string from text file with french accent and
convert it to wide character string UTF8 but problem appears while converting.
Function mbstowcs() returns me -1 as error value when it read "e" character
with accent.
The text is raw (only \r and \n as new line layout) typed on ms-windows
french lang. Typeface used is courrier new.
I think to save my source text file under UTF8 format but I do not how to deal
with the 0xFF 0xFE header of utf8 file. Is there a function like getline() that works with
UTF8 wide character file ?
Thank you.
Code:
#include <stdio.h> //fopen()
#include <wchar.h> //mbsrtowcs()
#include <locale.h> //setlocale()
FILE *file_in;
FILE *file_out_wide;
char *ascii_in;
wchar_t wide_string_A[100];
int char_count=0;
int n;
char *kbufft;
int main(void)
{
if(!setlocale(LC_ALL, "en_US.utf8")) return(1);
file_in=fopen("./ascii_in.txt", "r");
file_out_wide=fopen("./wide_out.txt", "w");
//********** Get ascii string ********************
getline(&ascii_in, &char_count, file_in);
//******* Remove new line and carriage return*****
kbufft=&ascii_in[ strlen(ascii_in) -1];
while( (*kbufft=='\r') || (*kbufft=='\n') )
{
*kbufft = '\0';
--kbufft;
}
//********* Convert ascii string to wide string****
n= mbstowcs(&wide_string_A[0], &ascii_in[0], strlen(ascii_in)+1);
printf("\n%ls\n", wide_string_A);
printf("%d\n", n);
//********* Write wide string to disk**************
fputws(wide_string_A, file_out_wide);
fflush(file_in); fflush(file_out_wide);
fclose(file_in); fclose(file_out_wide);
free(ascii_in);
return(0);
}