Code:
#include <stdio.h>
#include <wchar.h>
#include <string.h>
#include <stdlib.h>
#include <locale.h>
#include <limits.h>
/* taken from libc manual */
size_t mbslen (const char *s)
{
mbstate_t state;
size_t result = 0;
size_t nbytes;
memset(&state, '\0', sizeof(state));
while ((nbytes = mbrlen(s, MB_LEN_MAX, &state)) > 0)
{
if (nbytes >= (size_t)-2)
return (size_t)-1;
s += nbytes;
++result;
}
return result;
}
int main(void)
{
char input[80] = {0};
wchar_t output[80] = {0};
#if 1
const char *l = setlocale(LC_CTYPE, "");
if (!l)
perror("setlocale");
else
printf("locale = %s\n", l);
#else
puts("Using C locale");
#endif
printf("Give me something: ");
scanf("%s", input);
int n1 = mbstowcs(output, input, 80);
if (n1 == -1)
perror("mbstowcs");
int n2 = wcslen(output);
int n3 = strlen(input);
int n4 = mbslen(input);
if (n4 == -1)
perror("mbslen");
printf("mbstowcs(output,input)=%d\n"
"wcslen(output)=%d\n"
"strlen(input)=%d\n"
"mbslen(input)=%d\n", n1, n2, n3, n4);
if (*input)
{
printf("input = ");
const char *p = input;
for (; *p; ++p)
printf("0x%02X,", (unsigned char)*p);
puts("\b ");
}
if (*output)
{
printf("output = ");
const wchar_t *p = output;
for (; *p; ++p)
printf("0x%X,", (unsigned)*p);
puts("\b ");
}
return 0;
}
Code:
locale = en_US.utf8
Give me something: œ∑Ω
mbstowcs(output,input)=3
wcslen(output)=3
strlen(input)=7
mbslen(input)=3
input = 0xC5,0x93,0xE2,0x88,0x91,0xCE,0xA9
output = 0x153,0x2211,0x3A9
Code:
Using C locale
Give me something: œ∑Ω
mbstowcs: Invalid or incomplete multibyte or wide character
mbslen: Invalid or incomplete multibyte or wide character
mbstowcs(output,input)=-1
wcslen(output)=0
strlen(input)=7
mbslen(input)=-1
input = 0xC5,0x93,0xE2,0x88,0x91,0xCE,0xA9
On my Fedora 14 box, not calling setlocale causes mbstowcs to fail - due to bytes that don't belong to the C locale.
On tabstop's box, mbstowcs seems to just copy each char to a wchar_t piecemeal under the C locale.
gg