Since I use the SDL myself, and I usually end up enabling UNICODE with SDL_EnableUNICODE() myself, I've always wanted to see exactly how much of a performance hit is ensued by doing this. Your post gave me the excuse.
I happen to have the source for a recent version of the SDL (more recent than the SDL binary included with my testing [lenny] version of Debian!). So, first I grepped through it for SDL_EnableUNICODE, and found its implementation:
Code:
int SDL_EnableUNICODE(int enable)
{
int old_mode;
old_mode = SDL_TranslateUNICODE;
if ( enable >= 0 ) {
SDL_TranslateUNICODE = enable;
}
return(old_mode);
}
Obviously I had to look into SDL_TranslateUNICODE. Here's what a grep showed, for each driver:
I was going to do every driver, but I didn't realize that it would be so complicated, so I just did four drivers. There are lots of other drivers that the SDL supports, however. To be exact:
Code:
src/video
src/video/dc
src/video/aalib
src/video/xbios
src/video/x11
src/video/dga
src/video/directfb
src/video/quartz
src/video/fbcon
src/video/ataricommon
src/video/cybergfx
src/video/riscos
src/video/ggi
src/video/nanox
src/video/windib
src/video/qtopia
src/video/gem
src/video/svga
src/video/bwindow
src/video/dummy
src/video/picogui
src/video/ipod
src/video/Xext
src/video/wincommon
src/video/photon
src/video/wscons
src/video/windx5
src/video/os2fslib
src/video/epoc
src/video/gapi
src/video/maccommon
src/video/ps2gs
src/video/vgl
src/video/macdsp
src/video/macrom
So, I didn't really learn anything, but I think that the overhead invoked by using UNICODE translation is minimal. I mean, if there was any better way to do it then it would probably already have been implemented in the SDL.
You could always time it if you're really concerned . . . if you do, let us know because I'd be interested in the results.
----
[1] Code for x11 driver and (src/video/x11/SDL_x11events.c lines 446-477):
Code:
/* Look up the translated value for the key event */
#ifdef X_HAVE_UTF8_STRING
if ( SDL_IC != NULL ) {
static Status state;
/* A UTF-8 character can be at most 6 bytes */
char keybuf[6];
if ( Xutf8LookupString(SDL_IC, &xevent.xkey,
keybuf, sizeof(keybuf),
NULL, &state) ) {
keysym.unicode = Utf8ToUcs4((Uint8*)keybuf);
}
}
else
#endif
{
static XComposeStatus state;
char keybuf[32];
if ( XLookupString(&xevent.xkey,
keybuf, sizeof(keybuf),
NULL, &state) ) {
/*
* FIXME: XLookupString() may yield more than one
* character, so we need a mechanism to allow for
* this (perhaps null keypress events with a
* unicode value)
*/
keysym.unicode = (Uint8)keybuf[0];
}
}
posted = SDL_PrivateKeyboard(SDL_PRESSED, &keysym);
}
and (src/video/x11/SDL_x11events.c lines 61-179)
Code:
#ifdef X_HAVE_UTF8_STRING
Uint32 Utf8ToUcs4(const Uint8 *utf8)
{
Uint32 c;
int i = 1;
int noOctets = 0;
int firstOctetMask = 0;
unsigned char firstOctet = utf8[0];
if (firstOctet < 0x80) {
/*
Characters in the range:
00000000 to 01111111 (ASCII Range)
are stored in one octet:
0xxxxxxx (The same as its ASCII representation)
The least 6 significant bits of the first octet is the most 6 significant nonzero bits
of the UCS4 representation.
*/
noOctets = 1;
firstOctetMask = 0x7F; /* 0(1111111) - The most significant bit is ignored */
} else if ((firstOctet & 0xE0) /* get the most 3 significant bits by AND'ing with 11100000 */
== 0xC0 ) { /* see if those 3 bits are 110. If so, the char is in this range */
/*
Characters in the range:
00000000 10000000 to 00000111 11111111
are stored in two octets:
110xxxxx 10xxxxxx
The least 5 significant bits of the first octet is the most 5 significant nonzero bits
of the UCS4 representation.
*/
noOctets = 2;
firstOctetMask = 0x1F; /* 000(11111) - The most 3 significant bits are ignored */
} else if ((firstOctet & 0xF0) /* get the most 4 significant bits by AND'ing with 11110000 */
== 0xE0) { /* see if those 4 bits are 1110. If so, the char is in this range */
/*
Characters in the range:
00001000 00000000 to 11111111 11111111
are stored in three octets:
1110xxxx 10xxxxxx 10xxxxxx
The least 4 significant bits of the first octet is the most 4 significant nonzero bits
of the UCS4 representation.
*/
noOctets = 3;
firstOctetMask = 0x0F; /* 0000(1111) - The most 4 significant bits are ignored */
} else if ((firstOctet & 0xF8) /* get the most 5 significant bits by AND'ing with 11111000 */
== 0xF0) { /* see if those 5 bits are 11110. If so, the char is in this range */
/*
Characters in the range:
00000001 00000000 00000000 to 00011111 11111111 11111111
are stored in four octets:
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
The least 3 significant bits of the first octet is the most 3 significant nonzero bits
of the UCS4 representation.
*/
noOctets = 4;
firstOctetMask = 0x07; /* 11110(111) - The most 5 significant bits are ignored */
} else if ((firstOctet & 0xFC) /* get the most 6 significant bits by AND'ing with 11111100 */
== 0xF8) { /* see if those 6 bits are 111110. If so, the char is in this range */
/*
Characters in the range:
00000000 00100000 00000000 00000000 to
00000011 11111111 11111111 11111111
are stored in five octets:
111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
The least 2 significant bits of the first octet is the most 2 significant nonzero bits
of the UCS4 representation.
*/
noOctets = 5;
firstOctetMask = 0x03; /* 111110(11) - The most 6 significant bits are ignored */
} else if ((firstOctet & 0xFE) /* get the most 7 significant bits by AND'ing with 11111110 */
== 0xFC) { /* see if those 7 bits are 1111110. If so, the char is in this range */
/*
Characters in the range:
00000100 00000000 00000000 00000000 to
01111111 11111111 11111111 11111111
are stored in six octets:
1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
The least significant bit of the first octet is the most significant nonzero bit
of the UCS4 representation.
*/
noOctets = 6;
firstOctetMask = 0x01; /* 1111110(1) - The most 7 significant bits are ignored */
} else
return 0; /* The given chunk is not a valid UTF-8 encoded Unicode character */
/*
The least noOctets significant bits of the first octet is the most 2 significant nonzero bits
of the UCS4 representation.
The first 6 bits of the UCS4 representation is the least 8-noOctets-1 significant bits of
firstOctet if the character is not ASCII. If so, it's the least 7 significant bits of firstOctet.
This done by AND'ing firstOctet with its mask to trim the bits used for identifying the
number of continuing octets (if any) and leave only the free bits (the x's)
Sample:
1-octet: 0xxxxxxx & 01111111 = 0xxxxxxx
2-octets: 110xxxxx & 00011111 = 000xxxxx
*/
c = firstOctet & firstOctetMask;
/* Now, start filling c.ucs4 with the bits from the continuing octets from utf8. */
for (i = 1; i < noOctets; i++) {
/* A valid continuing octet is of the form 10xxxxxx */
if ((utf8[i] & 0xC0) /* get the most 2 significant bits by AND'ing with 11000000 */
!= 0x80) /* see if those 2 bits are 10. If not, the is a malformed sequence. */
/*The given chunk is a partial sequence at the end of a string that could
begin a valid character */
return 0;
/* Make room for the next 6-bits */
c <<= 6;
/*
Take only the least 6 significance bits of the current octet (utf8[i]) and fill the created room
of c.ucs4 with them.
This done by AND'ing utf8[i] with 00111111 and the OR'ing the result with c.ucs4.
*/
c |= utf8[i] & 0x3F;
}
return c;
}
#endif
[2] For dga, lines 94-108 of src/video/dga/SDL_dgaevents.c:
Code:
/* Look up the translated value for the key event */
if ( SDL_TranslateUNICODE ) {
static XComposeStatus state;
char keybuf[32];
if ( XLookupString(&xkey, keybuf, sizeof(keybuf), NULL, &state) ) {
/*
* FIXME: XLookupString() may yield more than one
* character, so we need a mechanism to allow for
* this (perhaps null keypress events with a
* unicode value)
*/
keysym.unicode = (Uint8)keybuf[0];
}
}
[3] For windx5, lines 830-848 of src/video/windx5/SDL_dx5events.c:
Code:
if ( pressed && SDL_TranslateUNICODE ) {
UINT vkey;
#ifndef NO_GETKEYBOARDSTATE
BYTE keystate[256];
Uint16 wchars[2];
#endif
vkey = MapVirtualKey(scancode, 1);
#ifdef NO_GETKEYBOARDSTATE
/* Uh oh, better hope the vkey is close enough.. */
keysym->unicode = vkey;
#else
GetKeyboardState(keystate);
if (SDL_ToUnicode(vkey, scancode, keystate, wchars, sizeof(wchars)/sizeof(wchars[0]), 0) == 1)
{
keysym->unicode = wchars[0];
}
#endif /* NO_GETKEYBOARDSTATE */
}
Apparently SDL_ToUNICODE is mapped to
Code:
static int WINAPI ToUnicode9xME(UINT vkey, UINT scancode, BYTE *keystate, LPWSTR wchars, int wsize, UINT flags);
which is implemented on lines 878-886 of src/video/wincommon/SDL_sysevents.c:
Code:
static int WINAPI ToUnicode9xME(UINT vkey, UINT scancode, PBYTE keystate, LPWSTR wchars, int wsize, UINT flags)
{
BYTE chars[2];
if (ToAsciiEx(vkey, scancode, keystate, (WORD*)chars, 0, GetKeyboardLayout(0)) == 1) {
return MultiByteToWideChar(codepage, 0, chars, 1, wchars, wsize);
}
return 0;
}