I rarely find the use of bitfields to be an optimization when it comes to speed because what appears to be a simple assignment is often implmented by masking and shifting. For example,
Code:
;
; b.sbyte.u5 = a.sbyte.u2;
;
mov cl,byte ptr [ebp-1]
shr ecx,2
and ecx,1
and cl,1
shl ecx,5
mov dl,byte ptr [eax]
and dl,-33
or cl,dl
mov byte ptr [eax],cl
Bitfields may pack data more tightly, but this size benefit may be taken at the cost of speed. Some exceptions might be on architectures that actually have bit-addressable memory in which an assignment would be an assignment.
I had run a little comparison, which is less than perfect, but is as follows.
Code:
#include <stdio.h>
#include <limits.h>
#include <time.h>
unsigned char foo(unsigned char value)
{
unsigned char mask = 1 << (CHAR_BIT - 1), result = 0;
while ( value ) /* skip most significant bits that are zero */
{
if ( value & 1 ) /* replace mod (machine dependency) */
{
result |= mask;
}
mask >>= 1;
value >>= 1;
}
return result;
}
unsigned char bar(unsigned char num)
{
unsigned char byte = 0;
int i = 0;
do
{
if ( num % 2 )
{
byte = byte << 1;
byte |= 0x01;
}
else
byte = byte << 1;
num = num >> 1;
i++;
} while ( i & 7 );
return byte;
}
unsigned char baz(unsigned char value)
{
union byteu
{
unsigned char byte;
struct
{
unsigned char u0:1;
unsigned char u1:1;
unsigned char u2:1;
unsigned char u3:1;
unsigned char u4:1;
unsigned char u5:1;
unsigned char u6:1;
unsigned char u7:1;
} sbyte;
} a, b;
a.byte = value;
b.sbyte.u7 = a.sbyte.u0;
b.sbyte.u6 = a.sbyte.u1;
b.sbyte.u5 = a.sbyte.u2;
b.sbyte.u4 = a.sbyte.u3;
b.sbyte.u3 = a.sbyte.u4;
b.sbyte.u2 = a.sbyte.u5;
b.sbyte.u1 = a.sbyte.u6;
b.sbyte.u0 = a.sbyte.u7;
return b.byte;
}
#define CYCLES 100000000
int main(void)
{
const char *name[] = { "foo", "bar", "baz" };
unsigned char (*const function[])(unsigned char) = { foo, bar, baz };
unsigned char value = 5;
size_t i,j;
printf("foo(%X) = %X\n", value, foo(value));
printf("bar(%X) = %X\n", value, bar(value));
printf("baz(%X) = %X\n", value, baz(value));
fflush(stdout);
for ( i = 0; i < sizeof(function)/sizeof(*function); ++i )
{
clock_t end, start = clock();
for ( j = 0; j < CYCLES; ++j )
{
function [ i ] (j);
}
end = clock();
printf("%s = %f\n", name [ i ], (end - start) / CLOCKS_PER_SEC);
fflush(stdout);
}
return 0;
}
Here are the results I obtained.
Code:
foo(5) = A0
bar(5) = A0
baz(5) = A0
foo = 9.614000
bar = 12.738000
baz = 17.766000
If optimizing for speed was the goal, this bitfields implementation was 39% slower than the original.
[OT]
>Vola.
Perhaps you mean voilą.
[/OT]