I did some fiddling with GCC.
Code:
// for ( i = 0 ; i < 10 ; i++ ) a[i] = 0;
cmpl $9, -12(%ebp) // compare(9, i)
jle L18 // if <= goto L18
jmp L16 // goto L16
L18:
movl -12(%ebp), %eax // %eax = i
movl $0, -72(%ebp,%eax,4) // a[i] = 0
leal -12(%ebp), %eax // %eax = &i
incl (%eax) // i++
jmp L15 // goto L15
Code:
// for ( p = a ; p != end ; p++ ) *p = 0;
movl -76(%ebp), %eax // %eax = p
cmpl -80(%ebp), %eax // compare(end, p)
jne L22 // if != goto L22
jmp L14 // goto L14
L22:
movl -76(%ebp), %eax // %eax = p
movl $0, (%eax) // *p = 0
leal -76(%ebp), %eax // %eax = &p
addl $4, (%eax) // p++ (pointer arithmitic)
jmp L19 // goto L19
Optimisation (-O2) makes a big difference.
Code:
// for ( i = 0 ; i < 10 ; i++ ) a[i] = 0;
L28:
movl $0, -56(%ebp,%eax,4) // *(a + i) = 0
incl %eax // i++
cmpl $9, %eax // compare(9, i)
jle L28 // if <= goto L28
Code:
// for ( p = a ; p != end ; p++ ) *p = 0;
L33:
movl $0, (%eax) // *p=0
addl $4, %eax // p++
cmpl %edx, %eax // compare(end, p)
jne L33 // if != goto L33
There is definitely an additional add in there for the index subscripting, but whether there is a difference in clock cycles, someone else will have to tell.
And finally the original example:
Code:
void test3(void)
{
int i;
unsigned char dest[10], source[10];
unsigned char maskbyte = 5;
for ( i = 0; i < 10 ; i++ )
{
dest[i] = (source[i] & maskbyte)+(dest[i] & ~maskbyte);
}
}
Here is the optimised assembly:
Code:
movb -24(%ebp,%ecx), %dl // %dl = dest[i]
movb -40(%ebp,%ecx), %al // %al = source[i]
andl $5, %eax // %eax = %eax & maskbyte
// %eax = source[i] & maskbyte
// Note: %al is the low byte of %eax
andl $-6, %edx // %edx = %edx & ~maskbyte
// %edx = dest[i] & ~maskbyte
// Note: %dl is the low byte of %edx
addl %edx, %eax // %eax = %eax + %edx
// (source[i] & maskbyte)+(dest[i] & ~maskbyte)
movb %al, -24(%ebp,%ecx) // dest[i] = %al
// Note: %al is the low byte of %eax
incl %ecx // i++
cmpl $9, %ecx // compare(9, i)
jle L43 // if <= goto L43
I'm not sure if that settles any arguments, but it taught me a bit more about reading assembly.