I want to share something i discovered yesterday.
All i wanted was to have an object aligned in memory by 16 in order to use SSE2 instructions for efficiency. But i thought that GCC alignment extends to memory other than the struct itself. How wrong i was! After using the debugger and viewing the disassembly, i can surely say that GCC doesn't respect alignment rules on the stack. Shouldn't it?
This was the code i compiled with the latest GCC and then viewed with the debugger:
Code:
#pragma pack(push, 16)
struct vec3d_t
{
float X;
float Y;
float Z;
};
#pragma pack(pop)
int main()
{
int dummy1 = 0;
vec3d_t myVec = { 0.0f, 0.0f, 0.0f };
int dummy2 = 0;
int myRes = (int)(myVec.X+myVec.Y+myVec.Z+dummy1+dummy2); // Don't let the compiler optimize them away
return myRes;
}
This was the ASM output:
( This isn't actually the version of GCC i compiled it with, as my flash drive i used right now to output this holds a little older version, but it is almost the same )
Code:
004012F0 push %ebp
004012F1 mov %esp,%ebp
004012F3 push %edi
004012F4 sub $0x44,%esp
004012F7 and $0xfffffff0,%esp ; Stack aligned to 16
004012FA mov $0x0,%eax
004012FF add $0xf,%eax
00401302 add $0xf,%eax
00401305 shr $0x4,%eax
00401308 shl $0x4,%eax
0040130B mov %eax,-0x3c(%ebp)
0040130E mov -0x3c(%ebp),%eax
00401311 call 0x401880 <_alloca>
00401316 call 0x401400 <__main>
0040131B movl $0x0,-0xc(%ebp)
00401322 lea -0x28(%ebp),%edi ; Memory alignment UNKNOWN
00401325 cld
00401326 mov $0x0,%edx
0040132B mov $0x3,%eax
00401330 mov %eax,%ecx
00401332 mov %edx,%eax
00401334 rep stos %eax,%es:(%edi)
00401336 movl $0x0,-0x2c(%ebp)
0040133D flds -0x28(%ebp)
00401340 fadds -0x24(%ebp)
00401343 fadds -0x20(%ebp)
00401346 fildl -0xc(%ebp)
00401349 faddp %st,%st(1)
0040134B fildl -0x2c(%ebp)
0040134E faddp %st,%st(1)
00401350 fnstcw -0x32(%ebp)
00401353 movzwl -0x32(%ebp),%eax
00401357 or $0xc00,%ax
0040135B mov %ax,-0x34(%ebp)
0040135F fldcw -0x34(%ebp)
00401362 fistpl -0x30(%ebp)
00401365 fldcw -0x32(%ebp)
00401368 mov -0x30(%ebp),%eax
0040136B mov -0x4(%ebp),%edi
0040136E leave
0040136F ret
See?! Very simple! ( Well, to me it is )