I'm trying to get the hand of using SSE instructions through gcc, and I can't get multiplication to work.
Here is my code and my Makefile
simd.c
MakefileCode:#include <stdio.h> #include <stdlib.h> typedef int v4si __attribute__ ((vector_size (16))); typedef union { int s[4]; v4si v; } v4si_u; void usage(char **argv){ printf("Usage: %s [1|2]\n1: SIMD mode\n2: SISD mode\n", argv[0]); exit(1); } int main(int argc, char **argv){ int aa[4] = { 1, 2, 3, 4 }, bb[4] = { 4, 3, 2, 1 }; v4si_u a, b; int i, j; if(argc < 2) usage(argv); for(i = 0; i < 4; ++i){ a.s[i] = aa[i]; b.s[i] = bb[i]; } if(argv[1][0] == '1') for(i = 0; i < 0xFFFFFF; ++i) a.v = a.v * b.v; else if(argv[1][0] == '2') for(i = 0; i < 0xFFFFFF; ++i) for(j = 0; j < 4; ++j) aa[j] = aa[j] * bb[j]; else usage(argv); return 0; }
OS X's otool -vt gives me this for the multiplication of the vectorsCode:CC = gcc FLAGS = -O0 -g -msse -m64 OUTPUT = simd all: $(CC) $(FLAGS) -o $(OUTPUT) simd.c exec: $(CC) $(FLAGS) -o $(OUTPUT) simd.c obj: $(CC) $(FLAGS) -c simd.c
Basically, it loads the value from the stack to the xmm registers, moves it into normal registers, multiplies it, moves it back into xmm registers, then moves it back onto the stack, completely defeating the purpose of sse and in fact harming performance. If I change to addition it works correctlyCode:00000000000000e1 movdqa 0xc0(%rbp),%xmm1 00000000000000e6 movdqa 0xb0(%rbp),%xmm2 00000000000000eb movd %xmm1,%edx 00000000000000ef movd %xmm2,%eax 00000000000000f3 movl %edx,%ecx 00000000000000f5 imull %eax,%ecx 00000000000000f8 movl %ecx,0x90(%rbp) 00000000000000fb pshufd $0x55,%xmm1,%xmm0 0000000000000100 movd %xmm0,%edx 0000000000000104 pshufd $0x55,%xmm2,%xmm0 0000000000000109 movd %xmm0,%eax 000000000000010d movl %edx,%ecx 000000000000010f imull %eax,%ecx 0000000000000112 movl %ecx,0x94(%rbp) 0000000000000115 movdqa %xmm1,%xmm0 0000000000000119 punpckhdq %xmm1,%xmm0 000000000000011d movd %xmm0,%edx 0000000000000121 movdqa %xmm2,%xmm0 0000000000000125 punpckhdq %xmm2,%xmm0 0000000000000129 movd %xmm0,%eax 000000000000012d movl %edx,%ecx 000000000000012f imull %eax,%ecx 0000000000000132 movl %ecx,0x98(%rbp) 0000000000000135 pshufd $0xff,%xmm1,%xmm0 000000000000013a movd %xmm0,%edx 000000000000013e pshufd $0xff,%xmm2,%xmm0 0000000000000143 movd %xmm0,%eax 0000000000000147 movl %edx,%ecx 0000000000000149 imull %eax,%ecx 000000000000014c movl %ecx,0x9c(%rbp) 000000000000014f movd 0x90(%rbp),%xmm1 0000000000000154 movd 0x94(%rbp),%xmm0 0000000000000159 punpckldq %xmm0,%xmm1 000000000000015d movd 0x98(%rbp),%xmm0 0000000000000162 movd 0x9c(%rbp),%xmm2 0000000000000167 punpckldq %xmm2,%xmm0 000000000000016b movq %xmm1,%xmm2 000000000000016f punpcklqdq %xmm0,%xmm2 0000000000000173 movdqa %xmm2,%xmm0 0000000000000177 movdqa %xmm0,0xc0(%rbp)
I thought sse supported integer multiplication. Am I wrong or am I doing something wrongCode:00000000000000da movdqa 0xc0(%rbp),%xmm1 00000000000000df movdqa 0xb0(%rbp),%xmm0 00000000000000e4 paddd %xmm1,%xmm0 00000000000000e8 movdqa %xmm0,0xc0(%rbp)
**EDIT**
A little bit of system info
Code:$ gcc -v Using built-in specs. Target: i686-apple-darwin9 [huge string removed] Thread model: posix gcc version 4.0.1 (Apple Inc. build 5465) $ uname -a Darwin 9.4.0 Darwin Kernel Version 9.4.0: Mon Jun 9 19:30:53 PDT 2008; root:xnu-1228.5.20~1/RELEASE_I386 i386



LinkBack URL
About LinkBacks



