Thread: GCC and SSE multplication

Threaded View

Previous Post Previous Post   Next Post Next Post
  1. #1
    Registered User Kernel Sanders's Avatar
    Join Date
    Aug 2008
    Posts
    61

    GCC and SSE multplication

    I'm trying to get the hand of using SSE instructions through gcc, and I can't get multiplication to work.

    Here is my code and my Makefile

    simd.c
    Code:
    #include <stdio.h>
    #include <stdlib.h>
    
    typedef int v4si __attribute__ ((vector_size (16)));
    typedef union { int s[4]; v4si v; } v4si_u;
    
    
    void usage(char **argv){
      printf("Usage: &#37;s [1|2]\n1: SIMD mode\n2: SISD mode\n", argv[0]);
      exit(1);
    }
    
    int main(int argc, char **argv){
    
      int aa[4] = { 1, 2, 3, 4 }, 
        bb[4] = { 4, 3, 2, 1 };
      v4si_u a, b;
      int i, j;
    
      if(argc < 2)
        usage(argv);
    
      for(i = 0; i < 4; ++i){
        a.s[i] = aa[i];
        b.s[i] = bb[i];
      }
    
      if(argv[1][0] == '1')
        for(i = 0; i < 0xFFFFFF; ++i)
          a.v = a.v * b.v;
      else if(argv[1][0] == '2')
        for(i = 0; i < 0xFFFFFF; ++i)
          for(j = 0; j < 4; ++j)
    	aa[j] = aa[j] * bb[j];
      else
        usage(argv);
    
      return 0;
    }
    Makefile
    Code:
    CC = gcc
    FLAGS = -O0 -g -msse -m64
    OUTPUT = simd
    
    all:
    	$(CC) $(FLAGS)  -o $(OUTPUT) simd.c
    exec:
    	$(CC) $(FLAGS) -o $(OUTPUT) simd.c
    obj:
    	$(CC) $(FLAGS) -c simd.c
    OS X's otool -vt gives me this for the multiplication of the vectors
    Code:
    00000000000000e1	movdqa	0xc0(%rbp),%xmm1
    00000000000000e6	movdqa	0xb0(%rbp),%xmm2
    00000000000000eb	movd	%xmm1,%edx
    00000000000000ef	movd	%xmm2,%eax
    00000000000000f3	movl	%edx,%ecx
    00000000000000f5	imull	%eax,%ecx
    00000000000000f8	movl	%ecx,0x90(%rbp)
    00000000000000fb	pshufd	$0x55,%xmm1,%xmm0
    0000000000000100	movd	%xmm0,%edx
    0000000000000104	pshufd	$0x55,%xmm2,%xmm0
    0000000000000109	movd	%xmm0,%eax
    000000000000010d	movl	%edx,%ecx
    000000000000010f	imull	%eax,%ecx
    0000000000000112	movl	%ecx,0x94(%rbp)
    0000000000000115	movdqa	%xmm1,%xmm0
    0000000000000119	punpckhdq	%xmm1,%xmm0
    000000000000011d	movd	%xmm0,%edx
    0000000000000121	movdqa	%xmm2,%xmm0
    0000000000000125	punpckhdq	%xmm2,%xmm0
    0000000000000129	movd	%xmm0,%eax
    000000000000012d	movl	%edx,%ecx
    000000000000012f	imull	%eax,%ecx
    0000000000000132	movl	%ecx,0x98(%rbp)
    0000000000000135	pshufd	$0xff,%xmm1,%xmm0
    000000000000013a	movd	%xmm0,%edx
    000000000000013e	pshufd	$0xff,%xmm2,%xmm0
    0000000000000143	movd	%xmm0,%eax
    0000000000000147	movl	%edx,%ecx
    0000000000000149	imull	%eax,%ecx
    000000000000014c	movl	%ecx,0x9c(%rbp)
    000000000000014f	movd	0x90(%rbp),%xmm1
    0000000000000154	movd	0x94(%rbp),%xmm0
    0000000000000159	punpckldq	%xmm0,%xmm1
    000000000000015d	movd	0x98(%rbp),%xmm0
    0000000000000162	movd	0x9c(%rbp),%xmm2
    0000000000000167	punpckldq	%xmm2,%xmm0
    000000000000016b	movq	%xmm1,%xmm2
    000000000000016f	punpcklqdq	%xmm0,%xmm2
    0000000000000173	movdqa	%xmm2,%xmm0
    0000000000000177	movdqa	%xmm0,0xc0(%rbp)
    Basically, it loads the value from the stack to the xmm registers, moves it into normal registers, multiplies it, moves it back into xmm registers, then moves it back onto the stack, completely defeating the purpose of sse and in fact harming performance. If I change to addition it works correctly

    Code:
    00000000000000da	movdqa	0xc0(%rbp),%xmm1
    00000000000000df	movdqa	0xb0(%rbp),%xmm0
    00000000000000e4	paddd	%xmm1,%xmm0
    00000000000000e8	movdqa	%xmm0,0xc0(%rbp)
    I thought sse supported integer multiplication. Am I wrong or am I doing something wrong

    **EDIT**
    A little bit of system info
    Code:
    $ gcc -v
    Using built-in specs.
    Target: i686-apple-darwin9
    [huge string removed]
    Thread model: posix
    gcc version 4.0.1 (Apple Inc. build 5465)
    $ uname -a
    Darwin 9.4.0 Darwin Kernel Version 9.4.0: Mon Jun  9 19:30:53 PDT 2008; root:xnu-1228.5.20~1/RELEASE_I386 i386
    Last edited by Kernel Sanders; 09-18-2008 at 07:28 PM.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. gcc inline asm: illegal instruction (core dump)
    By Sargnagel in forum C Programming
    Replies: 4
    Last Post: 10-28-2003, 01:41 PM
  2. gcc: vector instructions - but how?
    By Sargnagel in forum C Programming
    Replies: 12
    Last Post: 12-06-2002, 01:15 PM