Hi,

I have questions and issues specific to SSE optimization in C. My code is as follows:

Code:#include <stdlib.h> #include "xmmintrin.h" #include <stdio.h> #define NUM_ELEMS (32*1024) #define NUM_ITERS 10000 /* Note: xmmintrin.h is a standard header file available under the GNU Open GPL license. It contains definitions for C functions that wrap the SSE instruction set */ float a ; float* x ; float* y ; int main(int argc, char **argv) { srand(1); x = (float*) malloc(NUM_ELEMS * sizeof(float)); y = (float*) malloc(NUM_ELEMS * sizeof(float)); __m128 m1, m2, m3, m4; // type cast x, y and a to efficient intrinsic __m128 data type __m128* x = (__m128*) x ; __m128* y = (__m128*) y ; __m128 a = (__m128) a ; for (int i = 0; i < NUM_ELEMS; i++) { *x = _mm_set_ps1((float)rand()/100000) ; *y = _mm_set_ps1((float)rand()/100000) ; } for (int k = 0; k < NUM_ITERS; k++) { a = _mm_set_ps1(0.0) ; for ( int i = 0; i < NUM_ELEMS; i++ ) { //a += (x[i] + y[i]) * (x[i] - y[i]); m1 = _mm_add_ps(*x,*y); // m1 = x[i] + y[i] m2 = _mm_sub_ps(*x,*y); // m2 = x[i] - y[i] m3 = _mm_add_ps(m1, m2); // m3 = (x[i] + y[i]) * (x[i] - y[i]) a = _mm_add_ps(a, m3) ; // a+= x[i] + y[i] * (x[i] - y[i]) x++ ; y++ ; } } //a = (float) a ; //fprintf(stderr, "a = %f\n", a); return 0; }

Q 1. I am getting a Segmentation fault at:

*x = _mm_set_ps1((float)rand()/100000) ;

at the very first loop iteration.

The GDB output is:

Program received signal SIGSEGV, Segmentation fault.

0x00000000004006df in main (argc=2, argv=0x7fff303d8548) at prog-sse.c:45

45 *x = _mm_set_ps1((float)rand()/100000) ;

Why is it giving me a segmentation fault at memory that I have allocated?

Q.2. How do you print values in m128 variables, i.e. what is the string replacement code for printf

printf("value of my m128 variable is %?", My_m128_Variable) ;

or do we have to copy it to a float buffer variable and print the buffer?

Thanks.

Saad