Code:// this code is copyright 2007 Anthony Q. Bachler // you may use this code free of charge for non-commercial purposes only // for commercial use contact the author. This code is not released under the GPL void CNeuron::Process(double* Input , double* Output){ double Scratch = 0.0; __declspec(align(16)) __m128 Zero[] = {0.0 , 0.0}; __declspec(align(16)) __m128 tempout[] = {0.0 , 0.0}; double* pInput; double* pWeight; DWORD dwCount; pInput = Input; pWeight = this->Weights; dwCount = this->dwNumberOfInputs; dwCount = ((dwCount+7)/8); // calculate total iterations __asm push eax __asm push edx __asm push ecx __asm xor eax , eax __asm mov edx , 0x00000040 __asm mov ecx , dwCount __asm movapd xmm1 , Zero __asm movapd xmm3 , Zero __asm movapd xmm5 , Zero __asm movapd xmm7 , Zero theloop: __asm movapd xmm0 , pInput __asm mulpd xmm0 , pWeight __asm addpd xmm1 , xmm0 __asm movapd xmm2 , pInput+16 __asm mulpd xmm2 , pWeight+16 __asm addpd xmm3 , xmm2 __asm movapd xmm4 , pInput+32 __asm mulpd xmm4 , pWeight+32 __asm addpd xmm5 , xmm4 __asm movapd xmm6 , pInput+48 __asm add pInput , edx __asm mulpd xmm6 , pWeight+48 __asm add pWeight , edx __asm addpd xmm7 , xmm6 __asm loop theloop __asm addpd xmm1 , xmm3 __asm addpd xmm5 , xmm7 __asm addpd xmm1 , xmm5 __asm movapd tempout , xmm1 __asm fld tempout __asm fld tempout+8 __asm fadd __asm fstp st(0) __asm fld1 __asm fpatan __asm fsin __asm fstp Output __asm pop ecx __asm pop edx __asm pop eax /* for(DWORD temp=0;temp<this->dwNumberOfInputs;temp++){ Scratch += Input[temp] * this->Weights[temp]; } Output[0] = sin(atan(Scratch)); //*/ return; }