Thread: Odd assembly problem

Threaded View

Previous Post Previous Post   Next Post Next Post
  1. #24
    Malum in se abachler's Avatar
    Join Date
    Apr 2007
    Posts
    3,195
    Code:
    // this code is copyright 2007 Anthony Q. Bachler
    // you may use this code free of charge for non-commercial purposes only
    // for commercial use contact the author.  This code is not released under the GPL
    
    void CNeuron::Process(double* Input , double* Output){
        double Scratch = 0.0;
        __declspec(align(16)) __m128 Zero[] = {0.0 , 0.0};
        __declspec(align(16)) __m128 tempout[] = {0.0 , 0.0};
        double* pInput;
        double* pWeight;
        DWORD dwCount;
    
        pInput = Input;
        pWeight = this->Weights;
        dwCount = this->dwNumberOfInputs;
        dwCount = ((dwCount+7)/8);  // calculate total iterations
        __asm push      eax
        __asm push      edx
        __asm push      ecx
        __asm xor       eax , eax
        __asm mov       edx , 0x00000040
        __asm mov       ecx , dwCount
        __asm movapd    xmm1 , Zero
        __asm movapd    xmm3 , Zero
        __asm movapd    xmm5 , Zero
        __asm movapd    xmm7 , Zero
    theloop:
        __asm movapd    xmm0 , pInput
        __asm mulpd     xmm0 , pWeight
        __asm addpd     xmm1 , xmm0
        __asm movapd    xmm2 , pInput+16
        __asm mulpd     xmm2 , pWeight+16
        __asm addpd     xmm3 , xmm2
        __asm movapd    xmm4 , pInput+32
        __asm mulpd     xmm4 , pWeight+32
        __asm addpd     xmm5 , xmm4
        __asm movapd    xmm6 , pInput+48
        __asm add       pInput , edx
        __asm mulpd     xmm6 , pWeight+48
        __asm add       pWeight , edx
        __asm addpd     xmm7 , xmm6
        __asm loop      theloop
        __asm addpd     xmm1 , xmm3
        __asm addpd     xmm5 , xmm7
        __asm addpd     xmm1 , xmm5
        __asm movapd    tempout , xmm1
        __asm fld       tempout
        __asm fld       tempout+8
        __asm fadd
        __asm fstp      st(0)
        __asm fld1            
        __asm fpatan    
        __asm fsin
        __asm fstp      Output
        __asm pop       ecx
        __asm pop       edx
        __asm pop       eax
    /*
        for(DWORD temp=0;temp<this->dwNumberOfInputs;temp++){
            Scratch += Input[temp] * this->Weights[temp];
            }
        Output[0] = sin(atan(Scratch));
    //*/
        return;
        }
    Last edited by abachler; 11-19-2007 at 07:48 PM.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. Bin packing problem....
    By 81N4RY_DR460N in forum C++ Programming
    Replies: 0
    Last Post: 08-01-2005, 05:20 AM
  2. Words and lines count problem
    By emo in forum C Programming
    Replies: 1
    Last Post: 07-12-2005, 03:36 PM
  3. half ADT (nested struct) problem...
    By CyC|OpS in forum C Programming
    Replies: 1
    Last Post: 10-26-2002, 08:37 AM
  4. binary tree problem - help needed
    By sanju in forum C Programming
    Replies: 4
    Last Post: 10-16-2002, 05:18 AM