1. ## Pi Calculation

Hi, currently I am working on a distributed computing project to work out the mathematical constant 'pi'. After taking a look at many examples and formulas for pi I have choson to use a common implementation of the BBP formula. This is a digit extraction formula which is able to compute any digit in Pi without having to know the n-1 digit. However, I have a problem with the script, it is quite slow, each time it is run it returns 9 digits of Pi from the value of n. If n was 1 it would return digits 1-9 . If in my basic test program I set n to be 10,000 it takes around 85 seconds to work out nine digits starting at position 10,000 of pi.

This is a problem for, as when larger values are reached (billions) it could takes weeks of cpu time just to get nine more digits. However, I am quite sure there is room for optimisation in the current formula I use. I have also been told by some people that it would be faster if the program used hardware cpu optimisations (sse?), however I am unsure of this. Here is my current code, thank you for any help/advice you can give

Code:
```#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <time.h>

/* #define HAS_LONG_LONG */

#ifdef HAS_LONG_LONG
#define mul_mod(a,b,m) (( (long long) (a) * (long long) (b) ) % (m))
#else
#define mul_mod(a,b,m) fmod( (double) a * (double) b, m)
#endif

/* returns the inverse of x mod y */

int inv_mod(int x,int y) {
int q,u,v,a,c,t;

u=x;
v=y;
c=1;
a=0;
do {
q=v/u;

t=c;
c=a-q*c;
a=t;

t=u;
u=v-q*u;
v=t;
} while (u!=0);
a=a%y;
if (a<0) a=y+a;
return a;
}

/* returns (a^b) mod m */

int pow_mod(int a,int b,int m)
{
int r,aa;

r=1;
aa=a;
while (1) {
if (b&1) r=mul_mod(r,aa,m);
b=b>>1;
if (b == 0) break;
aa=mul_mod(aa,aa,m);
}
return r;
}

/* returns true if n is prime */

int is_prime(int n)
{
int r,i;
if ((n % 2) == 0) return 0;

r=(int)(sqrt(n));
for(i=3;i<=r;i+=2) if ((n % i) == 0) return 0;
return 1;
}

/* returns the prime number immediatly after n */

int next_prime(int n)
{
do {
n++;
} while (!is_prime(n));
return n;
}

/* finds out nine digits of pi starting from n */

/* will probably be the main function in openmosix capable version*/

int pseudo_main(int n)
{
int av,a,vmax,N,num,den,k,kq,kq2,t,v,s,i;
double sum;

double start_time, elap_time;
start_time = (double) clock ();

N=(int)((n+20)*log(10)/log(2));

sum=0;

for(a=3;a<=(2*N);a=next_prime(a)) {

vmax=(int)(log(2*N)/log(a));
av=1;
for(i=0;i<vmax;i++) av=av*a;

s=0;
num=1;
den=1;
v=0;
kq=1;
kq2=1;

for(k=1;k<=N;k++) {

t=k;
if (kq >= a) {
do {
t=t/a;
v--;
} while ((t % a) == 0);
kq=0;
}
kq++;
num=mul_mod(num,t,av);

t=(2*k-1);
if (kq2 >= a) {
if (kq2 == a) {
do {
t=t/a;
v++;
} while ((t % a) == 0);
}
kq2-=a;
}
den=mul_mod(den,t,av);
kq2+=2;

if (v > 0) {
t=inv_mod(den,av);
t=mul_mod(t,num,av);
t=mul_mod(t,k,av);
for(i=v;i<vmax;i++) t=mul_mod(t,a,av);
s+=t;
if (s>=av) s-=av;
}

}

t=pow_mod(10,n-1,av);
s=mul_mod(s,t,av);
sum=fmod(sum+(double) s/ (double) av,1.0);
}

elap_time = ((double) clock () - (double) start_time) / CLOCKS_PER_SEC;

printf("Position: %9d Digits: %09d Time: %9.2lf seconds\n",n,(int)(sum*1e9),elap_time);

return 0;
}

/* calls up the pseudo_main function to find 9 digits at a time */

int main (int argc,char *argv[])
{
int position=1,counter=500,number; //counter is the value of pi to start from.
double start_time, elap_time;

pseudo_main(counter);

//Pause Code
system ("pause"); // execute M\$-DOS' pause command
return 0;

}```

2. The biggest single optimization i can suggest to start with is to run through just one time and collate the prime number information you need into a BST. This will allow you to cut out all brute force prime searching during the main calculation and can be replaced by a simple tree lookup.That should give your code quite a boost.

3. If you want to implement it as fast as possible then you could try implementing all of it in assembly (which would also allow you to use special instructions). But I'll definitely look at optimising/find better algorithms first.