Hello there,
For a while now I've been working on a program that, at its core, uses the method of relaxation to find the transverse electric field in a cross section of some metal cylindrical cavity with arbitrary boundary conditions (shape). That's quite a mouthful.
The algorithm, as you may know, involves calculating the laplacian of a large 2D array and subtracting it, along with a scale factor, from each point. This must be done in two sweeps, since the new value of each point depends on its four adjacent points. A running calculation gives the wrong result. And, of course, you do this millions of times.
My code is terribly slow, but I don't see how I could expect much better then O(n) for this algorithm. So I'm wondering how this algorithm can be split, as to take advantage of multi-core processors. Any ideas? Here's some code:
Please, don't mind the code dump. It's not important.
Code:
//The high-level look at the relaxation
//g is of type 'grid_t' -- essentially a 2D array
//reg is of type 'region', which specifies a subset of a grid
std::vector<double> lap(grid_t::num_points(reg)); //laplacian field (as a 1D array)
for(unsigned int t = 0; t < time_steps; ++t)
{
g.iterate_region(reg,functions::calc_lap(lap)); //feeds laplacian into lap
g.iterate_region(reg,functions::relax(lap,dt)); //adds dt*lap[] to each point
}
Code:
//The grid object type
class grid_t
{
public:
grid_t(void) : w(0), h(0) {}
grid_t(int w_in, int h_in, double fill = 0.0);
~grid_t(void);
struct span {
span(unsigned int x_in,unsigned int y_in,int dy_in) : x(x_in), y(y_in), dy(dy_in) {}
unsigned int x, y;
int dy;
};
typedef std::vector<span> region;
double & operator()(int x, int y);
bool iterate_grid(func2d & f);
bool iterate_region(region & reg, func2d & f);
region find_regions(double boundary = 0.0);
bool write_file(const std::string & path);
unsigned int const width() { return w; }
unsigned int const height(){ return h; }
unsigned long int const num_points() { return w*h; } //number of points in grid
static unsigned long int num_points(const region & r); //number of POINTS in r (not members, which are spans).
private:
int w, h;
std::vector<std::vector<double> > g;
};
Code:
//The virtual function object
//Surprisingly, the virtual calls are optimized out (they don't depend on input)
class func2d
{
public:
func2d() {}
~func2d() {}
virtual double operator()(int x, int y, grid_t& g) = 0;
};
namespace functions
{
class relax : public func2d
{
public:
relax(std::vector<double> & lap_ref, double dt_in) : lap(lap_ref), dt(dt_in), count(0), func2d() {}
~relax() {}
double operator()(int x, int y, grid_t & g)
{
g(x,y) = g(x,y) + dt*lap[count++];
return g(x,y);
}
private:
std::vector<double> & lap;
const double dt;
unsigned long int count;
};
class calc_lap : public func2d
{
public:
calc_lap(std::vector<double> & lap_ref) : lap(lap_ref), count(0), func2d() {}
~calc_lap() {}
double operator()(int x, int y, grid_t & g)
{
lap[count++] = ( g(x-1,y) + g(x+1,y) + g(x,y-1) + g(x,y+1) - 4*g(x,y) );
return lap[count-1];
}
private:
std::vector<double> & lap;
unsigned long int count;
};
//......... More in namespace........
Code:
bool grid_t::iterate_region(region & reg, func2d & f)
{
for(unsigned int i = 0; i < reg.size(); ++i) //over all spans
{
for(int j = 0; j < reg[i].dy; ++j) //for each span
{
f(reg[i].x, reg[i].y + j, *this);
}
}
return true;
}