Code:
#include <iostream>
#include <ctime>
#include <cassert>
#include <cstdlib>
//////////
////////// array2d.hpp
//////////
template<typename T, typename uint = std::size_t>
class array2d
{
protected:
T** data;
const uint w;
const uint h;
void alloc(const T & fill = T());
public:
typedef T data_type;
typedef uint size_type;
array2d(uint width_, uint height_, const T & fill = T())
: w(width_), h(height_)
{ alloc(fill); }
array2d & operator = (const array2d<T, uint> & other);
array2d(array2d<T,uint> & other)
: w(other.width()), h(other.height())
{ alloc(); this->operator =( other ); }
array2d(const array2d<T,uint> & other)
: w(other.width()), h(other.height())
{ alloc(); this->operator =( other ); }
~array2d();
const uint width() const
{ return w; }
const uint height() const
{ return h; }
T & operator()(uint x, uint y)
{ return data[x][y]; }
const T & operator()(uint x, uint y) const
{ return data[x][y]; }
};
template<typename T, typename uint>
void array2d<T,uint>::alloc(const T & fill)
{
data = new T*[w];
for(uint i = 0; i < w; ++i)
{
data[i] = new T[h];
for(uint j = 0; j < h; ++j)
data[i][j] = fill;
}
}
template<typename T, typename uint>
array2d<T,uint> & array2d<T,uint>::operator = (const array2d<T,uint> & other)
{
assert(other.width() == w && other.height() == h);
for(uint i = 0; i < w; ++i)
{
for(uint j = 0; j < h; ++j)
data[i][j] = other(i,j);
}
return *this;
}
template<typename T, typename uint>
array2d<T,uint>::~array2d()
{
if(data)
{
for(uint i = 0; i < w; ++i)
delete [] data[i];
delete [] data;
data = 0;
}
}
//////////
////////// basic_grid.hpp
//////////
template< typename container_t,
typename data_t = typename container_t::data_type,
typename size_t = typename container_t::size_type >
class basic_grid
{
const size_t xstride;
const size_t ystride;
const size_t h;
const size_t w;
container_t & container;
public:
typedef container_t container_type;
typedef data_t data_type;
typedef size_t size_type;
basic_grid(container_t & container_, size_t xstride_, size_t ystride_)
: xstride(xstride_), ystride(ystride_), h(container_.height()/ystride_),
w(container_.width()/xstride_), container(container_)
{ /*empty*/ }
basic_grid(container_t & container_, size_t stride_)
: xstride(stride_), ystride(stride_), h(container_.height()/stride_),
w(container_.width()/stride_), container(container_)
{ /*empty*/ }
data_t & operator()(size_t x, size_t y)
{
return container(x*xstride, y*ystride);
}
const data_t & operator()(size_t x, size_t y) const
{
return container(x*xstride, y*ystride);
}
const size_t height() const { return h; }
const size_t width() const { return w; }
};
template< typename container_t,
unsigned xstride,
unsigned ystride,
typename data_t = typename container_t::data_type,
typename size_t = typename container_t::size_type >
class basic_grid_template
{
const size_t h;
const size_t w;
container_t & container;
public:
typedef container_t container_type;
typedef data_t data_type;
typedef size_t size_type;
basic_grid_template(container_t & container_)
: h(container_.height()/ystride),
w(container_.width()/xstride), container(container_)
{ /*empty*/ }
data_t & operator()(size_t x, size_t y)
{
return container(x*xstride, y*ystride);
}
const data_t & operator()(size_t x, size_t y) const
{
return container(x*xstride, y*ystride);
}
const size_t height() const { return h; }
const size_t width() const { return w; }
};
//////////
////////// main.cpp
//////////
template<typename array2d_type>
std::clock_t speed_test(unsigned long int N, array2d_type & g, array2d_type & back_buffer)
{
assert(g.height() == back_buffer.height());
assert(g.width() == back_buffer.width());
unsigned h = g.height();
unsigned w = g.width();
std::clock_t beg = std::clock();
for(unsigned long n = 0; n < N; ++n)
{
for(unsigned x = 1; x < w-1; ++x)
{
for(unsigned y = 1; y < h-1; ++y)
back_buffer(x,y) = g(x-1,y) + g(x+1,y) + g(x,y-1) + g(x,y+1) - 4.0*g(x,y);
}
for(unsigned x = 1; x < w-1; ++x)
{
for(unsigned y = 1; y < h-1; ++y)
g(x,y) += 0.2*back_buffer(x,y);
}
}
std::clock_t end = std::clock();
std::cout << "That took " << (end-beg) << " ticks." << std::endl;
return end-beg;
}
int main()
{
const unsigned stride = 2;
const unsigned h = 300;
const unsigned w = 300;
const unsigned N = 10000;
clock_t test1, test3, test4;
{
array2d<long double> barebones(w/stride,h/stride,0.0);
for(unsigned x = 1; x < w/stride-stride; ++x)
{
for(unsigned y = 1; y < h/stride-stride; ++y)
barebones(x,y) = std::rand();
}
array2d<long double> barebones_back(w/stride,h/stride,0.0);
test1 = speed_test(N,barebones,barebones_back);
}
{
unsigned int instride;
std::cout << "Stride length: ";
std::cin >> instride;
//I ask the user only so the optimizer can't deduce the stride at compile-time.
//The user's input should be the same as the compile-time constant stride.
if(instride != stride)
std::cout << "Warning: your stride is not the same as the compile-time one." << std::endl;
typedef array2d<long double> underclass;
typedef basic_grid< underclass, underclass::data_type, underclass::size_type > grid;
array2d<long double> container(w,h,0.0);
grid g(container, instride);
array2d<long double> back_buffer(w,h,0.0);
grid g_back(back_buffer, instride);
for(unsigned x = 1; x < g.width()-1; ++x)
{
for(unsigned y = 1; y < g.height()-1; ++y)
g(x,y) = std::rand();
}
test3 = speed_test(N, g, g_back);
}
{
typedef array2d<long double> underclass;
typedef basic_grid_template< underclass, stride, stride, underclass::data_type, underclass::size_type > grid_template;
array2d<long double> container(w,h,0.0);
grid_template g(container);
array2d<long double> back_buffer(w,h,0.0);
grid_template g_back(back_buffer);
for(unsigned x = 1; x < g.width()-1; ++x)
{
for(unsigned y = 1; y < g.height()-1; ++y)
g(x,y) = std::rand();
}
test4 = speed_test(N, g, g_back);
}
std::cout << "multiplication by runtime constant: test3/test1 equals " << (static_cast<long double>(test3)/test1) << std::endl;
std::cout << "multiplication by compile-time constant: test4/test1 equals " << (static_cast<long double>(test4)/test1) << std::endl;
std::cin.ignore();
std::cin.get();
return 0;
}
Depending on main()'s h, w, and N, I find that the disparity between the two methods can vary by a factor of 2, or by only 1%. I don't understand assembly well enough to analyze the optimizer's output. Microsoft's compiler shows more of a disparity than GCC on my system.