I have a program that runs algorithms over 2d arrays. Now I want to implement an abstraction that allows me to iterate over every n'th member -- to have a stride like in a valarray slice. I tried using a pure virtual interface to allow the same code that runs the regular arrays could also run the "strided" ones, but it proved (way) too slow. So now I'm using templates.My question is, what is the difference (in terms of opportunity for optimization) between multiplying a runtime integer by a compile-time constant and multiplying a runtime integer by a runtime constant?

I can specify the stride of my array via the constructor or via a template argument. I have found that the template solution is much faster for small jobs on the arrays, but comparable to the non-template solution for large jobs. Optimization is always confusing, but speed is at the heart of why I'm interested in "striding" at all.

I have a full program to demonstrate what I'm saying. It has a class array2d, which is just a wrapper for a T**, and it has two classes basic_grid<>, which act as an interface to an array2d -- one using compile-time constants and the other runtime constants.

Depending on main()'s h, w, and N, I find that the disparity between the two methods can vary by a factor of 2, or by only 1%. I don't understand assembly well enough to analyze the optimizer's output. Microsoft's compiler shows more of a disparity than GCC on my system.Code:#include <iostream> #include <ctime> #include <cassert> #include <cstdlib> ////////// ////////// array2d.hpp ////////// template<typename T, typename uint = std::size_t> class array2d { protected: T** data; const uint w; const uint h; void alloc(const T & fill = T()); public: typedef T data_type; typedef uint size_type; array2d(uint width_, uint height_, const T & fill = T()) : w(width_), h(height_) { alloc(fill); } array2d & operator = (const array2d<T, uint> & other); array2d(array2d<T,uint> & other) : w(other.width()), h(other.height()) { alloc(); this->operator =( other ); } array2d(const array2d<T,uint> & other) : w(other.width()), h(other.height()) { alloc(); this->operator =( other ); } ~array2d(); const uint width() const { return w; } const uint height() const { return h; } T & operator()(uint x, uint y) { return data[x][y]; } const T & operator()(uint x, uint y) const { return data[x][y]; } }; template<typename T, typename uint> void array2d<T,uint>::alloc(const T & fill) { data = new T*[w]; for(uint i = 0; i < w; ++i) { data[i] = new T[h]; for(uint j = 0; j < h; ++j) data[i][j] = fill; } } template<typename T, typename uint> array2d<T,uint> & array2d<T,uint>::operator = (const array2d<T,uint> & other) { assert(other.width() == w && other.height() == h); for(uint i = 0; i < w; ++i) { for(uint j = 0; j < h; ++j) data[i][j] = other(i,j); } return *this; } template<typename T, typename uint> array2d<T,uint>::~array2d() { if(data) { for(uint i = 0; i < w; ++i) delete [] data[i]; delete [] data; data = 0; } } ////////// ////////// basic_grid.hpp ////////// template< typename container_t, typename data_t = typename container_t::data_type, typename size_t = typename container_t::size_type > class basic_grid { const size_t xstride; const size_t ystride; const size_t h; const size_t w; container_t & container; public: typedef container_t container_type; typedef data_t data_type; typedef size_t size_type; basic_grid(container_t & container_, size_t xstride_, size_t ystride_) : xstride(xstride_), ystride(ystride_), h(container_.height()/ystride_), w(container_.width()/xstride_), container(container_) { /*empty*/ } basic_grid(container_t & container_, size_t stride_) : xstride(stride_), ystride(stride_), h(container_.height()/stride_), w(container_.width()/stride_), container(container_) { /*empty*/ } data_t & operator()(size_t x, size_t y) { return container(x*xstride, y*ystride); } const data_t & operator()(size_t x, size_t y) const { return container(x*xstride, y*ystride); } const size_t height() const { return h; } const size_t width() const { return w; } }; template< typename container_t, unsigned xstride, unsigned ystride, typename data_t = typename container_t::data_type, typename size_t = typename container_t::size_type > class basic_grid_template { const size_t h; const size_t w; container_t & container; public: typedef container_t container_type; typedef data_t data_type; typedef size_t size_type; basic_grid_template(container_t & container_) : h(container_.height()/ystride), w(container_.width()/xstride), container(container_) { /*empty*/ } data_t & operator()(size_t x, size_t y) { return container(x*xstride, y*ystride); } const data_t & operator()(size_t x, size_t y) const { return container(x*xstride, y*ystride); } const size_t height() const { return h; } const size_t width() const { return w; } }; ////////// ////////// main.cpp ////////// template<typename array2d_type> std::clock_t speed_test(unsigned long int N, array2d_type & g, array2d_type & back_buffer) { assert(g.height() == back_buffer.height()); assert(g.width() == back_buffer.width()); unsigned h = g.height(); unsigned w = g.width(); std::clock_t beg = std::clock(); for(unsigned long n = 0; n < N; ++n) { for(unsigned x = 1; x < w-1; ++x) { for(unsigned y = 1; y < h-1; ++y) back_buffer(x,y) = g(x-1,y) + g(x+1,y) + g(x,y-1) + g(x,y+1) - 4.0*g(x,y); } for(unsigned x = 1; x < w-1; ++x) { for(unsigned y = 1; y < h-1; ++y) g(x,y) += 0.2*back_buffer(x,y); } } std::clock_t end = std::clock(); std::cout << "That took " << (end-beg) << " ticks." << std::endl; return end-beg; } int main() { const unsigned stride = 2; const unsigned h = 300; const unsigned w = 300; const unsigned N = 10000; clock_t test1, test3, test4; { array2d<long double> barebones(w/stride,h/stride,0.0); for(unsigned x = 1; x < w/stride-stride; ++x) { for(unsigned y = 1; y < h/stride-stride; ++y) barebones(x,y) = std::rand(); } array2d<long double> barebones_back(w/stride,h/stride,0.0); test1 = speed_test(N,barebones,barebones_back); } { unsigned int instride; std::cout << "Stride length: "; std::cin >> instride; //I ask the user only so the optimizer can't deduce the stride at compile-time. //The user's input should be the same as the compile-time constant stride. if(instride != stride) std::cout << "Warning: your stride is not the same as the compile-time one." << std::endl; typedef array2d<long double> underclass; typedef basic_grid< underclass, underclass::data_type, underclass::size_type > grid; array2d<long double> container(w,h,0.0); grid g(container, instride); array2d<long double> back_buffer(w,h,0.0); grid g_back(back_buffer, instride); for(unsigned x = 1; x < g.width()-1; ++x) { for(unsigned y = 1; y < g.height()-1; ++y) g(x,y) = std::rand(); } test3 = speed_test(N, g, g_back); } { typedef array2d<long double> underclass; typedef basic_grid_template< underclass, stride, stride, underclass::data_type, underclass::size_type > grid_template; array2d<long double> container(w,h,0.0); grid_template g(container); array2d<long double> back_buffer(w,h,0.0); grid_template g_back(back_buffer); for(unsigned x = 1; x < g.width()-1; ++x) { for(unsigned y = 1; y < g.height()-1; ++y) g(x,y) = std::rand(); } test4 = speed_test(N, g, g_back); } std::cout << "multiplication by runtime constant: test3/test1 equals " << (static_cast<long double>(test3)/test1) << std::endl; std::cout << "multiplication by compile-time constant: test4/test1 equals " << (static_cast<long double>(test4)/test1) << std::endl; std::cin.ignore(); std::cin.get(); return 0; }