Okay, even though the problem posted at the beginning of the thread has already been solved, in follow up to brewbuck's suggestion, I did some research on roots of unity and refreshed myself a little bit.

I believe this optimizes out the call to std::exp correctly:

Code:

std::vector < std::complex<double> > InternalFastFourierTransform (std::vector < std::complex<double> > input)
{
static std::complex<double> i = std::complex<double>(0, 1);
static std::complex<double> NegativeTwoPiI = -2.0 * PI * i;
int N = (int)input.size();
int halfN = N / 2;
std::complex<double> omega_n = std::exp(NegativeTwoPiI / double(N));
std::complex<double> omega = std::complex<double> (1, 0);
if (N == 1)
{
return input;
}
else
{
//Recurse and get evens/odds
std::vector< std::complex<double> > evens = InternalFastFourierTransform( GetVectorRange(input, 0, N, 2) );
std::vector< std::complex<double> > odds = InternalFastFourierTransform( GetVectorRange(input, 1, N, 2) );
//Combine
std::vector< std::complex<double> > result = std::vector< std::complex<double> > (N);
for(int k = 0; k < halfN; k++)
{
result[k] = evens[k] + omega * odds[k];
result[k + halfN] = evens[k] - omega * odds[k];
omega = omega * omega_n;
}
return result;
}
}