From: John Peterson <peterson@cf...>  20040209 04:19:36

Benjamin S. Kirk writes: > Help me out here, John... Do you remeber that C++ User's Journal on > scientific programming? I think in near the end of that they describe a > templated pow function that hadles x^int cases intelligently through > repeated squaring... Does that ring a bell? It might be worth having > in utility.h OK, I've done some testing. Apparently pow() is ridiculously slow. I tested two different methods and pow in the attached code. Under O3 optimization, I found the results on a P4 2GHz (in seconds): Raising 50 million double values to the third power. Recursive Template: 0.05 Template Loop: 1.93 Standard pow(): 13.01 Raising to the fifth power, results were: Recursive Template: 0.05 Template Loop: 2.23 Standard pow(): 14.17 Finally, raising to the 10th power (base numbers were small) Recursive Template: 0.06 Template Loop: 2.64 Standard pow(): 13.56 The "Recursive Template" method is the method I posted to the mailing list earlier today. It seems to be the fastest, and as expected, its performance remains about constant no matter how high the exponent becomes. Here is the code (don't forget lm for pow()) I invite you to test it out yourselves... #include <iostream> #include <vector> #include <time.h> #include <iomanip> // Anonymous Namespace to hold useful constants namespace { const unsigned int n_bases = static_cast<unsigned int>(1e7); // Number of tests to perform const unsigned int n_tests = 5; // Raise to an integer power const unsigned int power = 10; }; // Power raising class template <int N> struct Pow { template <int Val> struct In { enum { value = Val * Pow<N1>::template In<Val>::value }; }; }; template <> struct Pow<0> { template <int Val> struct In { enum { value = 1 }; }; }; // Rescursive template function template <int N> double pow(double x) { return x * pow<N1>(x); } template <> double pow<0>(double x) { return 1.; } // Templated Loop function template <int N> double pow2(double x) { const unsigned int l = N / 2; const unsigned int m = N % 2; const double x2 = x*x; double y = 1.0; for (unsigned int i=0; i<l; ++i) { y = y * x2; } return m==0 ? y : y*x; } // I can't figure out how to get the class to use // double as its base value. Therefore, this method // is not feasible now. // void test_class(std::vector<double>& base_values) // { // for (unsigned int n=0; n<n_tests; ++n) // { // for (unsigned int i=0; i<n_bases; ++i) // //std::cout << // Pow<base_values[i]>::In<power> // // << std::endl // ; // } // } void test_recursive_template(std::vector<double>& base_values) { for (unsigned int n=0; n<n_tests; ++n) { for (unsigned int i=0; i<n_bases; ++i) //std::cout << pow<power>(base_values[i]) // << std::endl ; } } void test_template_loop(std::vector<double>& base_values) { for (unsigned int n=0; n<n_tests; ++n) { for (unsigned int i=0; i<n_bases; ++i) //std::cout << pow2<power>(base_values[i]) // << std::endl ; } } void test_standard_pow(std::vector<double>& base_values) { for (unsigned int n=0; n<n_tests; ++n) { for (unsigned int i=0; i<n_bases; ++i) //std::cout << pow(base_values[i], power) // << std::endl ; } } int main() { srand(0); std::vector<double> base_values(n_bases); // Fill the vector with random values between zero and one. for (unsigned int i=0; i<n_bases; ++i) base_values[i] = static_cast<double>(rand()) / static_cast<double>(RAND_MAX); // Convenient typedef for function pointers typedef void (*TestFunction) (std::vector<double>& v); // A vector of function pointers for the functions we will test. std::vector<TestFunction> tests(3); tests[0] = test_recursive_template; tests[1] = test_template_loop; tests[2] = test_standard_pow; // A vector of convenient names std::vector<std::string> test_names(3); test_names[0] = "Recursive Template"; test_names[1] = "Template Loop"; test_names[2] = "Standard pow()"; for (unsigned int i=0; i<tests.size(); ++i) { clock_t start = clock(); tests[i](base_values); clock_t stop = clock(); const double time_taken = (stop  start)/double(CLOCKS_PER_SEC); std::cout << test_names[i] << ": " << std::setprecision(10) << time_taken << std::endl; } return 0; } 