From: Jonathan Parker <jonathanparker89@gm...>  20140425 16:26:55

Hi there, Thanks for this great tool! I have been trying out ViennaCL with Eigen libraries and CUDA backend using Visual Studio 2012 on Windows 8.1. In particular, I written a simple program to test the relative speeds of performing matrix multiplication on the CPU with Eigen and on the GPU with ViennaCL. I have noticed that in general, using ViennaCL is MUCH slower (about 100 times) particularly with very large matrices, so my assumption is that it is, in fact, not running on the GPU. I have posted my code below. My first question is whether I have done something wrong  I am very new to all things CUDA so this is likely. My second question is whether or not ViennaCL is compatible with NVIDIA GTX 7XX series  I have a NVIDIA GTX 770M  as this is not listed in Table 1 of the User Manual. Thank you in advance for your help. Here is my code: #include <iostream> #include <Eigen/Core> #include <Eigen/Householder> #include <ctime> #include "cuda.h" #include "cuda_runtime.h" #include "device_launch_parameters.h" #include "GPU.cu" #define VIENNACL_HAVE_EIGEN using namespace std; using namespace Eigen; using namespace viennacl; int main(void){ clock_t t1, t2; float diff; int row = 1000; int col = 1000; int rc = 1000; MatrixXf A; A.setRandom(row,col); MatrixXf B; B.setRandom(row,rc); MatrixXf C; C.setRandom(rc,col); matrix<float> A_GPU; matrix<float> B_GPU; matrix<float> C_GPU; copy(A, A_GPU); copy(B, B_GPU); copy(C, C_GPU); //CPU product for (int i = 0; i < 10; i++){ t1 = clock(); A = B * C; t2 = clock(); diff = ((float)t2  (float)t1)/CLOCKS_PER_SEC; cout << "CPU: " << diff << " seconds." << endl; } //GPU product for (int i = 0; i < 10; i++){ t1 = clock(); DoProduct(A_GPU,B_GPU,C_GPU); t2 = clock(); diff = ((float)t2  (float)t1)/CLOCKS_PER_SEC; cout << "GPU: " << diff << " seconds." << endl; } system("PAUSE"); } In GPU.cu... #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/compressed_matrix.hpp" #include "viennacl/linalg/prod.hpp" #define VIENNACL_WITH_CUDA using namespace viennacl; using namespace viennacl::linalg; using namespace viennacl::backend; matrix<float> DoProduct(matrix<float>& A, matrix<float>& B, matrix<float>& C){ A = prod(B,C); return A; } 