From: Sensei <sen...@gm...> - 2017-02-17 16:27:17
|
Hi! I am new to the OpenCL/GPU world, and I probably expected too much from it. I am computing the norm_1 of a vector, on the CPU and GPU, and I had these results: Platform Apple Version OpenCL 1.2 (Jan 4 2017 22:35:59) > Device type CPU Version OpenCL 1.2 (Jan 4 2017 22:35:59) > Device type GPU Version OpenCL 1.2 (Jan 4 2017 22:35:59) STARTING, TIMES ARE IN MILLISECONDS Reserving CPU vector Reserving CPU vector 16 Filling CPU vector Filling CPU vector 9 Reserving GPU vector Reserving GPU vector 82 Copying to GPU Copying to GPU 158310 Computing norm_1 on GPU Computing norm_1 on GPU 333 Computing norm_1 on CPU Computing norm_1 on CPU 8 GPU: 5e+11 CPU: 5.00000e+11 Program ended with exit code: 0 As you can see, the GPU times are waaaaay higher than the CPU ones. My code is really simple, and I am following the recommended conduct to build values on the CPU and then copy them. Is this bad performance due to my platform? I am running on a MacBook Pro now. My code follows. Thanks! #include <iostream> #include <vector> #include <algorithm> #include <cstdlib> #include <numeric> #include <chrono> #define CL_USE_DEPRECATED_OPENCL_1_1_APIS #define __CL_ENABLE_EXCEPTIONS #define VIENNACL_WITH_OPENCL #include "cl.hpp" #include "viennacl/scalar.hpp" #include "viennacl/vector.hpp" #include "viennacl/ocl/backend.hpp" #include "viennacl/linalg/norm_1.hpp" int main(int argc, const char * argv[]) { // This is what vienna sees auto viennaplatforms = viennacl::ocl::get_platforms(); auto viennadevices = viennacl::ocl::platform().devices(); // See what standard OpenCL sees std::vector<cl::Platform> platforms; // Get platform cl::Platform::get(&platforms); // Temp std::string s; // Where the GPU lies cl::Device gpudevice; // Found a GPU bool gpufound = false; // See if we have a GPU for (auto p : platforms) { s.clear(); p.getInfo(CL_PLATFORM_NAME, &s); std::cout << "Platform " << s << std::endl; s.clear(); p.getInfo(CL_PLATFORM_VERSION, &s); std::cout << "Version " << s << std::endl; std::cout << std::endl; std::vector<cl::Device> devices; p.getDevices(CL_DEVICE_TYPE_ALL, &devices); for (auto d : devices) { std::size_t i = 4; d.getInfo(CL_DEVICE_TYPE, &i); std::cout << "> Device type " << (i & CL_DEVICE_TYPE_CPU ? "CPU" : "") << (i & CL_DEVICE_TYPE_GPU ? "GPU" : "") << (i & CL_DEVICE_TYPE_ACCELERATOR ? "ACCELERATOR" : "") << std::endl; if (i & CL_DEVICE_TYPE_GPU) { gpudevice = d; gpufound = true; } std::cout << "Version " << s << std::endl; } } if (!gpufound) { std::cout << "NO GPU FOUND. ABORTING." << std::endl; return 1; } // Size int size = 1 * 1000 * 1000; // Measuring time auto start = std::chrono::steady_clock::now(); std::cout << std::endl << "STARTING, TIMES ARE IN MILLISECONDS" << std::endl << std::endl; std::cout << "Reserving CPU vector " << std::endl; start = std::chrono::steady_clock::now(); std::vector<double> cpuv; cpuv.resize(size); std::cout << "Reserving CPU vector " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count() << std::endl << std::endl; std::cout << "Filling CPU vector " << std::endl; start = std::chrono::steady_clock::now(); std::iota(cpuv.begin(), cpuv.end(), 1.0 ); std::cout << "Filling CPU vector " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count() << std::endl << std::endl; std::cout << "Reserving GPU vector " << std::endl; start = std::chrono::steady_clock::now(); viennacl::vector<float> gpuv; gpuv.resize(size); std::cout << "Reserving GPU vector " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count() << std::endl << std::endl; std::cout << "Copying to GPU " << std::endl; start = std::chrono::steady_clock::now(); std::copy(cpuv.begin(), cpuv.end(), gpuv.begin()); std::cout << "Copying to GPU " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count() << std::endl << std::endl; std::cout << "Computing norm_1 on GPU " << std::endl; start = std::chrono::steady_clock::now(); double gpunorm1 = viennacl::linalg::norm_1(gpuv); std::cout << "Computing norm_1 on GPU " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count() << std::endl << std::endl; std::cout << "Computing norm_1 on CPU " << std::endl; start = std::chrono::steady_clock::now(); double cpunorm1 = std::accumulate(cpuv.begin(), cpuv.end(), 0.0, [](double a, double b){ return a + std::abs(b); }); std::cout << "Computing norm_1 on CPU " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count() << std::endl << std::endl; std::cout << "GPU: " << gpunorm1 << " CPU: " << cpunorm1 << std::endl; return 0; } |