From: Oswin K. <Osw...@ru...> - 2017-02-17 19:19:47
|
Hi! Compiling the kernels for opencl takes time. try to measure the second time you compute something. On 2017-02-17 17:27, Sensei wrote: > Hi! > > I am new to the OpenCL/GPU world, and I probably expected too much > from it. I am computing the norm_1 of a vector, on the CPU and GPU, > and I had these results: > > PLATFORM APPLE > VERSION OPENCL 1.2 (JAN 4 2017 22:35:59) > >> DEVICE TYPE CPU > VERSION OPENCL 1.2 (JAN 4 2017 22:35:59) >> DEVICE TYPE GPU > VERSION OPENCL 1.2 (JAN 4 2017 22:35:59) > > STARTING, TIMES ARE IN MILLISECONDS > > RESERVING CPU VECTOR > RESERVING CPU VECTOR 16 > > FILLING CPU VECTOR > FILLING CPU VECTOR 9 > > RESERVING GPU VECTOR > RESERVING GPU VECTOR 82 > > COPYING TO GPU > COPYING TO GPU 158310 > > COMPUTING NORM_1 ON GPU > COMPUTING NORM_1 ON GPU 333 > > COMPUTING NORM_1 ON CPU > COMPUTING NORM_1 ON CPU 8 > > GPU: 5E+11 CPU: 5.00000E+11 > PROGRAM ENDED WITH EXIT CODE: 0 > > As you can see, the GPU times are waaaaay higher than the CPU ones. My > code is really simple, and I am following the recommended conduct to > build values on the CPU and then copy them. > > Is this bad performance due to my platform? I am running on a MacBook > Pro now. My code follows. > > Thanks! > > #include <iostream> > #include <vector> > #include <algorithm> > #include <cstdlib> > #include <numeric> > #include <chrono> > > #define CL_USE_DEPRECATED_OPENCL_1_1_APIS > #define __CL_ENABLE_EXCEPTIONS > > #define VIENNACL_WITH_OPENCL > > #include "cl.hpp" > #include "viennacl/scalar.hpp" > #include "viennacl/vector.hpp" > #include "viennacl/ocl/backend.hpp" > #include "viennacl/linalg/norm_1.hpp" > > int main(int argc, const char * argv[]) > { > // This is what vienna sees > auto viennaplatforms = viennacl::ocl::get_platforms(); > auto viennadevices = viennacl::ocl::platform().devices(); > > // See what standard OpenCL sees > std::vector<cl::Platform> platforms; > > // Get platform > cl::Platform::get(&platforms); > > // Temp > std::string s; > > // Where the GPU lies > cl::Device gpudevice; > > // Found a GPU > bool gpufound = false; > > // See if we have a GPU > for (auto p : platforms) > { > s.clear(); > p.getInfo(CL_PLATFORM_NAME, &s); > std::cout << "Platform " << s << std::endl; > > s.clear(); > p.getInfo(CL_PLATFORM_VERSION, &s); > std::cout << "Version " << s << std::endl; > > std::cout << std::endl; > > std::vector<cl::Device> devices; > > p.getDevices(CL_DEVICE_TYPE_ALL, &devices); > > for (auto d : devices) > { > std::size_t i = 4; > d.getInfo(CL_DEVICE_TYPE, &i); > > std::cout << "> Device type " << > (i & CL_DEVICE_TYPE_CPU ? "CPU" : "") << > (i & CL_DEVICE_TYPE_GPU ? "GPU" : "") << > (i & CL_DEVICE_TYPE_ACCELERATOR ? > "ACCELERATOR" : "") << > std::endl; > > if (i & CL_DEVICE_TYPE_GPU) > { > gpudevice = d; > gpufound = true; > } > > std::cout << "Version " << s << std::endl; > > } > } > > if (!gpufound) > { > std::cout << "NO GPU FOUND. ABORTING." << std::endl; > return 1; > } > > // Size > int size = 1 * 1000 * 1000; > > // Measuring time > auto start = std::chrono::steady_clock::now(); > > std::cout << std::endl << "STARTING, TIMES ARE IN MILLISECONDS" << > std::endl << std::endl; > > std::cout << "Reserving CPU vector " << std::endl; > start = std::chrono::steady_clock::now(); > std::vector<double> cpuv; > cpuv.resize(size); > std::cout << "Reserving CPU vector " << > std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count() > << std::endl << std::endl; > > std::cout << "Filling CPU vector " << std::endl; > start = std::chrono::steady_clock::now(); > std::iota(cpuv.begin(), cpuv.end(), 1.0 ); > std::cout << "Filling CPU vector " << > std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count() > << std::endl << std::endl; > > std::cout << "Reserving GPU vector " << std::endl; > start = std::chrono::steady_clock::now(); > viennacl::vector<float> gpuv; > gpuv.resize(size); > std::cout << "Reserving GPU vector " << > std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count() > << std::endl << std::endl; > > std::cout << "Copying to GPU " << std::endl; > start = std::chrono::steady_clock::now(); > std::copy(cpuv.begin(), cpuv.end(), gpuv.begin()); > std::cout << "Copying to GPU " << > std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count() > << std::endl << std::endl; > > std::cout << "Computing norm_1 on GPU " << std::endl; > start = std::chrono::steady_clock::now(); > double gpunorm1 = viennacl::linalg::norm_1(gpuv); > std::cout << "Computing norm_1 on GPU " << > std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count() > << std::endl << std::endl; > > std::cout << "Computing norm_1 on CPU " << std::endl; > start = std::chrono::steady_clock::now(); > double cpunorm1 = std::accumulate(cpuv.begin(), cpuv.end(), 0.0, > [](double a, double b){ return a > + std::abs(b); }); > std::cout << "Computing norm_1 on CPU " << > std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count() > << std::endl << std::endl; > > std::cout << "GPU: " << gpunorm1 << " CPU: " << cpunorm1 << > std::endl; > > return 0; > } > ------------------------------------------------------------------------------ > Check out the vibrant tech community on one of the world's most > engaging tech sites, SlashDot.org! http://sdm.link/slashdot > _______________________________________________ > ViennaCL-devel mailing list > Vie...@li... > https://lists.sourceforge.net/lists/listinfo/viennacl-devel |