From: Charles D. <cde...@gm...> - 2016-12-14 16:56:06
|
I have a function where I use a custom opencl kernel. The function is below. The function runs without problem and provides the correct result after the *first time* I call it. However, if I try to call the function again it crashes right after the 'initialized' output where it is trying to add the kernel program. Any idea why it would be crashing here on subsequent calls? Is there some cleanup I should be doing at the end of this function? Thanks, Charles template<typename T> void cpp_vclMatrix_set_row_order( SEXP ptrA_, const bool AisVCL, Eigen::VectorXi indices, SEXP sourceCode_, const int max_local_size, const int ctx_id) { std::cout << "called" << std::endl; std::string my_kernel = as<std::string>(sourceCode_); viennacl::ocl::context ctx(viennacl::ocl::get_context(ctx_id)); viennacl::matrix<T> *vcl_A; // viennacl::matrix<T> *vcl_B; std::cout << "getting matrix" << std::endl; vcl_A = getVCLptr<T>(ptrA_, AisVCL, ctx_id); // vcl_B = getVCLptr<T>(ptrB_, BisVCL, ctx_id); unsigned int M = vcl_A->size1(); // // int N = vcl_B.size1(); unsigned int P = vcl_A->size2(); unsigned int M_internal = vcl_A->internal_size1(); unsigned int P_internal = vcl_A->internal_size2(); std::cout << "initialized" << std::endl; // add kernel to program viennacl::ocl::program & my_prog = ctx.add_program(my_kernel, "my_kernel"); std::cout << "program added" << std::endl; // get compiled kernel function viennacl::ocl::kernel & set_row_order = my_prog.get_kernel("set_row_order"); std::cout << "got kernel" << std::endl; // set global work sizes set_row_order.global_work_size(0, M_internal); set_row_order.global_work_size(1, P_internal); std::cout << "set global" << std::endl; // set local work sizes set_row_order.local_work_size(0, max_local_size); set_row_order.local_work_size(1, max_local_size); std::cout << "begin enqueue" << std::endl; { std::cout << "moving indexes" << std::endl; viennacl::vector<int> vcl_I(indices.size()); viennacl::copy(indices, vcl_I); std::cout << "creating dummy vector" << std::endl; viennacl::vector<T> vcl_V = viennacl::zero_vector<T>(M); viennacl::matrix_base<T> vcl_B(vcl_V.handle(), M, 0, 1, M, //row layout 1, 0, 1, 1, //column layout true); // row-major viennacl::range r(0, M); for(unsigned int i=0; i < P; i++){ viennacl::range c(i, i+1); viennacl::matrix_range<viennacl::matrix<T> > tmp(*vcl_A, r, c); // std::cout << tmp << std::endl; viennacl::ocl::enqueue(set_row_order(tmp, vcl_B, vcl_I, M, i, M_internal)); tmp = vcl_B; } } } |
From: Charles D. <cde...@gm...> - 2016-12-14 17:02:04
|
A quick addition, it also only seems to crash when the number of rows in the input matrix match or exceed 1000 (i.e. it works with the trivial example with 100 rows). Charles On Wed, Dec 14, 2016 at 10:55 AM, Charles Determan <cde...@gm...> wrote: > I have a function where I use a custom opencl kernel. The function is > below. The function runs without problem and provides the correct result > after the *first time* I call it. However, if I try to call the function > again it crashes right after the 'initialized' output where it is trying to > add the kernel program. Any idea why it would be crashing here on > subsequent calls? Is there some cleanup I should be doing at the end of > this function? > > Thanks, > Charles > > > template<typename T> > void > cpp_vclMatrix_set_row_order( > SEXP ptrA_, > const bool AisVCL, > Eigen::VectorXi indices, > SEXP sourceCode_, > const int max_local_size, > const int ctx_id) > { > > std::cout << "called" << std::endl; > > std::string my_kernel = as<std::string>(sourceCode_); > > viennacl::ocl::context ctx(viennacl::ocl::get_context(ctx_id)); > > viennacl::matrix<T> *vcl_A; > // viennacl::matrix<T> *vcl_B; > > std::cout << "getting matrix" << std::endl; > vcl_A = getVCLptr<T>(ptrA_, AisVCL, ctx_id); > // vcl_B = getVCLptr<T>(ptrB_, BisVCL, ctx_id); > > unsigned int M = vcl_A->size1(); > // // int N = vcl_B.size1(); > unsigned int P = vcl_A->size2(); > unsigned int M_internal = vcl_A->internal_size1(); > unsigned int P_internal = vcl_A->internal_size2(); > > std::cout << "initialized" << std::endl; > > // add kernel to program > viennacl::ocl::program & my_prog = ctx.add_program(my_kernel, > "my_kernel"); > > std::cout << "program added" << std::endl; > > // get compiled kernel function > viennacl::ocl::kernel & set_row_order = my_prog.get_kernel("set_row_ > order"); > > std::cout << "got kernel" << std::endl; > > // set global work sizes > set_row_order.global_work_size(0, M_internal); > set_row_order.global_work_size(1, P_internal); > > std::cout << "set global" << std::endl; > > // set local work sizes > set_row_order.local_work_size(0, max_local_size); > set_row_order.local_work_size(1, max_local_size); > > std::cout << "begin enqueue" << std::endl; > > { > > std::cout << "moving indexes" << std::endl; > viennacl::vector<int> vcl_I(indices.size()); > viennacl::copy(indices, vcl_I); > > std::cout << "creating dummy vector" << std::endl; > viennacl::vector<T> vcl_V = viennacl::zero_vector<T>(M); > > viennacl::matrix_base<T> vcl_B(vcl_V.handle(), > M, 0, 1, M, //row layout > 1, 0, 1, 1, //column layout > true); // row-major > > viennacl::range r(0, M); > > for(unsigned int i=0; i < P; i++){ > > viennacl::range c(i, i+1); > > viennacl::matrix_range<viennacl::matrix<T> > tmp(*vcl_A, r, > c); > > // std::cout << tmp << std::endl; > > viennacl::ocl::enqueue(set_row_order(tmp, vcl_B, vcl_I, M, i, > M_internal)); > > tmp = vcl_B; > } > } > } > |
From: Karl R. <ru...@iu...> - 2016-12-15 11:18:36
|
Hi Charles, can you please send us the kernel? Maybe there's something wrong with the thread assignment there. Also, rather than looping from 0 to P-1, it would make much more sense to process all columns in parallel in a single kernel. Best regards, Karli On 12/14/2016 06:01 PM, Charles Determan wrote: > A quick addition, it also only seems to crash when the number of rows in > the input matrix match or exceed 1000 (i.e. it works with the trivial > example with 100 rows). > > Charles > > On Wed, Dec 14, 2016 at 10:55 AM, Charles Determan > <cde...@gm... <mailto:cde...@gm...>> wrote: > > I have a function where I use a custom opencl kernel. The function > is below. The function runs without problem and provides the > correct result after the *first time* I call it. However, if I try > to call the function again it crashes right after the 'initialized' > output where it is trying to add the kernel program. Any idea why > it would be crashing here on subsequent calls? Is there some > cleanup I should be doing at the end of this function? > > Thanks, > Charles > > > template<typename T> > void > cpp_vclMatrix_set_row_order( > SEXP ptrA_, > const bool AisVCL, > Eigen::VectorXi indices, > SEXP sourceCode_, > const int max_local_size, > const int ctx_id) > { > > std::cout << "called" << std::endl; > > std::string my_kernel = as<std::string>(sourceCode_); > > viennacl::ocl::context ctx(viennacl::ocl::get_context(ctx_id)); > > viennacl::matrix<T> *vcl_A; > // viennacl::matrix<T> *vcl_B; > > std::cout << "getting matrix" << std::endl; > vcl_A = getVCLptr<T>(ptrA_, AisVCL, ctx_id); > // vcl_B = getVCLptr<T>(ptrB_, BisVCL, ctx_id); > > unsigned int M = vcl_A->size1(); > // // int N = vcl_B.size1(); > unsigned int P = vcl_A->size2(); > unsigned int M_internal = vcl_A->internal_size1(); > unsigned int P_internal = vcl_A->internal_size2(); > > std::cout << "initialized" << std::endl; > > // add kernel to program > viennacl::ocl::program & my_prog = ctx.add_program(my_kernel, > "my_kernel"); > > std::cout << "program added" << std::endl; > > // get compiled kernel function > viennacl::ocl::kernel & set_row_order = > my_prog.get_kernel("set_row_order"); > > std::cout << "got kernel" << std::endl; > > // set global work sizes > set_row_order.global_work_size(0, M_internal); > set_row_order.global_work_size(1, P_internal); > > std::cout << "set global" << std::endl; > > // set local work sizes > set_row_order.local_work_size(0, max_local_size); > set_row_order.local_work_size(1, max_local_size); > > std::cout << "begin enqueue" << std::endl; > > { > > std::cout << "moving indexes" << std::endl; > viennacl::vector<int> vcl_I(indices.size()); > viennacl::copy(indices, vcl_I); > > std::cout << "creating dummy vector" << std::endl; > viennacl::vector<T> vcl_V = viennacl::zero_vector<T>(M); > > viennacl::matrix_base<T> vcl_B(vcl_V.handle(), > M, 0, 1, M, //row layout > 1, 0, 1, 1, //column layout > true); // row-major > > viennacl::range r(0, M); > > for(unsigned int i=0; i < P; i++){ > > viennacl::range c(i, i+1); > > viennacl::matrix_range<viennacl::matrix<T> > tmp(*vcl_A, > r, c); > > // std::cout << tmp << std::endl; > > viennacl::ocl::enqueue(set_row_order(tmp, vcl_B, vcl_I, > M, i, M_internal)); > > tmp = vcl_B; > } > } > } > > > > > ------------------------------------------------------------------------------ > Check out the vibrant tech community on one of the world's most > engaging tech sites, SlashDot.org! http://sdm.link/slashdot > > > > _______________________________________________ > ViennaCL-devel mailing list > Vie...@li... > https://lists.sourceforge.net/lists/listinfo/viennacl-devel > |
From: Charles D. <cde...@gm...> - 2016-12-15 14:03:00
|
Hi Karl, here is the kernel below. Regarding your second point, I would love to process all columns in one kernel but I want to avoid initializing another entire matrix of the same size. To avoid this I am trying to only initialize a vector of size = number of rows which can then be assigned to the source matrix. Ideally I would like to do the reordering 'inplace' so I can pass the row indexes I want them to be int. __kernel void set_row_order( __global const double *A, __global double *B, __global const int *indices, const int Mdim, const int globalCol, const int MdimPad) { // Get the index of the elements to be processed const int globalRow = get_global_id(0); // C Row ID //const int globalCol = get_global_id(1); // C Col ID // Do the operation if((globalRow <= Mdim)){ B[globalRow] = A[indices[globalRow] * MdimPad + globalCol]; } } On Thu, Dec 15, 2016 at 5:18 AM, Karl Rupp <ru...@iu...> wrote: > Hi Charles, > > can you please send us the kernel? Maybe there's something wrong with the > thread assignment there. > > Also, rather than looping from 0 to P-1, it would make much more sense to > process all columns in parallel in a single kernel. > > Best regards, > Karli > > > On 12/14/2016 06:01 PM, Charles Determan wrote: > >> A quick addition, it also only seems to crash when the number of rows in >> the input matrix match or exceed 1000 (i.e. it works with the trivial >> example with 100 rows). >> >> Charles >> >> On Wed, Dec 14, 2016 at 10:55 AM, Charles Determan >> <cde...@gm... <mailto:cde...@gm...>> wrote: >> >> I have a function where I use a custom opencl kernel. The function >> is below. The function runs without problem and provides the >> correct result after the *first time* I call it. However, if I try >> >> to call the function again it crashes right after the 'initialized' >> output where it is trying to add the kernel program. Any idea why >> it would be crashing here on subsequent calls? Is there some >> cleanup I should be doing at the end of this function? >> >> Thanks, >> Charles >> >> >> template<typename T> >> void >> cpp_vclMatrix_set_row_order( >> SEXP ptrA_, >> const bool AisVCL, >> Eigen::VectorXi indices, >> SEXP sourceCode_, >> const int max_local_size, >> const int ctx_id) >> { >> >> std::cout << "called" << std::endl; >> >> std::string my_kernel = as<std::string>(sourceCode_); >> >> viennacl::ocl::context ctx(viennacl::ocl::get_context(ctx_id)); >> >> viennacl::matrix<T> *vcl_A; >> // viennacl::matrix<T> *vcl_B; >> >> std::cout << "getting matrix" << std::endl; >> vcl_A = getVCLptr<T>(ptrA_, AisVCL, ctx_id); >> // vcl_B = getVCLptr<T>(ptrB_, BisVCL, ctx_id); >> >> unsigned int M = vcl_A->size1(); >> // // int N = vcl_B.size1(); >> unsigned int P = vcl_A->size2(); >> unsigned int M_internal = vcl_A->internal_size1(); >> unsigned int P_internal = vcl_A->internal_size2(); >> >> std::cout << "initialized" << std::endl; >> >> // add kernel to program >> viennacl::ocl::program & my_prog = ctx.add_program(my_kernel, >> "my_kernel"); >> >> std::cout << "program added" << std::endl; >> >> // get compiled kernel function >> viennacl::ocl::kernel & set_row_order = >> my_prog.get_kernel("set_row_order"); >> >> std::cout << "got kernel" << std::endl; >> >> // set global work sizes >> set_row_order.global_work_size(0, M_internal); >> set_row_order.global_work_size(1, P_internal); >> >> std::cout << "set global" << std::endl; >> >> // set local work sizes >> set_row_order.local_work_size(0, max_local_size); >> set_row_order.local_work_size(1, max_local_size); >> >> std::cout << "begin enqueue" << std::endl; >> >> { >> >> std::cout << "moving indexes" << std::endl; >> viennacl::vector<int> vcl_I(indices.size()); >> viennacl::copy(indices, vcl_I); >> >> std::cout << "creating dummy vector" << std::endl; >> viennacl::vector<T> vcl_V = viennacl::zero_vector<T>(M); >> >> viennacl::matrix_base<T> vcl_B(vcl_V.handle(), >> M, 0, 1, M, //row layout >> 1, 0, 1, 1, //column layout >> true); // row-major >> >> viennacl::range r(0, M); >> >> for(unsigned int i=0; i < P; i++){ >> >> viennacl::range c(i, i+1); >> >> viennacl::matrix_range<viennacl::matrix<T> > tmp(*vcl_A, >> r, c); >> >> // std::cout << tmp << std::endl; >> >> viennacl::ocl::enqueue(set_row_order(tmp, vcl_B, vcl_I, >> M, i, M_internal)); >> >> tmp = vcl_B; >> } >> } >> } >> >> >> >> >> ------------------------------------------------------------ >> ------------------ >> Check out the vibrant tech community on one of the world's most >> engaging tech sites, SlashDot.org! http://sdm.link/slashdot >> >> >> >> _______________________________________________ >> ViennaCL-devel mailing list >> Vie...@li... >> https://lists.sourceforge.net/lists/listinfo/viennacl-devel >> >> > |