From: Charles D. <cde...@gm...> - 2016-12-14 17:02:04
|
A quick addition, it also only seems to crash when the number of rows in the input matrix match or exceed 1000 (i.e. it works with the trivial example with 100 rows). Charles On Wed, Dec 14, 2016 at 10:55 AM, Charles Determan <cde...@gm...> wrote: > I have a function where I use a custom opencl kernel. The function is > below. The function runs without problem and provides the correct result > after the *first time* I call it. However, if I try to call the function > again it crashes right after the 'initialized' output where it is trying to > add the kernel program. Any idea why it would be crashing here on > subsequent calls? Is there some cleanup I should be doing at the end of > this function? > > Thanks, > Charles > > > template<typename T> > void > cpp_vclMatrix_set_row_order( > SEXP ptrA_, > const bool AisVCL, > Eigen::VectorXi indices, > SEXP sourceCode_, > const int max_local_size, > const int ctx_id) > { > > std::cout << "called" << std::endl; > > std::string my_kernel = as<std::string>(sourceCode_); > > viennacl::ocl::context ctx(viennacl::ocl::get_context(ctx_id)); > > viennacl::matrix<T> *vcl_A; > // viennacl::matrix<T> *vcl_B; > > std::cout << "getting matrix" << std::endl; > vcl_A = getVCLptr<T>(ptrA_, AisVCL, ctx_id); > // vcl_B = getVCLptr<T>(ptrB_, BisVCL, ctx_id); > > unsigned int M = vcl_A->size1(); > // // int N = vcl_B.size1(); > unsigned int P = vcl_A->size2(); > unsigned int M_internal = vcl_A->internal_size1(); > unsigned int P_internal = vcl_A->internal_size2(); > > std::cout << "initialized" << std::endl; > > // add kernel to program > viennacl::ocl::program & my_prog = ctx.add_program(my_kernel, > "my_kernel"); > > std::cout << "program added" << std::endl; > > // get compiled kernel function > viennacl::ocl::kernel & set_row_order = my_prog.get_kernel("set_row_ > order"); > > std::cout << "got kernel" << std::endl; > > // set global work sizes > set_row_order.global_work_size(0, M_internal); > set_row_order.global_work_size(1, P_internal); > > std::cout << "set global" << std::endl; > > // set local work sizes > set_row_order.local_work_size(0, max_local_size); > set_row_order.local_work_size(1, max_local_size); > > std::cout << "begin enqueue" << std::endl; > > { > > std::cout << "moving indexes" << std::endl; > viennacl::vector<int> vcl_I(indices.size()); > viennacl::copy(indices, vcl_I); > > std::cout << "creating dummy vector" << std::endl; > viennacl::vector<T> vcl_V = viennacl::zero_vector<T>(M); > > viennacl::matrix_base<T> vcl_B(vcl_V.handle(), > M, 0, 1, M, //row layout > 1, 0, 1, 1, //column layout > true); // row-major > > viennacl::range r(0, M); > > for(unsigned int i=0; i < P; i++){ > > viennacl::range c(i, i+1); > > viennacl::matrix_range<viennacl::matrix<T> > tmp(*vcl_A, r, > c); > > // std::cout << tmp << std::endl; > > viennacl::ocl::enqueue(set_row_order(tmp, vcl_B, vcl_I, M, i, > M_internal)); > > tmp = vcl_B; > } > } > } > |