In the following code kernelGPU2 generates wrong result for both GPU and CPU backend (with CPU backend crashing).
//Copy.br
kernel void kernelGPU1(int input[][],out int output[][])
{
int2 index = instance().xy;
int a = index.x;
int b = index.y;
output[b][a] =input[b][a];
}
kernel void kernelGPU2(int input[][], out int outputdata[][], out int sumofcolumn<>)
{
int2 index = instance().xy;
int a =index.x;
int b =index.y;
int i =0;
int t1=0,t2=0;
for (i=0;i<4;i++)
{
outputdata[b][a] =input[b][a] ;
t1=input[b][a];
t2+=t1;
b++;
}
sumofcolumn=t2;
}
////////////////////////////////////////////////////////////////////////////////
//!
//! \file simple_kernel.cpp
//!
//! \brief Create and execute a simple kernel
//!
//! The kernel copies data from one 2D stream into another
//!
////////////////////////////////////////////////////////////////////////////////
// Project headers
#include "brookgenfiles/copy.h"
#include <iostream>
using namespace std;
int main(void){
unsigned int width = 3;
unsigned int height = 4;
int* cpu_a = new int[width * height];
int* cpu_b = new int[width * height];
int* cpu_c = new int[width];
for(int i = 0; i < height; ++i)
{
for(int j = 0; j < width; ++j)
{
cpu_a[i * width + j] = i * width + j;
}
}
unsigned int streamSize[] = {width, height};
brook::Stream<int> gpu_a(2, streamSize);
brook::Stream<int> gpu_b(2, streamSize);
brook::Stream<int> gpu_c(1, &width);
gpu_a.read(cpu_a);
kernelGPU2(gpu_a, gpu_b, gpu_c);
if(gpu_c.error()){
cout << gpu_c.errorLog();
return 1;
}
gpu_b.write(cpu_b);
gpu_c.write(cpu_c);
for(int i = 0; i < height; ++i)
{
for(int j = 0; j < width; ++j)
{
cout << cpu_b[i * width + j] << " ";
}
cout << "\n";
}
cout << "\n\n";
for(int j = 0; j < width; ++j)
{
cout << cpu_c[j] << " ";
}
return 0;
}