C-style indexing for 3D constant buffer does not work. It fails to pass
correctness.
The potential reason could be brcc/amdhlsl.
kernel void copy_BRConstBufferSize_120(float i0[2][15][2], out float o0<>)
{
float val = 0.0f;
int3 vpos = instance().xyz;
o0 = i0[vpos.z][vpos.y][vpos.x];
//o0 = val;
}
int main()
{
float i0[2][15][2];
float* o0 = NULL;
float streamo0<2, 15, 2>;
unsigned int k = 0, j = 0, i = 0, n = 0, m = 0;
unsigned int index = 0;
unsigned int mismatch = 0;
unsigned int inCBDepth = 2, inCBHeight = 15, inCBWidth = 2;
unsigned int _srcComponents = 1;
float inputValue = 0, inputStep = 0;
unsigned int outDepth = 2, outHeight = 15, outWidth = 2;
unsigned int _dstComponents = 1;
float* expected0 = NULL;
inputValue = (float)(0);
inputStep = (float)1.0;
for(k = 0; k < inCBDepth; ++k)
{
for(i = 0; i < inCBHeight; i++)
{
for(j = 0; j < inCBWidth; j++)
{
index = _srcComponents * (k * inCBHeight * inCBWidth + i * inCBWidth
+ j);
for(m = 0; m < _srcComponents; m++)
{
i0[k][i][j] = inputValue + 1;
inputValue += inputStep;
}
}
}
}
o0 = (float*)malloc(outDepth * outHeight * outWidth * _dstComponents *
sizeof(float));
expected0 = (float*)malloc(outDepth * outHeight * outWidth * _dstComponents *
sizeof(float));
memset(o0, 0, outDepth * outHeight * outWidth * _dstComponents *
sizeof(float));
memset(expected0, 0, outDepth * outHeight * outWidth * _dstComponents *
sizeof(float));
copy_BRConstBufferSize_120(i0, streamo0);
streamWrite(streamo0, o0);
//Generate Expected output
for(k = 0; k < outDepth; ++k)
{
for(i = 0; i < outHeight; i++)
{
for(j = 0; j < outWidth; j++)
{
index = _dstComponents * (k * outHeight * outWidth + i * outWidth +
j);
for(m = 0; m < _dstComponents; m++)
{
// Is generation of expected output is right or not?
expected0[index + m] = i0[k][i][j];
}
}
}
}
//Verify output
for(k = 0; k < outDepth; ++k)
{
for(i = 0; i < outHeight; i++)
{
for(j = 0; j < outWidth; j++)
{
index = _dstComponents * (k * outHeight * outWidth + i * outWidth +
j);
for(m = 0; m < _dstComponents; m++)
{
if(fabs(expected0[index + m] - o0[index + m]) > 0.000001)
{
mismatch = 1;
break;
}
}
if(mismatch)
{
printf("Position %d %d %d \n ", k, i, j);
printf("Results Expected\n ");
for(m = 0; m < _dstComponents; m++)
{
printf("%f %f\n ", o0[index + m], expected0[index + m]);
}
k = outDepth, i = outHeight, j = outWidth, n = 1;
break;
}
}
}
}
if(mismatch == 0)
{
printf(" CPU and GPU results matched\n ");
}
free(o0);
free(expected0);
return 0;
}