libCL provides a core set of C++ classes that encapsulate the OpenCL API and provide a simple error handling and logging infrastructure. Based upon this core functionality are implementations of specific algorithms categorized within different subdirectories. An example of how to extend libCL is found on the wiki page. Up to this point, libCL contains the following algorithms:
phys\
•Smoothed Particle Hydrodynamics
sort\
•Radix Sort
geom\
•Bounding Volume Hierarchy
image\
•Recursive Gaussian Smoothing
•Bilareral Gaussian Smoothing
•Screen-Space Ambient Occlusion Culling
•Adaptive Tone-mapping
•Bloom
libCL closely integrates with OpenCL Studio via the libBind library. All of the demo applications bundled with OpenCL Studio are built upon libCL.
The following source code illustrates how to extend libCL using the fundamental wrapper and base classes. An extension basically consists of an OpenCL program and a new C++ class derived from oclProgram. The .cl, .h and .cpp files of the extension are located in an appropriate subdirectory of libCL, classified according to the type of algorithm.
An extension class may instantiate buffers and associate additional algorithms as needed. The example illustrated below implements a tone mapping operator that applies to an input image and stores the result in an output image. Internally, the tone mapping class instantiates two additional buffers as well as recursive gaussian smoothing object.
#include "oclRecursiveGaussian.h" class oclToneMapping : public oclProgram { public: oclToneMapping(oclContext& iContext, cl_image_format iFormat); int compile(); int compute(oclDevice& iDevice, oclImage2D& bfSrce, oclImage2D& bfDest); void setSmoothing(cl_float iValue); protected: oclRecursiveGaussian mGaussian; oclKernel clLuminance; oclKernel clCombine; oclImage2D bfTempA; oclImage2D bfTempB; };
Comments
* oclProgram is the base class for all extensions
* oclRecursiveGaussian is another extension class
#include "oclToneMapping.h" oclToneMapping::oclToneMapping(oclContext& iContext, cl_image_format iFormat) : oclProgram(iContext, "oclToneMapping") , bfTempA(iContext, "bfTemp0") , bfTempB(iContext, "bfTemp1") , clLuminance(*this) , clCombine(*this) , mGaussian(iContext) { bfTempA.create(CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, iFormat, 256, 256); bfTempB.create(CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, iFormat, 256, 256); addSourceFile("image\\oclToneMapping.cl"); exportKernel(clLuminance); exportKernel(clCombine); } int oclToneMapping::compile() { // release kernels clLuminance = 0; clCombine = 0; if (!mGaussian.compile()) { return 0; } if (!oclProgram::compile()) { return 0; } clLuminance = createKernel("clLuminance"); KERNEL_VALIDATE(clLuminance) clCombine = createKernel("clCombine"); KERNEL_VALIDATE(clCombine) return 1; } int oclToneMapping::compute(oclDevice& iDevice, oclImage2D& bfSrce, oclImage2D& bfDest) { cl_uint lWidth = bfSrce.getImageInfo<size_t>(CL_IMAGE_WIDTH); cl_uint lHeight = bfSrce.getImageInfo<size_t>(CL_IMAGE_HEIGHT); if (bfTempA.getImageInfo<size_t>(CL_IMAGE_WIDTH)!=lWidth || bfTempA.getImageInfo<size_t>(CL_IMAGE_HEIGHT)!=lHeight) { bfTempA.resize(lWidth, lHeight); } if (bfTempB.getImageInfo<size_t>(CL_IMAGE_WIDTH)!=lWidth || bfTempB.getImageInfo<size_t>(CL_IMAGE_HEIGHT)!=lHeight) { bfTempB.resize(lWidth, lHeight); } size_t lGlobalSize[2]; lGlobalSize[0] = lWidth; lGlobalSize[1] = lHeight; clSetKernelArg(clLuminance, 0, sizeof(cl_mem), bfSrce); clSetKernelArg(clLuminance, 1, sizeof(cl_mem), bfTempA); sStatusCL = clEnqueueNDRangeKernel(iDevice, clLuminance,2,NULL, lGlobalSize, NULL, 0, NULL, clLuminance.getEvent()); ENQUEUE_VALIDATE if (!mGaussian.compute(iDevice, bfTempA, bfTempB, bfTempA)) { return false; } clSetKernelArg(clCombine, 0, sizeof(cl_mem), bfSrce); clSetKernelArg(clCombine, 1, sizeof(cl_mem), bfTempA); clSetKernelArg(clCombine, 2, sizeof(cl_mem), bfDest); sStatusCL = clEnqueueNDRangeKernel(iDevice, clCombine,2,NULL, lGlobalSize, NULL, 0, NULL, clCombine.getEvent()); ENQUEUE_VALIDATE return true; } void oclToneMapping::setSmoothing(cl_float iValue) { mGaussian.setSigma(iValue); }
Comments
* KERNEL_VALIDATE and ENQUEUE_VALIDATE are macros
* Buffer and kernel classes override various casting operators
* All objects in libCL can be named for effective error reporting
__kernel void clLuminance(__read_only image2d_t RGBAin, __write_only image2d_t LUMout) { const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_CLAMP; const int gx = get_global_id(0); const int gy = get_global_id(1); const int gw = get_global_size(0); float4 RGBA = read_imagef(RGBAin, sampler, (float2)(gx,gy)); write_imagef(LUMout, (int2)(gx,gy), dot((float4)(0.2126,0.7152,0.0722,0.0),RGBA)); } __kernel void clCombine(__read_only image2d_t RGBDin, __read_only image2d_t LUMin, __write_only image2d_t RGBDout) { const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR | CLK_ADDRESS_CLAMP; const int gx = get_global_id(0); const int gy = get_global_id(1); const int gw = get_global_size(0); const int gh = get_global_size(1); float2 pixel = (float2)((gx+0.5f)/gw,(gy+0.5f)/gh); float4 RGBA = read_imagef(RGBDin, sampler, pixel); float4 LLLL = read_imagef(LUMin, sampler, pixel); float luminance = dot((float4)(0.2126f,0.7152f,0.0722f,0.0f),RGBA); float glare = 0.0f;//log(l) * (1.0f - 0.99f/(0.99f + log(l))); // Retinex float4 result = RGBA*(exp(glare + log(luminance)-0.45f*log(LLLL))); write_imagef(RGBDout, (int2)(gx,gy), result); }
Comments
* The kernels names must be the same as in the call to createKernel
* The tone mapping algorithm still needs to be refined