From: <ac...@us...> - 2008-11-21 07:32:46
|
Revision: 3563 http://hugin.svn.sourceforge.net/hugin/?rev=3563&view=rev Author: acmihal Date: 2008-11-21 07:32:43 +0000 (Fri, 21 Nov 2008) Log Message: ----------- - Support for src and dest images larger than the maximum GPU texture size. - Fixes for fisheye, stereographic, cylindrical testcases. - Moderate speedups for GPU I/O. Modified Paths: -------------- hugin/branches/nona-gpu/src/hugin_base/panotools/PanoToolsTransformGPU.cpp hugin/branches/nona-gpu/src/hugin_base/vigra_ext/ImageTransformsGPU.cpp hugin/branches/nona-gpu/src/hugin_base/vigra_ext/ImageTransformsGPU.h hugin/branches/nona-gpu/src/hugin_base/vigra_ext/Interpolators.h hugin/branches/nona-gpu/src/tools/nona.cpp Modified: hugin/branches/nona-gpu/src/hugin_base/panotools/PanoToolsTransformGPU.cpp =================================================================== --- hugin/branches/nona-gpu/src/hugin_base/panotools/PanoToolsTransformGPU.cpp 2008-11-18 18:09:36 UTC (rev 3562) +++ hugin/branches/nona-gpu/src/hugin_base/panotools/PanoToolsTransformGPU.cpp 2008-11-21 07:32:43 UTC (rev 3563) @@ -48,10 +48,10 @@ // Still broken on ati -// fisheye -// stereographic -// cylindrical // albers +// FIXME coord xforms that discard do not work correctly. +// Instead of discard, need to set coords to something far outside the src image and return. +// e.g. (-1000, -1000). static void rotate_erect_glsl(ostringstream& oss, const void* params) { //oss << " // rotate_erect(" << var0 << ", " << var1 << ")" << endl @@ -103,8 +103,8 @@ static void erect_rect_glsl(ostringstream& oss, const void* params) { oss << " // erect_rect(" << distanceparam << ")" << endl - << " src.t = " << distanceparam << " * atan_xge0(src.t, length(vec2(" << distanceparam << ", src.s)));" << endl - << " src.s = " << distanceparam << " * atan_safe(src.s, " << distanceparam << ");" << endl + << " src.t = " << distanceparam << " * atan2_xge0(src.t, length(vec2(" << distanceparam << ", src.s)));" << endl + << " src.s = " << distanceparam << " * atan2_safe(src.s, " << distanceparam << ");" << endl << endl; } @@ -117,7 +117,7 @@ << " if (theta != 0.0) { s = sin(theta) / r; }" << endl << " float v1 = s * src.s;" << endl << " float v0 = cos(theta);" << endl - << " src.s = " << distanceparam << " * atan_safe(v1, v0);" << endl + << " src.s = " << distanceparam << " * atan2_safe(v1, v0);" << endl << " src.t = " << distanceparam << " * atan_safe(s * src.t / length(vec2(v0, v1)));" << endl << " }" << endl << endl; @@ -139,7 +139,7 @@ << " float s = sin(theta);" << endl << " vec2 v = vec2(s * sin(phi), cos(theta));" << endl << " float r = length(v);" << endl - << " theta = " << distanceparam << " * atan_safe(r, s * cos(phi));" << endl + << " theta = " << distanceparam << " * atan2_safe(r, s * cos(phi));" << endl << " src = v * (theta / r);" << endl << " }" << endl << endl; @@ -186,7 +186,7 @@ << " float s = " << (1.0 / distanceparam) << ";" << endl << " if (theta != 0.0) s = sin(theta) / r;" << endl << " vec2 v = vec2(cos(theta), s * src.s);" << endl - << " src.s = " << distanceparam << " * atan_safe(v.t, v.s);" << endl + << " src.s = " << distanceparam << " * atan2_safe(v.t, v.s);" << endl << " src.t = " << distanceparam << " * s * src.t / length(v);" << endl << " }" << endl << endl; @@ -222,7 +222,7 @@ << " vec3 u = v * m;" << endl << " r = length(u.st);" << endl << " theta = 0.0;" << endl - << " if (r != 0.0) theta = " << d << " * atan_safe(r, u.p) / r;" << endl + << " if (r != 0.0) theta = " << d << " * atan2_safe(r, u.p) / r;" << endl << " src = theta * u.st;" << endl << " }" << endl << endl; @@ -252,7 +252,7 @@ << " src /= " << distanceparam << ";" << endl << " if (abs(src.t) > " << M_PI << ") discard;" << endl << " float x = src.s;" << endl - << " src.s = " << distanceparam << " * atan_safe(sinh(src.s), cos(src.t));" << endl + << " src.s = " << distanceparam << " * atan2_safe(sinh(src.s), cos(src.t));" << endl << " src.t = " << distanceparam << " * asin(sin(src.t) / cosh(x));" << endl << " }" << endl << endl; @@ -276,7 +276,7 @@ << " float c = 2.0 * asin(ro / 2.0);" << endl << " src.t = " << distanceparam << " * asin((src.t * sin(c)) / ro);" << endl << " if (abs(ro * cos(c)) <= 1.0e-10) src.s = 0.0;" << endl - << " else src.s = " << distanceparam << " * atan_safe(src.s * sin(c), (ro * cos(c)));" << endl + << " else src.s = " << distanceparam << " * atan2_safe(src.s * sin(c), (ro * cos(c)));" << endl << " }" << endl << " }" << endl << endl; @@ -295,7 +295,7 @@ << " if (abs(cos_c) < 1.0e-10 && abs(src.s) < 1.0e-10) discard;" << endl << " float y = src.s * sin_c;" << endl << " float x = cos_c * rh;" << endl - << " src.s = atan_safe(y, x) * " << distanceparam << ";" << endl + << " src.s = atan2_safe(y, x) * " << distanceparam << ";" << endl << " }" << endl << endl; } @@ -321,7 +321,7 @@ oss << " src /= " << mp->distance << ";" << endl << " src.t += " << yoffset << ";" << endl << " float rho2 = (src.s * src.s + (" << rho0 << " - src.t) * (" << rho0 << " - src.t));" << endl - << " float theta = atan_safe(" << ((n < 0) ? "-" : "") << "src.s, " << ((n < 0) ? "-1.0 * " : "") << "(" << rho0 << " - src.t));" << endl + << " float theta = atan2_safe(" << ((n < 0) ? "-" : "") << "src.s, " << ((n < 0) ? "-1.0 * " : "") << "(" << rho0 << " - src.t));" << endl << " float phi = asin((" << C << " - rho2 * " << n2 << ") / " << twiceN << ");" << endl << " float lambda = theta / " << n << ";" << endl << " if (abs(lambda) > " << M_PI << ") discard;" << endl Modified: hugin/branches/nona-gpu/src/hugin_base/vigra_ext/ImageTransformsGPU.cpp =================================================================== --- hugin/branches/nona-gpu/src/hugin_base/vigra_ext/ImageTransformsGPU.cpp 2008-11-18 18:09:36 UTC (rev 3562) +++ hugin/branches/nona-gpu/src/hugin_base/vigra_ext/ImageTransformsGPU.cpp 2008-11-21 07:32:43 UTC (rev 3563) @@ -35,29 +35,50 @@ #include <time.h> #include <vigra/diff2d.hxx> +#include <vigra/utilities.hxx> #include <vigra/error.hxx> +#include <vector> + using std::cout; using std::cerr; using std::endl; +using std::vector; +using vigra::Rect2D; + #define CHECK_GL() checkGLErrors(__LINE__, __FILE__) -static GLenum XGLMap[] = {GL_RED, GL_RGB, GL_RGBA, GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT, GL_INT, GL_UNSIGNED_INT, GL_FLOAT}; -static const char *AlphaCombineKernelSource = { -"#version 110\n" -"#extension GL_ARB_texture_rectangle : enable\n" -"uniform sampler2DRect SrcAlphaTexture;\n" -"void main(void)\n" -"{\n" -" float alpha = texture2DRect(SrcAlphaTexture, gl_TexCoord[0].st).r;\n" -" if (alpha != 0.0) discard;\n" -" gl_FragColor = vec4(0.0, 0.0, 0.0, 0.0);\n" -"}\n" +static GLenum XGLMap[] = { + // gltypes + GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT, GL_INT, GL_UNSIGNED_INT, GL_FLOAT, + // Internalformats + GL_RGBA8, GL_RGBA16, GL_RGBA32F_ARB, GL_LUMINANCE8_ALPHA8, GL_LUMINANCE16_ALPHA16, GL_LUMINANCE_ALPHA32F_ARB, + GL_RGB8, GL_RGB16, GL_RGB32F_ARB, GL_LUMINANCE8, GL_LUMINANCE16, GL_LUMINANCE32F_ARB, + // formats + GL_LUMINANCE, GL_RGB, GL_LUMINANCE_ALPHA, GL_RGBA }; -static void checkGLErrors(int line, char *file) { +static char* XGLStringMap[] = { + // gltypes + "GL_BYTE", "GL_UNSIGNED_BYTE", "GL_SHORT", "GL_UNSIGNED_SHORT", "GL_INT", "GL_UNSIGNED_INT", "GL_FLOAT", + // Internalformats + "GL_RGBA8", "GL_RGBA16", "GL_RGBA32F_ARB", "GL_LUMINANCE8_ALPHA8", "GL_LUMINANCE16_ALPHA16", "GL_LUMINANCE_ALPHA32F_ARB", + "GL_RGB8", "GL_RGB16", "GL_RGB32F_ARB", "GL_LUMINANCE8", "GL_LUMINANCE16", "GL_LUMINANCE32F_ARB", + // formats + "GL_LUMINANCE", "GL_RGB", "GL_LUMINANCE_ALPHA", "GL_RGBA" +}; + +static int BytesPerPixel[] = { + 1, 1, 2, 2, 4, 4, 4, + 4, 8, 16, 2, 4, 8, + 3, 6, 12, 1, 2, 4, + 0, 0, 0, 0 +}; + + +static void checkGLErrors(int line, char* file) { GLenum errCode; if ((errCode = glGetError()) != GL_NO_ERROR) { cerr << "nona: GL error in " << file << ":" << line << ": " << gluErrorString(errCode) << endl; @@ -73,60 +94,159 @@ if (infologLength > 1) { infoLog = new char[infologLength]; glGetInfoLogARB(obj, infologLength, &charsWritten, infoLog); - cout << "nona: GL info log:" << endl << infoLog << endl; + cout << "nona: GL info log:" << endl << infoLog << endl << endl; delete[] infoLog; } } -static bool checkFramebufferStatus() { +static bool checkFramebufferStatus(int line, char* file) { GLenum status; status = (GLenum) glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT); switch(status) { case GL_FRAMEBUFFER_COMPLETE_EXT: return true; case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT_EXT: - cerr << "nona: GL error: Framebuffer incomplete, incomplete attachment" << endl; + cerr << "nona: GL error: Framebuffer incomplete, incomplete attachment in: " << file << ":" << line << endl; return false; case GL_FRAMEBUFFER_UNSUPPORTED_EXT: - cerr << "nona: Unsupported framebuffer format" << endl; + cerr << "nona: Unsupported framebuffer format in: " << file << ":" << line << endl; return false; case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT: - cerr << "nona: Framebuffer incomplete, missing attachment" << endl; + cerr << "nona: Framebuffer incomplete, missing attachment in: " << file << ":" << line << endl; return false; case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS_EXT: - cerr << "nona: Framebuffer incomplete, attached images must have same dimensions" << endl; + cerr << "nona: Framebuffer incomplete, attached images must have same dimensions in: " << file << ":" << line << endl; return false; case GL_FRAMEBUFFER_INCOMPLETE_FORMATS_EXT: - cerr << "nona: Framebuffer incomplete, attached images must have same format" << endl; + cerr << "nona: Framebuffer incomplete, attached images must have same format in: " << file << ":" << line << endl; return false; case GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_EXT: - cerr << "nona: Framebuffer incomplete, missing draw buffer" << endl; + cerr << "nona: Framebuffer incomplete, missing draw buffer in: " << file << ":" << line << endl; return false; case GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER_EXT: - cerr << "nona: Framebuffer incomplete, missing read buffer" << endl; + cerr << "nona: Framebuffer incomplete, missing read buffer in: " << file << ":" << line << endl; return false; } return false; } +static void compileGLSL(const char* programName, + GLhandleARB& programObject, + GLhandleARB& shaderObject, + const char** source) +{ + GLint success; + + programObject = glCreateProgramObjectARB(); + shaderObject = glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB); + + glShaderSourceARB(shaderObject, 1, source, NULL); + glCompileShaderARB(shaderObject); + + glGetObjectParameterivARB(shaderObject, GL_OBJECT_COMPILE_STATUS_ARB, &success); + if (!success) { + cerr << "nona: " << programName << " shader program could not be compiled." << endl; + printInfoLog(shaderObject); + exit(1); + } + + printInfoLog(shaderObject); + + glAttachObjectARB(programObject, shaderObject); + glLinkProgramARB(programObject); + + glGetObjectParameterivARB(programObject, GL_OBJECT_LINK_STATUS_ARB, &success); + if (!success) { + cerr << "nona: " << programName << " shader program could not be linked." << endl; + printInfoLog(programObject); + exit(1); + } + + printInfoLog(programObject); +} + +static void makeChunks(const int width, + const int height, + const int maxTextureSize, + const long long int maxPixels, + vector<Rect2D>& result) +{ + int numXChunks = 1; + int numYChunks = 1; + + // Make chunks small enough to fit into maxTextureSize + while (ceil(static_cast<double>(width) / numXChunks) > maxTextureSize) numXChunks++; + while (ceil(static_cast<double>(height) / numYChunks) > maxTextureSize) numYChunks++; + + // Make chunks small enough to fit into maxPixels limit + while ((ceil(static_cast<double>(width) / numXChunks) * ceil(static_cast<double>(height) / numYChunks)) + > maxPixels) { + + if (ceil(static_cast<double>(width) / numXChunks) > ceil(static_cast<double>(height) / numYChunks)) { + ++numXChunks; + } else { + ++numYChunks; + } + } + + // Make chunks small enough to fit in GL_PROXY_TEXTURE_2D of the biggest internalformat type. + while (1) { + glTexImage2D(GL_PROXY_TEXTURE_2D, + 0, + GL_RGBA32F_ARB, + static_cast<int>(ceil(static_cast<double>(width) / numXChunks)), + static_cast<int>(ceil(static_cast<double>(height) / numYChunks)), + 0, + GL_RGBA, + GL_FLOAT, + NULL); + GLint returnedWidth; + glGetTexLevelParameteriv(GL_PROXY_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &returnedWidth); + + if (width != 0) break; + + if (ceil(static_cast<double>(width) / numXChunks) > ceil(static_cast<double>(height) / numYChunks)) { + ++numXChunks; + } else { + ++numYChunks; + } + } + + for (int yChunk = 0, y = 0; yChunk < numYChunks; yChunk++) { + int yEnd = std::min(height, static_cast<int>(ceil(static_cast<double>(height) / numYChunks)) + y); + for (int xChunk = 0, x = 0; xChunk < numXChunks; xChunk++) { + int xEnd = std::min(width, static_cast<int>(ceil(static_cast<double>(width) / numXChunks)) + x); + result.push_back(Rect2D(x, y, xEnd, yEnd)); + x = xEnd; + } + y = yEnd; + } +} + + namespace vigra_ext { -bool transformImageGPUIntern(const std::string& glsl, +bool transformImageGPUIntern(const std::string& coordXformGLSL, + const std::string& interpolatorGLSL, + const int interpolatorSize, const vigra::Diff2D srcSize, const void* const srcBuffer, - const int srcGLType, const int srcGLFormat, + const int srcGLInternalFormat, const int srcGLTransferFormat, const int srcGLFormat, const int srcGLType, const void* const srcAlphaBuffer, - const int srcAlphaGLType, const int srcAlphaGLFormat, + const int srcAlphaGLType, const vigra::Diff2D destUL, const vigra::Diff2D destSize, void* const destBuffer, - const int destGLType, const int destGLFormat, + const int destGLInternalFormat, const int destGLTransferFormat, const int destGLFormat, const int destGLType, void* const destAlphaBuffer, - const int destAlphaGLType, const int destAlphaGLFormat, + const int destAlphaGLType, const bool warparound) { + timeval t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19, t20, t21; + gettimeofday(&t1, NULL); + const int xstart = destUL.x; const int xend = destUL.x + destSize.x; const int ystart = destUL.y; @@ -150,14 +270,14 @@ vigra_precondition((reinterpret_cast<const uintptr_t>(destBuffer) & 0x7) == 0, "dest image buffer not 8-byte aligned"); vigra_precondition((reinterpret_cast<const uintptr_t>(destAlphaBuffer) & 0x7) == 0, "dest alpha image buffer not 8-byte aligned"); - cout << "destGLFormat=" << destGLFormat << endl - << "destGLType=" << destGLType << endl - << "srcGLFormat=" << srcGLFormat << endl - << "srcGLType=" << srcGLType << endl - << "srcAlphaGLFormat=" << srcAlphaGLFormat << endl - << "srcAlphaGLType=" << srcAlphaGLType << endl - << "destAlphaGLFormat=" << destAlphaGLFormat << endl - << "destAlphaGLType=" << destAlphaGLType << endl; + cout << "destGLInternalFormat=" << XGLStringMap[destGLInternalFormat] << endl + << "destGLFormat=" << XGLStringMap[destGLFormat] << endl + << "destGLType=" << XGLStringMap[destGLType] << endl + << "srcGLInternalFormat=" << XGLStringMap[srcGLInternalFormat] << endl + << "srcGLFormat=" << XGLStringMap[srcGLFormat] << endl + << "srcGLType=" << XGLStringMap[srcGLType] << endl + << "srcAlphaGLType=" << XGLStringMap[srcAlphaGLType] << endl + << "destAlphaGLType=" << XGLStringMap[destAlphaGLType] << endl; cout << "warparound=" << warparound << endl; @@ -166,32 +286,70 @@ cout << "needsAtanWorkaround=" << needsAtanWorkaround << endl; + GLint maxTextureSize; + glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize); + cout << "maxTextureSize=" << maxTextureSize << endl; + + // Artificial limit: binding big textures to fbos seems to be very slow. + //maxTextureSize = 2048; + + const long long int GpuMemoryInBytes = 512 << 20; + const double SourceAllocationRatio = 0.7; + + const int bytesPerSourcePixel = BytesPerPixel[srcGLInternalFormat] + BytesPerPixel[srcGLTransferFormat] + 1; + const long long int maxSourcePixels = static_cast<long long int>(GpuMemoryInBytes * SourceAllocationRatio) / bytesPerSourcePixel; + + vector<Rect2D> sourceChunks; + makeChunks(srcSize.x, srcSize.y, maxTextureSize, maxSourcePixels, sourceChunks); + + const long long int actualSourcePixels = sourceChunks[0].area(); + const long long int gpuMemoryRemaining = GpuMemoryInBytes - (actualSourcePixels * bytesPerSourcePixel); + + // 16 bytes/pixel * 2 dest images for ping/pong multipass rendering + // 8 bytes/pixel for coordinate texture + // destXfer + GL_ALPHA + const int bytesPerDestPixel = 16 + 16 + 8 + BytesPerPixel[destGLTransferFormat] + 1; + const long long int maxDestPixels = gpuMemoryRemaining / bytesPerDestPixel; + + vector<Rect2D> destChunks; + makeChunks(destSize.x, destSize.y, 2048, maxDestPixels, destChunks); + + const long long int totalGpuMemoryUsed = (sourceChunks[0].area() * bytesPerSourcePixel) + (destChunks[0].area() * bytesPerDestPixel); + vigra_assert(totalGpuMemoryUsed <= GpuMemoryInBytes, + "failed to subdivide source and dest images into pieces small enough to fit in gpu memory."); + + cout << "Source chunks:" << endl; + for (vector<Rect2D>::iterator rI = sourceChunks.begin(); rI != sourceChunks.end(); ++rI) { + cout << " " << *rI << endl; + } + cout << "Dest chunks:" << endl; + for (vector<Rect2D>::iterator rI = destChunks.begin(); rI != destChunks.end(); ++rI) { + cout << " " << *rI << endl; + } + cout << "Total GPU memory used: " << totalGpuMemoryUsed << endl; + + + + // Prepare coord transform GLSL program std::ostringstream oss; oss << std::setprecision(20) << std::showpoint; oss << "#version 110" << endl << "#extension GL_ARB_texture_rectangle : enable" << endl << "uniform sampler2DRect SrcTexture;" << endl - << "vec2 sinc(const in vec2 x) {" << endl - << " vec2 xpi = x * " << M_PI << ";" << endl - << " vec2 result = vec2(1.0, 1.0);" << endl - << " if (xpi.s != 0.0) result.s = sin(xpi.s) / xpi.s;" << endl - << " if (xpi.t != 0.0) result.t = sin(xpi.t) / xpi.t;" << endl - << " return result;" << endl - << "}" << endl << "float sinh(const in float x) { return (exp(x) - exp(-x)) / 2.0; }" << endl << "float cosh(const in float x) { return (exp(x) + exp(-x)) / 2.0; }" << endl; if (needsAtanWorkaround) { - oss << "float atan_xge0(const in float y, const in float x) {" << endl + oss << "float atan2_xge0(const in float y, const in float x) {" << endl << " if (abs(y) > x) {" << endl << " return sign(y) * (" << (M_PI/2.0) << " - atan(x, abs(y)));" << endl << " } else {" << endl << " return atan(y, x);" << endl << " }" << endl << "}" << endl - << "float atan_safe(const in float y, const in float x) {" << endl - << " if (x >= 0.0) return atan_xge0(y, x);" << endl - << " else return (sign(y) * " << M_PI << ") - atan_xge0(y, -x);" << endl + << "float atan2_safe(const in float y, const in float x) {" << endl + << " if (x >= 0.0) return atan2_xge0(y, x);" << endl + << " else return (sign(y) * " << M_PI << ") - atan2_xge0(y, -x);" << endl << "}" << endl << "float atan_safe(const in float yx) {" << endl << " if (abs(yx) > 1.0) {" << endl @@ -201,10 +359,10 @@ << " }" << endl << "}" << endl; } else { - oss << "float atan_xge0(const in float y, const in float x) {" << endl + oss << "float atan2_xge0(const in float y, const in float x) {" << endl << " return atan(y, x);" << endl << "}" << endl - << "float atan_safe(const in float y, const in float x) {" << endl + << "float atan2_safe(const in float y, const in float x) {" << endl << " return atan(y, x);" << endl << "}" << endl << "float atan_safe(const in float yx) {" << endl @@ -214,220 +372,592 @@ oss << "void main(void)" << endl << "{" << endl - << glsl - //<< " gl_FragColor = vec4(src.s, src.t, 0.0, 0.0);" << endl - << " gl_FragColor = p;" << endl + << coordXformGLSL + << " gl_FragColor = vec4(src.s, 0.0, 0.0, src.t);" << endl << "}" << endl; - std::string ossStr = oss.str(); - const char* remapKernelSource = ossStr.c_str(); - cout << remapKernelSource; + std::string coordXformKernelSourceString = oss.str(); + const char* coordXformKernelSource = coordXformKernelSourceString.c_str(); + cout << coordXformKernelSource; - GLint maxTextureSize; - glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize); - cout << "maxTextureSize=" << maxTextureSize << endl; + GLhandleARB coordXformProgramObject; + GLhandleARB coordXformShaderObject; + compileGLSL("coordinate transform", + coordXformProgramObject, + coordXformShaderObject, + &coordXformKernelSource); + + + // Prepare interpolation shader program. + oss.str(""); + oss << std::setprecision(20) << std::showpoint; + oss << "#version 110" << endl + << "#extension GL_ARB_texture_rectangle : enable" << endl + << "uniform sampler2DRect CoordTexture;" << endl + << "uniform sampler2DRect AccumTexture;" << endl + << "uniform sampler2DRect SrcTexture;" << endl + << "uniform vec2 SrcUL;" << endl + << "uniform vec2 SrcLR;" << endl + << "vec2 sinc(const in vec2 x) {" << endl + << " vec2 xpi = x * " << M_PI << ";" << endl + << " vec2 result = vec2(1.0, 1.0);" << endl + << " if (xpi.s != 0.0) result.s = sin(xpi.s) / xpi.s;" << endl + << " if (xpi.t != 0.0) result.t = sin(xpi.t) / xpi.t;" << endl + << " return result;" << endl + << "}" << endl + << "void main(void)" << endl + << "{" << endl + << " vec2 src = texture2DRect(CoordTexture, gl_TexCoord[0].st).sq;" << endl + << " vec4 accum = texture2DRect(AccumTexture, gl_TexCoord[0].st);" << endl + << endl; + + // Add nothing to pixels where the source image has no contribution + oss << " if (any(lessThan(src, SrcUL - " << (interpolatorSize / 2.0) << "))) {" << endl + << " gl_FragColor = accum;" << endl + << " return;" << endl + << " }" << endl + << " if (any(greaterThan(src, SrcLR + " << ((interpolatorSize / 2.0) - 1.0) << "))) {" << endl + << " gl_FragColor = accum;" << endl + << " return;" << endl + << " }" << endl + << endl; + + oss << " src -= SrcUL;" << endl + << " vec2 t = floor(src);" << endl + << " vec2 f = fract(src);" << endl + << endl; + + // Compute filter weights + oss << interpolatorGLSL; + + // Interpolator loop + for (int ky = 0; ky < interpolatorSize; ++ky) { + double bounded_ky_offset = 1.5 + ky - (interpolatorSize / 2); + for (int kx = 0; kx < interpolatorSize; ++kx) { + double bounded_kx_offset = 1.5 + kx - (interpolatorSize / 2); + oss << " {" << endl + << " // (" << kx << ", " << ky << ")" << endl + << " vec2 ix = t + vec2(" << bounded_kx_offset << ", " << bounded_ky_offset << ");" << endl + << " vec4 sp = texture2DRect(SrcTexture, ix);" << endl + << " float weight = w[" << kx << "].s * w[" << ky << "].t * sp.a;" << endl + << " accum += sp * weight;" << endl + << " }" << endl; + } + } + + oss << endl + << " gl_FragColor = accum;" << endl + << "}" << endl + << endl; + + std::string interpolatorKernelSourceString = oss.str(); + const char* interpolatorKernelSource = interpolatorKernelSourceString.c_str(); + cout << interpolatorKernelSource; + + GLhandleARB interpolatorProgramObject; + GLhandleARB interpolatorShaderObject; + compileGLSL("interpolator", + interpolatorProgramObject, + interpolatorShaderObject, + &interpolatorKernelSource); + + GLint coordTextureParam = glGetUniformLocationARB(interpolatorProgramObject, "CoordTexture"); + GLint accumTextureParam = glGetUniformLocationARB(interpolatorProgramObject, "AccumTexture"); + GLint srcTextureParam = glGetUniformLocationARB(interpolatorProgramObject, "SrcTexture"); + GLint srcULParam = glGetUniformLocationARB(interpolatorProgramObject, "SrcUL"); + GLint srcLRParam = glGetUniformLocationARB(interpolatorProgramObject, "SrcLR"); + + + // Prepare normalization/photometric shader program + oss.str(""); + oss << std::setprecision(20) << std::showpoint; + oss << "#version 110" << endl + << "#extension GL_ARB_texture_rectangle : enable" << endl + << "uniform sampler2DRect NormTexture;" << endl + << "void main(void)" << endl + << "{" << endl + << " vec4 n = texture2DRect(NormTexture, gl_TexCoord[0].st);" << endl + << " vec4 p = vec4(0.0, 0.0, 0.0, 0.0);" << endl + << " if (n.a >= 0.2) p = n / n.a;" << endl + << " gl_FragColor = p;" << endl + << "}" << endl + << endl; + + std::string normalizationPhotometricKernelSourceString = oss.str(); + const char* normalizationPhotometricKernelSource = normalizationPhotometricKernelSourceString.c_str(); + cout << normalizationPhotometricKernelSource; + + GLhandleARB normalizationPhotometricProgramObject; + GLhandleARB normalizationPhotometricShaderObject; + compileGLSL("normalization/photometric", + normalizationPhotometricProgramObject, + normalizationPhotometricShaderObject, + &normalizationPhotometricKernelSource); + + GLint normTextureParam = glGetUniformLocationARB(normalizationPhotometricProgramObject, "NormTexture"); + + glFinish(); + gettimeofday(&t21, NULL); + cout << "gpu shader program compile time = " << (t21.tv_sec - t1.tv_sec + 1e-6*(t21.tv_usec - t1.tv_usec)) << endl; + + // General GL setup glPixelStorei(GL_PACK_ALIGNMENT, 8); glPixelStorei(GL_UNPACK_ALIGNMENT, 8); + const float borderColor[] = {0.0, 0.0, 0.0, 0.0}; + + glClearColor(0.0, 0.0, 0.0, 0.0); + + GLuint framebuffers[5]; + glGenFramebuffersEXT(5, framebuffers); + GLuint srcFB = framebuffers[0]; + GLuint coordFB = framebuffers[1]; + GLuint accumFB = framebuffers[2]; + GLuint destFB = framebuffers[3]; + GLuint destAlphaFB = framebuffers[4]; + + const int viewportWidth = std::max(destChunks[0].width(), sourceChunks[0].width()); + const int viewportHeight = std::max(destChunks[0].height(), sourceChunks[0].height()); + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + gluOrtho2D(0.0, viewportWidth, 0.0, viewportHeight); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + glViewport(0, 0, viewportWidth, viewportHeight); + + const int destOdd = (destChunks[0].height() & 1); + // Setup coordinate texture + GLuint coordTexture; + glGenTextures(1, &coordTexture); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, coordTexture); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_LUMINANCE_ALPHA32F_ARB, destChunks[0].width(), destChunks[0].height() + destOdd, 0, GL_LUMINANCE_ALPHA, GL_FLOAT, NULL); + CHECK_GL(); + + // Setup coordinate framebuffer + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, coordFB); + CHECK_GL(); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, coordTexture, 0); + CHECK_GL(); + if (!checkFramebufferStatus(__LINE__, __FILE__)) { + exit(1); + } + + // Setup accumulator ping-pong textures + GLuint accumTextures[2]; + glGenTextures(2, accumTextures); + + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, accumTextures[0]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA32F_ARB, destChunks[0].width(), destChunks[0].height() + destOdd, 0, GL_RGBA, GL_FLOAT, NULL); + CHECK_GL(); + + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, accumTextures[1]); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA32F_ARB, destChunks[0].width(), destChunks[0].height() + destOdd, 0, GL_RGBA, GL_FLOAT, NULL); + CHECK_GL(); + + // Attach accumulator ping-pong textures to framebuffer + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, accumFB); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, accumTextures[0], 0); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_RECTANGLE_ARB, accumTextures[1], 0); + if (!checkFramebufferStatus(__LINE__, __FILE__)) { + exit(1); + } + + // Setup src texture GLuint srcTexture; glGenTextures(1, &srcTexture); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, srcTexture); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, (warparound) ? GL_REPEAT : GL_CLAMP_TO_BORDER); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER); glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER); - const float borderColor[] = {0.0, 0.0, 0.0, 0.0}; glTexParameterfv(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_BORDER_COLOR, borderColor); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA32F_ARB, srcSize.x, srcSize.y, 0, XGLMap[srcGLFormat], XGLMap[srcGLType], srcBuffer); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, XGLMap[srcGLInternalFormat], sourceChunks[0].width(), sourceChunks[0].height(), 0, XGLMap[srcGLFormat], XGLMap[srcGLType], NULL); CHECK_GL(); - if (srcAlphaBuffer != NULL) { - static bool createdAlphaShader = false; - static GLhandleARB alphaProgramObject; - static GLhandleARB alphaShaderObject; - static GLint srcAlphaTextureParam; + // Setup alpha composite framebuffer + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, srcFB); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, srcTexture, 0); + if (!checkFramebufferStatus(__LINE__, __FILE__)) { + exit(1); + } - if (!createdAlphaShader) { - alphaProgramObject = glCreateProgramObjectARB(); - alphaShaderObject = glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB); - glShaderSourceARB(alphaShaderObject, 1, &AlphaCombineKernelSource, NULL); - glCompileShaderARB(alphaShaderObject); - printInfoLog(alphaShaderObject); - glAttachObjectARB(alphaProgramObject, alphaShaderObject); - glLinkProgramARB(alphaProgramObject); - printInfoLog(alphaProgramObject); - GLint success; - glGetObjectParameterivARB(alphaProgramObject, GL_OBJECT_LINK_STATUS_ARB, &success); - if (!success) { - cerr << "nona: GPU alpha combine shader program could not be linked." << endl; - exit(1); - } - srcAlphaTextureParam = glGetUniformLocationARB(alphaProgramObject, "SrcAlphaTexture"); - createdAlphaShader = true; - } + // Setup src alpha and src rgb transfer textures + GLuint srcAlphaTexture; + glGenTextures(1, &srcAlphaTexture); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, srcAlphaTexture); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_ALPHA, sourceChunks[0].width(), sourceChunks[0].height(), 0, GL_ALPHA, XGLMap[srcAlphaGLType], NULL); + CHECK_GL(); - glUseProgramObjectARB(alphaProgramObject); + GLuint srcTransferTexture; + glGenTextures(1, &srcTransferTexture); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, srcTransferTexture); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, XGLMap[srcGLTransferFormat], sourceChunks[0].width(), sourceChunks[0].height(), 0, XGLMap[srcGLFormat], XGLMap[srcGLType], NULL); + CHECK_GL(); - GLuint srcAlphaTexture; - glGenTextures(1, &srcAlphaTexture); - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, srcAlphaTexture); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_LUMINANCE32F_ARB, srcSize.x, srcSize.y, 0, XGLMap[srcAlphaGLFormat], XGLMap[srcAlphaGLType], srcAlphaBuffer); - CHECK_GL(); + // Setup dest and destalpha textures and framebuffers + GLuint destTexture; + glGenTextures(1, &destTexture); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, destTexture); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, XGLMap[destGLTransferFormat], destChunks[0].width(), destChunks[0].height() + destOdd, 0, XGLMap[destGLFormat], XGLMap[destGLType], NULL); - GLuint afb; - glGenFramebuffersEXT(1, &afb); - glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, afb); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, destFB); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, destTexture, 0); + if (!checkFramebufferStatus(__LINE__, __FILE__)) { + exit(1); + } - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, srcTexture, 0); + GLuint destAlphaTexture; + glGenTextures(1, &destAlphaTexture); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, destAlphaTexture); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP); + glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP); + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_ALPHA, destChunks[0].width(), destChunks[0].height() + destOdd, 0, GL_ALPHA, XGLMap[destAlphaGLType], NULL); + CHECK_GL(); - if (!checkFramebufferStatus()) { - exit(1); - } + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, destAlphaFB); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, destAlphaTexture, 0); + if (!checkFramebufferStatus(__LINE__, __FILE__)) { + exit(1); + } - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); - gluOrtho2D(0.0, srcSize.x, 0.0, srcSize.y); - glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); - glViewport(0, 0, srcSize.x, srcSize.y); - glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + glFinish(); + gettimeofday(&t2, NULL); + cout << "gpu shader texture/framebuffer setup time = " << (t2.tv_sec - t21.tv_sec + 1e-6*(t2.tv_usec - t21.tv_usec)) << endl; - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, srcAlphaTexture); - glUniform1iARB(srcAlphaTextureParam, 0); - CHECK_GL(); + // Render each dest chunk + int destChunkNumber = 0; + for (vector<Rect2D>::iterator dI = destChunks.begin(); dI != destChunks.end(); ++dI, ++destChunkNumber) { - timeval at1; - gettimeofday(&at1, NULL); + glFinish(); + gettimeofday(&t3, NULL); + // Render coord image + glUseProgramObjectARB(coordXformProgramObject); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, coordFB); glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + glPolygonMode(GL_FRONT, GL_FILL); glBegin(GL_QUADS); - glTexCoord2f(0.0, 0.0); glVertex2f(0.0, 0.0); - glTexCoord2f(srcSize.x, 0.0); glVertex2f(srcSize.x, 0.0); - glTexCoord2f(srcSize.x, srcSize.y); glVertex2f(srcSize.x, srcSize.y); - glTexCoord2f(0.0, srcSize.y); glVertex2f(0.0, srcSize.y); + glTexCoord2f(xstart + dI->left(), ystart + dI->top()); glVertex2f(0.0, 0.0); + glTexCoord2f(xstart + dI->right(), ystart + dI->top()); glVertex2f(dI->width(), 0.0); + glTexCoord2f(xstart + dI->right(), ystart + dI->bottom()); glVertex2f(dI->width(), dI->height()); + glTexCoord2f(xstart + dI->left(), ystart + dI->bottom()); glVertex2f(0.0, dI->height()); glEnd(); CHECK_GL(); glFinish(); + gettimeofday(&t4, NULL); + cout << "gpu dest chunk=" << *dI << " coord image render time = " << (t4.tv_sec - t3.tv_sec + 1e-6*(t4.tv_usec - t3.tv_usec)) << endl; + + // Multipass rendering of dest image + int pass = 0; + for (vector<Rect2D>::iterator sI = sourceChunks.begin(); sI != sourceChunks.end(); ++sI, ++pass) { + + if (destChunkNumber == 0 || sourceChunks.size() > 1) { + glFinish(); + gettimeofday(&t5, NULL); + + // Setup src and srcAlpha textures + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, srcFB); + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + glClear(GL_COLOR_BUFFER_BIT); + + glUseProgramObjectARB(0); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, srcSize.x); + glPixelStorei(GL_UNPACK_SKIP_PIXELS, sI->left()); + glPixelStorei(GL_UNPACK_SKIP_ROWS, sI->top()); + + glActiveTexture(GL_TEXTURE0); + glEnable(GL_TEXTURE_RECTANGLE_ARB); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, srcTransferTexture); + glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, sI->width(), sI->height(), XGLMap[srcGLFormat], XGLMap[srcGLType], srcBuffer); + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + CHECK_GL(); + + glFinish(); + gettimeofday(&t6, NULL); + cout << "gpu dest chunk=" << *dI << " source chunk=" << *sI << " src upload = " << (t6.tv_sec - t5.tv_sec + 1e-6*(t6.tv_usec - t5.tv_usec)) << endl; + + if (srcAlphaBuffer != NULL) { + glActiveTexture(GL_TEXTURE1); + glEnable(GL_TEXTURE_RECTANGLE_ARB); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, srcAlphaTexture); + glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, sI->width(), sI->height(), GL_ALPHA, XGLMap[srcAlphaGLType], srcAlphaBuffer); + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + CHECK_GL(); + + glFinish(); + gettimeofday(&t7, NULL); + cout << "gpu dest chunk=" << *dI << " source chunk=" << *sI << " src alpha upload = " << (t7.tv_sec - t6.tv_sec + 1e-6*(t7.tv_usec - t6.tv_usec)) << endl; + + glPolygonMode(GL_FRONT, GL_FILL); + glBegin(GL_QUADS); + glMultiTexCoord2f(GL_TEXTURE0, 0.0, 0.0); glMultiTexCoord2f(GL_TEXTURE1, 0.0, 0.0); glVertex2f(0.0, 0.0); + glMultiTexCoord2f(GL_TEXTURE0, sI->width(), 0.0); glMultiTexCoord2f(GL_TEXTURE1, sI->width(), 0.0); glVertex2f(sI->width(), 0.0); + glMultiTexCoord2f(GL_TEXTURE0, sI->width(), sI->height()); glMultiTexCoord2f(GL_TEXTURE1, sI->width(), sI->height()); glVertex2f(sI->width(), sI->height()); + glMultiTexCoord2f(GL_TEXTURE0, 0.0, sI->height()); glMultiTexCoord2f(GL_TEXTURE1, 0.0, sI->height()); glVertex2f(0.0, sI->height()); + glEnd(); + CHECK_GL(); + + glActiveTexture(GL_TEXTURE0); + glDisable(GL_TEXTURE_RECTANGLE_ARB); + glActiveTexture(GL_TEXTURE1); + glDisable(GL_TEXTURE_RECTANGLE_ARB); + CHECK_GL(); + + glFinish(); + gettimeofday(&t8, NULL); + cout << "gpu dest chunk=" << *dI << " source chunk=" << *sI << " src+alpha render = " << (t8.tv_sec - t7.tv_sec + 1e-6*(t8.tv_usec - t7.tv_usec)) << endl; + } + else { + glPolygonMode(GL_FRONT, GL_FILL); + glBegin(GL_QUADS); + glTexCoord2f(0.0, 0.0); glVertex2f(0.0, 0.0); + glTexCoord2f(sI->width(), 0.0); glVertex2f(sI->width(), 0.0); + glTexCoord2f(sI->width(), sI->height()); glVertex2f(sI->width(), sI->height()); + glTexCoord2f(0.0, sI->height()); glVertex2f(0.0, sI->height()); + glEnd(); + CHECK_GL(); + + glActiveTexture(GL_TEXTURE0); + glDisable(GL_TEXTURE_RECTANGLE_ARB); + CHECK_GL(); + + glFinish(); + gettimeofday(&t7, NULL); + cout << "gpu dest chunk=" << *dI << " source chunk=" << *sI << " src render = " << (t7.tv_sec - t6.tv_sec + 1e-6*(t7.tv_usec - t6.tv_usec)) << endl; + } + } + + glFinish(); + gettimeofday(&t9, NULL); + + // Render dest image + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, accumFB); + glUseProgramObjectARB(interpolatorProgramObject); + + if (pass == 0) { + // Clear ping accum texture on first pass. + glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT); + glClear(GL_COLOR_BUFFER_BIT); + } + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, coordTexture); + glUniform1iARB(coordTextureParam, 0); + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + CHECK_GL(); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, (pass & 1) ? accumTextures[0] : accumTextures[1]); + glUniform1iARB(accumTextureParam, 1); + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + CHECK_GL(); + + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, srcTexture); + glUniform1iARB(srcTextureParam, 2); + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + CHECK_GL(); + + glUniform2fARB(srcULParam, sI->left(), sI->top()); + CHECK_GL(); + glUniform2fARB(srcLRParam, sI->right(), sI->bottom()); + CHECK_GL(); + + glDrawBuffer((pass & 1) ? GL_COLOR_ATTACHMENT1_EXT : GL_COLOR_ATTACHMENT0_EXT); + + glFinish(); + gettimeofday(&t10, NULL); + cout << "gpu dest chunk=" << *dI << " source chunk=" << *sI << " interpolation setup = " << (t10.tv_sec - t9.tv_sec + 1e-6*(t10.tv_usec - t9.tv_usec)) << endl; + + glPolygonMode(GL_FRONT, GL_FILL); + glBegin(GL_QUADS); + glTexCoord2f(0.0, 0.0); glVertex2f(0.0, 0.0); + glTexCoord2f(dI->width(), 0.0); glVertex2f(dI->width(), 0.0); + glTexCoord2f(dI->width(), dI->height()); glVertex2f(dI->width(), dI->height()); + glTexCoord2f(0.0, dI->height()); glVertex2f(0.0, dI->height()); + glEnd(); + CHECK_GL(); + + glFinish(); + gettimeofday(&t11, NULL); + cout << "gpu dest chunk=" << *dI << " source chunk=" << *sI << " interpolation render = " << (t11.tv_sec - t10.tv_sec + 1e-6*(t11.tv_usec - t10.tv_usec)) << endl; + + } + + // normalization/photometric rendering pass + glUseProgramObjectARB(normalizationPhotometricProgramObject); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, (pass & 1) ? accumTextures[0] : accumTextures[1]); + glUniform1iARB(normTextureParam, 0); + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); CHECK_GL(); - timeval at2; - gettimeofday(&at2, NULL); - cout << "gpu alpha composite time = " << (at2.tv_sec - at1.tv_sec + 1e-6*(at2.tv_usec - at1.tv_usec)) << endl; + glDrawBuffer((pass & 1) ? GL_COLOR_ATTACHMENT1_EXT : GL_COLOR_ATTACHMENT0_EXT); + + glFinish(); + gettimeofday(&t12, NULL); + cout << "gpu dest chunk=" << *dI << " normalization setup = " << (t12.tv_sec - t11.tv_sec + 1e-6*(t12.tv_usec - t11.tv_usec)) << endl; - glDeleteFramebuffersEXT(1, &afb); - glDeleteTextures(1, &srcAlphaTexture); - } + glPolygonMode(GL_FRONT, GL_FILL); + glBegin(GL_QUADS); + glTexCoord2f(0.0, 0.0); glVertex2f(0.0, 0.0); + glTexCoord2f(dI->width(), 0.0); glVertex2f(dI->width(), 0.0); + glTexCoord2f(dI->width(), dI->height()); glVertex2f(dI->width(), dI->height()); + glTexCoord2f(0.0, dI->height()); glVertex2f(0.0, dI->height()); + glEnd(); + CHECK_GL(); - GLhandleARB programObject = glCreateProgramObjectARB(); - GLhandleARB shaderObject = glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB); - glShaderSourceARB(shaderObject, 1, &remapKernelSource, NULL); - glCompileShaderARB(shaderObject); - printInfoLog(shaderObject); - glAttachObjectARB(programObject, shaderObject); - glLinkProgramARB(programObject); - printInfoLog(programObject); - GLint success; - glGetObjectParameterivARB(programObject, GL_OBJECT_LINK_STATUS_ARB, &success); - if (!success) { - cerr << "nona: GPU remap shader program could not be linked." << endl; - exit(1); - } + glFinish(); + gettimeofday(&t13, NULL); + cout << "gpu dest chunk=" << *dI << " normalization render = " << (t13.tv_sec - t12.tv_sec + 1e-6*(t13.tv_usec - t12.tv_usec)) << endl; - GLint srcTextureParam = glGetUniformLocationARB(programObject, "SrcTexture"); + pass++; - glUseProgramObjectARB(programObject); + // Move output accumTexture to dest texture + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, destFB); + glUseProgramObjectARB(0); - GLuint outTexture; - glGenTextures(1, &outTexture); - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, outTexture); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP); - glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP); - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA32F_ARB, destSize.x, destSize.y, 0, XGLMap[destGLFormat], XGLMap[destGLType], NULL); - CHECK_GL(); + glActiveTexture(GL_TEXTURE0); + glEnable(GL_TEXTURE_RECTANGLE_ARB); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, (pass & 1) ? accumTextures[0] : accumTextures[1]); + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + CHECK_GL(); - GLuint fb; - glGenFramebuffersEXT(1, &fb); - glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb); + glFinish(); + gettimeofday(&t14, NULL); + cout << "gpu dest chunk=" << *dI << " dest rgb disassembly setup = " << (t14.tv_sec - t13.tv_sec + 1e-6*(t14.tv_usec - t13.tv_usec)) << endl; - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, outTexture, 0); + glPolygonMode(GL_FRONT, GL_FILL); + glBegin(GL_QUADS); + glTexCoord2f(0.0, 0.0); glVertex2f(0.0, 0.0); + glTexCoord2f(dI->width(), 0.0); glVertex2f(dI->width(), 0.0); + glTexCoord2f(dI->width(), dI->height()); glVertex2f(dI->width(), dI->height()); + glTexCoord2f(0.0, dI->height()); glVertex2f(0.0, dI->height()); + glEnd(); + CHECK_GL(); - if (!checkFramebufferStatus()) { - exit(1); - } + glActiveTexture(GL_TEXTURE0); + glDisable(GL_TEXTURE_RECTANGLE_ARB); + CHECK_GL(); - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); - gluOrtho2D(0.0, destSize.x, 0.0, destSize.y); - glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); - glViewport(0, 0, destSize.x, destSize.y); + glFinish(); + gettimeofday(&t15, NULL); + cout << "gpu dest chunk=" << *dI << " dest rgb disassembly render = " << (t15.tv_sec - t14.tv_sec + 1e-6*(t15.tv_usec - t14.tv_usec)) << endl; - glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + // Readback dest chunk + glPixelStorei(GL_PACK_ROW_LENGTH, destSize.x); + glPixelStorei(GL_PACK_SKIP_PIXELS, dI->left()); + glPixelStorei(GL_PACK_SKIP_ROWS, dI->top()); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_RECTANGLE_ARB, srcTexture); - glUniform1iARB(srcTextureParam, 0); - CHECK_GL(); + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); + CHECK_GL(); - timeval t1; - gettimeofday(&t1, NULL); + glReadPixels(0, 0, dI->width(), dI->height(), XGLMap[destGLFormat], XGLMap[destGLType], destBuffer); + CHECK_GL(); - glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); - glPolygonMode(GL_FRONT, GL_FILL); - glBegin(GL_QUADS); - glTexCoord2f(xstart, ystart); glVertex2f(0.0, 0.0); - glTexCoord2f(xend, ystart); glVertex2f(destSize.x, 0.0); - glTexCoord2f(xend, yend); glVertex2f(destSize.x, destSize.y); - glTexCoord2f(xstart, yend); glVertex2f(0.0, destSize.y); - glEnd(); - CHECK_GL(); + glFinish(); + gettimeofday(&t16, NULL); + cout << "gpu dest chunk=" << *dI << " rgb readback = " << (t16.tv_sec - t15.tv_sec + 1e-6*(t16.tv_usec - t15.tv_usec)) << endl; - glFinish(); - CHECK_GL(); + // Move output accumTexture to dest alpha texture + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, destAlphaFB); + glUseProgramObjectARB(0); - timeval t2; - gettimeofday(&t2, NULL); + glActiveTexture(GL_TEXTURE0); + glEnable(GL_TEXTURE_RECTANGLE_ARB); + glBindTexture(GL_TEXTURE_RECTANGLE_ARB, (pass & 1) ? accumTextures[0] : accumTextures[1]); + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + CHECK_GL(); - cout << "gpu render time = " << (t2.tv_sec - t1.tv_sec + 1e-6*(t2.tv_usec - t1.tv_usec)) << endl; + glFinish(); + gettimeofday(&t17, NULL); + cout << "gpu dest chunk=" << *dI << " dest alpha disassembly setup = " << (t17.tv_sec - t16.tv_sec + 1e-6*(t17.tv_usec - t16.tv_usec)) << endl; - glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); - CHECK_GL(); + glPolygonMode(GL_FRONT, GL_FILL); + glBegin(GL_QUADS); + glTexCoord2f(0.0, 0.0); glVertex2f(0.0, 0.0); + glTexCoord2f(dI->width(), 0.0); glVertex2f(dI->width(), 0.0); + glTexCoord2f(dI->width(), dI->height()); glVertex2f(dI->width(), dI->height()); + glTexCoord2f(0.0, dI->height()); glVertex2f(0.0, dI->height()); + glEnd(); + CHECK_GL(); - glReadPixels(0, 0, destSize.x, destSize.y, XGLMap[destGLFormat], XGLMap[destGLType], destBuffer); - CHECK_GL(); + glActiveTexture(GL_TEXTURE0); + glDisable(GL_TEXTURE_RECTANGLE_ARB); + CHECK_GL(); - timeval t3; - gettimeofday(&t3, NULL); + glFinish(); + gettimeofday(&t18, NULL); + cout << "gpu dest chunk=" << *dI << " dest alpha disassembly render = " << (t18.tv_sec - t17.tv_sec + 1e-6*(t18.tv_usec - t17.tv_usec)) << endl; - cout << "gpu rgb readback time = " << (t3.tv_sec - t2.tv_sec + 1e-6*(t3.tv_usec - t2.tv_usec)) << endl; + // Readback dest alpha chunk + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); + CHECK_GL(); - glReadPixels(0, 0, destSize.x, destSize.y, GL_ALPHA, XGLMap[destAlphaGLType], destAlphaBuffer); - CHECK_GL(); + glReadPixels(0, 0, dI->width(), dI->height(), GL_ALPHA, XGLMap[destAlphaGLType], destAlphaBuffer); + CHECK_GL(); - timeval t4; - gettimeofday(&t4, NULL); + glFinish(); + gettimeofday(&t19, NULL); + cout << "gpu dest chunk=" << *dI << " alpha readback = " << (t19.tv_sec - t18.tv_sec + 1e-6*(t19.tv_usec - t18.tv_usec)) << endl; - cout << "gpu alpha readback time = " << (t4.tv_sec - t3.tv_sec + 1e-6*(t4.tv_usec - t3.tv_usec)) << endl; + } - glDeleteFramebuffersEXT(1, &fb); - glDeleteTextures(1, &outTexture); + glDeleteTextures(2, accumTextures); + glDeleteTextures(1, &coordTexture); glDeleteTextures(1, &srcTexture); + glDeleteTextures(1, &srcAlphaTexture); + glDeleteTextures(1, &srcTransferTexture); + glDeleteTextures(1, &destTexture); + glDeleteTextures(1, &destAlphaTexture); + glDeleteFramebuffersEXT(5, framebuffers); + glUseProgramObjectARB(0); - glDeleteObjectARB(shaderObject); - glDeleteObjectARB(programObject); + glDeleteObjectARB(coordXformShaderObject); + glDeleteObjectARB(coordXformProgramObject); + glDeleteObjectARB(interpolatorShaderObject); + glDeleteObjectARB(interpolatorProgramObject); + glDeleteObjectARB(normalizationPhotometricShaderObject); + glDeleteObjectARB(normalizationPhotometricProgramObject); + glFinish(); + gettimeofday(&t20, NULL); + cout << "gpu destruct time = " << (t20.tv_sec - t19.tv_sec + 1e-6*(t20.tv_usec - t19.tv_usec)) << endl; + cout << "gpu total time = " << (t20.tv_sec - t1.tv_sec + 1e-6*(t20.tv_usec - t1.tv_usec)) << endl; + return true; } Modified: hugin/branches/nona-gpu/src/hugin_base/vigra_ext/ImageTransformsGPU.h =================================================================== --- hugin/branches/nona-gpu/src/hugin_base/vigra_ext/ImageTransformsGPU.h 2008-11-18 18:09:36 UTC (rev 3562) +++ hugin/branches/nona-gpu/src/hugin_base/vigra_ext/ImageTransformsGPU.h 2008-11-21 07:32:43 UTC (rev 3563) @@ -51,48 +51,78 @@ namespace vigra_ext { -bool transformImageGPUIntern(const std::string& glsl, +bool transformImageGPUIntern(const std::string& coordXformGLSL, + const std::string& interpolatorGLSL, + const int interpolatorSize, const vigra::Diff2D srcSize, const void* const srcBuffer, - const int srcGLType, const int srcGLFormat, + const int srcGLInternalFormat, const int srcGLTransferFormat, const int srcGLFormat, const int srcGLType, const void* const srcAlphaBuffer, - const int srcAlphaGLType, const int srcAlphaGLFormat, + const int srcAlphaGLType, const vigra::Diff2D destUL, const vigra::Diff2D destSize, void* const destBuffer, - const int destGLType, const int destGLFormat, + const int destGLInternalFormat, const int destGLTransferFormat, const int destGLFormat, const int destGLType, void* const destAlphaBuffer, - const int destAlphaGLType, const int destAlphaGLFormat, + ... [truncated message content] |