From 627aa610fc93398ac51129096ce8fb30731af341 Mon Sep 17 00:00:00 2001 From: Andreas Grois Date: Sun, 11 Mar 2018 21:27:11 +0100 Subject: Split buffer in three, one per color (as OpenGL guarantees that one can at least have 8 SSBOs), to allow three times larger images. Also make it possible to ignore maximum buffer size reported by the driver. The maximum buffer size limitation is rather strict on radeonsi, so I decided to split the buffer in three, effectively increasing the maximum image size by a factor of three. While doing so I realized that at least on radeonsi the reported maximum buffer size seems to be off. For this reason I added a new command line switch, that allows to ignore the maximum buffer size check. For those curious: radeonsi reports a maximum buffer size of 128 MB, but I had no problems when using three buffers of 1098 MB each. --- BuddhaTest/src/BuddhaTest.cpp | 37 ++++++++++++++++++++++++------------- BuddhaTest/src/Helpers.cpp | 14 +++++++++----- 2 files changed, 33 insertions(+), 18 deletions(-) (limited to 'BuddhaTest/src') diff --git a/BuddhaTest/src/BuddhaTest.cpp b/BuddhaTest/src/BuddhaTest.cpp index 1709853..5d03ad4 100644 --- a/BuddhaTest/src/BuddhaTest.cpp +++ b/BuddhaTest/src/BuddhaTest.cpp @@ -58,7 +58,7 @@ int main(int argc, char * argv[]) //we have a context. Let's check if input is sane. //calcualte buffer size, and make sure it's allowed by the driver. - const unsigned int pixelCount{(settings.imageWidth * bufferHeight)*3}; //*3 -> RGB + const unsigned int pixelCount{(settings.imageWidth * bufferHeight)}; if(!settings.CheckValidity()) { glfwTerminate(); @@ -109,15 +109,18 @@ int main(int argc, char * argv[]) glBufferData(GL_ARRAY_BUFFER, sizeof(g_vertex_buffer_data), g_vertex_buffer_data, GL_STATIC_DRAW); - GLuint drawBuffer; - glGenBuffers(1, &drawBuffer); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, drawBuffer); + GLuint drawBuffer[3]; + glGenBuffers(3, drawBuffer); + for(int i=0; i < 3; ++i) { - glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * pixelCount, nullptr, GL_DYNAMIC_COPY); - glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R8,GL_RED,GL_UNSIGNED_INT,nullptr); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, drawBuffer[i]); + { + glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * pixelCount, nullptr, GL_DYNAMIC_COPY); + glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R8,GL_RED,GL_UNSIGNED_INT,nullptr); + } + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2+i, drawBuffer[i]); } - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, drawBuffer); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); uint32_t iterationCount{0}; glUseProgram(ComputeShader); @@ -188,17 +191,25 @@ int main(int argc, char * argv[]) if(!settings.pngFilename.empty()) { glMemoryBarrier(GL_ALL_BARRIER_BITS); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, drawBuffer); + std::vector> readBackBuffers(3,std::vector(pixelCount)); + for(int i = 0; i < 3; ++i) + { + glBindBuffer(GL_SHADER_STORAGE_BUFFER, drawBuffer[i]); + glGetBufferSubData(GL_SHADER_STORAGE_BUFFER,0,4 * pixelCount,readBackBuffers[i].data()); + } + + //too lazy to change WriteOutputPng... + std::vector combinedBuffer(3*pixelCount); + for(int i=0;i<3*pixelCount;++i) { - std::vector readBackBuffer(pixelCount); - glGetBufferSubData(GL_SHADER_STORAGE_BUFFER,0,4 * pixelCount,readBackBuffer.data()); - Helpers::WriteOutputPNG(settings.pngFilename,readBackBuffer,settings.imageWidth,bufferHeight, settings.pngGamma, settings.pngColorScale); + combinedBuffer[i] = readBackBuffers[i%3][i/3]; } + Helpers::WriteOutputPNG(settings.pngFilename,combinedBuffer,settings.imageWidth,bufferHeight, settings.pngGamma, settings.pngColorScale); } //a bit of cleanup glDeleteBuffers(1,&vertexbuffer); - glDeleteBuffers(1,&drawBuffer); + glDeleteBuffers(3,drawBuffer); glfwTerminate(); return 0; diff --git a/BuddhaTest/src/Helpers.cpp b/BuddhaTest/src/Helpers.cpp index e1a4115..3cb786c 100644 --- a/BuddhaTest/src/Helpers.cpp +++ b/BuddhaTest/src/Helpers.cpp @@ -264,10 +264,12 @@ namespace Helpers } int maxSSBOSize; glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE,&maxSSBOSize); - if((static_cast(imageWidth) * static_cast(imageHeight)) > static_cast(maxSSBOSize)/6) + if((static_cast(imageWidth) * static_cast(imageHeight)) > static_cast(maxSSBOSize)/2) //divided by 2, as we have 4 bytes per int, but only half the image height { - std::cerr << "Requested buffer size larger than maximum allowed by graphics driver. Max pixel number: " << maxSSBOSize/6 << std::endl; - return false; + std::cerr << "Requested buffer size is larger than maximum allowed by graphics driver. Max pixel number: " << maxSSBOSize/2 << std::endl; + std::cerr << "You can override this limit check using the --ignoreMaxBufferSize 1 command line parameter, but doing so is your own risk." << std::endl; + if(ignoreMaxBufferSize == 0) + return false; } int WorkGroupSizeLimitX, WorkGroupSizeLimitY, WorkGroupSizeLimitZ; glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT,0,&WorkGroupSizeLimitX); @@ -332,7 +334,8 @@ namespace Helpers {"--globalWorkgroupSizeZ", &globalWorkGroupSizeZ}, {"--imageGamma",&pngGamma}, {"--imageColorScale",&pngColorScale}, - {"--output", &pngFilename} + {"--output", &pngFilename}, + {"--ignoreMaxBufferSize", &ignoreMaxBufferSize} }; for(int i=1; i < argc;++i) @@ -358,7 +361,8 @@ namespace Helpers "\tNOTE: There's also a limit on the product of the three local workgroup sizes, for which a number smaller or equal to 1024 is guaranteed to work. Higher numbers might work and run faster. Feel free to experiment." << std::endl << "--globalWorkgroupSizeX [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1024." << std::endl << "--globalWorkgroupSizeY [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1." << std::endl << - "--globalWorkgroupSizeZ [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1." << std::endl; + "--globalWorkgroupSizeZ [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1." << std::endl << + "--ignoreMaxBufferSize [0,1] : If set to 1, a failed maximum buffer size check is not treated as error. Some graphics drivers report lower values than their absolute limit. Do this on your own risk, though." << std::endl; return false; } } -- cgit v1.2.3