From 627aa610fc93398ac51129096ce8fb30731af341 Mon Sep 17 00:00:00 2001 From: Andreas Grois Date: Sun, 11 Mar 2018 21:27:11 +0100 Subject: Split buffer in three, one per color (as OpenGL guarantees that one can at least have 8 SSBOs), to allow three times larger images. Also make it possible to ignore maximum buffer size reported by the driver. The maximum buffer size limitation is rather strict on radeonsi, so I decided to split the buffer in three, effectively increasing the maximum image size by a factor of three. While doing so I realized that at least on radeonsi the reported maximum buffer size seems to be off. For this reason I added a new command line switch, that allows to ignore the maximum buffer size check. For those curious: radeonsi reports a maximum buffer size of 128 MB, but I had no problems when using three buffers of 1098 MB each. --- BuddhaTest/Shaders/BuddhaCompute.glsl | 20 ++++++++++++------ BuddhaTest/Shaders/BuddhaFragment.glsl | 16 +++++++++++---- BuddhaTest/include/Helpers.h | 2 ++ BuddhaTest/src/BuddhaTest.cpp | 37 ++++++++++++++++++++++------------ BuddhaTest/src/Helpers.cpp | 14 ++++++++----- 5 files changed, 61 insertions(+), 28 deletions(-) diff --git a/BuddhaTest/Shaders/BuddhaCompute.glsl b/BuddhaTest/Shaders/BuddhaCompute.glsl index 9d8301a..ddcbace 100644 --- a/BuddhaTest/Shaders/BuddhaCompute.glsl +++ b/BuddhaTest/Shaders/BuddhaCompute.glsl @@ -2,9 +2,17 @@ //#version 430 //layout (local_size_x = 1024) in; //to be safe, we limit our local work group size to 1024. That's the minimum a GL 4.3 capable driver must support. -layout(std430, binding=2) buffer renderedData +layout(std430, binding=2) buffer renderedDataRed { - uint counts_SSBO[]; + uint counts_SSBORed[]; +}; +layout(std430, binding=3) buffer renderedDataGreen +{ + uint counts_SSBOGreen[]; +}; +layout(std430, binding=4) buffer renderedDataBlue +{ + uint counts_SSBOBlue[]; }; uniform uint width; @@ -15,10 +23,10 @@ uniform uvec3 orbitLength; void addToColorOfCell(uvec2 cell, uvec3 toAdd) { - uint firstIndex = 3*(cell.x + cell.y * width); - atomicAdd(counts_SSBO[firstIndex],toAdd.x); - atomicAdd(counts_SSBO[firstIndex+1],toAdd.y); - atomicAdd(counts_SSBO[firstIndex+2],toAdd.z); + uint firstIndex = (cell.x + cell.y * width); + atomicAdd(counts_SSBORed[firstIndex],toAdd.x); + atomicAdd(counts_SSBOGreen[firstIndex],toAdd.y); + atomicAdd(counts_SSBOBlue[firstIndex],toAdd.z); } uvec2 getCell(vec2 complex) diff --git a/BuddhaTest/Shaders/BuddhaFragment.glsl b/BuddhaTest/Shaders/BuddhaFragment.glsl index 1556065..24fb6a7 100644 --- a/BuddhaTest/Shaders/BuddhaFragment.glsl +++ b/BuddhaTest/Shaders/BuddhaFragment.glsl @@ -4,9 +4,17 @@ in vec2 uv; out vec3 color; -layout(std430, binding=2) buffer renderedData +layout(std430, binding=2) buffer renderedDataRed { - uint counts_SSBO[]; + uint counts_SSBORed[]; +}; +layout(std430, binding=3) buffer renderedDataGreen +{ + uint counts_SSBOGreen[]; +}; +layout(std430, binding=4) buffer renderedDataBlue +{ + uint counts_SSBOBlue[]; }; uniform uint width; @@ -16,8 +24,8 @@ uvec3 getColorAt(vec2 fragCoord) { uint xIndex = uint(max(0.0,(fragCoord.x+1.0)*0.5*width)); uint yIndex = uint(max(0.0,abs(fragCoord.y)*height)); - uint firstIndex = 3*(xIndex + yIndex * width); - return uvec3(counts_SSBO[firstIndex],counts_SSBO[firstIndex+1],counts_SSBO[firstIndex+2]); + uint firstIndex = (xIndex + yIndex * width); + return uvec3(counts_SSBORed[firstIndex],counts_SSBOGreen[firstIndex],counts_SSBOBlue[firstIndex]); } void main(){ diff --git a/BuddhaTest/include/Helpers.h b/BuddhaTest/include/Helpers.h index 76a9a57..6358066 100644 --- a/BuddhaTest/include/Helpers.h +++ b/BuddhaTest/include/Helpers.h @@ -50,6 +50,8 @@ namespace Helpers double pngGamma = 1.0; double pngColorScale = 2.0; + unsigned int ignoreMaxBufferSize = 0; + bool CheckValidity(); bool ParseCommandLine(int argc, char * argv[]); }; diff --git a/BuddhaTest/src/BuddhaTest.cpp b/BuddhaTest/src/BuddhaTest.cpp index 1709853..5d03ad4 100644 --- a/BuddhaTest/src/BuddhaTest.cpp +++ b/BuddhaTest/src/BuddhaTest.cpp @@ -58,7 +58,7 @@ int main(int argc, char * argv[]) //we have a context. Let's check if input is sane. //calcualte buffer size, and make sure it's allowed by the driver. - const unsigned int pixelCount{(settings.imageWidth * bufferHeight)*3}; //*3 -> RGB + const unsigned int pixelCount{(settings.imageWidth * bufferHeight)}; if(!settings.CheckValidity()) { glfwTerminate(); @@ -109,15 +109,18 @@ int main(int argc, char * argv[]) glBufferData(GL_ARRAY_BUFFER, sizeof(g_vertex_buffer_data), g_vertex_buffer_data, GL_STATIC_DRAW); - GLuint drawBuffer; - glGenBuffers(1, &drawBuffer); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, drawBuffer); + GLuint drawBuffer[3]; + glGenBuffers(3, drawBuffer); + for(int i=0; i < 3; ++i) { - glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * pixelCount, nullptr, GL_DYNAMIC_COPY); - glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R8,GL_RED,GL_UNSIGNED_INT,nullptr); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, drawBuffer[i]); + { + glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * pixelCount, nullptr, GL_DYNAMIC_COPY); + glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R8,GL_RED,GL_UNSIGNED_INT,nullptr); + } + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2+i, drawBuffer[i]); } - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, drawBuffer); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); uint32_t iterationCount{0}; glUseProgram(ComputeShader); @@ -188,17 +191,25 @@ int main(int argc, char * argv[]) if(!settings.pngFilename.empty()) { glMemoryBarrier(GL_ALL_BARRIER_BITS); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, drawBuffer); + std::vector> readBackBuffers(3,std::vector(pixelCount)); + for(int i = 0; i < 3; ++i) + { + glBindBuffer(GL_SHADER_STORAGE_BUFFER, drawBuffer[i]); + glGetBufferSubData(GL_SHADER_STORAGE_BUFFER,0,4 * pixelCount,readBackBuffers[i].data()); + } + + //too lazy to change WriteOutputPng... + std::vector combinedBuffer(3*pixelCount); + for(int i=0;i<3*pixelCount;++i) { - std::vector readBackBuffer(pixelCount); - glGetBufferSubData(GL_SHADER_STORAGE_BUFFER,0,4 * pixelCount,readBackBuffer.data()); - Helpers::WriteOutputPNG(settings.pngFilename,readBackBuffer,settings.imageWidth,bufferHeight, settings.pngGamma, settings.pngColorScale); + combinedBuffer[i] = readBackBuffers[i%3][i/3]; } + Helpers::WriteOutputPNG(settings.pngFilename,combinedBuffer,settings.imageWidth,bufferHeight, settings.pngGamma, settings.pngColorScale); } //a bit of cleanup glDeleteBuffers(1,&vertexbuffer); - glDeleteBuffers(1,&drawBuffer); + glDeleteBuffers(3,drawBuffer); glfwTerminate(); return 0; diff --git a/BuddhaTest/src/Helpers.cpp b/BuddhaTest/src/Helpers.cpp index e1a4115..3cb786c 100644 --- a/BuddhaTest/src/Helpers.cpp +++ b/BuddhaTest/src/Helpers.cpp @@ -264,10 +264,12 @@ namespace Helpers } int maxSSBOSize; glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE,&maxSSBOSize); - if((static_cast(imageWidth) * static_cast(imageHeight)) > static_cast(maxSSBOSize)/6) + if((static_cast(imageWidth) * static_cast(imageHeight)) > static_cast(maxSSBOSize)/2) //divided by 2, as we have 4 bytes per int, but only half the image height { - std::cerr << "Requested buffer size larger than maximum allowed by graphics driver. Max pixel number: " << maxSSBOSize/6 << std::endl; - return false; + std::cerr << "Requested buffer size is larger than maximum allowed by graphics driver. Max pixel number: " << maxSSBOSize/2 << std::endl; + std::cerr << "You can override this limit check using the --ignoreMaxBufferSize 1 command line parameter, but doing so is your own risk." << std::endl; + if(ignoreMaxBufferSize == 0) + return false; } int WorkGroupSizeLimitX, WorkGroupSizeLimitY, WorkGroupSizeLimitZ; glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT,0,&WorkGroupSizeLimitX); @@ -332,7 +334,8 @@ namespace Helpers {"--globalWorkgroupSizeZ", &globalWorkGroupSizeZ}, {"--imageGamma",&pngGamma}, {"--imageColorScale",&pngColorScale}, - {"--output", &pngFilename} + {"--output", &pngFilename}, + {"--ignoreMaxBufferSize", &ignoreMaxBufferSize} }; for(int i=1; i < argc;++i) @@ -358,7 +361,8 @@ namespace Helpers "\tNOTE: There's also a limit on the product of the three local workgroup sizes, for which a number smaller or equal to 1024 is guaranteed to work. Higher numbers might work and run faster. Feel free to experiment." << std::endl << "--globalWorkgroupSizeX [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1024." << std::endl << "--globalWorkgroupSizeY [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1." << std::endl << - "--globalWorkgroupSizeZ [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1." << std::endl; + "--globalWorkgroupSizeZ [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1." << std::endl << + "--ignoreMaxBufferSize [0,1] : If set to 1, a failed maximum buffer size check is not treated as error. Some graphics drivers report lower values than their absolute limit. Do this on your own risk, though." << std::endl; return false; } } -- cgit v1.2.3