diff options
author | Andreas Grois <andi@grois.info> | 2018-03-11 21:27:11 +0100 |
---|---|---|
committer | Andreas Grois <andi@grois.info> | 2018-03-11 21:27:11 +0100 |
commit | 627aa610fc93398ac51129096ce8fb30731af341 (patch) | |
tree | 54c38179129a32bbe85753af63fc8b8e49b4c89c | |
parent | bc666e3edcbd53f3bbe5446f0c4bc23e83c044a0 (diff) |
Split buffer in three, one per color (as OpenGL guarantees that one can at least have 8 SSBOs), to allow three times larger images. Also make it possible to ignore maximum buffer size reported by the driver.
The maximum buffer size limitation is rather strict on radeonsi, so I decided to split the buffer in three, effectively increasing the maximum image size by a factor of three.
While doing so I realized that at least on radeonsi the reported maximum buffer size seems to be off. For this reason I added a new command line switch, that allows to ignore the maximum buffer size check.
For those curious: radeonsi reports a maximum buffer size of 128 MB, but I had no problems when using three buffers of 1098 MB each.
-rw-r--r-- | BuddhaTest/Shaders/BuddhaCompute.glsl | 20 | ||||
-rw-r--r-- | BuddhaTest/Shaders/BuddhaFragment.glsl | 16 | ||||
-rw-r--r-- | BuddhaTest/include/Helpers.h | 2 | ||||
-rw-r--r-- | BuddhaTest/src/BuddhaTest.cpp | 37 | ||||
-rw-r--r-- | BuddhaTest/src/Helpers.cpp | 14 |
5 files changed, 61 insertions, 28 deletions
diff --git a/BuddhaTest/Shaders/BuddhaCompute.glsl b/BuddhaTest/Shaders/BuddhaCompute.glsl index 9d8301a..ddcbace 100644 --- a/BuddhaTest/Shaders/BuddhaCompute.glsl +++ b/BuddhaTest/Shaders/BuddhaCompute.glsl @@ -2,9 +2,17 @@ //#version 430
//layout (local_size_x = 1024) in; //to be safe, we limit our local work group size to 1024. That's the minimum a GL 4.3 capable driver must support.
-layout(std430, binding=2) buffer renderedData
+layout(std430, binding=2) buffer renderedDataRed
{
- uint counts_SSBO[];
+ uint counts_SSBORed[];
+};
+layout(std430, binding=3) buffer renderedDataGreen
+{
+ uint counts_SSBOGreen[];
+};
+layout(std430, binding=4) buffer renderedDataBlue
+{
+ uint counts_SSBOBlue[];
};
uniform uint width;
@@ -15,10 +23,10 @@ uniform uvec3 orbitLength; void addToColorOfCell(uvec2 cell, uvec3 toAdd)
{
- uint firstIndex = 3*(cell.x + cell.y * width);
- atomicAdd(counts_SSBO[firstIndex],toAdd.x);
- atomicAdd(counts_SSBO[firstIndex+1],toAdd.y);
- atomicAdd(counts_SSBO[firstIndex+2],toAdd.z);
+ uint firstIndex = (cell.x + cell.y * width);
+ atomicAdd(counts_SSBORed[firstIndex],toAdd.x);
+ atomicAdd(counts_SSBOGreen[firstIndex],toAdd.y);
+ atomicAdd(counts_SSBOBlue[firstIndex],toAdd.z);
}
uvec2 getCell(vec2 complex)
diff --git a/BuddhaTest/Shaders/BuddhaFragment.glsl b/BuddhaTest/Shaders/BuddhaFragment.glsl index 1556065..24fb6a7 100644 --- a/BuddhaTest/Shaders/BuddhaFragment.glsl +++ b/BuddhaTest/Shaders/BuddhaFragment.glsl @@ -4,9 +4,17 @@ in vec2 uv; out vec3 color;
-layout(std430, binding=2) buffer renderedData
+layout(std430, binding=2) buffer renderedDataRed
{
- uint counts_SSBO[];
+ uint counts_SSBORed[];
+};
+layout(std430, binding=3) buffer renderedDataGreen
+{
+ uint counts_SSBOGreen[];
+};
+layout(std430, binding=4) buffer renderedDataBlue
+{
+ uint counts_SSBOBlue[];
};
uniform uint width;
@@ -16,8 +24,8 @@ uvec3 getColorAt(vec2 fragCoord) {
uint xIndex = uint(max(0.0,(fragCoord.x+1.0)*0.5*width));
uint yIndex = uint(max(0.0,abs(fragCoord.y)*height));
- uint firstIndex = 3*(xIndex + yIndex * width);
- return uvec3(counts_SSBO[firstIndex],counts_SSBO[firstIndex+1],counts_SSBO[firstIndex+2]);
+ uint firstIndex = (xIndex + yIndex * width);
+ return uvec3(counts_SSBORed[firstIndex],counts_SSBOGreen[firstIndex],counts_SSBOBlue[firstIndex]);
}
void main(){
diff --git a/BuddhaTest/include/Helpers.h b/BuddhaTest/include/Helpers.h index 76a9a57..6358066 100644 --- a/BuddhaTest/include/Helpers.h +++ b/BuddhaTest/include/Helpers.h @@ -50,6 +50,8 @@ namespace Helpers double pngGamma = 1.0;
double pngColorScale = 2.0;
+ unsigned int ignoreMaxBufferSize = 0;
+
bool CheckValidity();
bool ParseCommandLine(int argc, char * argv[]);
};
diff --git a/BuddhaTest/src/BuddhaTest.cpp b/BuddhaTest/src/BuddhaTest.cpp index 1709853..5d03ad4 100644 --- a/BuddhaTest/src/BuddhaTest.cpp +++ b/BuddhaTest/src/BuddhaTest.cpp @@ -58,7 +58,7 @@ int main(int argc, char * argv[]) //we have a context. Let's check if input is sane.
//calcualte buffer size, and make sure it's allowed by the driver.
- const unsigned int pixelCount{(settings.imageWidth * bufferHeight)*3}; //*3 -> RGB
+ const unsigned int pixelCount{(settings.imageWidth * bufferHeight)};
if(!settings.CheckValidity())
{
glfwTerminate();
@@ -109,15 +109,18 @@ int main(int argc, char * argv[]) glBufferData(GL_ARRAY_BUFFER, sizeof(g_vertex_buffer_data), g_vertex_buffer_data, GL_STATIC_DRAW);
- GLuint drawBuffer;
- glGenBuffers(1, &drawBuffer);
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, drawBuffer);
+ GLuint drawBuffer[3];
+ glGenBuffers(3, drawBuffer);
+ for(int i=0; i < 3; ++i)
{
- glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * pixelCount, nullptr, GL_DYNAMIC_COPY);
- glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R8,GL_RED,GL_UNSIGNED_INT,nullptr);
+ glBindBuffer(GL_SHADER_STORAGE_BUFFER, drawBuffer[i]);
+ {
+ glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * pixelCount, nullptr, GL_DYNAMIC_COPY);
+ glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R8,GL_RED,GL_UNSIGNED_INT,nullptr);
+ }
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2+i, drawBuffer[i]);
}
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, drawBuffer);
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
+ glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
uint32_t iterationCount{0};
glUseProgram(ComputeShader);
@@ -188,17 +191,25 @@ int main(int argc, char * argv[]) if(!settings.pngFilename.empty())
{
glMemoryBarrier(GL_ALL_BARRIER_BITS);
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, drawBuffer);
+ std::vector<std::vector<uint32_t>> readBackBuffers(3,std::vector<uint32_t>(pixelCount));
+ for(int i = 0; i < 3; ++i)
+ {
+ glBindBuffer(GL_SHADER_STORAGE_BUFFER, drawBuffer[i]);
+ glGetBufferSubData(GL_SHADER_STORAGE_BUFFER,0,4 * pixelCount,readBackBuffers[i].data());
+ }
+
+ //too lazy to change WriteOutputPng...
+ std::vector<uint32_t> combinedBuffer(3*pixelCount);
+ for(int i=0;i<3*pixelCount;++i)
{
- std::vector<uint32_t> readBackBuffer(pixelCount);
- glGetBufferSubData(GL_SHADER_STORAGE_BUFFER,0,4 * pixelCount,readBackBuffer.data());
- Helpers::WriteOutputPNG(settings.pngFilename,readBackBuffer,settings.imageWidth,bufferHeight, settings.pngGamma, settings.pngColorScale);
+ combinedBuffer[i] = readBackBuffers[i%3][i/3];
}
+ Helpers::WriteOutputPNG(settings.pngFilename,combinedBuffer,settings.imageWidth,bufferHeight, settings.pngGamma, settings.pngColorScale);
}
//a bit of cleanup
glDeleteBuffers(1,&vertexbuffer);
- glDeleteBuffers(1,&drawBuffer);
+ glDeleteBuffers(3,drawBuffer);
glfwTerminate();
return 0;
diff --git a/BuddhaTest/src/Helpers.cpp b/BuddhaTest/src/Helpers.cpp index e1a4115..3cb786c 100644 --- a/BuddhaTest/src/Helpers.cpp +++ b/BuddhaTest/src/Helpers.cpp @@ -264,10 +264,12 @@ namespace Helpers }
int maxSSBOSize;
glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE,&maxSSBOSize);
- if((static_cast<unsigned long>(imageWidth) * static_cast<unsigned long>(imageHeight)) > static_cast<unsigned long>(maxSSBOSize)/6)
+ if((static_cast<unsigned long>(imageWidth) * static_cast<unsigned long>(imageHeight)) > static_cast<unsigned long>(maxSSBOSize)/2) //divided by 2, as we have 4 bytes per int, but only half the image height
{
- std::cerr << "Requested buffer size larger than maximum allowed by graphics driver. Max pixel number: " << maxSSBOSize/6 << std::endl;
- return false;
+ std::cerr << "Requested buffer size is larger than maximum allowed by graphics driver. Max pixel number: " << maxSSBOSize/2 << std::endl;
+ std::cerr << "You can override this limit check using the --ignoreMaxBufferSize 1 command line parameter, but doing so is your own risk." << std::endl;
+ if(ignoreMaxBufferSize == 0)
+ return false;
}
int WorkGroupSizeLimitX, WorkGroupSizeLimitY, WorkGroupSizeLimitZ;
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT,0,&WorkGroupSizeLimitX);
@@ -332,7 +334,8 @@ namespace Helpers {"--globalWorkgroupSizeZ", &globalWorkGroupSizeZ},
{"--imageGamma",&pngGamma},
{"--imageColorScale",&pngColorScale},
- {"--output", &pngFilename}
+ {"--output", &pngFilename},
+ {"--ignoreMaxBufferSize", &ignoreMaxBufferSize}
};
for(int i=1; i < argc;++i)
@@ -358,7 +361,8 @@ namespace Helpers "\tNOTE: There's also a limit on the product of the three local workgroup sizes, for which a number smaller or equal to 1024 is guaranteed to work. Higher numbers might work and run faster. Feel free to experiment." << std::endl <<
"--globalWorkgroupSizeX [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1024." << std::endl <<
"--globalWorkgroupSizeY [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1." << std::endl <<
- "--globalWorkgroupSizeZ [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1." << std::endl;
+ "--globalWorkgroupSizeZ [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1." << std::endl <<
+ "--ignoreMaxBufferSize [0,1] : If set to 1, a failed maximum buffer size check is not treated as error. Some graphics drivers report lower values than their absolute limit. Do this on your own risk, though." << std::endl;
return false;
}
}
|