From a350181f2c15feb072c2edf5d0c41965936f1948 Mon Sep 17 00:00:00 2001 From: Andreas Grois Date: Wed, 14 Mar 2018 22:39:06 +0100 Subject: First commit with compute pausability --- BuddhaTest/Shaders/BuddhaCompute.glsl | 121 +++++++++++++++++++++++++++------- BuddhaTest/include/Helpers.h | 2 + BuddhaTest/src/BuddhaTest.cpp | 45 +++++++++---- BuddhaTest/src/Helpers.cpp | 2 + 4 files changed, 133 insertions(+), 37 deletions(-) (limited to 'BuddhaTest') diff --git a/BuddhaTest/Shaders/BuddhaCompute.glsl b/BuddhaTest/Shaders/BuddhaCompute.glsl index ddcbace..ba93ae9 100644 --- a/BuddhaTest/Shaders/BuddhaCompute.glsl +++ b/BuddhaTest/Shaders/BuddhaCompute.glsl @@ -4,15 +4,20 @@ layout(std430, binding=2) buffer renderedDataRed { - uint counts_SSBORed[]; + uint counts_SSBORed[]; }; layout(std430, binding=3) buffer renderedDataGreen { - uint counts_SSBOGreen[]; + uint counts_SSBOGreen[]; }; layout(std430, binding=4) buffer renderedDataBlue { - uint counts_SSBOBlue[]; + uint counts_SSBOBlue[]; +}; +layout(std430, binding=5) buffer statusBuffer +{ + uint accumulatedState; + uint individualState[]; }; uniform uint width; @@ -21,6 +26,28 @@ uniform uint height; uniform uint iterationCount; uniform uvec3 orbitLength; +uniform uint iterationChanged; +uniform uint iterationsPerDispatch; + +void getIndividualState(in uint CellID, out vec2 coordinates, out uint phase, out uint remainingIterations) +{ + uint x = individualState[4*CellID]; + uint y = individualState[4*CellID+1]; + phase = individualState[4*CellID+2]; + remainingIterations = individualState[4*CellID+3]; + coordinates = vec2(uintBitsToFloat(x),uintBitsToFloat(y)); +} + +void setIndividualState(in uint CellID, in vec2 coordinates, in uint phase, in uint remainingIterations) +{ + uint x=floatBitsToUint(coordinates.x); + uint y=floatBitsToUint(coordinates.y); + atomicExchange(individualState[4*CellID],x); + atomicExchange(individualState[4*CellID+1],y); + atomicExchange(individualState[4*CellID+2],phase); + atomicExchange(individualState[4*CellID+3],remainingIterations); +} + void addToColorOfCell(uvec2 cell, uvec3 toAdd) { uint firstIndex = (cell.x + cell.y * width); @@ -152,39 +179,40 @@ vec2 getStartValue(uint seed, uint yDecoupler) return retval; } -bool isGoingToBeDrawn(vec2 offset) +bool isGoingToBeDrawn(in vec2 offset, inout vec2 lastVal, inout uint remainingIterations, out bool result) { - vec2 val = vec2(0); - uint limit = orbitLength.x > orbitLength.y ? orbitLength.x : orbitLength.y; - limit = limit > orbitLength.z ? limit : orbitLength.z; - for(uint i = 0; i < limit;++i) + uint startCount = remainingIterations > iterationsPerDispatch ? remainingIterations - iterationsPerDispatch : 0; + for(uint i = startCount; i < remainingIterations;++i) { - val = compSqr(val) + offset; - if(dot(val,val) > 4.0) + lastVal = compSqr(lastVal) + offset; + if(dot(lastVal,lastVal) > 4.0) { + result = true; return true; } } - return false; + remainingIterations -= iterationsPerDispatch; //can underflow, we don't care, as if that happens, we return true and discard the value anyhow. + result = false; + return startCount == 0; } -void drawOrbit(vec2 offset) +bool drawOrbit(in vec2 offset, in uint totalIterations, inout vec2 lastVal, inout uint remainingIterations) { - vec2 val = vec2(0); - uint limit = orbitLength.x > orbitLength.y ? orbitLength.x : orbitLength.y; - limit = limit > orbitLength.z ? limit : orbitLength.z; - for(uint i = 0; i < limit;++i) + uint startCount = remainingIterations > iterationsPerDispatch ? remainingIterations - iterationsPerDispatch : 0; + for(uint i = totalIterations - remainingIterations; i < totalIterations - startCount;++i) { - val = compSqr(val) + offset; - if(dot(val,val) > 20.0) + lastVal = compSqr(lastVal) + offset; + if(dot(lastVal,lastVal) > 20.0) { - return; + return true; //done. } - if(val.x > -2.5 && val.x < 1.0 && val.y > -1.0 && val.y < 1.0) + if(lastVal.x > -2.5 && lastVal.x < 1.0 && lastVal.y > -1.0 && lastVal.y < 1.0) { - addToColorAt(val,uvec3(i < orbitLength.r,i < orbitLength.g,i < orbitLength.b)); + addToColorAt(lastVal,uvec3(i < orbitLength.r,i < orbitLength.g,i < orbitLength.b)); } } + remainingIterations -= iterationsPerDispatch; //can underflow, we don't care, as if that happens, we return true and discard the value anyhow. + return startCount == 0; } void main() { @@ -194,12 +222,55 @@ void main() { uint totalWorkers = totalWorkersPerDimension.x*totalWorkersPerDimension.y*totalWorkersPerDimension.z; //TODO: Check this once I've had some sleep. Anyhow, I'm using 1D, so y and z components globalInfocationID should be zero anyhow. - uint seed = iterationCount * totalWorkers + gl_GlobalInvocationID.x + gl_GlobalInvocationID.y*totalWorkersPerDimension.x + gl_GlobalInvocationID.z*(totalWorkersPerDimension.x + totalWorkersPerDimension.y); + uint uniqueWorkerID = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y*totalWorkersPerDimension.x + gl_GlobalInvocationID.z*(totalWorkersPerDimension.x + totalWorkersPerDimension.y); + uint seed = iterationCount * totalWorkers + uniqueWorkerID; uint yDecoupler = iterationCount; vec2 offset = getStartValue(seed, yDecoupler); - if(!isGoingToBeDrawn(offset)) - return; + uint totalIterations = orbitLength.x > orbitLength.y ? orbitLength.x : orbitLength.y; + totalIterations = totalIterations > orbitLength.z ? totalIterations : orbitLength.z; + + //getIndividualState(in uint CellID, out vec2 coordinates, out uint phase, out uint remainingIterations) + vec2 lastPosition = vec2(0); + uint phase = 0; + uint remainingIterations; + if(iterationChanged == 0) //same iteration as last time, reuse old state. + { + getIndividualState(uniqueWorkerID, lastPosition, phase, remainingIterations); + } + else + { + remainingIterations = totalIterations; + } + + if(phase == 0) + { + //check if this orbit is going to be drawn + bool result; + if(isGoingToBeDrawn(offset, lastPosition, remainingIterations, result)) + { + //done, proceed to phase 1 or 2, based on result. + phase = result ? 1 : 2; + lastPosition = vec2(0); + remainingIterations = totalIterations; + } + } + else if(phase == 1) //else if. We allow less than the user set iterations per dispatch, but never more. + { + //draw orbit + if(drawOrbit(offset, totalIterations, lastPosition, remainingIterations)) + { + //done. + phase = 2; + } + } + if(phase == 2) + { + //done. + remainingIterations = 0; + } + - drawOrbit(offset); + setIndividualState(uniqueWorkerID, lastPosition, phase, remainingIterations); + atomicOr(accumulatedState, uint(phase != 2)); } diff --git a/BuddhaTest/include/Helpers.h b/BuddhaTest/include/Helpers.h index 6358066..ee5b07f 100644 --- a/BuddhaTest/include/Helpers.h +++ b/BuddhaTest/include/Helpers.h @@ -46,6 +46,8 @@ namespace Helpers unsigned int globalWorkGroupSizeY = 1; unsigned int globalWorkGroupSizeZ = 1; + unsigned int iterationsPerFrame = 1000; + std::string pngFilename = ""; double pngGamma = 1.0; double pngColorScale = 2.0; diff --git a/BuddhaTest/src/BuddhaTest.cpp b/BuddhaTest/src/BuddhaTest.cpp index 5d03ad4..fcc76a3 100644 --- a/BuddhaTest/src/BuddhaTest.cpp +++ b/BuddhaTest/src/BuddhaTest.cpp @@ -122,15 +122,27 @@ int main(int argc, char * argv[]) } glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + const uint32_t workersPerFrame = settings.globalWorkGroupSizeX*settings.globalWorkGroupSizeY*settings.globalWorkGroupSizeZ*settings.localWorkgroupSizeX*settings.localWorkgroupSizeY*settings.localWorkgroupSizeZ; + GLuint stateBuffer; + glGenBuffers(1,&stateBuffer); + glBindBuffer(GL_SHADER_STORAGE_BUFFER,stateBuffer); + glBufferData(GL_SHADER_STORAGE_BUFFER, 4*(4*workersPerFrame+1),nullptr,GL_DYNAMIC_COPY); + glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R8,GL_RED,GL_UNSIGNED_INT,nullptr); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, stateBuffer); + uint32_t iterationCount{0}; glUseProgram(ComputeShader); GLint iterationCountUniformHandle = glGetUniformLocation(ComputeShader, "iterationCount"); GLint orbitLengthUniformHandle = glGetUniformLocation(ComputeShader, "orbitLength"); GLint widthUniformComputeHandle = glGetUniformLocation(ComputeShader, "width"); GLint heightUniformComputeHandle = glGetUniformLocation(ComputeShader, "height"); + GLint iterationChangedHandle = glGetUniformLocation(ComputeShader, "iterationChanged"); + GLint iterationsPerDispatchHandle = glGetUniformLocation(ComputeShader, "iterationsPerDispatch"); glUniform3ui(orbitLengthUniformHandle,settings.orbitLengthRed,settings.orbitLengthGreen,settings.orbitLengthBlue); glUniform1ui(widthUniformComputeHandle, settings.imageWidth); glUniform1ui(heightUniformComputeHandle, bufferHeight); + glUniform1ui(iterationChangedHandle,1); + glUniform1ui(iterationsPerDispatchHandle, settings.iterationsPerFrame); glUseProgram(VertexAndFragmentShaders); GLint widthUniformFragmentHandle = glGetUniformLocation(VertexAndFragmentShaders, "width"); @@ -140,29 +152,37 @@ int main(int argc, char * argv[]) glClearColor(0.0f, 0.0f, 0.4f, 0.0f); - //uint32_t iterationsPerFrame = settings.globalWorkGroupSizeX*settings.globalWorkGroupSizeY*settings.globalWorkGroupSizeZ*settings.localWorkgroupSizeX*settings.localWorkgroupSizeY*settings.localWorkgroupSizeZ; - //bool bWarningShown{false}; - /* Loop until the user closes the window */ while (!glfwWindowShouldClose(window)) { - //commented out, as with the new y-decoupling we will get more points in 2D even after we had all integer values for x. - //check if we are "done" - //if(iterationsPerFrame * iterationCount <= iterationsPerFrame && iterationCount > 1 && !bWarningShown) - //{ - // std::cout << "The program covered all possible integer values for starting point computation. Leaving it running will not improve the image any more." << std::endl; - // bWarningShown = true; - //} + //clear first integer in state buffer. That's the "boolean" we use to determine if we should increment iterationCount. + glBindBuffer(GL_SHADER_STORAGE_BUFFER,stateBuffer); + glClearBufferSubData(GL_SHADER_STORAGE_BUFFER,GL_R8,0,4,GL_RED,GL_UNSIGNED_INT,nullptr); //let the compute shader do something glUseProgram(ComputeShader); - //increase iterationCount, which is used for pseudo random generation - glUniform1ui(iterationCountUniformHandle,++iterationCount); + //set iterationCount, which is used for pseudo random generation + glUniform1ui(iterationCountUniformHandle,iterationCount); glDispatchCompute(settings.globalWorkGroupSizeX, settings.globalWorkGroupSizeY, settings.globalWorkGroupSizeZ); //before reading the values in the ssbo, we need a memory barrier: glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); //I hope this is the correct (and only required) bit + //read back first bit of state buffer. If it's zero, increment iterationcount and set glUniform1ui(iterationChangedHandle,1); + //if it's nonzero set glUniform1ui(iterationChangedHandle,0); + glBindBuffer(GL_SHADER_STORAGE_BUFFER,stateBuffer); + uint accumulatedState; + glGetBufferSubData(GL_SHADER_STORAGE_BUFFER,0,4,&accumulatedState); + if(accumulatedState) + { + glUniform1ui(iterationChangedHandle,0); + } + else + { + glUniform1ui(iterationChangedHandle,1); + ++iterationCount; + } + /* Render here */ glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glUseProgram(VertexAndFragmentShaders); @@ -210,6 +230,7 @@ int main(int argc, char * argv[]) //a bit of cleanup glDeleteBuffers(1,&vertexbuffer); glDeleteBuffers(3,drawBuffer); + glDeleteBuffers(1,&stateBuffer); glfwTerminate(); return 0; diff --git a/BuddhaTest/src/Helpers.cpp b/BuddhaTest/src/Helpers.cpp index 3cb786c..5197421 100644 --- a/BuddhaTest/src/Helpers.cpp +++ b/BuddhaTest/src/Helpers.cpp @@ -332,6 +332,7 @@ namespace Helpers {"--globalWorkgroupSizeX", &globalWorkGroupSizeX}, {"--globalWorkgroupSizeY", &globalWorkGroupSizeY}, {"--globalWorkgroupSizeZ", &globalWorkGroupSizeZ}, + {"--iterationsPerFrame", &iterationsPerFrame}, {"--imageGamma",&pngGamma}, {"--imageColorScale",&pngColorScale}, {"--output", &pngFilename}, @@ -362,6 +363,7 @@ namespace Helpers "--globalWorkgroupSizeX [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1024." << std::endl << "--globalWorkgroupSizeY [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1." << std::endl << "--globalWorkgroupSizeZ [integer] : How often the local work group should be invoked per frame. Values up to 65535 are guaranteed to work. Default is 1." << std::endl << + "--iterationsPerFrame [integer] : Limit how many iteration steps the shader may make per frame. Use this to keep the desktop responsive while rendering high iteration count images. Default: 1000." << std::endl << "--ignoreMaxBufferSize [0,1] : If set to 1, a failed maximum buffer size check is not treated as error. Some graphics drivers report lower values than their absolute limit. Do this on your own risk, though." << std::endl; return false; } -- cgit v1.2.3