From 598e425e2bb50586c9aec400308727aa1075f69b Mon Sep 17 00:00:00 2001 From: Andreas Grois Date: Thu, 15 Mar 2018 08:15:27 +0100 Subject: Speed up computation tremendously, by having each worker (nearly) completely store its state and working (nearly) fully independent of each other. --- BuddhaTest/Shaders/BuddhaCompute.glsl | 122 +++++++++++++++++++--------------- BuddhaTest/src/BuddhaTest.cpp | 29 +------- 2 files changed, 70 insertions(+), 81 deletions(-) diff --git a/BuddhaTest/Shaders/BuddhaCompute.glsl b/BuddhaTest/Shaders/BuddhaCompute.glsl index ba93ae9..8eb925b 100644 --- a/BuddhaTest/Shaders/BuddhaCompute.glsl +++ b/BuddhaTest/Shaders/BuddhaCompute.glsl @@ -16,36 +16,44 @@ layout(std430, binding=4) buffer renderedDataBlue }; layout(std430, binding=5) buffer statusBuffer { - uint accumulatedState; uint individualState[]; }; uniform uint width; uniform uint height; -uniform uint iterationCount; uniform uvec3 orbitLength; -uniform uint iterationChanged; uniform uint iterationsPerDispatch; -void getIndividualState(in uint CellID, out vec2 coordinates, out uint phase, out uint remainingIterations) +void getIndividualState(in uint CellID, out vec2 offset, out vec2 coordinates, out uint phase, out uint orbitNumber, out uint doneIterations) { - uint x = individualState[4*CellID]; - uint y = individualState[4*CellID+1]; - phase = individualState[4*CellID+2]; - remainingIterations = individualState[4*CellID+3]; + uint startIndex = 7*CellID; + uint x = individualState[startIndex]; + uint y = individualState[startIndex+1]; + phase = individualState[startIndex+2]; + orbitNumber = individualState[startIndex+3]; + doneIterations = individualState[startIndex+4]; + uint offx = individualState[startIndex+5]; + uint offy = individualState[startIndex+6]; coordinates = vec2(uintBitsToFloat(x),uintBitsToFloat(y)); + offset = vec2(uintBitsToFloat(offx),uintBitsToFloat(offy)); } -void setIndividualState(in uint CellID, in vec2 coordinates, in uint phase, in uint remainingIterations) +void setIndividualState(in uint CellID, in vec2 offset, in vec2 coordinates, in uint phase, in uint orbitNumber, in uint doneIterations) { + uint startIndex = 7*CellID; uint x=floatBitsToUint(coordinates.x); uint y=floatBitsToUint(coordinates.y); - atomicExchange(individualState[4*CellID],x); - atomicExchange(individualState[4*CellID+1],y); - atomicExchange(individualState[4*CellID+2],phase); - atomicExchange(individualState[4*CellID+3],remainingIterations); + uint offx = floatBitsToUint(offset.x); + uint offy = floatBitsToUint(offset.y); + atomicExchange(individualState[startIndex],x); + atomicExchange(individualState[startIndex+1],y); + atomicExchange(individualState[startIndex+2],phase); + atomicExchange(individualState[startIndex+3],orbitNumber); + atomicExchange(individualState[startIndex+4],doneIterations); + atomicExchange(individualState[startIndex+5],offx); + atomicExchange(individualState[startIndex+6],offy); } void addToColorOfCell(uvec2 cell, uvec3 toAdd) @@ -179,31 +187,33 @@ vec2 getStartValue(uint seed, uint yDecoupler) return retval; } -bool isGoingToBeDrawn(in vec2 offset, inout vec2 lastVal, inout uint remainingIterations, out bool result) +bool isGoingToBeDrawn(in vec2 offset, in uint totalIterations, inout vec2 lastVal, inout uint doneIterations, out bool result) { - uint startCount = remainingIterations > iterationsPerDispatch ? remainingIterations - iterationsPerDispatch : 0; - for(uint i = startCount; i < remainingIterations;++i) + uint endCount = doneIterations + iterationsPerDispatch > totalIterations ? totalIterations : doneIterations + iterationsPerDispatch; + for(uint i = doneIterations; i < endCount;++i) { lastVal = compSqr(lastVal) + offset; if(dot(lastVal,lastVal) > 4.0) { result = true; + doneIterations = i+1; return true; } } - remainingIterations -= iterationsPerDispatch; //can underflow, we don't care, as if that happens, we return true and discard the value anyhow. + doneIterations = endCount; result = false; - return startCount == 0; + return endCount == totalIterations; } -bool drawOrbit(in vec2 offset, in uint totalIterations, inout vec2 lastVal, inout uint remainingIterations) +bool drawOrbit(in vec2 offset, in uint totalIterations, inout vec2 lastVal, inout uint doneIterations) { - uint startCount = remainingIterations > iterationsPerDispatch ? remainingIterations - iterationsPerDispatch : 0; - for(uint i = totalIterations - remainingIterations; i < totalIterations - startCount;++i) + uint endCount = doneIterations + iterationsPerDispatch > totalIterations ? totalIterations : doneIterations + iterationsPerDispatch; + for(uint i = doneIterations; i < endCount;++i) { lastVal = compSqr(lastVal) + offset; if(dot(lastVal,lastVal) > 20.0) { + doneIterations = i+1; return true; //done. } if(lastVal.x > -2.5 && lastVal.x < 1.0 && lastVal.y > -1.0 && lastVal.y < 1.0) @@ -211,8 +221,8 @@ bool drawOrbit(in vec2 offset, in uint totalIterations, inout vec2 lastVal, inou addToColorAt(lastVal,uvec3(i < orbitLength.r,i < orbitLength.g,i < orbitLength.b)); } } - remainingIterations -= iterationsPerDispatch; //can underflow, we don't care, as if that happens, we return true and discard the value anyhow. - return startCount == 0; + doneIterations = endCount; + return endCount == totalIterations; } void main() { @@ -223,54 +233,58 @@ void main() { //TODO: Check this once I've had some sleep. Anyhow, I'm using 1D, so y and z components globalInfocationID should be zero anyhow. uint uniqueWorkerID = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y*totalWorkersPerDimension.x + gl_GlobalInvocationID.z*(totalWorkersPerDimension.x + totalWorkersPerDimension.y); - uint seed = iterationCount * totalWorkers + uniqueWorkerID; - uint yDecoupler = iterationCount; - vec2 offset = getStartValue(seed, yDecoupler); uint totalIterations = orbitLength.x > orbitLength.y ? orbitLength.x : orbitLength.y; totalIterations = totalIterations > orbitLength.z ? totalIterations : orbitLength.z; //getIndividualState(in uint CellID, out vec2 coordinates, out uint phase, out uint remainingIterations) - vec2 lastPosition = vec2(0); - uint phase = 0; - uint remainingIterations; - if(iterationChanged == 0) //same iteration as last time, reuse old state. + vec2 lastPosition; + uint phase; + uint doneIterations; + uint orbitNumber; + vec2 offset; + //getIndividualState(in uint CellID, out vec2 offset, out vec2 coordinates, out uint phase, out uint orbitNumber, out uint doneIterations) + getIndividualState(uniqueWorkerID, offset, lastPosition, phase, orbitNumber, doneIterations); + if(phase == 0) { - getIndividualState(uniqueWorkerID, lastPosition, phase, remainingIterations); + //new orbit: + uint seed = orbitNumber * totalWorkers + uniqueWorkerID; + uint yDecoupler = orbitNumber; + offset = getStartValue(seed, yDecoupler); + lastPosition = vec2(0); + phase = 1; + doneIterations = 0; } - else - { - remainingIterations = totalIterations; - } - - if(phase == 0) + if(phase == 1) { //check if this orbit is going to be drawn bool result; - if(isGoingToBeDrawn(offset, lastPosition, remainingIterations, result)) + if(isGoingToBeDrawn(offset,totalIterations, lastPosition, doneIterations , result)) { - //done, proceed to phase 1 or 2, based on result. - phase = result ? 1 : 2; - lastPosition = vec2(0); - remainingIterations = totalIterations; + if(result) + { + //on to step 2: drawing + phase = 2; + lastPosition = vec2(0); + doneIterations = 0; + } + else + { + //back to step 0 + ++orbitNumber; + phase = 0; + } } } - else if(phase == 1) //else if. We allow less than the user set iterations per dispatch, but never more. + else if(phase == 2) { - //draw orbit - if(drawOrbit(offset, totalIterations, lastPosition, remainingIterations)) + if(drawOrbit(offset, totalIterations, lastPosition, doneIterations)) { - //done. - phase = 2; + ++orbitNumber; + phase = 0; } } - if(phase == 2) - { - //done. - remainingIterations = 0; - } - setIndividualState(uniqueWorkerID, lastPosition, phase, remainingIterations); - atomicOr(accumulatedState, uint(phase != 2)); + setIndividualState(uniqueWorkerID, offset, lastPosition, phase, orbitNumber, doneIterations); } diff --git a/BuddhaTest/src/BuddhaTest.cpp b/BuddhaTest/src/BuddhaTest.cpp index fcc76a3..67971ea 100644 --- a/BuddhaTest/src/BuddhaTest.cpp +++ b/BuddhaTest/src/BuddhaTest.cpp @@ -126,22 +126,18 @@ int main(int argc, char * argv[]) GLuint stateBuffer; glGenBuffers(1,&stateBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER,stateBuffer); - glBufferData(GL_SHADER_STORAGE_BUFFER, 4*(4*workersPerFrame+1),nullptr,GL_DYNAMIC_COPY); + glBufferData(GL_SHADER_STORAGE_BUFFER, 4*(7*workersPerFrame),nullptr,GL_DYNAMIC_COPY); glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R8,GL_RED,GL_UNSIGNED_INT,nullptr); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, stateBuffer); - uint32_t iterationCount{0}; glUseProgram(ComputeShader); - GLint iterationCountUniformHandle = glGetUniformLocation(ComputeShader, "iterationCount"); GLint orbitLengthUniformHandle = glGetUniformLocation(ComputeShader, "orbitLength"); GLint widthUniformComputeHandle = glGetUniformLocation(ComputeShader, "width"); GLint heightUniformComputeHandle = glGetUniformLocation(ComputeShader, "height"); - GLint iterationChangedHandle = glGetUniformLocation(ComputeShader, "iterationChanged"); GLint iterationsPerDispatchHandle = glGetUniformLocation(ComputeShader, "iterationsPerDispatch"); glUniform3ui(orbitLengthUniformHandle,settings.orbitLengthRed,settings.orbitLengthGreen,settings.orbitLengthBlue); glUniform1ui(widthUniformComputeHandle, settings.imageWidth); glUniform1ui(heightUniformComputeHandle, bufferHeight); - glUniform1ui(iterationChangedHandle,1); glUniform1ui(iterationsPerDispatchHandle, settings.iterationsPerFrame); glUseProgram(VertexAndFragmentShaders); @@ -155,34 +151,13 @@ int main(int argc, char * argv[]) /* Loop until the user closes the window */ while (!glfwWindowShouldClose(window)) { - //clear first integer in state buffer. That's the "boolean" we use to determine if we should increment iterationCount. - glBindBuffer(GL_SHADER_STORAGE_BUFFER,stateBuffer); - glClearBufferSubData(GL_SHADER_STORAGE_BUFFER,GL_R8,0,4,GL_RED,GL_UNSIGNED_INT,nullptr); - //let the compute shader do something - glUseProgram(ComputeShader); - //set iterationCount, which is used for pseudo random generation - glUniform1ui(iterationCountUniformHandle,iterationCount); + glUseProgram(ComputeShader); glDispatchCompute(settings.globalWorkGroupSizeX, settings.globalWorkGroupSizeY, settings.globalWorkGroupSizeZ); //before reading the values in the ssbo, we need a memory barrier: glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); //I hope this is the correct (and only required) bit - //read back first bit of state buffer. If it's zero, increment iterationcount and set glUniform1ui(iterationChangedHandle,1); - //if it's nonzero set glUniform1ui(iterationChangedHandle,0); - glBindBuffer(GL_SHADER_STORAGE_BUFFER,stateBuffer); - uint accumulatedState; - glGetBufferSubData(GL_SHADER_STORAGE_BUFFER,0,4,&accumulatedState); - if(accumulatedState) - { - glUniform1ui(iterationChangedHandle,0); - } - else - { - glUniform1ui(iterationChangedHandle,1); - ++iterationCount; - } - /* Render here */ glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glUseProgram(VertexAndFragmentShaders); -- cgit v1.2.3