aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Grois <andi@grois.info>2018-03-15 08:15:27 +0100
committerAndreas Grois <andi@grois.info>2018-03-15 08:15:27 +0100
commit598e425e2bb50586c9aec400308727aa1075f69b (patch)
tree2ef7de2094cb5b2658e55f0aad287b79c1910e08
parenta350181f2c15feb072c2edf5d0c41965936f1948 (diff)
Speed up computation tremendously, by having each worker (nearly)
completely store its state and working (nearly) fully independent of each other.
-rw-r--r--BuddhaTest/Shaders/BuddhaCompute.glsl122
-rw-r--r--BuddhaTest/src/BuddhaTest.cpp29
2 files changed, 70 insertions, 81 deletions
diff --git a/BuddhaTest/Shaders/BuddhaCompute.glsl b/BuddhaTest/Shaders/BuddhaCompute.glsl
index ba93ae9..8eb925b 100644
--- a/BuddhaTest/Shaders/BuddhaCompute.glsl
+++ b/BuddhaTest/Shaders/BuddhaCompute.glsl
@@ -16,36 +16,44 @@ layout(std430, binding=4) buffer renderedDataBlue
};
layout(std430, binding=5) buffer statusBuffer
{
- uint accumulatedState;
uint individualState[];
};
uniform uint width;
uniform uint height;
-uniform uint iterationCount;
uniform uvec3 orbitLength;
-uniform uint iterationChanged;
uniform uint iterationsPerDispatch;
-void getIndividualState(in uint CellID, out vec2 coordinates, out uint phase, out uint remainingIterations)
+void getIndividualState(in uint CellID, out vec2 offset, out vec2 coordinates, out uint phase, out uint orbitNumber, out uint doneIterations)
{
- uint x = individualState[4*CellID];
- uint y = individualState[4*CellID+1];
- phase = individualState[4*CellID+2];
- remainingIterations = individualState[4*CellID+3];
+ uint startIndex = 7*CellID;
+ uint x = individualState[startIndex];
+ uint y = individualState[startIndex+1];
+ phase = individualState[startIndex+2];
+ orbitNumber = individualState[startIndex+3];
+ doneIterations = individualState[startIndex+4];
+ uint offx = individualState[startIndex+5];
+ uint offy = individualState[startIndex+6];
coordinates = vec2(uintBitsToFloat(x),uintBitsToFloat(y));
+ offset = vec2(uintBitsToFloat(offx),uintBitsToFloat(offy));
}
-void setIndividualState(in uint CellID, in vec2 coordinates, in uint phase, in uint remainingIterations)
+void setIndividualState(in uint CellID, in vec2 offset, in vec2 coordinates, in uint phase, in uint orbitNumber, in uint doneIterations)
{
+ uint startIndex = 7*CellID;
uint x=floatBitsToUint(coordinates.x);
uint y=floatBitsToUint(coordinates.y);
- atomicExchange(individualState[4*CellID],x);
- atomicExchange(individualState[4*CellID+1],y);
- atomicExchange(individualState[4*CellID+2],phase);
- atomicExchange(individualState[4*CellID+3],remainingIterations);
+ uint offx = floatBitsToUint(offset.x);
+ uint offy = floatBitsToUint(offset.y);
+ atomicExchange(individualState[startIndex],x);
+ atomicExchange(individualState[startIndex+1],y);
+ atomicExchange(individualState[startIndex+2],phase);
+ atomicExchange(individualState[startIndex+3],orbitNumber);
+ atomicExchange(individualState[startIndex+4],doneIterations);
+ atomicExchange(individualState[startIndex+5],offx);
+ atomicExchange(individualState[startIndex+6],offy);
}
void addToColorOfCell(uvec2 cell, uvec3 toAdd)
@@ -179,31 +187,33 @@ vec2 getStartValue(uint seed, uint yDecoupler)
return retval;
}
-bool isGoingToBeDrawn(in vec2 offset, inout vec2 lastVal, inout uint remainingIterations, out bool result)
+bool isGoingToBeDrawn(in vec2 offset, in uint totalIterations, inout vec2 lastVal, inout uint doneIterations, out bool result)
{
- uint startCount = remainingIterations > iterationsPerDispatch ? remainingIterations - iterationsPerDispatch : 0;
- for(uint i = startCount; i < remainingIterations;++i)
+ uint endCount = doneIterations + iterationsPerDispatch > totalIterations ? totalIterations : doneIterations + iterationsPerDispatch;
+ for(uint i = doneIterations; i < endCount;++i)
{
lastVal = compSqr(lastVal) + offset;
if(dot(lastVal,lastVal) > 4.0)
{
result = true;
+ doneIterations = i+1;
return true;
}
}
- remainingIterations -= iterationsPerDispatch; //can underflow, we don't care, as if that happens, we return true and discard the value anyhow.
+ doneIterations = endCount;
result = false;
- return startCount == 0;
+ return endCount == totalIterations;
}
-bool drawOrbit(in vec2 offset, in uint totalIterations, inout vec2 lastVal, inout uint remainingIterations)
+bool drawOrbit(in vec2 offset, in uint totalIterations, inout vec2 lastVal, inout uint doneIterations)
{
- uint startCount = remainingIterations > iterationsPerDispatch ? remainingIterations - iterationsPerDispatch : 0;
- for(uint i = totalIterations - remainingIterations; i < totalIterations - startCount;++i)
+ uint endCount = doneIterations + iterationsPerDispatch > totalIterations ? totalIterations : doneIterations + iterationsPerDispatch;
+ for(uint i = doneIterations; i < endCount;++i)
{
lastVal = compSqr(lastVal) + offset;
if(dot(lastVal,lastVal) > 20.0)
{
+ doneIterations = i+1;
return true; //done.
}
if(lastVal.x > -2.5 && lastVal.x < 1.0 && lastVal.y > -1.0 && lastVal.y < 1.0)
@@ -211,8 +221,8 @@ bool drawOrbit(in vec2 offset, in uint totalIterations, inout vec2 lastVal, inou
addToColorAt(lastVal,uvec3(i < orbitLength.r,i < orbitLength.g,i < orbitLength.b));
}
}
- remainingIterations -= iterationsPerDispatch; //can underflow, we don't care, as if that happens, we return true and discard the value anyhow.
- return startCount == 0;
+ doneIterations = endCount;
+ return endCount == totalIterations;
}
void main() {
@@ -223,54 +233,58 @@ void main() {
//TODO: Check this once I've had some sleep. Anyhow, I'm using 1D, so y and z components globalInfocationID should be zero anyhow.
uint uniqueWorkerID = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y*totalWorkersPerDimension.x + gl_GlobalInvocationID.z*(totalWorkersPerDimension.x + totalWorkersPerDimension.y);
- uint seed = iterationCount * totalWorkers + uniqueWorkerID;
- uint yDecoupler = iterationCount;
- vec2 offset = getStartValue(seed, yDecoupler);
uint totalIterations = orbitLength.x > orbitLength.y ? orbitLength.x : orbitLength.y;
totalIterations = totalIterations > orbitLength.z ? totalIterations : orbitLength.z;
//getIndividualState(in uint CellID, out vec2 coordinates, out uint phase, out uint remainingIterations)
- vec2 lastPosition = vec2(0);
- uint phase = 0;
- uint remainingIterations;
- if(iterationChanged == 0) //same iteration as last time, reuse old state.
+ vec2 lastPosition;
+ uint phase;
+ uint doneIterations;
+ uint orbitNumber;
+ vec2 offset;
+ //getIndividualState(in uint CellID, out vec2 offset, out vec2 coordinates, out uint phase, out uint orbitNumber, out uint doneIterations)
+ getIndividualState(uniqueWorkerID, offset, lastPosition, phase, orbitNumber, doneIterations);
+ if(phase == 0)
{
- getIndividualState(uniqueWorkerID, lastPosition, phase, remainingIterations);
+ //new orbit:
+ uint seed = orbitNumber * totalWorkers + uniqueWorkerID;
+ uint yDecoupler = orbitNumber;
+ offset = getStartValue(seed, yDecoupler);
+ lastPosition = vec2(0);
+ phase = 1;
+ doneIterations = 0;
}
- else
- {
- remainingIterations = totalIterations;
- }
-
- if(phase == 0)
+ if(phase == 1)
{
//check if this orbit is going to be drawn
bool result;
- if(isGoingToBeDrawn(offset, lastPosition, remainingIterations, result))
+ if(isGoingToBeDrawn(offset,totalIterations, lastPosition, doneIterations , result))
{
- //done, proceed to phase 1 or 2, based on result.
- phase = result ? 1 : 2;
- lastPosition = vec2(0);
- remainingIterations = totalIterations;
+ if(result)
+ {
+ //on to step 2: drawing
+ phase = 2;
+ lastPosition = vec2(0);
+ doneIterations = 0;
+ }
+ else
+ {
+ //back to step 0
+ ++orbitNumber;
+ phase = 0;
+ }
}
}
- else if(phase == 1) //else if. We allow less than the user set iterations per dispatch, but never more.
+ else if(phase == 2)
{
- //draw orbit
- if(drawOrbit(offset, totalIterations, lastPosition, remainingIterations))
+ if(drawOrbit(offset, totalIterations, lastPosition, doneIterations))
{
- //done.
- phase = 2;
+ ++orbitNumber;
+ phase = 0;
}
}
- if(phase == 2)
- {
- //done.
- remainingIterations = 0;
- }
- setIndividualState(uniqueWorkerID, lastPosition, phase, remainingIterations);
- atomicOr(accumulatedState, uint(phase != 2));
+ setIndividualState(uniqueWorkerID, offset, lastPosition, phase, orbitNumber, doneIterations);
}
diff --git a/BuddhaTest/src/BuddhaTest.cpp b/BuddhaTest/src/BuddhaTest.cpp
index fcc76a3..67971ea 100644
--- a/BuddhaTest/src/BuddhaTest.cpp
+++ b/BuddhaTest/src/BuddhaTest.cpp
@@ -126,22 +126,18 @@ int main(int argc, char * argv[])
GLuint stateBuffer;
glGenBuffers(1,&stateBuffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER,stateBuffer);
- glBufferData(GL_SHADER_STORAGE_BUFFER, 4*(4*workersPerFrame+1),nullptr,GL_DYNAMIC_COPY);
+ glBufferData(GL_SHADER_STORAGE_BUFFER, 4*(7*workersPerFrame),nullptr,GL_DYNAMIC_COPY);
glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R8,GL_RED,GL_UNSIGNED_INT,nullptr);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, stateBuffer);
- uint32_t iterationCount{0};
glUseProgram(ComputeShader);
- GLint iterationCountUniformHandle = glGetUniformLocation(ComputeShader, "iterationCount");
GLint orbitLengthUniformHandle = glGetUniformLocation(ComputeShader, "orbitLength");
GLint widthUniformComputeHandle = glGetUniformLocation(ComputeShader, "width");
GLint heightUniformComputeHandle = glGetUniformLocation(ComputeShader, "height");
- GLint iterationChangedHandle = glGetUniformLocation(ComputeShader, "iterationChanged");
GLint iterationsPerDispatchHandle = glGetUniformLocation(ComputeShader, "iterationsPerDispatch");
glUniform3ui(orbitLengthUniformHandle,settings.orbitLengthRed,settings.orbitLengthGreen,settings.orbitLengthBlue);
glUniform1ui(widthUniformComputeHandle, settings.imageWidth);
glUniform1ui(heightUniformComputeHandle, bufferHeight);
- glUniform1ui(iterationChangedHandle,1);
glUniform1ui(iterationsPerDispatchHandle, settings.iterationsPerFrame);
glUseProgram(VertexAndFragmentShaders);
@@ -155,34 +151,13 @@ int main(int argc, char * argv[])
/* Loop until the user closes the window */
while (!glfwWindowShouldClose(window))
{
- //clear first integer in state buffer. That's the "boolean" we use to determine if we should increment iterationCount.
- glBindBuffer(GL_SHADER_STORAGE_BUFFER,stateBuffer);
- glClearBufferSubData(GL_SHADER_STORAGE_BUFFER,GL_R8,0,4,GL_RED,GL_UNSIGNED_INT,nullptr);
-
//let the compute shader do something
- glUseProgram(ComputeShader);
- //set iterationCount, which is used for pseudo random generation
- glUniform1ui(iterationCountUniformHandle,iterationCount);
+ glUseProgram(ComputeShader);
glDispatchCompute(settings.globalWorkGroupSizeX, settings.globalWorkGroupSizeY, settings.globalWorkGroupSizeZ);
//before reading the values in the ssbo, we need a memory barrier:
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); //I hope this is the correct (and only required) bit
- //read back first bit of state buffer. If it's zero, increment iterationcount and set glUniform1ui(iterationChangedHandle,1);
- //if it's nonzero set glUniform1ui(iterationChangedHandle,0);
- glBindBuffer(GL_SHADER_STORAGE_BUFFER,stateBuffer);
- uint accumulatedState;
- glGetBufferSubData(GL_SHADER_STORAGE_BUFFER,0,4,&accumulatedState);
- if(accumulatedState)
- {
- glUniform1ui(iterationChangedHandle,0);
- }
- else
- {
- glUniform1ui(iterationChangedHandle,1);
- ++iterationCount;
- }
-
/* Render here */
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glUseProgram(VertexAndFragmentShaders);