aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Grois <andi@grois.info>2018-03-22 22:52:44 +0100
committerAndreas Grois <andi@grois.info>2018-03-22 22:52:44 +0100
commite45b39dc2ebb76bc079677b1d186b6a5109d1a5b (patch)
tree17919228413393625b94fbfd3435f966159e33dc
parentd06a01040cd7614a6951d6e6e350135a05ddb9c2 (diff)
Get occupancy back up good by exploiting shared memory.
-rw-r--r--BuddhaTest/Shaders/BuddhaCompute.glsl84
-rw-r--r--BuddhaTest/Shaders/BuddhaFragment.glsl6
-rw-r--r--BuddhaTest/src/BuddhaTest.cpp2
3 files changed, 51 insertions, 41 deletions
diff --git a/BuddhaTest/Shaders/BuddhaCompute.glsl b/BuddhaTest/Shaders/BuddhaCompute.glsl
index 0ce19f7..dfc3828 100644
--- a/BuddhaTest/Shaders/BuddhaCompute.glsl
+++ b/BuddhaTest/Shaders/BuddhaCompute.glsl
@@ -9,7 +9,7 @@ layout(std430, binding=2) restrict buffer renderedDataRed
layout(std430, binding=3) restrict buffer brightnessData
{
- restrict uvec3 brightness;
+ restrict uint brightness;
};
struct individualData
@@ -33,7 +33,16 @@ uniform uvec4 orbitLength;
uniform uint iterationsPerDispatch;
uniform uint totalIterations;
-shared uvec3[gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z] brightnesses;
+struct workerState
+{
+ uint phase;
+ uint orbitNumber;
+ uint doneIterations;
+
+ uint brightness;
+};
+
+shared workerState[gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z] localStore;
void uintMaxIP(inout uint modified, const uint constant)
{
@@ -55,8 +64,8 @@ void addToColorOfCell(uvec2 cell, uvec3 toAdd)
b.z = atomicAdd(counts_SSBO[firstIndex+2],toAdd.z);
for(int i = 0; i < 3;++i)
{
- if(brightnesses[gl_LocalInvocationIndex][i] < b[i])
- brightnesses[gl_LocalInvocationIndex][i] = b[i];
+ if(localStore[gl_LocalInvocationIndex].brightness < b[i])
+ localStore[gl_LocalInvocationIndex].brightness = b[i];
}
}
@@ -218,10 +227,7 @@ vec2 getCurrentOrbitOffset(const uint orbitNumber, const uint totalWorkers, cons
}
void main() {
- for(int i = 0; i < 3;++i)
- {
- brightnesses[gl_LocalInvocationIndex][i] = 0;
- }
+ localStore[gl_LocalInvocationIndex].brightness = 0;
//we need to know how many total work groups are running this iteration
const uvec3 totalWorkersPerDimension = gl_WorkGroupSize * gl_NumWorkGroups;
@@ -229,89 +235,93 @@ void main() {
const uint uniqueWorkerID = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y*totalWorkersPerDimension.x + gl_GlobalInvocationID.z*(totalWorkersPerDimension.x * totalWorkersPerDimension.y);
- individualData state = stateArray[uniqueWorkerID];
+ localStore[gl_LocalInvocationIndex].phase = stateArray[uniqueWorkerID].phase;
+ localStore[gl_LocalInvocationIndex].orbitNumber = stateArray[uniqueWorkerID].orbitNumber;
+ localStore[gl_LocalInvocationIndex].doneIterations = stateArray[uniqueWorkerID].doneIterations;
+ vec2 lastPosition = stateArray[uniqueWorkerID].lastPosition;
//getIndividualState(in uint CellID, out vec2 offset, out vec2 coordinates, out uint phase, out uint orbitNumber, out uint doneIterations)
uint iterationsLeftToDo = iterationsPerDispatch;
- vec2 offset = getCurrentOrbitOffset(state.orbitNumber, totalWorkers, uniqueWorkerID);
+ vec2 offset = getCurrentOrbitOffset(localStore[gl_LocalInvocationIndex].orbitNumber, totalWorkers, uniqueWorkerID);
while(iterationsLeftToDo != 0)
{
- if(state.phase == 0)
+ if(localStore[gl_LocalInvocationIndex].phase == 0)
{
//new orbit:
//we know that iterationsLeftToDo is at least 1 by the while condition.
--iterationsLeftToDo; //count this as 1 iteration.
- offset = getCurrentOrbitOffset(state.orbitNumber, totalWorkers, uniqueWorkerID);
+ offset = getCurrentOrbitOffset(localStore[gl_LocalInvocationIndex].orbitNumber, totalWorkers, uniqueWorkerID);
if(isInMainBulb(offset) || isInMainCardioid(offset))
{
// do not waste time drawing this orbit
- ++state.orbitNumber;
+ ++localStore[gl_LocalInvocationIndex].orbitNumber;
}
else
{
//cool orbit!
- state.lastPosition = vec2(0);
- state.phase = 1;
- state.doneIterations = 0;
+ lastPosition = vec2(0);
+ localStore[gl_LocalInvocationIndex].phase = 1;
+ localStore[gl_LocalInvocationIndex].doneIterations = 0;
}
}
- if(state.phase == 1)
+ if(localStore[gl_LocalInvocationIndex].phase == 1)
{
//check if this orbit is going to be drawn
bool result;
- if(isGoingToBeDrawn(offset,totalIterations, state.lastPosition, iterationsLeftToDo, state.doneIterations , result))
+ if(isGoingToBeDrawn(offset,totalIterations, lastPosition, iterationsLeftToDo, localStore[gl_LocalInvocationIndex].doneIterations , result))
{
if(result)
{
//on to step 2: drawing
- state.phase = 2;
- state.lastPosition = vec2(0);
- state.doneIterations = 0;
+ localStore[gl_LocalInvocationIndex].phase = 2;
+ lastPosition = vec2(0);
+ localStore[gl_LocalInvocationIndex].doneIterations = 0;
}
else
{
//back to step 0
- ++state.orbitNumber;
- state.phase = 0;
+ ++localStore[gl_LocalInvocationIndex].orbitNumber;
+ localStore[gl_LocalInvocationIndex].phase = 0;
}
}
}
- if(state.phase == 2)
+ if(localStore[gl_LocalInvocationIndex].phase == 2)
{
- if(drawOrbit(offset, totalIterations, state.lastPosition, iterationsLeftToDo, state.doneIterations))
+ if(drawOrbit(offset, totalIterations, lastPosition, iterationsLeftToDo, localStore[gl_LocalInvocationIndex].doneIterations))
{
- ++state.orbitNumber;
- state.phase = 0;
+ ++localStore[gl_LocalInvocationIndex].orbitNumber;
+ localStore[gl_LocalInvocationIndex].phase = 0;
}
}
}
- stateArray[uniqueWorkerID] = state;
+ stateArray[uniqueWorkerID].orbitNumber = localStore[gl_LocalInvocationIndex].orbitNumber;
+ stateArray[uniqueWorkerID].phase = localStore[gl_LocalInvocationIndex].phase;
+ stateArray[uniqueWorkerID].doneIterations = localStore[gl_LocalInvocationIndex].doneIterations;
+ stateArray[uniqueWorkerID].lastPosition = lastPosition;
+
//use divide et impera to get the real maximum brightness of this local group
barrier();
- if(bool(brightnesses.length() & 1) && gl_LocalInvocationIndex == 0)
+ if(bool(localStore.length() & 1) && gl_LocalInvocationIndex == 0)
{
- uintMaxIP(brightnesses[0], brightnesses[brightnesses.length()-1]);
+ uintMaxIP(localStore[0].brightness, localStore[localStore.length()-1].brightness);
}
- for(int step = brightnesses.length()/2;step >= 1;step = step/2)
+ for(int step = localStore.length()/2;step >= 1;step = step/2)
{
barrier();
if(gl_LocalInvocationIndex < step)
{
- uintMaxIP(brightnesses[gl_LocalInvocationIndex],brightnesses[gl_LocalInvocationIndex+step]);
+ uintMaxIP(localStore[gl_LocalInvocationIndex].brightness,localStore[gl_LocalInvocationIndex+step].brightness);
if(bool(step & 1) && gl_LocalInvocationIndex == 0)
{
- uintMaxIP(brightnesses[0], brightnesses[step-1]);
+ uintMaxIP(localStore[0].brightness, localStore[step-1].brightness);
}
}
}
barrier();
if(gl_LocalInvocationIndex == 0)
{
- for(uint i = 0; i < 3; ++i)
- {
- atomicMax(brightness[i], brightnesses[0][i]);
- }
+ atomicMax(brightness, localStore[0].brightness);
}
}
diff --git a/BuddhaTest/Shaders/BuddhaFragment.glsl b/BuddhaTest/Shaders/BuddhaFragment.glsl
index 16d25f2..73cb53e 100644
--- a/BuddhaTest/Shaders/BuddhaFragment.glsl
+++ b/BuddhaTest/Shaders/BuddhaFragment.glsl
@@ -6,11 +6,11 @@ out vec3 color;
layout(std430, binding=2) restrict readonly buffer renderedDataRed
{
- restrict readonly uint counts_SSBO[];
+ restrict readonly uint counts_SSBO[];
};
layout(std430, binding=3) restrict readonly buffer brightnessData
{
- restrict readonly uvec3 brightness;
+ restrict readonly uint brightness;
};
uniform uint width;
@@ -27,6 +27,6 @@ uvec3 getColorAt(vec2 fragCoord)
void main(){
uvec3 totalCount = getColorAt(uv);
- vec3 scaled = vec3(totalCount)/max(length(vec3(brightness)),1.0);
+ vec3 scaled = vec3(totalCount)/max(float(brightness),1.0);
color = scaled;
}
diff --git a/BuddhaTest/src/BuddhaTest.cpp b/BuddhaTest/src/BuddhaTest.cpp
index 4de252d..4d7ab9f 100644
--- a/BuddhaTest/src/BuddhaTest.cpp
+++ b/BuddhaTest/src/BuddhaTest.cpp
@@ -122,7 +122,7 @@ int main(int argc, char * argv[])
GLuint brightnessBuffer;
glGenBuffers(1,&brightnessBuffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, brightnessBuffer);
- glBufferData(GL_SHADER_STORAGE_BUFFER, 16,nullptr, GL_DYNAMIC_COPY);
+ glBufferData(GL_SHADER_STORAGE_BUFFER, 4,nullptr, GL_DYNAMIC_COPY);
glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R8,GL_RED,GL_UNSIGNED_INT,nullptr);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, brightnessBuffer);