diff options
author | Andreas Grois <andi@grois.info> | 2018-03-22 22:52:44 +0100 |
---|---|---|
committer | Andreas Grois <andi@grois.info> | 2018-03-22 22:52:44 +0100 |
commit | e45b39dc2ebb76bc079677b1d186b6a5109d1a5b (patch) | |
tree | 17919228413393625b94fbfd3435f966159e33dc | |
parent | d06a01040cd7614a6951d6e6e350135a05ddb9c2 (diff) |
Get occupancy back up good by exploiting shared memory.
-rw-r--r-- | BuddhaTest/Shaders/BuddhaCompute.glsl | 84 | ||||
-rw-r--r-- | BuddhaTest/Shaders/BuddhaFragment.glsl | 6 | ||||
-rw-r--r-- | BuddhaTest/src/BuddhaTest.cpp | 2 |
3 files changed, 51 insertions, 41 deletions
diff --git a/BuddhaTest/Shaders/BuddhaCompute.glsl b/BuddhaTest/Shaders/BuddhaCompute.glsl index 0ce19f7..dfc3828 100644 --- a/BuddhaTest/Shaders/BuddhaCompute.glsl +++ b/BuddhaTest/Shaders/BuddhaCompute.glsl @@ -9,7 +9,7 @@ layout(std430, binding=2) restrict buffer renderedDataRed layout(std430, binding=3) restrict buffer brightnessData { - restrict uvec3 brightness; + restrict uint brightness; }; struct individualData @@ -33,7 +33,16 @@ uniform uvec4 orbitLength; uniform uint iterationsPerDispatch; uniform uint totalIterations; -shared uvec3[gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z] brightnesses; +struct workerState +{ + uint phase; + uint orbitNumber; + uint doneIterations; + + uint brightness; +}; + +shared workerState[gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z] localStore; void uintMaxIP(inout uint modified, const uint constant) { @@ -55,8 +64,8 @@ void addToColorOfCell(uvec2 cell, uvec3 toAdd) b.z = atomicAdd(counts_SSBO[firstIndex+2],toAdd.z); for(int i = 0; i < 3;++i) { - if(brightnesses[gl_LocalInvocationIndex][i] < b[i]) - brightnesses[gl_LocalInvocationIndex][i] = b[i]; + if(localStore[gl_LocalInvocationIndex].brightness < b[i]) + localStore[gl_LocalInvocationIndex].brightness = b[i]; } } @@ -218,10 +227,7 @@ vec2 getCurrentOrbitOffset(const uint orbitNumber, const uint totalWorkers, cons } void main() { - for(int i = 0; i < 3;++i) - { - brightnesses[gl_LocalInvocationIndex][i] = 0; - } + localStore[gl_LocalInvocationIndex].brightness = 0; //we need to know how many total work groups are running this iteration const uvec3 totalWorkersPerDimension = gl_WorkGroupSize * gl_NumWorkGroups; @@ -229,89 +235,93 @@ void main() { const uint uniqueWorkerID = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y*totalWorkersPerDimension.x + gl_GlobalInvocationID.z*(totalWorkersPerDimension.x * totalWorkersPerDimension.y); - individualData state = stateArray[uniqueWorkerID]; + localStore[gl_LocalInvocationIndex].phase = stateArray[uniqueWorkerID].phase; + localStore[gl_LocalInvocationIndex].orbitNumber = stateArray[uniqueWorkerID].orbitNumber; + localStore[gl_LocalInvocationIndex].doneIterations = stateArray[uniqueWorkerID].doneIterations; + vec2 lastPosition = stateArray[uniqueWorkerID].lastPosition; //getIndividualState(in uint CellID, out vec2 offset, out vec2 coordinates, out uint phase, out uint orbitNumber, out uint doneIterations) uint iterationsLeftToDo = iterationsPerDispatch; - vec2 offset = getCurrentOrbitOffset(state.orbitNumber, totalWorkers, uniqueWorkerID); + vec2 offset = getCurrentOrbitOffset(localStore[gl_LocalInvocationIndex].orbitNumber, totalWorkers, uniqueWorkerID); while(iterationsLeftToDo != 0) { - if(state.phase == 0) + if(localStore[gl_LocalInvocationIndex].phase == 0) { //new orbit: //we know that iterationsLeftToDo is at least 1 by the while condition. --iterationsLeftToDo; //count this as 1 iteration. - offset = getCurrentOrbitOffset(state.orbitNumber, totalWorkers, uniqueWorkerID); + offset = getCurrentOrbitOffset(localStore[gl_LocalInvocationIndex].orbitNumber, totalWorkers, uniqueWorkerID); if(isInMainBulb(offset) || isInMainCardioid(offset)) { // do not waste time drawing this orbit - ++state.orbitNumber; + ++localStore[gl_LocalInvocationIndex].orbitNumber; } else { //cool orbit! - state.lastPosition = vec2(0); - state.phase = 1; - state.doneIterations = 0; + lastPosition = vec2(0); + localStore[gl_LocalInvocationIndex].phase = 1; + localStore[gl_LocalInvocationIndex].doneIterations = 0; } } - if(state.phase == 1) + if(localStore[gl_LocalInvocationIndex].phase == 1) { //check if this orbit is going to be drawn bool result; - if(isGoingToBeDrawn(offset,totalIterations, state.lastPosition, iterationsLeftToDo, state.doneIterations , result)) + if(isGoingToBeDrawn(offset,totalIterations, lastPosition, iterationsLeftToDo, localStore[gl_LocalInvocationIndex].doneIterations , result)) { if(result) { //on to step 2: drawing - state.phase = 2; - state.lastPosition = vec2(0); - state.doneIterations = 0; + localStore[gl_LocalInvocationIndex].phase = 2; + lastPosition = vec2(0); + localStore[gl_LocalInvocationIndex].doneIterations = 0; } else { //back to step 0 - ++state.orbitNumber; - state.phase = 0; + ++localStore[gl_LocalInvocationIndex].orbitNumber; + localStore[gl_LocalInvocationIndex].phase = 0; } } } - if(state.phase == 2) + if(localStore[gl_LocalInvocationIndex].phase == 2) { - if(drawOrbit(offset, totalIterations, state.lastPosition, iterationsLeftToDo, state.doneIterations)) + if(drawOrbit(offset, totalIterations, lastPosition, iterationsLeftToDo, localStore[gl_LocalInvocationIndex].doneIterations)) { - ++state.orbitNumber; - state.phase = 0; + ++localStore[gl_LocalInvocationIndex].orbitNumber; + localStore[gl_LocalInvocationIndex].phase = 0; } } } - stateArray[uniqueWorkerID] = state; + stateArray[uniqueWorkerID].orbitNumber = localStore[gl_LocalInvocationIndex].orbitNumber; + stateArray[uniqueWorkerID].phase = localStore[gl_LocalInvocationIndex].phase; + stateArray[uniqueWorkerID].doneIterations = localStore[gl_LocalInvocationIndex].doneIterations; + stateArray[uniqueWorkerID].lastPosition = lastPosition; + //use divide et impera to get the real maximum brightness of this local group barrier(); - if(bool(brightnesses.length() & 1) && gl_LocalInvocationIndex == 0) + if(bool(localStore.length() & 1) && gl_LocalInvocationIndex == 0) { - uintMaxIP(brightnesses[0], brightnesses[brightnesses.length()-1]); + uintMaxIP(localStore[0].brightness, localStore[localStore.length()-1].brightness); } - for(int step = brightnesses.length()/2;step >= 1;step = step/2) + for(int step = localStore.length()/2;step >= 1;step = step/2) { barrier(); if(gl_LocalInvocationIndex < step) { - uintMaxIP(brightnesses[gl_LocalInvocationIndex],brightnesses[gl_LocalInvocationIndex+step]); + uintMaxIP(localStore[gl_LocalInvocationIndex].brightness,localStore[gl_LocalInvocationIndex+step].brightness); if(bool(step & 1) && gl_LocalInvocationIndex == 0) { - uintMaxIP(brightnesses[0], brightnesses[step-1]); + uintMaxIP(localStore[0].brightness, localStore[step-1].brightness); } } } barrier(); if(gl_LocalInvocationIndex == 0) { - for(uint i = 0; i < 3; ++i) - { - atomicMax(brightness[i], brightnesses[0][i]); - } + atomicMax(brightness, localStore[0].brightness); } } diff --git a/BuddhaTest/Shaders/BuddhaFragment.glsl b/BuddhaTest/Shaders/BuddhaFragment.glsl index 16d25f2..73cb53e 100644 --- a/BuddhaTest/Shaders/BuddhaFragment.glsl +++ b/BuddhaTest/Shaders/BuddhaFragment.glsl @@ -6,11 +6,11 @@ out vec3 color; layout(std430, binding=2) restrict readonly buffer renderedDataRed { - restrict readonly uint counts_SSBO[]; + restrict readonly uint counts_SSBO[]; }; layout(std430, binding=3) restrict readonly buffer brightnessData { - restrict readonly uvec3 brightness; + restrict readonly uint brightness; }; uniform uint width; @@ -27,6 +27,6 @@ uvec3 getColorAt(vec2 fragCoord) void main(){ uvec3 totalCount = getColorAt(uv); - vec3 scaled = vec3(totalCount)/max(length(vec3(brightness)),1.0); + vec3 scaled = vec3(totalCount)/max(float(brightness),1.0); color = scaled; } diff --git a/BuddhaTest/src/BuddhaTest.cpp b/BuddhaTest/src/BuddhaTest.cpp index 4de252d..4d7ab9f 100644 --- a/BuddhaTest/src/BuddhaTest.cpp +++ b/BuddhaTest/src/BuddhaTest.cpp @@ -122,7 +122,7 @@ int main(int argc, char * argv[]) GLuint brightnessBuffer; glGenBuffers(1,&brightnessBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER, brightnessBuffer); - glBufferData(GL_SHADER_STORAGE_BUFFER, 16,nullptr, GL_DYNAMIC_COPY); + glBufferData(GL_SHADER_STORAGE_BUFFER, 4,nullptr, GL_DYNAMIC_COPY); glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R8,GL_RED,GL_UNSIGNED_INT,nullptr); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, brightnessBuffer); |