aboutsummaryrefslogtreecommitdiff
path: root/BuddhaTest/Shaders/BuddhaCompute.glsl
diff options
context:
space:
mode:
Diffstat (limited to 'BuddhaTest/Shaders/BuddhaCompute.glsl')
-rw-r--r--BuddhaTest/Shaders/BuddhaCompute.glsl66
1 files changed, 63 insertions, 3 deletions
diff --git a/BuddhaTest/Shaders/BuddhaCompute.glsl b/BuddhaTest/Shaders/BuddhaCompute.glsl
index 1903505..ebfdb29 100644
--- a/BuddhaTest/Shaders/BuddhaCompute.glsl
+++ b/BuddhaTest/Shaders/BuddhaCompute.glsl
@@ -7,6 +7,12 @@ layout(std430, binding=2) restrict buffer renderedDataRed
restrict uint counts_SSBO[];
};
+layout(std430, binding=3) restrict buffer brightnessData
+{
+ restrict uint brightness;
+};
+
+/** Data stored in the state buffer. */
struct individualData
{
uint phase;
@@ -28,12 +34,38 @@ uniform uvec4 orbitLength;
uniform uint iterationsPerDispatch;
uniform uint totalIterations;
+/** Data stored in shared memory. Used to reduce register pressure. Read at beginning from buffer (if needed), written back at end. */
+struct workerState
+{
+ uint brightness;
+};
+
+/** Storage in shared memory. Used to reduce register pressure. */
+shared workerState[gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z] localStore;
+
+void uintMaxIP(inout uint modified, const uint constant)
+{
+ modified = modified < constant ? constant : modified;
+}
+
+void uintMaxIP(inout uvec3 modified, const uvec3 constant)
+{
+ for(int i = 0; i < 3 ; ++i)
+ uintMaxIP(modified[i],constant[i]);
+}
+
void addToColorOfCell(uvec2 cell, uvec3 toAdd)
{
uint firstIndex = 3*(cell.x + cell.y * width);
- atomicAdd(counts_SSBO[firstIndex],toAdd.x);
- atomicAdd(counts_SSBO[firstIndex+1],toAdd.y);
- atomicAdd(counts_SSBO[firstIndex+2],toAdd.z);
+ uvec3 b;
+ b.x = atomicAdd(counts_SSBO[firstIndex],toAdd.x);
+ b.y = atomicAdd(counts_SSBO[firstIndex+1],toAdd.y);
+ b.z = atomicAdd(counts_SSBO[firstIndex+2],toAdd.z);
+ for(int i = 0; i < 3;++i)
+ {
+ if(localStore[gl_LocalInvocationIndex].brightness < b[i])
+ localStore[gl_LocalInvocationIndex].brightness = b[i];
+ }
}
uvec2 getCell(vec2 complex)
@@ -225,6 +257,8 @@ vec2 getCurrentOrbitOffset(const uint orbitNumber, const uint totalWorkers, cons
}
void main() {
+ localStore[gl_LocalInvocationIndex].brightness = 0;
+
//we need to know how many total work groups are running this iteration
const uvec3 totalWorkersPerDimension = gl_WorkGroupSize * gl_NumWorkGroups;
const uint totalWorkers = totalWorkersPerDimension.x*totalWorkersPerDimension.y*totalWorkersPerDimension.z;
@@ -288,5 +322,31 @@ void main() {
}
}
}
+
stateArray[uniqueWorkerID] = state;
+
+
+ //use divide et impera to get the real maximum brightness of this local group
+ barrier();
+ if(bool(localStore.length() & 1) && gl_LocalInvocationIndex == 0)
+ {
+ uintMaxIP(localStore[0].brightness, localStore[localStore.length()-1].brightness);
+ }
+ for(int step = localStore.length()/2;step >= 1;step = step/2)
+ {
+ barrier();
+ if(gl_LocalInvocationIndex < step)
+ {
+ uintMaxIP(localStore[gl_LocalInvocationIndex].brightness,localStore[gl_LocalInvocationIndex+step].brightness);
+ if(bool(step & 1) && gl_LocalInvocationIndex == 0)
+ {
+ uintMaxIP(localStore[0].brightness, localStore[step-1].brightness);
+ }
+ }
+ }
+ barrier();
+ if(gl_LocalInvocationIndex == 0)
+ {
+ atomicMax(brightness, localStore[0].brightness);
+ }
}