diff options
author | Andreas Grois <andi@grois.info> | 2018-03-17 08:56:53 +0100 |
---|---|---|
committer | Andreas Grois <andi@grois.info> | 2018-03-17 08:56:53 +0100 |
commit | 5d0467629625f8a092ac2ef8ba5f30c629cc8b62 (patch) | |
tree | 6dbc1693c164ba442ae0b3bd92643c4194b17e47 | |
parent | 48fe1f1aad00f31d44b6814213ae89462bd39176 (diff) |
Improve parallelization by not storing the offset, but instead re-calculating it every dispatch.
By this I could get the VGPR usage down from 40 to 36.
-rw-r--r-- | BuddhaTest/Shaders/BuddhaCompute.glsl | 42 | ||||
-rw-r--r-- | BuddhaTest/src/BuddhaTest.cpp | 2 |
2 files changed, 22 insertions, 22 deletions
diff --git a/BuddhaTest/Shaders/BuddhaCompute.glsl b/BuddhaTest/Shaders/BuddhaCompute.glsl index 4e8e795..c53596b 100644 --- a/BuddhaTest/Shaders/BuddhaCompute.glsl +++ b/BuddhaTest/Shaders/BuddhaCompute.glsl @@ -26,34 +26,27 @@ uniform uvec3 orbitLength; uniform uint iterationsPerDispatch;
-void getIndividualState(in uint CellID, out vec2 offset, out vec2 coordinates, out uint phase, out uint orbitNumber, out uint doneIterations)
+void getIndividualState(in uint CellID, out vec2 coordinates, out uint phase, out uint orbitNumber, out uint doneIterations)
{
- uint startIndex = 7*CellID;
+ uint startIndex = 5*CellID;
uint x = individualState[startIndex];
uint y = individualState[startIndex+1];
phase = individualState[startIndex+2];
orbitNumber = individualState[startIndex+3];
doneIterations = individualState[startIndex+4];
- uint offx = individualState[startIndex+5];
- uint offy = individualState[startIndex+6];
coordinates = vec2(uintBitsToFloat(x),uintBitsToFloat(y));
- offset = vec2(uintBitsToFloat(offx),uintBitsToFloat(offy));
}
-void setIndividualState(in uint CellID, in vec2 offset, in vec2 coordinates, in uint phase, in uint orbitNumber, in uint doneIterations)
+void setIndividualState(in uint CellID, in vec2 coordinates, in uint phase, in uint orbitNumber, in uint doneIterations)
{
- uint startIndex = 7*CellID;
+ uint startIndex = 5*CellID;
uint x=floatBitsToUint(coordinates.x);
uint y=floatBitsToUint(coordinates.y);
- uint offx = floatBitsToUint(offset.x);
- uint offy = floatBitsToUint(offset.y);
atomicExchange(individualState[startIndex],x);
atomicExchange(individualState[startIndex+1],y);
atomicExchange(individualState[startIndex+2],phase);
atomicExchange(individualState[startIndex+3],orbitNumber);
atomicExchange(individualState[startIndex+4],doneIterations);
- atomicExchange(individualState[startIndex+5],offx);
- atomicExchange(individualState[startIndex+6],offy);
}
void addToColorOfCell(uvec2 cell, uvec3 toAdd)
@@ -211,6 +204,17 @@ bool drawOrbit(in vec2 offset, in uint totalIterations, inout vec2 lastVal, inou return endCount == totalIterations;
}
+vec2 getCurrentOrbitOffset(uint orbitNumber, uint totalWorkers, uint uniqueWorkerID)
+{
+ uint seed = orbitNumber * totalWorkers + uniqueWorkerID;
+ uint yDecoupler = orbitNumber;
+ float x = hash1(seed,seed);
+ seed = (seed ^ intHash(orbitNumber));
+ float y = hash1(seed,seed);
+ vec2 random = vec2(x,y);
+ return vec2(random.x * 3.5-2.5,random.y*1.55);
+}
+
void main() {
//we need to know how many total work groups are running this iteration
@@ -227,10 +231,12 @@ void main() { uint phase;
uint doneIterations;
uint orbitNumber;
- vec2 offset;
+
//getIndividualState(in uint CellID, out vec2 offset, out vec2 coordinates, out uint phase, out uint orbitNumber, out uint doneIterations)
- getIndividualState(uniqueWorkerID, offset, lastPosition, phase, orbitNumber, doneIterations);
+ getIndividualState(uniqueWorkerID, lastPosition, phase, orbitNumber, doneIterations);
uint iterationsLeftToDo = iterationsPerDispatch;
+ vec2 offset = getCurrentOrbitOffset(orbitNumber, totalWorkers, uniqueWorkerID);
+
while(iterationsLeftToDo != 0)
{
if(phase == 0)
@@ -238,13 +244,7 @@ void main() { //new orbit:
//we know that iterationsLeftToDo is at least 1 by the while condition.
--iterationsLeftToDo; //count this as 1 iteration.
- uint seed = orbitNumber * totalWorkers + uniqueWorkerID;
- uint yDecoupler = orbitNumber;
- float x = hash1(seed,seed);
- seed = (seed ^ intHash(orbitNumber));
- float y = hash1(seed,seed);
- vec2 random = vec2(x,y);
- offset = vec2(random.x * 3.5-2.5,random.y*1.55);
+ offset = getCurrentOrbitOffset(orbitNumber, totalWorkers, uniqueWorkerID);
if(isInMainBulb(offset) || isInMainCardioid(offset))
{
// do not waste time drawing this orbit
@@ -290,5 +290,5 @@ void main() { }
- setIndividualState(uniqueWorkerID, offset, lastPosition, phase, orbitNumber, doneIterations);
+ setIndividualState(uniqueWorkerID, lastPosition, phase, orbitNumber, doneIterations);
}
diff --git a/BuddhaTest/src/BuddhaTest.cpp b/BuddhaTest/src/BuddhaTest.cpp index 24f5893..14f4dd3 100644 --- a/BuddhaTest/src/BuddhaTest.cpp +++ b/BuddhaTest/src/BuddhaTest.cpp @@ -127,7 +127,7 @@ int main(int argc, char * argv[]) GLuint stateBuffer;
glGenBuffers(1,&stateBuffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER,stateBuffer);
- glBufferData(GL_SHADER_STORAGE_BUFFER, 4*(7*workersPerFrame),nullptr,GL_DYNAMIC_COPY);
+ glBufferData(GL_SHADER_STORAGE_BUFFER, 4*(5*workersPerFrame),nullptr,GL_DYNAMIC_COPY);
glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R8,GL_RED,GL_UNSIGNED_INT,nullptr);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, stateBuffer);
|