#version 450 #define BLUR_WIDTH 5 #define BLUR_HALF_WIDTH BLUR_WIDTH / 2 #define SHM_WIDTH BLUR_WIDTH + 16 - 1 layout(local_size_x = 16, local_size_y = 16) in; layout(set = 3, binding = 0, r8) uniform image2D ssaoImage; layout(set = 3, binding = 1, r8) uniform image2D ssaoBlurImage; float GetSSAOValue(ivec2 uv, ivec2 imageSize) { if (uv.x >= 0 && uv.y >= 0 && uv.x < imageSize.x && uv.y < imageSize.y) { return imageLoad (ssaoImage, uv).r; } return 0.0f; } shared float sharedPixels[16 + BLUR_WIDTH - 1][16 + BLUR_WIDTH - 1]; void main() { ivec2 globalThread = ivec2 (gl_GlobalInvocationID.xy); ivec2 localThread = ivec2 (gl_LocalInvocationID.xy); ivec2 inputImageSize = imageSize(ssaoImage); // Load color into shared memory ivec2 start = ivec2 (gl_WorkGroupID) * ivec2 (gl_WorkGroupSize) - (BLUR_HALF_WIDTH); for (int i = localThread.x; i < SHM_WIDTH; i += int (gl_WorkGroupSize.x)) { for (int j = localThread.y; j < SHM_WIDTH; j += int (gl_WorkGroupSize.y)) { float value = GetSSAOValue (start + ivec2 (i, j), inputImageSize); sharedPixels[i][j] = value; } } // wait for all shared memory to load barrier(); ivec2 shmStart = ivec2 (localThread + (BLUR_HALF_WIDTH)); float sum = 0; for (int i = -BLUR_HALF_WIDTH; i <= BLUR_HALF_WIDTH; ++i) { for (int j = -BLUR_HALF_WIDTH; j <= BLUR_HALF_WIDTH; ++j) { float sharedVal = sharedPixels[shmStart.x + i][shmStart.y + j]; sum += sharedVal; } } sum /= (BLUR_WIDTH * BLUR_WIDTH); imageStore(ssaoBlurImage, globalThread, vec4(sum.rrr, 1.0f)); }