#version 450 #define BLUR_WIDTH 7 #define BLUR_HALF_WIDTH BLUR_WIDTH / 2 #define SHM_WIDTH BLUR_WIDTH + 16 - 1 layout(local_size_x = 16, local_size_y = 16) in; layout(set = 3, binding = 0, r8) uniform image2D shadowMap; layout(set = 3, binding = 1, r8) uniform image2D shadowMapBlurred; vec4 GetShadowMapValue(ivec2 uv, ivec2 imageSize) { if (uv.x >= 0 && uv.y >= 0 && uv.x < imageSize.x && uv.y < imageSize.y) { return imageLoad (shadowMap, uv); } return vec4 (0.0f); } shared vec4 sharedPixels[16 + BLUR_WIDTH - 1][16 + BLUR_WIDTH - 1]; void main() { ivec2 globalThread = ivec2 (gl_GlobalInvocationID.xy); ivec2 localThread = ivec2 (gl_LocalInvocationID.xy); ivec2 inputImageSize = imageSize(shadowMap); // Load color into shared memory ivec2 start = ivec2 (gl_WorkGroupID) * ivec2 (gl_WorkGroupSize) - (BLUR_HALF_WIDTH); for (int i = localThread.x; i < SHM_WIDTH; i += int (gl_WorkGroupSize.x)) { for (int j = localThread.y; j < SHM_WIDTH; j += int (gl_WorkGroupSize.y)) { vec4 value = GetShadowMapValue (start + ivec2 (i, j), inputImageSize); sharedPixels[i][j] = value; } } // wait for all shared memory to load barrier(); ivec2 shmStart = ivec2 (localThread + (BLUR_HALF_WIDTH)); vec4 sum = vec4 (0.0f); for (int i = -BLUR_HALF_WIDTH; i <= BLUR_HALF_WIDTH; ++i) { for (int j = -BLUR_HALF_WIDTH; j <= BLUR_HALF_WIDTH; ++j) { vec4 sharedVal = sharedPixels[shmStart.x + i][shmStart.y + j]; sum += sharedVal; } } sum /= (BLUR_WIDTH * BLUR_WIDTH); imageStore(shadowMapBlurred, globalThread, sum); }