stk-code_catmod/data/shaders/gaussianv.comp

// From http://http.developer.nvidia.com/GPUGems3/gpugems3_ch40.html

uniform layout(size1x16) restrict readonly image2D source;
uniform layout(size1x16) volatile restrict writeonly image2D dest;
uniform float sigma = 5.;

layout (local_size_x = 8, local_size_y = 8) in;

shared float local_src[8][8 + 2 * 8];

void main()
{
    int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y);
    ivec2 uv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y);
    local_src[x][y] = imageLoad(source, ivec2(uv) - ivec2(0, 8)).x;
    local_src[x][y + 8] = imageLoad(source, ivec2(uv)).x;
    local_src[x][y + 16] = imageLoad(source, ivec2(uv) + ivec2(0, 8)).x;

    barrier();

    float g0, g1, g2;
    g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma);
    g1 = exp(-0.5 / (sigma * sigma));
    g2 = g1 * g1;
    float sum = local_src[x][y + 8] * g0;
    g0 *= g1;
    g1 *= g2;
    for (int j = 1; j < 8; j++) {
        sum += local_src[x][y + 8 + j] * g0;
        sum += local_src[x][y + 8 - j] * g0;
        g0 *= g1;
        g1 *= g2;
    }
    imageStore(dest, ivec2(uv), vec4(sum));
}