From 6cc721eaf9e217730299a9d9c4325a77bcb60d06 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Fri, 14 Nov 2014 23:52:03 +0100 Subject: [PATCH] Use CS for gaussian6 --- data/shaders/gaussian6h.comp | 41 ++++++++++++++++++++++++++++++++ data/shaders/gaussian6v.comp | 41 ++++++++++++++++++++++++++++++++ src/graphics/post_processing.cpp | 39 +++++++++++++++++++++++++----- src/graphics/shaders.cpp | 20 ++++++++++++++++ src/graphics/shaders.hpp | 13 ++++++++++ 5 files changed, 148 insertions(+), 6 deletions(-) create mode 100644 data/shaders/gaussian6h.comp create mode 100644 data/shaders/gaussian6v.comp diff --git a/data/shaders/gaussian6h.comp b/data/shaders/gaussian6h.comp new file mode 100644 index 000000000..83963c7b5 --- /dev/null +++ b/data/shaders/gaussian6h.comp @@ -0,0 +1,41 @@ +uniform sampler2D source; +uniform layout(rgba16f) restrict writeonly image2D dest; +uniform vec2 pixel; +uniform float sigma; + +// Gaussian separated blur with radius 6. + +layout (local_size_x = 8, local_size_y = 8) in; + +shared vec3 local_src[8 + 2 * 8][8]; + +void main() +{ + int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y); + ivec2 iuv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y); + vec2 uv_m = (iuv - ivec2(8, 0)) * pixel; + vec2 uv = iuv * pixel; + vec2 uv_p = (iuv + ivec2(8, 0)) * pixel; + + local_src[x][y] = texture(source, uv_m).rgb; + local_src[x + 8][y] = texture(source, uv).rgb; + local_src[x + 16][y] = texture(source, uv_p).rgb; + + barrier(); + + float g0, g1, g2; + g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma); + g1 = exp(-0.5 / (sigma * sigma)); + g2 = g1 * g1; + vec3 sum = local_src[x + 8][y] * g0; + g0 *= g1; + g1 *= g2; + for (int i = 1; i < 6; i++) { + sum += local_src[8 + x - i][y] * g0; + sum += local_src[8 + x + i][y] * g0; + g0 *= g1; + g1 *= g2; + } + + imageStore(dest, iuv, vec4(sum, 0.)); +} diff --git a/data/shaders/gaussian6v.comp b/data/shaders/gaussian6v.comp new file mode 100644 index 000000000..b66763c21 --- /dev/null +++ b/data/shaders/gaussian6v.comp @@ -0,0 +1,41 @@ +uniform sampler2D source; +uniform layout(rgba16f) restrict writeonly image2D dest; +uniform vec2 pixel; +uniform float sigma; + +// Gaussian separated blur with radius 6. + +layout (local_size_x = 8, local_size_y = 8) in; + +shared vec3 local_src[8][8 + 2 * 8]; + +void main() +{ + int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y); + ivec2 iuv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y); + vec2 uv_m = (iuv - ivec2(0, 8)) * pixel; + vec2 uv = iuv * pixel; + vec2 uv_p = (iuv + ivec2(0, 8)) * pixel; + + local_src[x][y] = texture(source, uv_m).rgb; + local_src[x][y + 8] = texture(source, uv).rgb; + local_src[x][y + 16] = texture(source, uv_p).rgb; + + barrier(); + + float g0, g1, g2; + g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma); + g1 = exp(-0.5 / (sigma * sigma)); + g2 = g1 * g1; + vec3 sum = local_src[x][y + 8] * g0; + g0 *= g1; + g1 *= g2; + for (int i = 1; i < 6; i++) { + sum += local_src[x][8 + y - i] * g0; + sum += local_src[x][8 + y + i] * g0; + g0 *= g1; + g1 *= g2; + } + + imageStore(dest, iuv, vec4(sum, 0.)); +} diff --git a/src/graphics/post_processing.cpp b/src/graphics/post_processing.cpp index e13482959..1488ba56c 100644 --- a/src/graphics/post_processing.cpp +++ b/src/graphics/post_processing.cpp @@ -340,16 +340,43 @@ void PostProcessing::renderGaussian6Blur(FrameBuffer &in_fbo, FrameBuffer &auxil assert(in_fbo.getWidth() == auxiliary.getWidth() && in_fbo.getHeight() == auxiliary.getHeight()); float inv_width = 1.0f / in_fbo.getWidth(), inv_height = 1.0f / in_fbo.getHeight(); { - auxiliary.Bind(); + if (!irr_driver->hasARBComputeShaders()) + { + auxiliary.Bind(); - FullScreenShader::Gaussian6VBlurShader::getInstance()->SetTextureUnits(in_fbo.getRTT()[0]); - DrawFullScreenEffect(core::vector2df(inv_width, inv_height), sigmaV); + FullScreenShader::Gaussian6VBlurShader::getInstance()->SetTextureUnits(in_fbo.getRTT()[0]); + DrawFullScreenEffect(core::vector2df(inv_width, inv_height), sigmaV); + } + else + { + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); + glUseProgram(FullScreenShader::ComputeGaussian6VBlurShader::getInstance()->Program); + FullScreenShader::ComputeGaussian6VBlurShader::getInstance()->SetTextureUnits(in_fbo.getRTT()[0]); + glBindSampler(FullScreenShader::ComputeGaussian6VBlurShader::getInstance()->TU_dest, 0); + glBindImageTexture(FullScreenShader::ComputeGaussian6VBlurShader::getInstance()->TU_dest, auxiliary.getRTT()[0], 0, false, 0, GL_WRITE_ONLY, GL_RGBA16F); + FullScreenShader::ComputeGaussian6VBlurShader::getInstance()->setUniforms(core::vector2df(inv_width, inv_height), sigmaV); + glDispatchCompute((int)in_fbo.getWidth() / 8 + 1, (int)in_fbo.getHeight() / 8 + 1, 1); + } } { - in_fbo.Bind(); + if (!irr_driver->hasARBComputeShaders()) + { + in_fbo.Bind(); - FullScreenShader::Gaussian6HBlurShader::getInstance()->SetTextureUnits(auxiliary.getRTT()[0]); - DrawFullScreenEffect(core::vector2df(inv_width, inv_height), sigmaH); + FullScreenShader::Gaussian6HBlurShader::getInstance()->SetTextureUnits(auxiliary.getRTT()[0]); + DrawFullScreenEffect(core::vector2df(inv_width, inv_height), sigmaH); + } + else + { + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + glUseProgram(FullScreenShader::ComputeGaussian6HBlurShader::getInstance()->Program); + FullScreenShader::ComputeGaussian6HBlurShader::getInstance()->SetTextureUnits(auxiliary.getRTT()[0]); + glBindSampler(FullScreenShader::ComputeGaussian6HBlurShader::getInstance()->TU_dest, 0); + glBindImageTexture(FullScreenShader::ComputeGaussian6HBlurShader::getInstance()->TU_dest, in_fbo.getRTT()[0], 0, false, 0, GL_WRITE_ONLY, GL_RGBA16F); + FullScreenShader::ComputeGaussian6HBlurShader::getInstance()->setUniforms(core::vector2df(inv_width, inv_height), sigmaH); + glDispatchCompute((int)in_fbo.getWidth() / 8 + 1, (int)in_fbo.getHeight() / 8 + 1, 1); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + } } } diff --git a/src/graphics/shaders.cpp b/src/graphics/shaders.cpp index 9b3f4d5af..b953b776f 100644 --- a/src/graphics/shaders.cpp +++ b/src/graphics/shaders.cpp @@ -1742,6 +1742,16 @@ namespace FullScreenShader AssignTextureUnit(Program, TexUnit(TU_dest, "dest")); } + ComputeGaussian6HBlurShader::ComputeGaussian6HBlurShader() + { + Program = LoadProgram(OBJECT, + GL_COMPUTE_SHADER, file_manager->getAsset("shaders/gaussian6h.comp").c_str()); + TU_dest = 1; + AssignUniforms("pixel", "sigma"); + AssignSamplerNames(Program, 0, "source"); + AssignTextureUnit(Program, TexUnit(TU_dest, "dest")); + } + Gaussian6HBlurShader::Gaussian6HBlurShader() { Program = LoadProgram(OBJECT, @@ -1792,6 +1802,16 @@ namespace FullScreenShader AssignTextureUnit(Program, TexUnit(TU_dest, "dest")); } + ComputeGaussian6VBlurShader::ComputeGaussian6VBlurShader() + { + Program = LoadProgram(OBJECT, + GL_COMPUTE_SHADER, file_manager->getAsset("shaders/gaussian6v.comp").c_str()); + TU_dest = 1; + AssignUniforms("pixel", "sigma"); + AssignSamplerNames(Program, 0, "source"); + AssignTextureUnit(Program, TexUnit(TU_dest, "dest")); + } + Gaussian6VBlurShader::Gaussian6VBlurShader() { Program = LoadProgram(OBJECT, diff --git a/src/graphics/shaders.hpp b/src/graphics/shaders.hpp index 3088e6b2c..8e7b1d53a 100644 --- a/src/graphics/shaders.hpp +++ b/src/graphics/shaders.hpp @@ -464,6 +464,13 @@ public: ComputeGaussian17TapHShader(); }; +class ComputeGaussian6HBlurShader : public ShaderHelperSingleton, public TextureRead +{ +public: + GLuint TU_dest; + ComputeGaussian6HBlurShader(); +}; + class Gaussian6HBlurShader : public ShaderHelperSingleton, public TextureRead { public: @@ -496,6 +503,12 @@ public: ComputeGaussian17TapVShader(); }; +class ComputeGaussian6VBlurShader : public ShaderHelperSingleton, public TextureRead +{ +public: + GLuint TU_dest; + ComputeGaussian6VBlurShader(); +}; class Gaussian6VBlurShader : public ShaderHelperSingleton, public TextureRead {