diff --git a/data/shaders/gaussian.comp b/data/shaders/gaussian.comp new file mode 100644 index 000000000..a5c1a3d22 --- /dev/null +++ b/data/shaders/gaussian.comp @@ -0,0 +1,36 @@ +// From http://http.developer.nvidia.com/GPUGems3/gpugems3_ch40.html + +uniform layout(size1x16) restrict readonly image2D source; +uniform layout(size1x16) volatile restrict writeonly image2D dest; +uniform float sigma = 5.; + +layout (local_size_x = 8, local_size_y = 8) in; + +shared float local_src[8 + 2 * 8][8]; + +void main() +{ + int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y); + ivec2 uv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y); + local_src[x][y] = imageLoad(source, ivec2(uv) - ivec2(8, 0)).x; + local_src[x + 8][y] = imageLoad(source, ivec2(uv)).x; + local_src[x + 16][y] = imageLoad(source, ivec2(uv) + ivec2(8, 0)).x; + + barrier(); + + float g0, g1, g2; + g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma); + g1 = exp(-0.5 / (sigma * sigma)); + g2 = g1 * g1; + float sum = local_src[x + 8][y] * g0; + g0 *= g1; + g1 *= g2; + for (int j = 1; j < 8; j++) { + sum += local_src[8 + x - j][y] * g0; + sum += local_src[8 + x + j][y] * g0; + g0 *= g1; + g1 *= g2; + } + imageStore(dest, ivec2(uv), vec4(sum)); +} + diff --git a/data/shaders/gaussianv.comp b/data/shaders/gaussianv.comp new file mode 100644 index 000000000..38674a61d --- /dev/null +++ b/data/shaders/gaussianv.comp @@ -0,0 +1,36 @@ +// From http://http.developer.nvidia.com/GPUGems3/gpugems3_ch40.html + +uniform layout(size1x16) restrict readonly image2D source; +uniform layout(size1x16) volatile restrict writeonly image2D dest; +uniform float sigma = 5.; + +layout (local_size_x = 8, local_size_y = 8) in; + +shared float local_src[8][8 + 2 * 8]; + +void main() +{ + int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y); + ivec2 uv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y); + local_src[x][y] = imageLoad(source, ivec2(uv) - ivec2(0, 8)).x; + local_src[x][y + 8] = imageLoad(source, ivec2(uv)).x; + local_src[x][y + 16] = imageLoad(source, ivec2(uv) + ivec2(0, 8)).x; + + barrier(); + + float g0, g1, g2; + g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma); + g1 = exp(-0.5 / (sigma * sigma)); + g2 = g1 * g1; + float sum = local_src[x][y + 8] * g0; + g0 *= g1; + g1 *= g2; + for (int j = 1; j < 8; j++) { + sum += local_src[x][y + 8 + j] * g0; + sum += local_src[x][y + 8 - j] * g0; + g0 *= g1; + g1 *= g2; + } + imageStore(dest, ivec2(uv), vec4(sum)); +} + diff --git a/src/graphics/glwrap.cpp b/src/graphics/glwrap.cpp index 8f200a98f..b206bb7d2 100644 --- a/src/graphics/glwrap.cpp +++ b/src/graphics/glwrap.cpp @@ -74,6 +74,8 @@ PFNGLGETCOMPRESSEDTEXIMAGEPROC glGetCompressedTexImage; PFNGLTEXSTORAGE1DPROC glTexStorage1D; PFNGLTEXSTORAGE2DPROC glTexStorage2D; PFNGLTEXSTORAGE3DPROC glTexStorage3D; +PFNGLBINDIMAGETEXTUREPROC glBindImageTexture; +PFNGLDISPATCHCOMPUTEPROC glDispatchCompute; #endif static bool is_gl_init = false; @@ -226,6 +228,8 @@ void initGL() glTexStorage1D = (PFNGLTEXSTORAGE1DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage1D"); glTexStorage2D = (PFNGLTEXSTORAGE2DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage2D"); glTexStorage3D = (PFNGLTEXSTORAGE3DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage3D"); + glBindImageTexture = (PFNGLBINDIMAGETEXTUREPROC)IRR_OGL_LOAD_EXTENSION("glBindImageTexture"); + glDispatchCompute = (PFNGLDISPATCHCOMPUTEPROC)IRR_OGL_LOAD_EXTENSION("glDispatchCompute"); #ifdef DEBUG glDebugMessageCallbackARB = (PFNGLDEBUGMESSAGECALLBACKARBPROC)IRR_OGL_LOAD_EXTENSION("glDebugMessageCallbackARB"); #endif diff --git a/src/graphics/glwrap.hpp b/src/graphics/glwrap.hpp index bc9a94b6e..4d2ea5f1a 100644 --- a/src/graphics/glwrap.hpp +++ b/src/graphics/glwrap.hpp @@ -97,6 +97,8 @@ extern PFNGLGETCOMPRESSEDTEXIMAGEPROC glGetCompressedTexImage; extern PFNGLTEXSTORAGE1DPROC glTexStorage1D; extern PFNGLTEXSTORAGE2DPROC glTexStorage2D; extern PFNGLTEXSTORAGE3DPROC glTexStorage3D; +extern PFNGLBINDIMAGETEXTUREPROC glBindImageTexture; +extern PFNGLDISPATCHCOMPUTEPROC glDispatchCompute; #ifdef DEBUG extern PFNGLDEBUGMESSAGECALLBACKARBPROC glDebugMessageCallbackARB; #endif diff --git a/src/graphics/post_processing.cpp b/src/graphics/post_processing.cpp index aa626b162..3f0505dcf 100644 --- a/src/graphics/post_processing.cpp +++ b/src/graphics/post_processing.cpp @@ -397,32 +397,56 @@ void PostProcessing::renderGaussian17TapBlur(FrameBuffer &in_fbo, FrameBuffer &a assert(in_fbo.getWidth() == auxiliary.getWidth() && in_fbo.getHeight() == auxiliary.getHeight()); float inv_width = 1.0f / in_fbo.getWidth(), inv_height = 1.0f / in_fbo.getHeight(); { - auxiliary.Bind(); - glUseProgram(FullScreenShader::Gaussian17TapHShader::Program); - glBindVertexArray(FullScreenShader::Gaussian17TapHShader::vao); + if (irr_driver->getGLSLVersion() < 430) + { + auxiliary.Bind(); + glUseProgram(FullScreenShader::Gaussian17TapHShader::Program); + glBindVertexArray(FullScreenShader::Gaussian17TapHShader::vao); - glUniform2f(FullScreenShader::Gaussian17TapHShader::uniform_pixel, inv_width, inv_height); + glUniform2f(FullScreenShader::Gaussian17TapHShader::uniform_pixel, inv_width, inv_height); - setTexture(0, in_fbo.getRTT()[0], GL_LINEAR, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glUniform1i(FullScreenShader::Gaussian17TapHShader::uniform_tex, 0); + setTexture(0, in_fbo.getRTT()[0], GL_LINEAR, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glUniform1i(FullScreenShader::Gaussian17TapHShader::uniform_tex, 0); - glDrawArrays(GL_TRIANGLES, 0, 3); + glDrawArrays(GL_TRIANGLES, 0, 3); + } + else + { + glUseProgram(FullScreenShader::ComputeGaussian17TapHShader::Program); + glBindImageTexture(0, in_fbo.getRTT()[0], 0, false, 0, GL_READ_ONLY, GL_R16F); + glBindImageTexture(1, auxiliary.getRTT()[0], 0, false, 0, GL_WRITE_ONLY, GL_R16F); + glUniform1i(FullScreenShader::ComputeGaussian17TapHShader::uniform_source, 0); + glUniform1i(FullScreenShader::ComputeGaussian17TapHShader::uniform_dest, 1); + glDispatchCompute(in_fbo.getWidth() / 8, in_fbo.getHeight() / 8, 1); + } } { - in_fbo.Bind(); - glUseProgram(FullScreenShader::Gaussian17TapVShader::Program); - glBindVertexArray(FullScreenShader::Gaussian17TapVShader::vao); + if (irr_driver->getGLSLVersion() < 430) + { + in_fbo.Bind(); + glUseProgram(FullScreenShader::Gaussian17TapVShader::Program); + glBindVertexArray(FullScreenShader::Gaussian17TapVShader::vao); - glUniform2f(FullScreenShader::Gaussian17TapVShader::uniform_pixel, inv_width, inv_height); + glUniform2f(FullScreenShader::Gaussian17TapVShader::uniform_pixel, inv_width, inv_height); - setTexture(0, auxiliary.getRTT()[0], GL_LINEAR, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glUniform1i(FullScreenShader::Gaussian17TapVShader::uniform_tex, 0); + setTexture(0, auxiliary.getRTT()[0], GL_LINEAR, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glUniform1i(FullScreenShader::Gaussian17TapVShader::uniform_tex, 0); - glDrawArrays(GL_TRIANGLES, 0, 3); + glDrawArrays(GL_TRIANGLES, 0, 3); + } + else + { + glUseProgram(FullScreenShader::ComputeGaussian17TapVShader::Program); + glBindImageTexture(0, auxiliary.getRTT()[0], 0, false, 0, GL_READ_ONLY, GL_R16F); + glBindImageTexture(1, in_fbo.getRTT()[0], 0, false, 0, GL_WRITE_ONLY, GL_R16F); + glUniform1i(FullScreenShader::ComputeGaussian17TapVShader::uniform_source, 0); + glUniform1i(FullScreenShader::ComputeGaussian17TapVShader::uniform_dest, 1); + glDispatchCompute(in_fbo.getWidth() / 8, in_fbo.getHeight() / 8, 1); + } } } diff --git a/src/graphics/shaders.cpp b/src/graphics/shaders.cpp index 8fbb3c88a..59cc101fb 100644 --- a/src/graphics/shaders.cpp +++ b/src/graphics/shaders.cpp @@ -277,9 +277,11 @@ void Shaders::loadShaders() FullScreenShader::DepthOfFieldShader::init(); FullScreenShader::FogShader::init(); FullScreenShader::Gaussian17TapHShader::init(); + FullScreenShader::ComputeGaussian17TapHShader::init(); FullScreenShader::Gaussian3HBlurShader::init(); FullScreenShader::Gaussian3VBlurShader::init(); FullScreenShader::Gaussian17TapVShader::init(); + FullScreenShader::ComputeGaussian17TapVShader::init(); FullScreenShader::Gaussian6HBlurShader::init(); FullScreenShader::Gaussian6VBlurShader::init(); FullScreenShader::GlowShader::init(); @@ -2439,6 +2441,17 @@ namespace FullScreenShader vao = createFullScreenVAO(Program); } + GLuint ComputeGaussian17TapHShader::Program; + GLuint ComputeGaussian17TapHShader::uniform_source; + GLuint ComputeGaussian17TapHShader::uniform_dest; + void ComputeGaussian17TapHShader::init() + { + Program = LoadProgram( + GL_COMPUTE_SHADER, file_manager->getAsset("shaders/gaussian.comp").c_str()); + uniform_source = glGetUniformLocation(Program, "source"); + uniform_dest = glGetUniformLocation(Program, "dest"); + } + GLuint Gaussian6HBlurShader::Program; GLuint Gaussian6HBlurShader::uniform_tex; GLuint Gaussian6HBlurShader::uniform_pixel; @@ -2481,6 +2494,17 @@ namespace FullScreenShader vao = createFullScreenVAO(Program); } + GLuint ComputeGaussian17TapVShader::Program; + GLuint ComputeGaussian17TapVShader::uniform_source; + GLuint ComputeGaussian17TapVShader::uniform_dest; + void ComputeGaussian17TapVShader::init() + { + Program = LoadProgram( + GL_COMPUTE_SHADER, file_manager->getAsset("shaders/gaussianv.comp").c_str()); + uniform_source = glGetUniformLocation(Program, "source"); + uniform_dest = glGetUniformLocation(Program, "dest"); + } + GLuint Gaussian6VBlurShader::Program; GLuint Gaussian6VBlurShader::uniform_tex; GLuint Gaussian6VBlurShader::uniform_pixel; diff --git a/src/graphics/shaders.hpp b/src/graphics/shaders.hpp index d724942cd..1c7548174 100644 --- a/src/graphics/shaders.hpp +++ b/src/graphics/shaders.hpp @@ -641,6 +641,15 @@ public: static void init(); }; +class ComputeGaussian17TapHShader +{ +public: + static GLuint Program; + static GLuint uniform_source, uniform_dest; + + static void init(); +}; + class Gaussian6HBlurShader { public: @@ -671,6 +680,16 @@ public: static void init(); }; +class ComputeGaussian17TapVShader +{ +public: + static GLuint Program; + static GLuint uniform_source, uniform_dest; + + static void init(); +}; + + class Gaussian6VBlurShader { public: