Implement Gaussian blur using Compute Shader
The filters are ~3.5 time faster with CS.
This commit is contained in:
parent
8cc1df3dd1
commit
eb45954684
36
data/shaders/gaussian.comp
Normal file
36
data/shaders/gaussian.comp
Normal file
@ -0,0 +1,36 @@
|
||||
// From http://http.developer.nvidia.com/GPUGems3/gpugems3_ch40.html
|
||||
|
||||
uniform layout(size1x16) restrict readonly image2D source;
|
||||
uniform layout(size1x16) volatile restrict writeonly image2D dest;
|
||||
uniform float sigma = 5.;
|
||||
|
||||
layout (local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
shared float local_src[8 + 2 * 8][8];
|
||||
|
||||
void main()
|
||||
{
|
||||
int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y);
|
||||
ivec2 uv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y);
|
||||
local_src[x][y] = imageLoad(source, ivec2(uv) - ivec2(8, 0)).x;
|
||||
local_src[x + 8][y] = imageLoad(source, ivec2(uv)).x;
|
||||
local_src[x + 16][y] = imageLoad(source, ivec2(uv) + ivec2(8, 0)).x;
|
||||
|
||||
barrier();
|
||||
|
||||
float g0, g1, g2;
|
||||
g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma);
|
||||
g1 = exp(-0.5 / (sigma * sigma));
|
||||
g2 = g1 * g1;
|
||||
float sum = local_src[x + 8][y] * g0;
|
||||
g0 *= g1;
|
||||
g1 *= g2;
|
||||
for (int j = 1; j < 8; j++) {
|
||||
sum += local_src[8 + x - j][y] * g0;
|
||||
sum += local_src[8 + x + j][y] * g0;
|
||||
g0 *= g1;
|
||||
g1 *= g2;
|
||||
}
|
||||
imageStore(dest, ivec2(uv), vec4(sum));
|
||||
}
|
||||
|
36
data/shaders/gaussianv.comp
Normal file
36
data/shaders/gaussianv.comp
Normal file
@ -0,0 +1,36 @@
|
||||
// From http://http.developer.nvidia.com/GPUGems3/gpugems3_ch40.html
|
||||
|
||||
uniform layout(size1x16) restrict readonly image2D source;
|
||||
uniform layout(size1x16) volatile restrict writeonly image2D dest;
|
||||
uniform float sigma = 5.;
|
||||
|
||||
layout (local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
shared float local_src[8][8 + 2 * 8];
|
||||
|
||||
void main()
|
||||
{
|
||||
int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y);
|
||||
ivec2 uv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y);
|
||||
local_src[x][y] = imageLoad(source, ivec2(uv) - ivec2(0, 8)).x;
|
||||
local_src[x][y + 8] = imageLoad(source, ivec2(uv)).x;
|
||||
local_src[x][y + 16] = imageLoad(source, ivec2(uv) + ivec2(0, 8)).x;
|
||||
|
||||
barrier();
|
||||
|
||||
float g0, g1, g2;
|
||||
g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma);
|
||||
g1 = exp(-0.5 / (sigma * sigma));
|
||||
g2 = g1 * g1;
|
||||
float sum = local_src[x][y + 8] * g0;
|
||||
g0 *= g1;
|
||||
g1 *= g2;
|
||||
for (int j = 1; j < 8; j++) {
|
||||
sum += local_src[x][y + 8 + j] * g0;
|
||||
sum += local_src[x][y + 8 - j] * g0;
|
||||
g0 *= g1;
|
||||
g1 *= g2;
|
||||
}
|
||||
imageStore(dest, ivec2(uv), vec4(sum));
|
||||
}
|
||||
|
@ -74,6 +74,8 @@ PFNGLGETCOMPRESSEDTEXIMAGEPROC glGetCompressedTexImage;
|
||||
PFNGLTEXSTORAGE1DPROC glTexStorage1D;
|
||||
PFNGLTEXSTORAGE2DPROC glTexStorage2D;
|
||||
PFNGLTEXSTORAGE3DPROC glTexStorage3D;
|
||||
PFNGLBINDIMAGETEXTUREPROC glBindImageTexture;
|
||||
PFNGLDISPATCHCOMPUTEPROC glDispatchCompute;
|
||||
#endif
|
||||
|
||||
static bool is_gl_init = false;
|
||||
@ -226,6 +228,8 @@ void initGL()
|
||||
glTexStorage1D = (PFNGLTEXSTORAGE1DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage1D");
|
||||
glTexStorage2D = (PFNGLTEXSTORAGE2DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage2D");
|
||||
glTexStorage3D = (PFNGLTEXSTORAGE3DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage3D");
|
||||
glBindImageTexture = (PFNGLBINDIMAGETEXTUREPROC)IRR_OGL_LOAD_EXTENSION("glBindImageTexture");
|
||||
glDispatchCompute = (PFNGLDISPATCHCOMPUTEPROC)IRR_OGL_LOAD_EXTENSION("glDispatchCompute");
|
||||
#ifdef DEBUG
|
||||
glDebugMessageCallbackARB = (PFNGLDEBUGMESSAGECALLBACKARBPROC)IRR_OGL_LOAD_EXTENSION("glDebugMessageCallbackARB");
|
||||
#endif
|
||||
|
@ -97,6 +97,8 @@ extern PFNGLGETCOMPRESSEDTEXIMAGEPROC glGetCompressedTexImage;
|
||||
extern PFNGLTEXSTORAGE1DPROC glTexStorage1D;
|
||||
extern PFNGLTEXSTORAGE2DPROC glTexStorage2D;
|
||||
extern PFNGLTEXSTORAGE3DPROC glTexStorage3D;
|
||||
extern PFNGLBINDIMAGETEXTUREPROC glBindImageTexture;
|
||||
extern PFNGLDISPATCHCOMPUTEPROC glDispatchCompute;
|
||||
#ifdef DEBUG
|
||||
extern PFNGLDEBUGMESSAGECALLBACKARBPROC glDebugMessageCallbackARB;
|
||||
#endif
|
||||
|
@ -397,32 +397,56 @@ void PostProcessing::renderGaussian17TapBlur(FrameBuffer &in_fbo, FrameBuffer &a
|
||||
assert(in_fbo.getWidth() == auxiliary.getWidth() && in_fbo.getHeight() == auxiliary.getHeight());
|
||||
float inv_width = 1.0f / in_fbo.getWidth(), inv_height = 1.0f / in_fbo.getHeight();
|
||||
{
|
||||
auxiliary.Bind();
|
||||
glUseProgram(FullScreenShader::Gaussian17TapHShader::Program);
|
||||
glBindVertexArray(FullScreenShader::Gaussian17TapHShader::vao);
|
||||
if (irr_driver->getGLSLVersion() < 430)
|
||||
{
|
||||
auxiliary.Bind();
|
||||
glUseProgram(FullScreenShader::Gaussian17TapHShader::Program);
|
||||
glBindVertexArray(FullScreenShader::Gaussian17TapHShader::vao);
|
||||
|
||||
glUniform2f(FullScreenShader::Gaussian17TapHShader::uniform_pixel, inv_width, inv_height);
|
||||
glUniform2f(FullScreenShader::Gaussian17TapHShader::uniform_pixel, inv_width, inv_height);
|
||||
|
||||
setTexture(0, in_fbo.getRTT()[0], GL_LINEAR, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glUniform1i(FullScreenShader::Gaussian17TapHShader::uniform_tex, 0);
|
||||
setTexture(0, in_fbo.getRTT()[0], GL_LINEAR, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glUniform1i(FullScreenShader::Gaussian17TapHShader::uniform_tex, 0);
|
||||
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
}
|
||||
else
|
||||
{
|
||||
glUseProgram(FullScreenShader::ComputeGaussian17TapHShader::Program);
|
||||
glBindImageTexture(0, in_fbo.getRTT()[0], 0, false, 0, GL_READ_ONLY, GL_R16F);
|
||||
glBindImageTexture(1, auxiliary.getRTT()[0], 0, false, 0, GL_WRITE_ONLY, GL_R16F);
|
||||
glUniform1i(FullScreenShader::ComputeGaussian17TapHShader::uniform_source, 0);
|
||||
glUniform1i(FullScreenShader::ComputeGaussian17TapHShader::uniform_dest, 1);
|
||||
glDispatchCompute(in_fbo.getWidth() / 8, in_fbo.getHeight() / 8, 1);
|
||||
}
|
||||
}
|
||||
{
|
||||
in_fbo.Bind();
|
||||
glUseProgram(FullScreenShader::Gaussian17TapVShader::Program);
|
||||
glBindVertexArray(FullScreenShader::Gaussian17TapVShader::vao);
|
||||
if (irr_driver->getGLSLVersion() < 430)
|
||||
{
|
||||
in_fbo.Bind();
|
||||
glUseProgram(FullScreenShader::Gaussian17TapVShader::Program);
|
||||
glBindVertexArray(FullScreenShader::Gaussian17TapVShader::vao);
|
||||
|
||||
glUniform2f(FullScreenShader::Gaussian17TapVShader::uniform_pixel, inv_width, inv_height);
|
||||
glUniform2f(FullScreenShader::Gaussian17TapVShader::uniform_pixel, inv_width, inv_height);
|
||||
|
||||
setTexture(0, auxiliary.getRTT()[0], GL_LINEAR, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glUniform1i(FullScreenShader::Gaussian17TapVShader::uniform_tex, 0);
|
||||
setTexture(0, auxiliary.getRTT()[0], GL_LINEAR, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glUniform1i(FullScreenShader::Gaussian17TapVShader::uniform_tex, 0);
|
||||
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
}
|
||||
else
|
||||
{
|
||||
glUseProgram(FullScreenShader::ComputeGaussian17TapVShader::Program);
|
||||
glBindImageTexture(0, auxiliary.getRTT()[0], 0, false, 0, GL_READ_ONLY, GL_R16F);
|
||||
glBindImageTexture(1, in_fbo.getRTT()[0], 0, false, 0, GL_WRITE_ONLY, GL_R16F);
|
||||
glUniform1i(FullScreenShader::ComputeGaussian17TapVShader::uniform_source, 0);
|
||||
glUniform1i(FullScreenShader::ComputeGaussian17TapVShader::uniform_dest, 1);
|
||||
glDispatchCompute(in_fbo.getWidth() / 8, in_fbo.getHeight() / 8, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -277,9 +277,11 @@ void Shaders::loadShaders()
|
||||
FullScreenShader::DepthOfFieldShader::init();
|
||||
FullScreenShader::FogShader::init();
|
||||
FullScreenShader::Gaussian17TapHShader::init();
|
||||
FullScreenShader::ComputeGaussian17TapHShader::init();
|
||||
FullScreenShader::Gaussian3HBlurShader::init();
|
||||
FullScreenShader::Gaussian3VBlurShader::init();
|
||||
FullScreenShader::Gaussian17TapVShader::init();
|
||||
FullScreenShader::ComputeGaussian17TapVShader::init();
|
||||
FullScreenShader::Gaussian6HBlurShader::init();
|
||||
FullScreenShader::Gaussian6VBlurShader::init();
|
||||
FullScreenShader::GlowShader::init();
|
||||
@ -2439,6 +2441,17 @@ namespace FullScreenShader
|
||||
vao = createFullScreenVAO(Program);
|
||||
}
|
||||
|
||||
GLuint ComputeGaussian17TapHShader::Program;
|
||||
GLuint ComputeGaussian17TapHShader::uniform_source;
|
||||
GLuint ComputeGaussian17TapHShader::uniform_dest;
|
||||
void ComputeGaussian17TapHShader::init()
|
||||
{
|
||||
Program = LoadProgram(
|
||||
GL_COMPUTE_SHADER, file_manager->getAsset("shaders/gaussian.comp").c_str());
|
||||
uniform_source = glGetUniformLocation(Program, "source");
|
||||
uniform_dest = glGetUniformLocation(Program, "dest");
|
||||
}
|
||||
|
||||
GLuint Gaussian6HBlurShader::Program;
|
||||
GLuint Gaussian6HBlurShader::uniform_tex;
|
||||
GLuint Gaussian6HBlurShader::uniform_pixel;
|
||||
@ -2481,6 +2494,17 @@ namespace FullScreenShader
|
||||
vao = createFullScreenVAO(Program);
|
||||
}
|
||||
|
||||
GLuint ComputeGaussian17TapVShader::Program;
|
||||
GLuint ComputeGaussian17TapVShader::uniform_source;
|
||||
GLuint ComputeGaussian17TapVShader::uniform_dest;
|
||||
void ComputeGaussian17TapVShader::init()
|
||||
{
|
||||
Program = LoadProgram(
|
||||
GL_COMPUTE_SHADER, file_manager->getAsset("shaders/gaussianv.comp").c_str());
|
||||
uniform_source = glGetUniformLocation(Program, "source");
|
||||
uniform_dest = glGetUniformLocation(Program, "dest");
|
||||
}
|
||||
|
||||
GLuint Gaussian6VBlurShader::Program;
|
||||
GLuint Gaussian6VBlurShader::uniform_tex;
|
||||
GLuint Gaussian6VBlurShader::uniform_pixel;
|
||||
|
@ -641,6 +641,15 @@ public:
|
||||
static void init();
|
||||
};
|
||||
|
||||
class ComputeGaussian17TapHShader
|
||||
{
|
||||
public:
|
||||
static GLuint Program;
|
||||
static GLuint uniform_source, uniform_dest;
|
||||
|
||||
static void init();
|
||||
};
|
||||
|
||||
class Gaussian6HBlurShader
|
||||
{
|
||||
public:
|
||||
@ -671,6 +680,16 @@ public:
|
||||
static void init();
|
||||
};
|
||||
|
||||
class ComputeGaussian17TapVShader
|
||||
{
|
||||
public:
|
||||
static GLuint Program;
|
||||
static GLuint uniform_source, uniform_dest;
|
||||
|
||||
static void init();
|
||||
};
|
||||
|
||||
|
||||
class Gaussian6VBlurShader
|
||||
{
|
||||
public:
|
||||
|
Loading…
Reference in New Issue
Block a user