Implement Gaussian blur using Compute Shader

The filters are ~3.5 time faster with CS.
This commit is contained in:
vlj 2014-06-04 00:52:35 +02:00
parent 8cc1df3dd1
commit eb45954684
7 changed files with 163 additions and 18 deletions

View File

@ -0,0 +1,36 @@
// From http://http.developer.nvidia.com/GPUGems3/gpugems3_ch40.html
uniform layout(size1x16) restrict readonly image2D source;
uniform layout(size1x16) volatile restrict writeonly image2D dest;
uniform float sigma = 5.;
layout (local_size_x = 8, local_size_y = 8) in;
shared float local_src[8 + 2 * 8][8];
void main()
{
int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y);
ivec2 uv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y);
local_src[x][y] = imageLoad(source, ivec2(uv) - ivec2(8, 0)).x;
local_src[x + 8][y] = imageLoad(source, ivec2(uv)).x;
local_src[x + 16][y] = imageLoad(source, ivec2(uv) + ivec2(8, 0)).x;
barrier();
float g0, g1, g2;
g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma);
g1 = exp(-0.5 / (sigma * sigma));
g2 = g1 * g1;
float sum = local_src[x + 8][y] * g0;
g0 *= g1;
g1 *= g2;
for (int j = 1; j < 8; j++) {
sum += local_src[8 + x - j][y] * g0;
sum += local_src[8 + x + j][y] * g0;
g0 *= g1;
g1 *= g2;
}
imageStore(dest, ivec2(uv), vec4(sum));
}

View File

@ -0,0 +1,36 @@
// From http://http.developer.nvidia.com/GPUGems3/gpugems3_ch40.html
uniform layout(size1x16) restrict readonly image2D source;
uniform layout(size1x16) volatile restrict writeonly image2D dest;
uniform float sigma = 5.;
layout (local_size_x = 8, local_size_y = 8) in;
shared float local_src[8][8 + 2 * 8];
void main()
{
int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y);
ivec2 uv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y);
local_src[x][y] = imageLoad(source, ivec2(uv) - ivec2(0, 8)).x;
local_src[x][y + 8] = imageLoad(source, ivec2(uv)).x;
local_src[x][y + 16] = imageLoad(source, ivec2(uv) + ivec2(0, 8)).x;
barrier();
float g0, g1, g2;
g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma);
g1 = exp(-0.5 / (sigma * sigma));
g2 = g1 * g1;
float sum = local_src[x][y + 8] * g0;
g0 *= g1;
g1 *= g2;
for (int j = 1; j < 8; j++) {
sum += local_src[x][y + 8 + j] * g0;
sum += local_src[x][y + 8 - j] * g0;
g0 *= g1;
g1 *= g2;
}
imageStore(dest, ivec2(uv), vec4(sum));
}

View File

@ -74,6 +74,8 @@ PFNGLGETCOMPRESSEDTEXIMAGEPROC glGetCompressedTexImage;
PFNGLTEXSTORAGE1DPROC glTexStorage1D;
PFNGLTEXSTORAGE2DPROC glTexStorage2D;
PFNGLTEXSTORAGE3DPROC glTexStorage3D;
PFNGLBINDIMAGETEXTUREPROC glBindImageTexture;
PFNGLDISPATCHCOMPUTEPROC glDispatchCompute;
#endif
static bool is_gl_init = false;
@ -226,6 +228,8 @@ void initGL()
glTexStorage1D = (PFNGLTEXSTORAGE1DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage1D");
glTexStorage2D = (PFNGLTEXSTORAGE2DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage2D");
glTexStorage3D = (PFNGLTEXSTORAGE3DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage3D");
glBindImageTexture = (PFNGLBINDIMAGETEXTUREPROC)IRR_OGL_LOAD_EXTENSION("glBindImageTexture");
glDispatchCompute = (PFNGLDISPATCHCOMPUTEPROC)IRR_OGL_LOAD_EXTENSION("glDispatchCompute");
#ifdef DEBUG
glDebugMessageCallbackARB = (PFNGLDEBUGMESSAGECALLBACKARBPROC)IRR_OGL_LOAD_EXTENSION("glDebugMessageCallbackARB");
#endif

View File

@ -97,6 +97,8 @@ extern PFNGLGETCOMPRESSEDTEXIMAGEPROC glGetCompressedTexImage;
extern PFNGLTEXSTORAGE1DPROC glTexStorage1D;
extern PFNGLTEXSTORAGE2DPROC glTexStorage2D;
extern PFNGLTEXSTORAGE3DPROC glTexStorage3D;
extern PFNGLBINDIMAGETEXTUREPROC glBindImageTexture;
extern PFNGLDISPATCHCOMPUTEPROC glDispatchCompute;
#ifdef DEBUG
extern PFNGLDEBUGMESSAGECALLBACKARBPROC glDebugMessageCallbackARB;
#endif

View File

@ -397,32 +397,56 @@ void PostProcessing::renderGaussian17TapBlur(FrameBuffer &in_fbo, FrameBuffer &a
assert(in_fbo.getWidth() == auxiliary.getWidth() && in_fbo.getHeight() == auxiliary.getHeight());
float inv_width = 1.0f / in_fbo.getWidth(), inv_height = 1.0f / in_fbo.getHeight();
{
auxiliary.Bind();
glUseProgram(FullScreenShader::Gaussian17TapHShader::Program);
glBindVertexArray(FullScreenShader::Gaussian17TapHShader::vao);
if (irr_driver->getGLSLVersion() < 430)
{
auxiliary.Bind();
glUseProgram(FullScreenShader::Gaussian17TapHShader::Program);
glBindVertexArray(FullScreenShader::Gaussian17TapHShader::vao);
glUniform2f(FullScreenShader::Gaussian17TapHShader::uniform_pixel, inv_width, inv_height);
glUniform2f(FullScreenShader::Gaussian17TapHShader::uniform_pixel, inv_width, inv_height);
setTexture(0, in_fbo.getRTT()[0], GL_LINEAR, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glUniform1i(FullScreenShader::Gaussian17TapHShader::uniform_tex, 0);
setTexture(0, in_fbo.getRTT()[0], GL_LINEAR, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glUniform1i(FullScreenShader::Gaussian17TapHShader::uniform_tex, 0);
glDrawArrays(GL_TRIANGLES, 0, 3);
glDrawArrays(GL_TRIANGLES, 0, 3);
}
else
{
glUseProgram(FullScreenShader::ComputeGaussian17TapHShader::Program);
glBindImageTexture(0, in_fbo.getRTT()[0], 0, false, 0, GL_READ_ONLY, GL_R16F);
glBindImageTexture(1, auxiliary.getRTT()[0], 0, false, 0, GL_WRITE_ONLY, GL_R16F);
glUniform1i(FullScreenShader::ComputeGaussian17TapHShader::uniform_source, 0);
glUniform1i(FullScreenShader::ComputeGaussian17TapHShader::uniform_dest, 1);
glDispatchCompute(in_fbo.getWidth() / 8, in_fbo.getHeight() / 8, 1);
}
}
{
in_fbo.Bind();
glUseProgram(FullScreenShader::Gaussian17TapVShader::Program);
glBindVertexArray(FullScreenShader::Gaussian17TapVShader::vao);
if (irr_driver->getGLSLVersion() < 430)
{
in_fbo.Bind();
glUseProgram(FullScreenShader::Gaussian17TapVShader::Program);
glBindVertexArray(FullScreenShader::Gaussian17TapVShader::vao);
glUniform2f(FullScreenShader::Gaussian17TapVShader::uniform_pixel, inv_width, inv_height);
glUniform2f(FullScreenShader::Gaussian17TapVShader::uniform_pixel, inv_width, inv_height);
setTexture(0, auxiliary.getRTT()[0], GL_LINEAR, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glUniform1i(FullScreenShader::Gaussian17TapVShader::uniform_tex, 0);
setTexture(0, auxiliary.getRTT()[0], GL_LINEAR, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glUniform1i(FullScreenShader::Gaussian17TapVShader::uniform_tex, 0);
glDrawArrays(GL_TRIANGLES, 0, 3);
glDrawArrays(GL_TRIANGLES, 0, 3);
}
else
{
glUseProgram(FullScreenShader::ComputeGaussian17TapVShader::Program);
glBindImageTexture(0, auxiliary.getRTT()[0], 0, false, 0, GL_READ_ONLY, GL_R16F);
glBindImageTexture(1, in_fbo.getRTT()[0], 0, false, 0, GL_WRITE_ONLY, GL_R16F);
glUniform1i(FullScreenShader::ComputeGaussian17TapVShader::uniform_source, 0);
glUniform1i(FullScreenShader::ComputeGaussian17TapVShader::uniform_dest, 1);
glDispatchCompute(in_fbo.getWidth() / 8, in_fbo.getHeight() / 8, 1);
}
}
}

View File

@ -277,9 +277,11 @@ void Shaders::loadShaders()
FullScreenShader::DepthOfFieldShader::init();
FullScreenShader::FogShader::init();
FullScreenShader::Gaussian17TapHShader::init();
FullScreenShader::ComputeGaussian17TapHShader::init();
FullScreenShader::Gaussian3HBlurShader::init();
FullScreenShader::Gaussian3VBlurShader::init();
FullScreenShader::Gaussian17TapVShader::init();
FullScreenShader::ComputeGaussian17TapVShader::init();
FullScreenShader::Gaussian6HBlurShader::init();
FullScreenShader::Gaussian6VBlurShader::init();
FullScreenShader::GlowShader::init();
@ -2439,6 +2441,17 @@ namespace FullScreenShader
vao = createFullScreenVAO(Program);
}
GLuint ComputeGaussian17TapHShader::Program;
GLuint ComputeGaussian17TapHShader::uniform_source;
GLuint ComputeGaussian17TapHShader::uniform_dest;
void ComputeGaussian17TapHShader::init()
{
Program = LoadProgram(
GL_COMPUTE_SHADER, file_manager->getAsset("shaders/gaussian.comp").c_str());
uniform_source = glGetUniformLocation(Program, "source");
uniform_dest = glGetUniformLocation(Program, "dest");
}
GLuint Gaussian6HBlurShader::Program;
GLuint Gaussian6HBlurShader::uniform_tex;
GLuint Gaussian6HBlurShader::uniform_pixel;
@ -2481,6 +2494,17 @@ namespace FullScreenShader
vao = createFullScreenVAO(Program);
}
GLuint ComputeGaussian17TapVShader::Program;
GLuint ComputeGaussian17TapVShader::uniform_source;
GLuint ComputeGaussian17TapVShader::uniform_dest;
void ComputeGaussian17TapVShader::init()
{
Program = LoadProgram(
GL_COMPUTE_SHADER, file_manager->getAsset("shaders/gaussianv.comp").c_str());
uniform_source = glGetUniformLocation(Program, "source");
uniform_dest = glGetUniformLocation(Program, "dest");
}
GLuint Gaussian6VBlurShader::Program;
GLuint Gaussian6VBlurShader::uniform_tex;
GLuint Gaussian6VBlurShader::uniform_pixel;

View File

@ -641,6 +641,15 @@ public:
static void init();
};
class ComputeGaussian17TapHShader
{
public:
static GLuint Program;
static GLuint uniform_source, uniform_dest;
static void init();
};
class Gaussian6HBlurShader
{
public:
@ -671,6 +680,16 @@ public:
static void init();
};
class ComputeGaussian17TapVShader
{
public:
static GLuint Program;
static GLuint uniform_source, uniform_dest;
static void init();
};
class Gaussian6VBlurShader
{
public: