Implement Gaussian blur using Compute Shader
The filters are ~3.5 time faster with CS.
This commit is contained in:
parent
8cc1df3dd1
commit
eb45954684
36
data/shaders/gaussian.comp
Normal file
36
data/shaders/gaussian.comp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
// From http://http.developer.nvidia.com/GPUGems3/gpugems3_ch40.html
|
||||||
|
|
||||||
|
uniform layout(size1x16) restrict readonly image2D source;
|
||||||
|
uniform layout(size1x16) volatile restrict writeonly image2D dest;
|
||||||
|
uniform float sigma = 5.;
|
||||||
|
|
||||||
|
layout (local_size_x = 8, local_size_y = 8) in;
|
||||||
|
|
||||||
|
shared float local_src[8 + 2 * 8][8];
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y);
|
||||||
|
ivec2 uv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y);
|
||||||
|
local_src[x][y] = imageLoad(source, ivec2(uv) - ivec2(8, 0)).x;
|
||||||
|
local_src[x + 8][y] = imageLoad(source, ivec2(uv)).x;
|
||||||
|
local_src[x + 16][y] = imageLoad(source, ivec2(uv) + ivec2(8, 0)).x;
|
||||||
|
|
||||||
|
barrier();
|
||||||
|
|
||||||
|
float g0, g1, g2;
|
||||||
|
g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma);
|
||||||
|
g1 = exp(-0.5 / (sigma * sigma));
|
||||||
|
g2 = g1 * g1;
|
||||||
|
float sum = local_src[x + 8][y] * g0;
|
||||||
|
g0 *= g1;
|
||||||
|
g1 *= g2;
|
||||||
|
for (int j = 1; j < 8; j++) {
|
||||||
|
sum += local_src[8 + x - j][y] * g0;
|
||||||
|
sum += local_src[8 + x + j][y] * g0;
|
||||||
|
g0 *= g1;
|
||||||
|
g1 *= g2;
|
||||||
|
}
|
||||||
|
imageStore(dest, ivec2(uv), vec4(sum));
|
||||||
|
}
|
||||||
|
|
36
data/shaders/gaussianv.comp
Normal file
36
data/shaders/gaussianv.comp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
// From http://http.developer.nvidia.com/GPUGems3/gpugems3_ch40.html
|
||||||
|
|
||||||
|
uniform layout(size1x16) restrict readonly image2D source;
|
||||||
|
uniform layout(size1x16) volatile restrict writeonly image2D dest;
|
||||||
|
uniform float sigma = 5.;
|
||||||
|
|
||||||
|
layout (local_size_x = 8, local_size_y = 8) in;
|
||||||
|
|
||||||
|
shared float local_src[8][8 + 2 * 8];
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y);
|
||||||
|
ivec2 uv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y);
|
||||||
|
local_src[x][y] = imageLoad(source, ivec2(uv) - ivec2(0, 8)).x;
|
||||||
|
local_src[x][y + 8] = imageLoad(source, ivec2(uv)).x;
|
||||||
|
local_src[x][y + 16] = imageLoad(source, ivec2(uv) + ivec2(0, 8)).x;
|
||||||
|
|
||||||
|
barrier();
|
||||||
|
|
||||||
|
float g0, g1, g2;
|
||||||
|
g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma);
|
||||||
|
g1 = exp(-0.5 / (sigma * sigma));
|
||||||
|
g2 = g1 * g1;
|
||||||
|
float sum = local_src[x][y + 8] * g0;
|
||||||
|
g0 *= g1;
|
||||||
|
g1 *= g2;
|
||||||
|
for (int j = 1; j < 8; j++) {
|
||||||
|
sum += local_src[x][y + 8 + j] * g0;
|
||||||
|
sum += local_src[x][y + 8 - j] * g0;
|
||||||
|
g0 *= g1;
|
||||||
|
g1 *= g2;
|
||||||
|
}
|
||||||
|
imageStore(dest, ivec2(uv), vec4(sum));
|
||||||
|
}
|
||||||
|
|
@ -74,6 +74,8 @@ PFNGLGETCOMPRESSEDTEXIMAGEPROC glGetCompressedTexImage;
|
|||||||
PFNGLTEXSTORAGE1DPROC glTexStorage1D;
|
PFNGLTEXSTORAGE1DPROC glTexStorage1D;
|
||||||
PFNGLTEXSTORAGE2DPROC glTexStorage2D;
|
PFNGLTEXSTORAGE2DPROC glTexStorage2D;
|
||||||
PFNGLTEXSTORAGE3DPROC glTexStorage3D;
|
PFNGLTEXSTORAGE3DPROC glTexStorage3D;
|
||||||
|
PFNGLBINDIMAGETEXTUREPROC glBindImageTexture;
|
||||||
|
PFNGLDISPATCHCOMPUTEPROC glDispatchCompute;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static bool is_gl_init = false;
|
static bool is_gl_init = false;
|
||||||
@ -226,6 +228,8 @@ void initGL()
|
|||||||
glTexStorage1D = (PFNGLTEXSTORAGE1DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage1D");
|
glTexStorage1D = (PFNGLTEXSTORAGE1DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage1D");
|
||||||
glTexStorage2D = (PFNGLTEXSTORAGE2DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage2D");
|
glTexStorage2D = (PFNGLTEXSTORAGE2DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage2D");
|
||||||
glTexStorage3D = (PFNGLTEXSTORAGE3DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage3D");
|
glTexStorage3D = (PFNGLTEXSTORAGE3DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage3D");
|
||||||
|
glBindImageTexture = (PFNGLBINDIMAGETEXTUREPROC)IRR_OGL_LOAD_EXTENSION("glBindImageTexture");
|
||||||
|
glDispatchCompute = (PFNGLDISPATCHCOMPUTEPROC)IRR_OGL_LOAD_EXTENSION("glDispatchCompute");
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
glDebugMessageCallbackARB = (PFNGLDEBUGMESSAGECALLBACKARBPROC)IRR_OGL_LOAD_EXTENSION("glDebugMessageCallbackARB");
|
glDebugMessageCallbackARB = (PFNGLDEBUGMESSAGECALLBACKARBPROC)IRR_OGL_LOAD_EXTENSION("glDebugMessageCallbackARB");
|
||||||
#endif
|
#endif
|
||||||
|
@ -97,6 +97,8 @@ extern PFNGLGETCOMPRESSEDTEXIMAGEPROC glGetCompressedTexImage;
|
|||||||
extern PFNGLTEXSTORAGE1DPROC glTexStorage1D;
|
extern PFNGLTEXSTORAGE1DPROC glTexStorage1D;
|
||||||
extern PFNGLTEXSTORAGE2DPROC glTexStorage2D;
|
extern PFNGLTEXSTORAGE2DPROC glTexStorage2D;
|
||||||
extern PFNGLTEXSTORAGE3DPROC glTexStorage3D;
|
extern PFNGLTEXSTORAGE3DPROC glTexStorage3D;
|
||||||
|
extern PFNGLBINDIMAGETEXTUREPROC glBindImageTexture;
|
||||||
|
extern PFNGLDISPATCHCOMPUTEPROC glDispatchCompute;
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
extern PFNGLDEBUGMESSAGECALLBACKARBPROC glDebugMessageCallbackARB;
|
extern PFNGLDEBUGMESSAGECALLBACKARBPROC glDebugMessageCallbackARB;
|
||||||
#endif
|
#endif
|
||||||
|
@ -396,6 +396,8 @@ void PostProcessing::renderGaussian17TapBlur(FrameBuffer &in_fbo, FrameBuffer &a
|
|||||||
{
|
{
|
||||||
assert(in_fbo.getWidth() == auxiliary.getWidth() && in_fbo.getHeight() == auxiliary.getHeight());
|
assert(in_fbo.getWidth() == auxiliary.getWidth() && in_fbo.getHeight() == auxiliary.getHeight());
|
||||||
float inv_width = 1.0f / in_fbo.getWidth(), inv_height = 1.0f / in_fbo.getHeight();
|
float inv_width = 1.0f / in_fbo.getWidth(), inv_height = 1.0f / in_fbo.getHeight();
|
||||||
|
{
|
||||||
|
if (irr_driver->getGLSLVersion() < 430)
|
||||||
{
|
{
|
||||||
auxiliary.Bind();
|
auxiliary.Bind();
|
||||||
glUseProgram(FullScreenShader::Gaussian17TapHShader::Program);
|
glUseProgram(FullScreenShader::Gaussian17TapHShader::Program);
|
||||||
@ -410,6 +412,18 @@ void PostProcessing::renderGaussian17TapBlur(FrameBuffer &in_fbo, FrameBuffer &a
|
|||||||
|
|
||||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
glUseProgram(FullScreenShader::ComputeGaussian17TapHShader::Program);
|
||||||
|
glBindImageTexture(0, in_fbo.getRTT()[0], 0, false, 0, GL_READ_ONLY, GL_R16F);
|
||||||
|
glBindImageTexture(1, auxiliary.getRTT()[0], 0, false, 0, GL_WRITE_ONLY, GL_R16F);
|
||||||
|
glUniform1i(FullScreenShader::ComputeGaussian17TapHShader::uniform_source, 0);
|
||||||
|
glUniform1i(FullScreenShader::ComputeGaussian17TapHShader::uniform_dest, 1);
|
||||||
|
glDispatchCompute(in_fbo.getWidth() / 8, in_fbo.getHeight() / 8, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
if (irr_driver->getGLSLVersion() < 430)
|
||||||
{
|
{
|
||||||
in_fbo.Bind();
|
in_fbo.Bind();
|
||||||
glUseProgram(FullScreenShader::Gaussian17TapVShader::Program);
|
glUseProgram(FullScreenShader::Gaussian17TapVShader::Program);
|
||||||
@ -424,6 +438,16 @@ void PostProcessing::renderGaussian17TapBlur(FrameBuffer &in_fbo, FrameBuffer &a
|
|||||||
|
|
||||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
glUseProgram(FullScreenShader::ComputeGaussian17TapVShader::Program);
|
||||||
|
glBindImageTexture(0, auxiliary.getRTT()[0], 0, false, 0, GL_READ_ONLY, GL_R16F);
|
||||||
|
glBindImageTexture(1, in_fbo.getRTT()[0], 0, false, 0, GL_WRITE_ONLY, GL_R16F);
|
||||||
|
glUniform1i(FullScreenShader::ComputeGaussian17TapVShader::uniform_source, 0);
|
||||||
|
glUniform1i(FullScreenShader::ComputeGaussian17TapVShader::uniform_dest, 1);
|
||||||
|
glDispatchCompute(in_fbo.getWidth() / 8, in_fbo.getHeight() / 8, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PostProcessing::renderPassThrough(GLuint tex)
|
void PostProcessing::renderPassThrough(GLuint tex)
|
||||||
|
@ -277,9 +277,11 @@ void Shaders::loadShaders()
|
|||||||
FullScreenShader::DepthOfFieldShader::init();
|
FullScreenShader::DepthOfFieldShader::init();
|
||||||
FullScreenShader::FogShader::init();
|
FullScreenShader::FogShader::init();
|
||||||
FullScreenShader::Gaussian17TapHShader::init();
|
FullScreenShader::Gaussian17TapHShader::init();
|
||||||
|
FullScreenShader::ComputeGaussian17TapHShader::init();
|
||||||
FullScreenShader::Gaussian3HBlurShader::init();
|
FullScreenShader::Gaussian3HBlurShader::init();
|
||||||
FullScreenShader::Gaussian3VBlurShader::init();
|
FullScreenShader::Gaussian3VBlurShader::init();
|
||||||
FullScreenShader::Gaussian17TapVShader::init();
|
FullScreenShader::Gaussian17TapVShader::init();
|
||||||
|
FullScreenShader::ComputeGaussian17TapVShader::init();
|
||||||
FullScreenShader::Gaussian6HBlurShader::init();
|
FullScreenShader::Gaussian6HBlurShader::init();
|
||||||
FullScreenShader::Gaussian6VBlurShader::init();
|
FullScreenShader::Gaussian6VBlurShader::init();
|
||||||
FullScreenShader::GlowShader::init();
|
FullScreenShader::GlowShader::init();
|
||||||
@ -2439,6 +2441,17 @@ namespace FullScreenShader
|
|||||||
vao = createFullScreenVAO(Program);
|
vao = createFullScreenVAO(Program);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLuint ComputeGaussian17TapHShader::Program;
|
||||||
|
GLuint ComputeGaussian17TapHShader::uniform_source;
|
||||||
|
GLuint ComputeGaussian17TapHShader::uniform_dest;
|
||||||
|
void ComputeGaussian17TapHShader::init()
|
||||||
|
{
|
||||||
|
Program = LoadProgram(
|
||||||
|
GL_COMPUTE_SHADER, file_manager->getAsset("shaders/gaussian.comp").c_str());
|
||||||
|
uniform_source = glGetUniformLocation(Program, "source");
|
||||||
|
uniform_dest = glGetUniformLocation(Program, "dest");
|
||||||
|
}
|
||||||
|
|
||||||
GLuint Gaussian6HBlurShader::Program;
|
GLuint Gaussian6HBlurShader::Program;
|
||||||
GLuint Gaussian6HBlurShader::uniform_tex;
|
GLuint Gaussian6HBlurShader::uniform_tex;
|
||||||
GLuint Gaussian6HBlurShader::uniform_pixel;
|
GLuint Gaussian6HBlurShader::uniform_pixel;
|
||||||
@ -2481,6 +2494,17 @@ namespace FullScreenShader
|
|||||||
vao = createFullScreenVAO(Program);
|
vao = createFullScreenVAO(Program);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLuint ComputeGaussian17TapVShader::Program;
|
||||||
|
GLuint ComputeGaussian17TapVShader::uniform_source;
|
||||||
|
GLuint ComputeGaussian17TapVShader::uniform_dest;
|
||||||
|
void ComputeGaussian17TapVShader::init()
|
||||||
|
{
|
||||||
|
Program = LoadProgram(
|
||||||
|
GL_COMPUTE_SHADER, file_manager->getAsset("shaders/gaussianv.comp").c_str());
|
||||||
|
uniform_source = glGetUniformLocation(Program, "source");
|
||||||
|
uniform_dest = glGetUniformLocation(Program, "dest");
|
||||||
|
}
|
||||||
|
|
||||||
GLuint Gaussian6VBlurShader::Program;
|
GLuint Gaussian6VBlurShader::Program;
|
||||||
GLuint Gaussian6VBlurShader::uniform_tex;
|
GLuint Gaussian6VBlurShader::uniform_tex;
|
||||||
GLuint Gaussian6VBlurShader::uniform_pixel;
|
GLuint Gaussian6VBlurShader::uniform_pixel;
|
||||||
|
@ -641,6 +641,15 @@ public:
|
|||||||
static void init();
|
static void init();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class ComputeGaussian17TapHShader
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static GLuint Program;
|
||||||
|
static GLuint uniform_source, uniform_dest;
|
||||||
|
|
||||||
|
static void init();
|
||||||
|
};
|
||||||
|
|
||||||
class Gaussian6HBlurShader
|
class Gaussian6HBlurShader
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -671,6 +680,16 @@ public:
|
|||||||
static void init();
|
static void init();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class ComputeGaussian17TapVShader
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static GLuint Program;
|
||||||
|
static GLuint uniform_source, uniform_dest;
|
||||||
|
|
||||||
|
static void init();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
class Gaussian6VBlurShader
|
class Gaussian6VBlurShader
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
Loading…
Reference in New Issue
Block a user