Merge branch 'CS'

This commit is contained in:
vlj 2014-06-05 03:33:02 +02:00
commit 588ec08bc1
12 changed files with 9924 additions and 10094 deletions

View File

@ -0,0 +1,36 @@
// From http://http.developer.nvidia.com/GPUGems3/gpugems3_ch40.html
uniform layout(size1x16) restrict readonly image2D source;
uniform layout(size1x16) volatile restrict writeonly image2D dest;
uniform float sigma = 5.;
layout (local_size_x = 8, local_size_y = 8) in;
shared float local_src[8 + 2 * 8][8];
void main()
{
int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y);
ivec2 uv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y);
local_src[x][y] = imageLoad(source, ivec2(uv) - ivec2(8, 0)).x;
local_src[x + 8][y] = imageLoad(source, ivec2(uv)).x;
local_src[x + 16][y] = imageLoad(source, ivec2(uv) + ivec2(8, 0)).x;
barrier();
float g0, g1, g2;
g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma);
g1 = exp(-0.5 / (sigma * sigma));
g2 = g1 * g1;
float sum = local_src[x + 8][y] * g0;
g0 *= g1;
g1 *= g2;
for (int j = 1; j < 8; j++) {
sum += local_src[8 + x - j][y] * g0;
sum += local_src[8 + x + j][y] * g0;
g0 *= g1;
g1 *= g2;
}
imageStore(dest, ivec2(uv), vec4(sum));
}

View File

@ -0,0 +1,36 @@
// From http://http.developer.nvidia.com/GPUGems3/gpugems3_ch40.html
uniform layout(size1x16) restrict readonly image2D source;
uniform layout(size1x16) volatile restrict writeonly image2D dest;
uniform float sigma = 5.;
layout (local_size_x = 8, local_size_y = 8) in;
shared float local_src[8][8 + 2 * 8];
void main()
{
int x = int(gl_LocalInvocationID.x), y = int(gl_LocalInvocationID.y);
ivec2 uv = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y);
local_src[x][y] = imageLoad(source, ivec2(uv) - ivec2(0, 8)).x;
local_src[x][y + 8] = imageLoad(source, ivec2(uv)).x;
local_src[x][y + 16] = imageLoad(source, ivec2(uv) + ivec2(0, 8)).x;
barrier();
float g0, g1, g2;
g0 = 1.0 / (sqrt(2.0 * 3.14) * sigma);
g1 = exp(-0.5 / (sigma * sigma));
g2 = g1 * g1;
float sum = local_src[x][y + 8] * g0;
g0 *= g1;
g1 *= g2;
for (int j = 1; j < 8; j++) {
sum += local_src[x][y + 8 + j] * g0;
sum += local_src[x][y + 8 - j] * g0;
g0 *= g1;
g1 *= g2;
}
imageStore(dest, ivec2(uv), vec4(sum));
}

File diff suppressed because it is too large Load Diff

View File

@ -74,6 +74,8 @@ PFNGLGETCOMPRESSEDTEXIMAGEPROC glGetCompressedTexImage;
PFNGLTEXSTORAGE1DPROC glTexStorage1D;
PFNGLTEXSTORAGE2DPROC glTexStorage2D;
PFNGLTEXSTORAGE3DPROC glTexStorage3D;
PFNGLBINDIMAGETEXTUREPROC glBindImageTexture;
PFNGLDISPATCHCOMPUTEPROC glDispatchCompute;
#endif
static bool is_gl_init = false;
@ -226,6 +228,8 @@ void initGL()
glTexStorage1D = (PFNGLTEXSTORAGE1DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage1D");
glTexStorage2D = (PFNGLTEXSTORAGE2DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage2D");
glTexStorage3D = (PFNGLTEXSTORAGE3DPROC)IRR_OGL_LOAD_EXTENSION("glTexStorage3D");
glBindImageTexture = (PFNGLBINDIMAGETEXTUREPROC)IRR_OGL_LOAD_EXTENSION("glBindImageTexture");
glDispatchCompute = (PFNGLDISPATCHCOMPUTEPROC)IRR_OGL_LOAD_EXTENSION("glDispatchCompute");
#ifdef DEBUG
glDebugMessageCallbackARB = (PFNGLDEBUGMESSAGECALLBACKARBPROC)IRR_OGL_LOAD_EXTENSION("glDebugMessageCallbackARB");
#endif
@ -897,4 +901,4 @@ void GL32_draw2DRectangle(video::SColor color, const core::rect<s32>& position,
glUseProgram(0);
glGetError();
}
}

View File

@ -97,6 +97,8 @@ extern PFNGLGETCOMPRESSEDTEXIMAGEPROC glGetCompressedTexImage;
extern PFNGLTEXSTORAGE1DPROC glTexStorage1D;
extern PFNGLTEXSTORAGE2DPROC glTexStorage2D;
extern PFNGLTEXSTORAGE3DPROC glTexStorage3D;
extern PFNGLBINDIMAGETEXTUREPROC glBindImageTexture;
extern PFNGLDISPATCHCOMPUTEPROC glDispatchCompute;
#ifdef DEBUG
extern PFNGLDEBUGMESSAGECALLBACKARBPROC glDebugMessageCallbackARB;
#endif

View File

@ -108,7 +108,11 @@ enum QueryPerf
{
Q_SOLID_PASS1,
Q_SHADOWS,
Q_LIGHT,
Q_RH,
Q_GI,
Q_ENVMAP,
Q_SUN,
Q_POINTLIGHTS,
Q_SSAO,
Q_SOLID_PASS2,
Q_TRANSPARENT,

View File

@ -400,32 +400,56 @@ void PostProcessing::renderGaussian17TapBlur(FrameBuffer &in_fbo, FrameBuffer &a
assert(in_fbo.getWidth() == auxiliary.getWidth() && in_fbo.getHeight() == auxiliary.getHeight());
float inv_width = 1.0f / in_fbo.getWidth(), inv_height = 1.0f / in_fbo.getHeight();
{
auxiliary.Bind();
glUseProgram(FullScreenShader::Gaussian17TapHShader::Program);
glBindVertexArray(FullScreenShader::Gaussian17TapHShader::vao);
if (irr_driver->getGLSLVersion() < 430)
{
auxiliary.Bind();
glUseProgram(FullScreenShader::Gaussian17TapHShader::Program);
glBindVertexArray(FullScreenShader::Gaussian17TapHShader::vao);
glUniform2f(FullScreenShader::Gaussian17TapHShader::uniform_pixel, inv_width, inv_height);
glUniform2f(FullScreenShader::Gaussian17TapHShader::uniform_pixel, inv_width, inv_height);
setTexture(0, in_fbo.getRTT()[0], GL_LINEAR, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glUniform1i(FullScreenShader::Gaussian17TapHShader::uniform_tex, 0);
setTexture(0, in_fbo.getRTT()[0], GL_LINEAR, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glUniform1i(FullScreenShader::Gaussian17TapHShader::uniform_tex, 0);
glDrawArrays(GL_TRIANGLES, 0, 3);
glDrawArrays(GL_TRIANGLES, 0, 3);
}
else
{
glUseProgram(FullScreenShader::ComputeGaussian17TapHShader::Program);
glBindImageTexture(0, in_fbo.getRTT()[0], 0, false, 0, GL_READ_ONLY, GL_R16F);
glBindImageTexture(1, auxiliary.getRTT()[0], 0, false, 0, GL_WRITE_ONLY, GL_R16F);
glUniform1i(FullScreenShader::ComputeGaussian17TapHShader::uniform_source, 0);
glUniform1i(FullScreenShader::ComputeGaussian17TapHShader::uniform_dest, 1);
glDispatchCompute(in_fbo.getWidth() / 8, in_fbo.getHeight() / 8, 1);
}
}
{
in_fbo.Bind();
glUseProgram(FullScreenShader::Gaussian17TapVShader::Program);
glBindVertexArray(FullScreenShader::Gaussian17TapVShader::vao);
if (irr_driver->getGLSLVersion() < 430)
{
in_fbo.Bind();
glUseProgram(FullScreenShader::Gaussian17TapVShader::Program);
glBindVertexArray(FullScreenShader::Gaussian17TapVShader::vao);
glUniform2f(FullScreenShader::Gaussian17TapVShader::uniform_pixel, inv_width, inv_height);
glUniform2f(FullScreenShader::Gaussian17TapVShader::uniform_pixel, inv_width, inv_height);
setTexture(0, auxiliary.getRTT()[0], GL_LINEAR, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glUniform1i(FullScreenShader::Gaussian17TapVShader::uniform_tex, 0);
setTexture(0, auxiliary.getRTT()[0], GL_LINEAR, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glUniform1i(FullScreenShader::Gaussian17TapVShader::uniform_tex, 0);
glDrawArrays(GL_TRIANGLES, 0, 3);
glDrawArrays(GL_TRIANGLES, 0, 3);
}
else
{
glUseProgram(FullScreenShader::ComputeGaussian17TapVShader::Program);
glBindImageTexture(0, auxiliary.getRTT()[0], 0, false, 0, GL_READ_ONLY, GL_R16F);
glBindImageTexture(1, in_fbo.getRTT()[0], 0, false, 0, GL_WRITE_ONLY, GL_R16F);
glUniform1i(FullScreenShader::ComputeGaussian17TapVShader::uniform_source, 0);
glUniform1i(FullScreenShader::ComputeGaussian17TapVShader::uniform_dest, 1);
glDispatchCompute(in_fbo.getWidth() / 8, in_fbo.getHeight() / 8, 1);
}
}
}

View File

@ -285,7 +285,6 @@ void IrrDriver::renderScene(scene::ICameraSceneNode * const camnode, unsigned po
// Lights
{
PROFILER_PUSH_CPU_MARKER("- Light", 0x00, 0xFF, 0x00);
ScopedGPUTimer Timer(getGPUTimer(Q_LIGHT));
renderLights(pointlightcount);
PROFILER_POP_CPU_MARKER();
}
@ -986,6 +985,7 @@ void IrrDriver::renderLights(unsigned pointlightcount)
//RH
if (UserConfigParams::m_gi)
{
ScopedGPUTimer timer(irr_driver->getGPUTimer(Q_RH));
glDisable(GL_BLEND);
m_rtts->getRH().Bind();
glUseProgram(FullScreenShader::RadianceHintsConstructionShader::Program);
@ -1008,11 +1008,17 @@ void IrrDriver::renderLights(unsigned pointlightcount)
m_rtts->getFBO(FBO_TMP1_WITH_DS).Bind();
if (UserConfigParams::m_gi)
{
ScopedGPUTimer timer(irr_driver->getGPUTimer(Q_GI));
m_post_processing->renderGI(rh_matrix, rh_extend, m_rtts->getRH().getRTT()[0], m_rtts->getRH().getRTT()[1], m_rtts->getRH().getRTT()[2]);
}
m_rtts->getFBO(FBO_COMBINED_TMP1_TMP2).Bind();
if (SkyboxCubeMap)
{
ScopedGPUTimer timer(irr_driver->getGPUTimer(Q_ENVMAP));
m_post_processing->renderDiffuseEnvMap(blueSHCoeff, greenSHCoeff, redSHCoeff);
}
m_rtts->getFBO(FBO_COMBINED_TMP1_TMP2).Bind();
if (World::getWorld() && World::getWorld()->getTrack()->hasShadows() && SkyboxCubeMap && UserConfigParams::m_gi)
irr_driver->getSceneManager()->setAmbientLight(SColor(0, 0, 0, 0));
@ -1020,13 +1026,16 @@ void IrrDriver::renderLights(unsigned pointlightcount)
// Render sunlight if and only if track supports shadow
if (!World::getWorld() || World::getWorld()->getTrack()->hasShadows())
{
ScopedGPUTimer timer(irr_driver->getGPUTimer(Q_SUN));
if (World::getWorld() && UserConfigParams::m_shadows)
m_post_processing->renderShadowedSunlight(sun_ortho_matrix, m_rtts->getShadowDepthTex());
else
m_post_processing->renderSunlight();
}
renderPointLights(MIN2(pointlightcount, MAXLIGHT));
{
ScopedGPUTimer timer(irr_driver->getGPUTimer(Q_POINTLIGHTS));
renderPointLights(MIN2(pointlightcount, MAXLIGHT));
}
}
void IrrDriver::renderSSAO()

View File

@ -22,6 +22,18 @@
#include "graphics/irr_driver.hpp"
#include "utils/log.hpp"
static GLuint generateRTT3D(GLenum target, size_t w, size_t h, size_t d, GLint internalFormat, GLint format, GLint type)
{
GLuint result;
glGenTextures(1, &result);
glBindTexture(target, result);
if (irr_driver->getGLSLVersion() < 420)
glTexImage3D(target, 0, internalFormat, w, h, d, 0, format, type, 0);
else
glTexStorage3D(target, 1, internalFormat, w, h, d);
return result;
}
static GLuint generateRTT(const core::dimension2du &res, GLint internalFormat, GLint format, GLint type, unsigned mipmaplevel = 1)
{
GLuint result;
@ -80,9 +92,7 @@ RTT::RTT(size_t width, size_t height)
unsigned linear_depth_mip_levels = ceil(log2(max_(res.Width, res.Height)));
glGenTextures(1, &DepthStencilTexture);
glBindTexture(GL_TEXTURE_2D, DepthStencilTexture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, res.Width, res.Height, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0);
DepthStencilTexture = generateRTT(res, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8);
// All RTTs are currently RGBA16F mostly with stencil. The four tmp RTTs are the same size
// as the screen, for use in post-processing.
@ -196,12 +206,8 @@ RTT::RTT(size_t width, size_t height)
if (UserConfigParams::m_shadows)
{
glGenTextures(1, &shadowColorTex);
glBindTexture(GL_TEXTURE_2D_ARRAY, shadowColorTex);
glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_R8, 1024, 1024, 4, 0, GL_RED, GL_UNSIGNED_BYTE, 0);
glGenTextures(1, &shadowDepthTex);
glBindTexture(GL_TEXTURE_2D_ARRAY, shadowDepthTex);
glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_DEPTH_STENCIL, 1024, 1024, 4, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0);
shadowColorTex = generateRTT3D(GL_TEXTURE_2D_ARRAY, 1024, 1024, 4, GL_R8, GL_RED, GL_UNSIGNED_BYTE);
shadowDepthTex = generateRTT3D(GL_TEXTURE_2D_ARRAY, 1024, 1024, 4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8);
somevector.clear();
somevector.push_back(shadowColorTex);
@ -211,30 +217,18 @@ RTT::RTT(size_t width, size_t height)
if (UserConfigParams::m_gi)
{
//Todo : use "normal" shadowtex
glGenTextures(1, &RSM_Color);
glBindTexture(GL_TEXTURE_2D, RSM_Color);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8, 1024, 1024, 0, GL_RGB, GL_UNSIGNED_BYTE, 0);
glGenTextures(1, &RSM_Normal);
glBindTexture(GL_TEXTURE_2D, RSM_Normal);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16F, 1024, 1024, 0, GL_RGB, GL_FLOAT, 0);
glGenTextures(1, &RSM_Depth);
glBindTexture(GL_TEXTURE_2D, RSM_Depth);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, 1024, 1024, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0);
RSM_Color = generateRTT(shadowsize0, GL_RGB8, GL_RGB, GL_UNSIGNED_BYTE);
RSM_Normal = generateRTT(shadowsize0, GL_RGB16F, GL_RGB, GL_FLOAT);
RSM_Depth = generateRTT(shadowsize0, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8);
somevector.clear();
somevector.push_back(RSM_Color);
somevector.push_back(RSM_Normal);
m_RSM = new FrameBuffer(somevector, RSM_Depth, 1024, 1024, true);
glGenTextures(1, &RH_Red);
glBindTexture(GL_TEXTURE_3D, RH_Red);
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGBA16F, 32, 16, 32, 0, GL_RGBA, GL_FLOAT, 0);
glGenTextures(1, &RH_Green);
glBindTexture(GL_TEXTURE_3D, RH_Green);
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGBA16F, 32, 16, 32, 0, GL_RGBA, GL_FLOAT, 0);
glGenTextures(1, &RH_Blue);
glBindTexture(GL_TEXTURE_3D, RH_Blue);
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGBA16F, 32, 16, 32, 0, GL_RGBA, GL_FLOAT, 0);
RH_Red = generateRTT3D(GL_TEXTURE_3D, 32, 16, 32, GL_RGBA16F, GL_RGBA, GL_FLOAT);
RH_Green = generateRTT3D(GL_TEXTURE_3D, 32, 16, 32, GL_RGBA16F, GL_RGBA, GL_FLOAT);
RH_Blue = generateRTT3D(GL_TEXTURE_3D, 32, 16, 32, GL_RGBA16F, GL_RGBA, GL_FLOAT);
somevector.clear();
somevector.push_back(RH_Red);

View File

@ -277,9 +277,11 @@ void Shaders::loadShaders()
FullScreenShader::DepthOfFieldShader::init();
FullScreenShader::FogShader::init();
FullScreenShader::Gaussian17TapHShader::init();
FullScreenShader::ComputeGaussian17TapHShader::init();
FullScreenShader::Gaussian3HBlurShader::init();
FullScreenShader::Gaussian3VBlurShader::init();
FullScreenShader::Gaussian17TapVShader::init();
FullScreenShader::ComputeGaussian17TapVShader::init();
FullScreenShader::Gaussian6HBlurShader::init();
FullScreenShader::Gaussian6VBlurShader::init();
FullScreenShader::GlowShader::init();
@ -2439,6 +2441,17 @@ namespace FullScreenShader
vao = createFullScreenVAO(Program);
}
GLuint ComputeGaussian17TapHShader::Program;
GLuint ComputeGaussian17TapHShader::uniform_source;
GLuint ComputeGaussian17TapHShader::uniform_dest;
void ComputeGaussian17TapHShader::init()
{
Program = LoadProgram(
GL_COMPUTE_SHADER, file_manager->getAsset("shaders/gaussian.comp").c_str());
uniform_source = glGetUniformLocation(Program, "source");
uniform_dest = glGetUniformLocation(Program, "dest");
}
GLuint Gaussian6HBlurShader::Program;
GLuint Gaussian6HBlurShader::uniform_tex;
GLuint Gaussian6HBlurShader::uniform_pixel;
@ -2481,6 +2494,17 @@ namespace FullScreenShader
vao = createFullScreenVAO(Program);
}
GLuint ComputeGaussian17TapVShader::Program;
GLuint ComputeGaussian17TapVShader::uniform_source;
GLuint ComputeGaussian17TapVShader::uniform_dest;
void ComputeGaussian17TapVShader::init()
{
Program = LoadProgram(
GL_COMPUTE_SHADER, file_manager->getAsset("shaders/gaussianv.comp").c_str());
uniform_source = glGetUniformLocation(Program, "source");
uniform_dest = glGetUniformLocation(Program, "dest");
}
GLuint Gaussian6VBlurShader::Program;
GLuint Gaussian6VBlurShader::uniform_tex;
GLuint Gaussian6VBlurShader::uniform_pixel;

View File

@ -641,6 +641,15 @@ public:
static void init();
};
class ComputeGaussian17TapHShader
{
public:
static GLuint Program;
static GLuint uniform_source, uniform_dest;
static void init();
};
class Gaussian6HBlurShader
{
public:
@ -671,6 +680,16 @@ public:
static void init();
};
class ComputeGaussian17TapVShader
{
public:
static GLuint Program;
static GLuint uniform_source, uniform_dest;
static void init();
};
class Gaussian6VBlurShader
{
public:

View File

@ -393,7 +393,11 @@ void Profiler::draw()
{
"Solid Pass 1",
"Shadows",
"Lights",
"RH",
"GI",
"Env Map",
"SunLight",
"PointLights",
"SSAO",
"Solid Pass 2",
"Transparent",