From cacb008ae6796a71e9767dd1fd8d8a9a2b508535 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 27 Oct 2014 01:43:33 +0100 Subject: [PATCH] Prepare compute shader to stretch lightcoord --- data/shaders/Lightspaceboundingbox.comp | 142 ++++++++++++++++++++++++ src/graphics/irr_driver.cpp | 5 + src/graphics/irr_driver.hpp | 7 ++ src/graphics/render.cpp | 104 ++++++++++++++--- src/graphics/shaders.cpp | 11 ++ src/graphics/shaders.hpp | 6 + 6 files changed, 261 insertions(+), 14 deletions(-) create mode 100644 data/shaders/Lightspaceboundingbox.comp diff --git a/data/shaders/Lightspaceboundingbox.comp b/data/shaders/Lightspaceboundingbox.comp new file mode 100644 index 000000000..e55e69c68 --- /dev/null +++ b/data/shaders/Lightspaceboundingbox.comp @@ -0,0 +1,142 @@ +uniform sampler2D depth; +uniform float split0; +uniform float split1; +uniform float split2; +uniform float splitmax; +uniform mat4 SunCamMatrix; + +layout (local_size_x = 8, local_size_y = 8) in; + +struct CascadeBoundingBox +{ + int xmin; + int xmax; + int ymin; + int ymax; + int zmin; + int zmax; +}; + +layout (std430) buffer BoundingBoxes +{ + CascadeBoundingBox BB[4]; +}; + +vec4 getPosFromUVDepth(vec3 uvDepth, mat4 InverseProjectionMatrix); + +shared int xmin[4]; +shared int xmax[4]; +shared int ymin[4]; +shared int ymax[4]; +shared int zmin[4]; +shared int zmax[4]; + +void main() +{ + if (gl_LocalInvocationIndex < 4) { + xmin[gl_LocalInvocationIndex] = ymin[gl_LocalInvocationIndex] = zmin[gl_LocalInvocationIndex] = 1000; + xmax[gl_LocalInvocationIndex] = ymax[gl_LocalInvocationIndex] = zmax[gl_LocalInvocationIndex] = -1000; + } + + barrier(); + + ivec3 lmax0 = ivec3(-1000); + ivec3 lmin0 = ivec3(1000); + ivec3 lmax1 = ivec3(-1000); + ivec3 lmin1 = ivec3(1000); + ivec3 lmax2 = ivec3(-1000); + ivec3 lmin2 = ivec3(1000); + ivec3 lmax3 = ivec3(-1000); + ivec3 lmin3 = ivec3(1000); + + vec2 start_xy = gl_LocalInvocationID.xy + gl_WorkGroupID.xy * gl_WorkGroupSize.xy * 8; + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + + + vec2 uv = (start_xy + vec2(i, j) * gl_WorkGroupID.xy) / screen; + float z = texture(depth, uv).x; + vec4 xpos = getPosFromUVDepth(vec3(uv, z), InverseProjectionMatrix); + vec4 lightcoord = InverseViewMatrix * xpos; + lightcoord /= lightcoord.w; + lightcoord = SunCamMatrix * lightcoord; + lightcoord /= lightcoord.w; + ivec3 lc = ivec3(lightcoord.xyz) * 4; + + if (xpos.z < split0) { + lmax0 = max(lmax0, lc); + lmin0 = min(lmin0, lc); + } else if (xpos.z < split1) { + lmax1 = max(lmax1, lc); + lmin1 = min(lmin1, lc); + } else if (xpos.z < split2) { + lmax2 = max(lmax2, lc); + lmin2 = min(lmin2, lc); + } else if (xpos.z < splitmax) { + lmax3 = max(lmax3, lc); + lmin3 = min(lmin3, lc); + } + } + } + + atomicMax(xmax[0], lmax0.x); + atomicMax(ymax[0], lmax0.y); + atomicMax(zmax[0], lmax0.z); + atomicMin(xmin[0], lmin0.x); + atomicMin(ymin[0], lmin0.y); + atomicMin(zmin[0], lmin0.z); + + atomicMax(xmax[1], lmax1.x); + atomicMax(ymax[1], lmax1.y); + atomicMax(zmax[1], lmax1.z); + atomicMin(xmin[1], lmin1.x); + atomicMin(ymin[1], lmin1.y); + atomicMin(zmin[1], lmin1.z); + + atomicMax(xmax[2], lmax2.x); + atomicMax(ymax[2], lmax2.y); + atomicMax(zmax[2], lmax2.z); + atomicMin(xmin[2], lmin2.x); + atomicMin(ymin[2], lmin2.y); + atomicMin(zmin[2], lmin2.z); + + atomicMax(xmax[3], lmax3.x); + atomicMax(ymax[3], lmax3.y); + atomicMax(zmax[3], lmax3.z); + atomicMin(xmin[3], lmin3.x); + atomicMin(ymin[3], lmin3.y); + atomicMin(zmin[3], lmin3.z); + + barrier(); + + if (gl_LocalInvocationIndex == 0) { + atomicMax(BB[0].xmax, xmax[0]); + atomicMax(BB[0].ymax, ymax[0]); + atomicMax(BB[0].zmax, zmax[0]); + atomicMin(BB[0].xmin, xmin[0]); + atomicMin(BB[0].ymin, ymin[0]); + atomicMin(BB[0].zmin, zmin[0]); + + atomicMax(BB[1].xmax, xmax[1]); + atomicMax(BB[1].ymax, ymax[1]); + atomicMax(BB[1].zmax, zmax[1]); + atomicMin(BB[1].xmin, xmin[1]); + atomicMin(BB[1].ymin, ymin[1]); + atomicMin(BB[1].zmin, zmin[1]); + + atomicMax(BB[2].xmax, xmax[2]); + atomicMax(BB[2].ymax, ymax[2]); + atomicMax(BB[2].zmax, zmax[2]); + atomicMin(BB[2].xmin, xmin[2]); + atomicMin(BB[2].ymin, ymin[2]); + atomicMin(BB[2].zmin, zmin[2]); + + atomicMax(BB[3].xmax, xmax[3]); + atomicMax(BB[3].ymax, ymax[3]); + atomicMax(BB[3].zmax, zmax[3]); + atomicMin(BB[3].xmin, xmin[3]); + atomicMin(BB[3].ymin, ymin[3]); + atomicMin(BB[3].zmin, zmin[3]); + } +} + diff --git a/src/graphics/irr_driver.cpp b/src/graphics/irr_driver.cpp index 700481678..8e6b9d95b 100644 --- a/src/graphics/irr_driver.cpp +++ b/src/graphics/irr_driver.cpp @@ -485,6 +485,7 @@ void IrrDriver::initDevice() m_need_ubo_workaround = false; m_need_rh_workaround = false; m_need_srgb_workaround = false; + m_support_sdsm = false; #ifdef WIN32 // Fix for Intel Sandy Bridge on Windows which supports GL up to 3.1 only if (strstr((const char *)glGetString(GL_VENDOR), "Intel") != NULL && (m_gl_major_version == 3 && m_gl_minor_version == 1)) @@ -492,7 +493,10 @@ void IrrDriver::initDevice() #endif // Fix for Nvidia and instanced RH if (strstr((const char *)glGetString(GL_VENDOR), "NVIDIA") != NULL) + { m_need_rh_workaround = true; + m_support_sdsm = false; + } // Fix for AMD and bindless sRGB textures if (strstr((const char *)glGetString(GL_VENDOR), "ATI") != NULL) @@ -546,6 +550,7 @@ void IrrDriver::initDevice() hasTextureView = true; Log::info("GLDriver", "ARB Texture View enabled"); } + m_support_sdsm = m_support_sdsm && hasComputeShaders && hasBuffserStorage; } #endif diff --git a/src/graphics/irr_driver.hpp b/src/graphics/irr_driver.hpp index 1dc1f29e0..ca1747a20 100644 --- a/src/graphics/irr_driver.hpp +++ b/src/graphics/irr_driver.hpp @@ -181,6 +181,7 @@ private: bool hasComputeShaders; bool hasTextureStorage; bool hasTextureView; + bool m_support_sdsm; bool m_need_ubo_workaround; bool m_need_rh_workaround; bool m_need_srgb_workaround; @@ -267,6 +268,11 @@ public: return 120; } + bool supportsSDSM() const + { + return m_support_sdsm; + } + bool needUBOWorkaround() const { return m_need_ubo_workaround; @@ -773,6 +779,7 @@ public: void renderScene(scene::ICameraSceneNode * const camnode, unsigned pointlightcount, std::vector& glows, float dt, bool hasShadows, bool forceRTT); unsigned UpdateLightsInfo(scene::ICameraSceneNode * const camnode, float dt); + void UpdateSplitAndLightcoordRangeFromComputeShaders(size_t width, size_t height); void computeCameraMatrix(scene::ICameraSceneNode * const camnode, size_t width, size_t height); // --------------------- OLD RTT -------------------- diff --git a/src/graphics/render.cpp b/src/graphics/render.cpp index cfc19e857..46f2f1184 100644 --- a/src/graphics/render.cpp +++ b/src/graphics/render.cpp @@ -644,8 +644,77 @@ core::matrix4 getTighestFitOrthoProj(const core::matrix4 &transform, const std:: float shadowSplit[5] = {1., 5., 20., 50., 150 }; +struct CascadeBoundingBox +{ + int xmin; + int xmax; + int ymin; + int ymax; + int zmin; + int zmax; +}; + +static size_t currentCBB = 0; +static CascadeBoundingBox *CBB[2]; + +/** Update shadowSplit values and make Cascade Bounding Box pointer valid. +* The function aunches two compute kernel that generates an histogram of the depth buffer value (between 0 and 250 with increment of 0.25) +* and get an axis aligned bounding box (from SunCamMatrix view) containing all depth buffer value. +* It also retrieves the result from the previous computations (in a Round Robin fashion) and update CBB pointer. +* \param width of the depth buffer +* \param height of the depth buffer +*/ +void IrrDriver::UpdateSplitAndLightcoordRangeFromComputeShaders(size_t width, size_t height) +{ + // Value that should be kept between multiple calls + static GLuint ssbo[2]; + static GLsync LightcoordBBFence = 0; + static size_t currentHist = 0; + static GLuint ssboSplit[2]; + static float tmpshadowSplit[5] = { 1., 5., 20., 50., 150. }; + + if (!LightcoordBBFence) + { + glGenBuffers(2, ssbo); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo[0]); + glBufferStorage(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(CascadeBoundingBox), 0, GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT); + CBB[0] = (CascadeBoundingBox *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, 4 * sizeof(CascadeBoundingBox), GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo[1]); + glBufferStorage(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(CascadeBoundingBox), 0, GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT); + CBB[1] = (CascadeBoundingBox *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, 4 * sizeof(CascadeBoundingBox), GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT); + } + + // Use bounding boxes from last frame + if (LightcoordBBFence) + { + while (glClientWaitSync(LightcoordBBFence, GL_SYNC_FLUSH_COMMANDS_BIT, 0) != GL_ALREADY_SIGNALED); + glDeleteSync(LightcoordBBFence); + } + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ssbo[currentCBB]); + for (unsigned i = 0; i < 4; i++) + { + CBB[currentCBB][i].xmin = CBB[currentCBB][i].ymin = CBB[currentCBB][i].zmin = 1000; + CBB[currentCBB][i].xmax = CBB[currentCBB][i].ymax = CBB[currentCBB][i].zmax = -1000; + } + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + glUseProgram(FullScreenShader::LightspaceBoundingBoxShader::getInstance()->Program); + FullScreenShader::LightspaceBoundingBoxShader::getInstance()->SetTextureUnits(getDepthStencilTexture()); + FullScreenShader::LightspaceBoundingBoxShader::getInstance()->setUniforms(m_suncam->getViewMatrix(), tmpshadowSplit[1], tmpshadowSplit[2], tmpshadowSplit[3], tmpshadowSplit[4]); + glDispatchCompute((int)width / 64, (int)height / 64, 1); + + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + LightcoordBBFence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + + currentCBB = (currentCBB + 1) % 2; + +} + + void IrrDriver::computeCameraMatrix(scene::ICameraSceneNode * const camnode, size_t width, size_t height) { + if (irr_driver->supportsSDSM()) + UpdateSplitAndLightcoordRangeFromComputeShaders(width, height); static_cast(m_scene_manager)->OnAnimate(os::Timer::getTime()); camnode->render(); irr_driver->setProjMatrix(irr_driver->getVideoDriver()->getTransform(video::ETS_PROJECTION)); @@ -699,9 +768,12 @@ void IrrDriver::computeCameraMatrix(scene::ICameraSceneNode * const camnode, siz // Build the 3 ortho projection (for the 3 shadow resolution levels) for (unsigned i = 0; i < 4; i++) { - camnode->setFarValue(FarValues[i]); - camnode->setNearValue(NearValues[i]); - camnode->render(); + if (!irr_driver->supportsSDSM()) + { + camnode->setFarValue(FarValues[i]); + camnode->setNearValue(NearValues[i]); + camnode->render(); + } const scene::SViewFrustum *frustrum = camnode->getViewFrustum(); float tmp[24] = { frustrum->getFarLeftDown().X, @@ -739,7 +811,6 @@ void IrrDriver::computeCameraMatrix(scene::ICameraSceneNode * const camnode, siz core::aabbox3df box = smallcambox; box = box.intersect(trackbox); - std::vector vectors; vectors.push_back(frustrum->getFarLeftDown()); vectors.push_back(frustrum->getFarLeftUp()); @@ -750,19 +821,24 @@ void IrrDriver::computeCameraMatrix(scene::ICameraSceneNode * const camnode, siz vectors.push_back(frustrum->getNearRightDown()); vectors.push_back(frustrum->getNearRightUp()); -/* SunCamViewMatrix.transformBoxEx(trackbox); - SunCamViewMatrix.transformBoxEx(box); + core::matrix4 tmp_matrix; - core::vector3df extent = box.getExtent(); - const float w = fabsf(extent.X); - const float h = fabsf(extent.Y); - float z = box.MaxEdge.Z; + if (irr_driver->supportsSDSM()){ + float left = CBB[currentCBB][i].xmin / 4 - 2; + float right = CBB[currentCBB][i].xmax / 4 + 2; + float up = CBB[currentCBB][i].ymin / 4 - 2; + float down = CBB[currentCBB][i].ymax / 4 + 2; - // Snap to texels - const float units_per_w = w / 1024; - const float units_per_h = h / 1024;*/ + // Prevent Matrix without extend + if (left != right && up != down) + tmp_matrix.buildProjectionMatrixOrthoLH(left, right, + down, up, + CBB[currentCBB][i].zmin / 4 - 100, CBB[currentCBB][i].zmax / 4 + 2); + } + else + tmp_matrix = getTighestFitOrthoProj(SunCamViewMatrix, vectors); - m_shadow_camnodes[i]->setProjectionMatrix(getTighestFitOrthoProj(SunCamViewMatrix, vectors) , true); + m_shadow_camnodes[i]->setProjectionMatrix(tmp_matrix , true); m_shadow_camnodes[i]->render(); sun_ortho_matrix.push_back(getVideoDriver()->getTransform(video::ETS_PROJECTION) * getVideoDriver()->getTransform(video::ETS_VIEW)); diff --git a/src/graphics/shaders.cpp b/src/graphics/shaders.cpp index 459f276d1..af57d6a9f 100644 --- a/src/graphics/shaders.cpp +++ b/src/graphics/shaders.cpp @@ -1789,6 +1789,17 @@ namespace FullScreenShader AssignSamplerNames(Program, 0, "texture"); } + LightspaceBoundingBoxShader::LightspaceBoundingBoxShader() + { + Program = LoadProgram(OBJECT, + GL_COMPUTE_SHADER, file_manager->getAsset("shaders/Lightspaceboundingbox.comp").c_str(), + GL_COMPUTE_SHADER, file_manager->getAsset("shaders/utils/getPosFromUVDepth.frag").c_str()); + AssignSamplerNames(Program, 0, "depth"); + AssignUniforms("SunCamMatrix", "split0", "split1", "split2", "splitmax"); + GLuint block_idx = glGetProgramResourceIndex(Program, GL_SHADER_STORAGE_BLOCK, "BoundingBoxes"); + glShaderStorageBlockBinding(Program, block_idx, 2); + } + GlowShader::GlowShader() { Program = LoadProgram(OBJECT, diff --git a/src/graphics/shaders.hpp b/src/graphics/shaders.hpp index b7ab47882..fb6e946cb 100644 --- a/src/graphics/shaders.hpp +++ b/src/graphics/shaders.hpp @@ -511,6 +511,12 @@ public: LinearizeDepthShader(); }; +class LightspaceBoundingBoxShader : public ShaderHelperSingleton, public TextureRead < Nearest_Filtered > +{ +public: + LightspaceBoundingBoxShader(); +}; + class GlowShader : public ShaderHelperSingleton, public TextureRead { public: