Prepare compute shader to stretch lightcoord

This commit is contained in:
Vincent Lejeune 2014-10-27 01:43:33 +01:00
parent 92ecd01e61
commit cacb008ae6
6 changed files with 261 additions and 14 deletions

View File

@ -0,0 +1,142 @@
uniform sampler2D depth;
uniform float split0;
uniform float split1;
uniform float split2;
uniform float splitmax;
uniform mat4 SunCamMatrix;
layout (local_size_x = 8, local_size_y = 8) in;
struct CascadeBoundingBox
{
int xmin;
int xmax;
int ymin;
int ymax;
int zmin;
int zmax;
};
layout (std430) buffer BoundingBoxes
{
CascadeBoundingBox BB[4];
};
vec4 getPosFromUVDepth(vec3 uvDepth, mat4 InverseProjectionMatrix);
shared int xmin[4];
shared int xmax[4];
shared int ymin[4];
shared int ymax[4];
shared int zmin[4];
shared int zmax[4];
void main()
{
if (gl_LocalInvocationIndex < 4) {
xmin[gl_LocalInvocationIndex] = ymin[gl_LocalInvocationIndex] = zmin[gl_LocalInvocationIndex] = 1000;
xmax[gl_LocalInvocationIndex] = ymax[gl_LocalInvocationIndex] = zmax[gl_LocalInvocationIndex] = -1000;
}
barrier();
ivec3 lmax0 = ivec3(-1000);
ivec3 lmin0 = ivec3(1000);
ivec3 lmax1 = ivec3(-1000);
ivec3 lmin1 = ivec3(1000);
ivec3 lmax2 = ivec3(-1000);
ivec3 lmin2 = ivec3(1000);
ivec3 lmax3 = ivec3(-1000);
ivec3 lmin3 = ivec3(1000);
vec2 start_xy = gl_LocalInvocationID.xy + gl_WorkGroupID.xy * gl_WorkGroupSize.xy * 8;
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
vec2 uv = (start_xy + vec2(i, j) * gl_WorkGroupID.xy) / screen;
float z = texture(depth, uv).x;
vec4 xpos = getPosFromUVDepth(vec3(uv, z), InverseProjectionMatrix);
vec4 lightcoord = InverseViewMatrix * xpos;
lightcoord /= lightcoord.w;
lightcoord = SunCamMatrix * lightcoord;
lightcoord /= lightcoord.w;
ivec3 lc = ivec3(lightcoord.xyz) * 4;
if (xpos.z < split0) {
lmax0 = max(lmax0, lc);
lmin0 = min(lmin0, lc);
} else if (xpos.z < split1) {
lmax1 = max(lmax1, lc);
lmin1 = min(lmin1, lc);
} else if (xpos.z < split2) {
lmax2 = max(lmax2, lc);
lmin2 = min(lmin2, lc);
} else if (xpos.z < splitmax) {
lmax3 = max(lmax3, lc);
lmin3 = min(lmin3, lc);
}
}
}
atomicMax(xmax[0], lmax0.x);
atomicMax(ymax[0], lmax0.y);
atomicMax(zmax[0], lmax0.z);
atomicMin(xmin[0], lmin0.x);
atomicMin(ymin[0], lmin0.y);
atomicMin(zmin[0], lmin0.z);
atomicMax(xmax[1], lmax1.x);
atomicMax(ymax[1], lmax1.y);
atomicMax(zmax[1], lmax1.z);
atomicMin(xmin[1], lmin1.x);
atomicMin(ymin[1], lmin1.y);
atomicMin(zmin[1], lmin1.z);
atomicMax(xmax[2], lmax2.x);
atomicMax(ymax[2], lmax2.y);
atomicMax(zmax[2], lmax2.z);
atomicMin(xmin[2], lmin2.x);
atomicMin(ymin[2], lmin2.y);
atomicMin(zmin[2], lmin2.z);
atomicMax(xmax[3], lmax3.x);
atomicMax(ymax[3], lmax3.y);
atomicMax(zmax[3], lmax3.z);
atomicMin(xmin[3], lmin3.x);
atomicMin(ymin[3], lmin3.y);
atomicMin(zmin[3], lmin3.z);
barrier();
if (gl_LocalInvocationIndex == 0) {
atomicMax(BB[0].xmax, xmax[0]);
atomicMax(BB[0].ymax, ymax[0]);
atomicMax(BB[0].zmax, zmax[0]);
atomicMin(BB[0].xmin, xmin[0]);
atomicMin(BB[0].ymin, ymin[0]);
atomicMin(BB[0].zmin, zmin[0]);
atomicMax(BB[1].xmax, xmax[1]);
atomicMax(BB[1].ymax, ymax[1]);
atomicMax(BB[1].zmax, zmax[1]);
atomicMin(BB[1].xmin, xmin[1]);
atomicMin(BB[1].ymin, ymin[1]);
atomicMin(BB[1].zmin, zmin[1]);
atomicMax(BB[2].xmax, xmax[2]);
atomicMax(BB[2].ymax, ymax[2]);
atomicMax(BB[2].zmax, zmax[2]);
atomicMin(BB[2].xmin, xmin[2]);
atomicMin(BB[2].ymin, ymin[2]);
atomicMin(BB[2].zmin, zmin[2]);
atomicMax(BB[3].xmax, xmax[3]);
atomicMax(BB[3].ymax, ymax[3]);
atomicMax(BB[3].zmax, zmax[3]);
atomicMin(BB[3].xmin, xmin[3]);
atomicMin(BB[3].ymin, ymin[3]);
atomicMin(BB[3].zmin, zmin[3]);
}
}

View File

@ -485,6 +485,7 @@ void IrrDriver::initDevice()
m_need_ubo_workaround = false;
m_need_rh_workaround = false;
m_need_srgb_workaround = false;
m_support_sdsm = false;
#ifdef WIN32
// Fix for Intel Sandy Bridge on Windows which supports GL up to 3.1 only
if (strstr((const char *)glGetString(GL_VENDOR), "Intel") != NULL && (m_gl_major_version == 3 && m_gl_minor_version == 1))
@ -492,7 +493,10 @@ void IrrDriver::initDevice()
#endif
// Fix for Nvidia and instanced RH
if (strstr((const char *)glGetString(GL_VENDOR), "NVIDIA") != NULL)
{
m_need_rh_workaround = true;
m_support_sdsm = false;
}
// Fix for AMD and bindless sRGB textures
if (strstr((const char *)glGetString(GL_VENDOR), "ATI") != NULL)
@ -546,6 +550,7 @@ void IrrDriver::initDevice()
hasTextureView = true;
Log::info("GLDriver", "ARB Texture View enabled");
}
m_support_sdsm = m_support_sdsm && hasComputeShaders && hasBuffserStorage;
}
#endif

View File

@ -181,6 +181,7 @@ private:
bool hasComputeShaders;
bool hasTextureStorage;
bool hasTextureView;
bool m_support_sdsm;
bool m_need_ubo_workaround;
bool m_need_rh_workaround;
bool m_need_srgb_workaround;
@ -267,6 +268,11 @@ public:
return 120;
}
bool supportsSDSM() const
{
return m_support_sdsm;
}
bool needUBOWorkaround() const
{
return m_need_ubo_workaround;
@ -773,6 +779,7 @@ public:
void renderScene(scene::ICameraSceneNode * const camnode, unsigned pointlightcount, std::vector<GlowData>& glows, float dt, bool hasShadows, bool forceRTT);
unsigned UpdateLightsInfo(scene::ICameraSceneNode * const camnode, float dt);
void UpdateSplitAndLightcoordRangeFromComputeShaders(size_t width, size_t height);
void computeCameraMatrix(scene::ICameraSceneNode * const camnode, size_t width, size_t height);
// --------------------- OLD RTT --------------------

View File

@ -644,8 +644,77 @@ core::matrix4 getTighestFitOrthoProj(const core::matrix4 &transform, const std::
float shadowSplit[5] = {1., 5., 20., 50., 150 };
struct CascadeBoundingBox
{
int xmin;
int xmax;
int ymin;
int ymax;
int zmin;
int zmax;
};
static size_t currentCBB = 0;
static CascadeBoundingBox *CBB[2];
/** Update shadowSplit values and make Cascade Bounding Box pointer valid.
* The function aunches two compute kernel that generates an histogram of the depth buffer value (between 0 and 250 with increment of 0.25)
* and get an axis aligned bounding box (from SunCamMatrix view) containing all depth buffer value.
* It also retrieves the result from the previous computations (in a Round Robin fashion) and update CBB pointer.
* \param width of the depth buffer
* \param height of the depth buffer
*/
void IrrDriver::UpdateSplitAndLightcoordRangeFromComputeShaders(size_t width, size_t height)
{
// Value that should be kept between multiple calls
static GLuint ssbo[2];
static GLsync LightcoordBBFence = 0;
static size_t currentHist = 0;
static GLuint ssboSplit[2];
static float tmpshadowSplit[5] = { 1., 5., 20., 50., 150. };
if (!LightcoordBBFence)
{
glGenBuffers(2, ssbo);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo[0]);
glBufferStorage(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(CascadeBoundingBox), 0, GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
CBB[0] = (CascadeBoundingBox *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, 4 * sizeof(CascadeBoundingBox), GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo[1]);
glBufferStorage(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(CascadeBoundingBox), 0, GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
CBB[1] = (CascadeBoundingBox *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, 4 * sizeof(CascadeBoundingBox), GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
}
// Use bounding boxes from last frame
if (LightcoordBBFence)
{
while (glClientWaitSync(LightcoordBBFence, GL_SYNC_FLUSH_COMMANDS_BIT, 0) != GL_ALREADY_SIGNALED);
glDeleteSync(LightcoordBBFence);
}
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ssbo[currentCBB]);
for (unsigned i = 0; i < 4; i++)
{
CBB[currentCBB][i].xmin = CBB[currentCBB][i].ymin = CBB[currentCBB][i].zmin = 1000;
CBB[currentCBB][i].xmax = CBB[currentCBB][i].ymax = CBB[currentCBB][i].zmax = -1000;
}
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glUseProgram(FullScreenShader::LightspaceBoundingBoxShader::getInstance()->Program);
FullScreenShader::LightspaceBoundingBoxShader::getInstance()->SetTextureUnits(getDepthStencilTexture());
FullScreenShader::LightspaceBoundingBoxShader::getInstance()->setUniforms(m_suncam->getViewMatrix(), tmpshadowSplit[1], tmpshadowSplit[2], tmpshadowSplit[3], tmpshadowSplit[4]);
glDispatchCompute((int)width / 64, (int)height / 64, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
LightcoordBBFence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
currentCBB = (currentCBB + 1) % 2;
}
void IrrDriver::computeCameraMatrix(scene::ICameraSceneNode * const camnode, size_t width, size_t height)
{
if (irr_driver->supportsSDSM())
UpdateSplitAndLightcoordRangeFromComputeShaders(width, height);
static_cast<scene::CSceneManager *>(m_scene_manager)->OnAnimate(os::Timer::getTime());
camnode->render();
irr_driver->setProjMatrix(irr_driver->getVideoDriver()->getTransform(video::ETS_PROJECTION));
@ -699,9 +768,12 @@ void IrrDriver::computeCameraMatrix(scene::ICameraSceneNode * const camnode, siz
// Build the 3 ortho projection (for the 3 shadow resolution levels)
for (unsigned i = 0; i < 4; i++)
{
camnode->setFarValue(FarValues[i]);
camnode->setNearValue(NearValues[i]);
camnode->render();
if (!irr_driver->supportsSDSM())
{
camnode->setFarValue(FarValues[i]);
camnode->setNearValue(NearValues[i]);
camnode->render();
}
const scene::SViewFrustum *frustrum = camnode->getViewFrustum();
float tmp[24] = {
frustrum->getFarLeftDown().X,
@ -739,7 +811,6 @@ void IrrDriver::computeCameraMatrix(scene::ICameraSceneNode * const camnode, siz
core::aabbox3df box = smallcambox;
box = box.intersect(trackbox);
std::vector<vector3df> vectors;
vectors.push_back(frustrum->getFarLeftDown());
vectors.push_back(frustrum->getFarLeftUp());
@ -750,19 +821,24 @@ void IrrDriver::computeCameraMatrix(scene::ICameraSceneNode * const camnode, siz
vectors.push_back(frustrum->getNearRightDown());
vectors.push_back(frustrum->getNearRightUp());
/* SunCamViewMatrix.transformBoxEx(trackbox);
SunCamViewMatrix.transformBoxEx(box);
core::matrix4 tmp_matrix;
core::vector3df extent = box.getExtent();
const float w = fabsf(extent.X);
const float h = fabsf(extent.Y);
float z = box.MaxEdge.Z;
if (irr_driver->supportsSDSM()){
float left = CBB[currentCBB][i].xmin / 4 - 2;
float right = CBB[currentCBB][i].xmax / 4 + 2;
float up = CBB[currentCBB][i].ymin / 4 - 2;
float down = CBB[currentCBB][i].ymax / 4 + 2;
// Snap to texels
const float units_per_w = w / 1024;
const float units_per_h = h / 1024;*/
// Prevent Matrix without extend
if (left != right && up != down)
tmp_matrix.buildProjectionMatrixOrthoLH(left, right,
down, up,
CBB[currentCBB][i].zmin / 4 - 100, CBB[currentCBB][i].zmax / 4 + 2);
}
else
tmp_matrix = getTighestFitOrthoProj(SunCamViewMatrix, vectors);
m_shadow_camnodes[i]->setProjectionMatrix(getTighestFitOrthoProj(SunCamViewMatrix, vectors) , true);
m_shadow_camnodes[i]->setProjectionMatrix(tmp_matrix , true);
m_shadow_camnodes[i]->render();
sun_ortho_matrix.push_back(getVideoDriver()->getTransform(video::ETS_PROJECTION) * getVideoDriver()->getTransform(video::ETS_VIEW));

View File

@ -1789,6 +1789,17 @@ namespace FullScreenShader
AssignSamplerNames(Program, 0, "texture");
}
LightspaceBoundingBoxShader::LightspaceBoundingBoxShader()
{
Program = LoadProgram(OBJECT,
GL_COMPUTE_SHADER, file_manager->getAsset("shaders/Lightspaceboundingbox.comp").c_str(),
GL_COMPUTE_SHADER, file_manager->getAsset("shaders/utils/getPosFromUVDepth.frag").c_str());
AssignSamplerNames(Program, 0, "depth");
AssignUniforms("SunCamMatrix", "split0", "split1", "split2", "splitmax");
GLuint block_idx = glGetProgramResourceIndex(Program, GL_SHADER_STORAGE_BLOCK, "BoundingBoxes");
glShaderStorageBlockBinding(Program, block_idx, 2);
}
GlowShader::GlowShader()
{
Program = LoadProgram(OBJECT,

View File

@ -511,6 +511,12 @@ public:
LinearizeDepthShader();
};
class LightspaceBoundingBoxShader : public ShaderHelperSingleton<LightspaceBoundingBoxShader, core::matrix4, float, float, float, float>, public TextureRead < Nearest_Filtered >
{
public:
LightspaceBoundingBoxShader();
};
class GlowShader : public ShaderHelperSingleton<GlowShader>, public TextureRead<Bilinear_Filtered>
{
public: