Prepare compute shader to stretch lightcoord
This commit is contained in:
parent
92ecd01e61
commit
cacb008ae6
142
data/shaders/Lightspaceboundingbox.comp
Normal file
142
data/shaders/Lightspaceboundingbox.comp
Normal file
@ -0,0 +1,142 @@
|
||||
uniform sampler2D depth;
|
||||
uniform float split0;
|
||||
uniform float split1;
|
||||
uniform float split2;
|
||||
uniform float splitmax;
|
||||
uniform mat4 SunCamMatrix;
|
||||
|
||||
layout (local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
struct CascadeBoundingBox
|
||||
{
|
||||
int xmin;
|
||||
int xmax;
|
||||
int ymin;
|
||||
int ymax;
|
||||
int zmin;
|
||||
int zmax;
|
||||
};
|
||||
|
||||
layout (std430) buffer BoundingBoxes
|
||||
{
|
||||
CascadeBoundingBox BB[4];
|
||||
};
|
||||
|
||||
vec4 getPosFromUVDepth(vec3 uvDepth, mat4 InverseProjectionMatrix);
|
||||
|
||||
shared int xmin[4];
|
||||
shared int xmax[4];
|
||||
shared int ymin[4];
|
||||
shared int ymax[4];
|
||||
shared int zmin[4];
|
||||
shared int zmax[4];
|
||||
|
||||
void main()
|
||||
{
|
||||
if (gl_LocalInvocationIndex < 4) {
|
||||
xmin[gl_LocalInvocationIndex] = ymin[gl_LocalInvocationIndex] = zmin[gl_LocalInvocationIndex] = 1000;
|
||||
xmax[gl_LocalInvocationIndex] = ymax[gl_LocalInvocationIndex] = zmax[gl_LocalInvocationIndex] = -1000;
|
||||
}
|
||||
|
||||
barrier();
|
||||
|
||||
ivec3 lmax0 = ivec3(-1000);
|
||||
ivec3 lmin0 = ivec3(1000);
|
||||
ivec3 lmax1 = ivec3(-1000);
|
||||
ivec3 lmin1 = ivec3(1000);
|
||||
ivec3 lmax2 = ivec3(-1000);
|
||||
ivec3 lmin2 = ivec3(1000);
|
||||
ivec3 lmax3 = ivec3(-1000);
|
||||
ivec3 lmin3 = ivec3(1000);
|
||||
|
||||
vec2 start_xy = gl_LocalInvocationID.xy + gl_WorkGroupID.xy * gl_WorkGroupSize.xy * 8;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
for (int j = 0; j < 8; j++) {
|
||||
|
||||
|
||||
vec2 uv = (start_xy + vec2(i, j) * gl_WorkGroupID.xy) / screen;
|
||||
float z = texture(depth, uv).x;
|
||||
vec4 xpos = getPosFromUVDepth(vec3(uv, z), InverseProjectionMatrix);
|
||||
vec4 lightcoord = InverseViewMatrix * xpos;
|
||||
lightcoord /= lightcoord.w;
|
||||
lightcoord = SunCamMatrix * lightcoord;
|
||||
lightcoord /= lightcoord.w;
|
||||
ivec3 lc = ivec3(lightcoord.xyz) * 4;
|
||||
|
||||
if (xpos.z < split0) {
|
||||
lmax0 = max(lmax0, lc);
|
||||
lmin0 = min(lmin0, lc);
|
||||
} else if (xpos.z < split1) {
|
||||
lmax1 = max(lmax1, lc);
|
||||
lmin1 = min(lmin1, lc);
|
||||
} else if (xpos.z < split2) {
|
||||
lmax2 = max(lmax2, lc);
|
||||
lmin2 = min(lmin2, lc);
|
||||
} else if (xpos.z < splitmax) {
|
||||
lmax3 = max(lmax3, lc);
|
||||
lmin3 = min(lmin3, lc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
atomicMax(xmax[0], lmax0.x);
|
||||
atomicMax(ymax[0], lmax0.y);
|
||||
atomicMax(zmax[0], lmax0.z);
|
||||
atomicMin(xmin[0], lmin0.x);
|
||||
atomicMin(ymin[0], lmin0.y);
|
||||
atomicMin(zmin[0], lmin0.z);
|
||||
|
||||
atomicMax(xmax[1], lmax1.x);
|
||||
atomicMax(ymax[1], lmax1.y);
|
||||
atomicMax(zmax[1], lmax1.z);
|
||||
atomicMin(xmin[1], lmin1.x);
|
||||
atomicMin(ymin[1], lmin1.y);
|
||||
atomicMin(zmin[1], lmin1.z);
|
||||
|
||||
atomicMax(xmax[2], lmax2.x);
|
||||
atomicMax(ymax[2], lmax2.y);
|
||||
atomicMax(zmax[2], lmax2.z);
|
||||
atomicMin(xmin[2], lmin2.x);
|
||||
atomicMin(ymin[2], lmin2.y);
|
||||
atomicMin(zmin[2], lmin2.z);
|
||||
|
||||
atomicMax(xmax[3], lmax3.x);
|
||||
atomicMax(ymax[3], lmax3.y);
|
||||
atomicMax(zmax[3], lmax3.z);
|
||||
atomicMin(xmin[3], lmin3.x);
|
||||
atomicMin(ymin[3], lmin3.y);
|
||||
atomicMin(zmin[3], lmin3.z);
|
||||
|
||||
barrier();
|
||||
|
||||
if (gl_LocalInvocationIndex == 0) {
|
||||
atomicMax(BB[0].xmax, xmax[0]);
|
||||
atomicMax(BB[0].ymax, ymax[0]);
|
||||
atomicMax(BB[0].zmax, zmax[0]);
|
||||
atomicMin(BB[0].xmin, xmin[0]);
|
||||
atomicMin(BB[0].ymin, ymin[0]);
|
||||
atomicMin(BB[0].zmin, zmin[0]);
|
||||
|
||||
atomicMax(BB[1].xmax, xmax[1]);
|
||||
atomicMax(BB[1].ymax, ymax[1]);
|
||||
atomicMax(BB[1].zmax, zmax[1]);
|
||||
atomicMin(BB[1].xmin, xmin[1]);
|
||||
atomicMin(BB[1].ymin, ymin[1]);
|
||||
atomicMin(BB[1].zmin, zmin[1]);
|
||||
|
||||
atomicMax(BB[2].xmax, xmax[2]);
|
||||
atomicMax(BB[2].ymax, ymax[2]);
|
||||
atomicMax(BB[2].zmax, zmax[2]);
|
||||
atomicMin(BB[2].xmin, xmin[2]);
|
||||
atomicMin(BB[2].ymin, ymin[2]);
|
||||
atomicMin(BB[2].zmin, zmin[2]);
|
||||
|
||||
atomicMax(BB[3].xmax, xmax[3]);
|
||||
atomicMax(BB[3].ymax, ymax[3]);
|
||||
atomicMax(BB[3].zmax, zmax[3]);
|
||||
atomicMin(BB[3].xmin, xmin[3]);
|
||||
atomicMin(BB[3].ymin, ymin[3]);
|
||||
atomicMin(BB[3].zmin, zmin[3]);
|
||||
}
|
||||
}
|
||||
|
@ -485,6 +485,7 @@ void IrrDriver::initDevice()
|
||||
m_need_ubo_workaround = false;
|
||||
m_need_rh_workaround = false;
|
||||
m_need_srgb_workaround = false;
|
||||
m_support_sdsm = false;
|
||||
#ifdef WIN32
|
||||
// Fix for Intel Sandy Bridge on Windows which supports GL up to 3.1 only
|
||||
if (strstr((const char *)glGetString(GL_VENDOR), "Intel") != NULL && (m_gl_major_version == 3 && m_gl_minor_version == 1))
|
||||
@ -492,7 +493,10 @@ void IrrDriver::initDevice()
|
||||
#endif
|
||||
// Fix for Nvidia and instanced RH
|
||||
if (strstr((const char *)glGetString(GL_VENDOR), "NVIDIA") != NULL)
|
||||
{
|
||||
m_need_rh_workaround = true;
|
||||
m_support_sdsm = false;
|
||||
}
|
||||
|
||||
// Fix for AMD and bindless sRGB textures
|
||||
if (strstr((const char *)glGetString(GL_VENDOR), "ATI") != NULL)
|
||||
@ -546,6 +550,7 @@ void IrrDriver::initDevice()
|
||||
hasTextureView = true;
|
||||
Log::info("GLDriver", "ARB Texture View enabled");
|
||||
}
|
||||
m_support_sdsm = m_support_sdsm && hasComputeShaders && hasBuffserStorage;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -181,6 +181,7 @@ private:
|
||||
bool hasComputeShaders;
|
||||
bool hasTextureStorage;
|
||||
bool hasTextureView;
|
||||
bool m_support_sdsm;
|
||||
bool m_need_ubo_workaround;
|
||||
bool m_need_rh_workaround;
|
||||
bool m_need_srgb_workaround;
|
||||
@ -267,6 +268,11 @@ public:
|
||||
return 120;
|
||||
}
|
||||
|
||||
bool supportsSDSM() const
|
||||
{
|
||||
return m_support_sdsm;
|
||||
}
|
||||
|
||||
bool needUBOWorkaround() const
|
||||
{
|
||||
return m_need_ubo_workaround;
|
||||
@ -773,6 +779,7 @@ public:
|
||||
|
||||
void renderScene(scene::ICameraSceneNode * const camnode, unsigned pointlightcount, std::vector<GlowData>& glows, float dt, bool hasShadows, bool forceRTT);
|
||||
unsigned UpdateLightsInfo(scene::ICameraSceneNode * const camnode, float dt);
|
||||
void UpdateSplitAndLightcoordRangeFromComputeShaders(size_t width, size_t height);
|
||||
void computeCameraMatrix(scene::ICameraSceneNode * const camnode, size_t width, size_t height);
|
||||
|
||||
// --------------------- OLD RTT --------------------
|
||||
|
@ -644,8 +644,77 @@ core::matrix4 getTighestFitOrthoProj(const core::matrix4 &transform, const std::
|
||||
|
||||
float shadowSplit[5] = {1., 5., 20., 50., 150 };
|
||||
|
||||
struct CascadeBoundingBox
|
||||
{
|
||||
int xmin;
|
||||
int xmax;
|
||||
int ymin;
|
||||
int ymax;
|
||||
int zmin;
|
||||
int zmax;
|
||||
};
|
||||
|
||||
static size_t currentCBB = 0;
|
||||
static CascadeBoundingBox *CBB[2];
|
||||
|
||||
/** Update shadowSplit values and make Cascade Bounding Box pointer valid.
|
||||
* The function aunches two compute kernel that generates an histogram of the depth buffer value (between 0 and 250 with increment of 0.25)
|
||||
* and get an axis aligned bounding box (from SunCamMatrix view) containing all depth buffer value.
|
||||
* It also retrieves the result from the previous computations (in a Round Robin fashion) and update CBB pointer.
|
||||
* \param width of the depth buffer
|
||||
* \param height of the depth buffer
|
||||
*/
|
||||
void IrrDriver::UpdateSplitAndLightcoordRangeFromComputeShaders(size_t width, size_t height)
|
||||
{
|
||||
// Value that should be kept between multiple calls
|
||||
static GLuint ssbo[2];
|
||||
static GLsync LightcoordBBFence = 0;
|
||||
static size_t currentHist = 0;
|
||||
static GLuint ssboSplit[2];
|
||||
static float tmpshadowSplit[5] = { 1., 5., 20., 50., 150. };
|
||||
|
||||
if (!LightcoordBBFence)
|
||||
{
|
||||
glGenBuffers(2, ssbo);
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo[0]);
|
||||
glBufferStorage(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(CascadeBoundingBox), 0, GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
|
||||
CBB[0] = (CascadeBoundingBox *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, 4 * sizeof(CascadeBoundingBox), GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo[1]);
|
||||
glBufferStorage(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(CascadeBoundingBox), 0, GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
|
||||
CBB[1] = (CascadeBoundingBox *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, 4 * sizeof(CascadeBoundingBox), GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
|
||||
}
|
||||
|
||||
// Use bounding boxes from last frame
|
||||
if (LightcoordBBFence)
|
||||
{
|
||||
while (glClientWaitSync(LightcoordBBFence, GL_SYNC_FLUSH_COMMANDS_BIT, 0) != GL_ALREADY_SIGNALED);
|
||||
glDeleteSync(LightcoordBBFence);
|
||||
}
|
||||
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ssbo[currentCBB]);
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
{
|
||||
CBB[currentCBB][i].xmin = CBB[currentCBB][i].ymin = CBB[currentCBB][i].zmin = 1000;
|
||||
CBB[currentCBB][i].xmax = CBB[currentCBB][i].ymax = CBB[currentCBB][i].zmax = -1000;
|
||||
}
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
glUseProgram(FullScreenShader::LightspaceBoundingBoxShader::getInstance()->Program);
|
||||
FullScreenShader::LightspaceBoundingBoxShader::getInstance()->SetTextureUnits(getDepthStencilTexture());
|
||||
FullScreenShader::LightspaceBoundingBoxShader::getInstance()->setUniforms(m_suncam->getViewMatrix(), tmpshadowSplit[1], tmpshadowSplit[2], tmpshadowSplit[3], tmpshadowSplit[4]);
|
||||
glDispatchCompute((int)width / 64, (int)height / 64, 1);
|
||||
|
||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||
LightcoordBBFence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
|
||||
currentCBB = (currentCBB + 1) % 2;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void IrrDriver::computeCameraMatrix(scene::ICameraSceneNode * const camnode, size_t width, size_t height)
|
||||
{
|
||||
if (irr_driver->supportsSDSM())
|
||||
UpdateSplitAndLightcoordRangeFromComputeShaders(width, height);
|
||||
static_cast<scene::CSceneManager *>(m_scene_manager)->OnAnimate(os::Timer::getTime());
|
||||
camnode->render();
|
||||
irr_driver->setProjMatrix(irr_driver->getVideoDriver()->getTransform(video::ETS_PROJECTION));
|
||||
@ -699,9 +768,12 @@ void IrrDriver::computeCameraMatrix(scene::ICameraSceneNode * const camnode, siz
|
||||
// Build the 3 ortho projection (for the 3 shadow resolution levels)
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
{
|
||||
camnode->setFarValue(FarValues[i]);
|
||||
camnode->setNearValue(NearValues[i]);
|
||||
camnode->render();
|
||||
if (!irr_driver->supportsSDSM())
|
||||
{
|
||||
camnode->setFarValue(FarValues[i]);
|
||||
camnode->setNearValue(NearValues[i]);
|
||||
camnode->render();
|
||||
}
|
||||
const scene::SViewFrustum *frustrum = camnode->getViewFrustum();
|
||||
float tmp[24] = {
|
||||
frustrum->getFarLeftDown().X,
|
||||
@ -739,7 +811,6 @@ void IrrDriver::computeCameraMatrix(scene::ICameraSceneNode * const camnode, siz
|
||||
core::aabbox3df box = smallcambox;
|
||||
box = box.intersect(trackbox);
|
||||
|
||||
|
||||
std::vector<vector3df> vectors;
|
||||
vectors.push_back(frustrum->getFarLeftDown());
|
||||
vectors.push_back(frustrum->getFarLeftUp());
|
||||
@ -750,19 +821,24 @@ void IrrDriver::computeCameraMatrix(scene::ICameraSceneNode * const camnode, siz
|
||||
vectors.push_back(frustrum->getNearRightDown());
|
||||
vectors.push_back(frustrum->getNearRightUp());
|
||||
|
||||
/* SunCamViewMatrix.transformBoxEx(trackbox);
|
||||
SunCamViewMatrix.transformBoxEx(box);
|
||||
core::matrix4 tmp_matrix;
|
||||
|
||||
core::vector3df extent = box.getExtent();
|
||||
const float w = fabsf(extent.X);
|
||||
const float h = fabsf(extent.Y);
|
||||
float z = box.MaxEdge.Z;
|
||||
if (irr_driver->supportsSDSM()){
|
||||
float left = CBB[currentCBB][i].xmin / 4 - 2;
|
||||
float right = CBB[currentCBB][i].xmax / 4 + 2;
|
||||
float up = CBB[currentCBB][i].ymin / 4 - 2;
|
||||
float down = CBB[currentCBB][i].ymax / 4 + 2;
|
||||
|
||||
// Snap to texels
|
||||
const float units_per_w = w / 1024;
|
||||
const float units_per_h = h / 1024;*/
|
||||
// Prevent Matrix without extend
|
||||
if (left != right && up != down)
|
||||
tmp_matrix.buildProjectionMatrixOrthoLH(left, right,
|
||||
down, up,
|
||||
CBB[currentCBB][i].zmin / 4 - 100, CBB[currentCBB][i].zmax / 4 + 2);
|
||||
}
|
||||
else
|
||||
tmp_matrix = getTighestFitOrthoProj(SunCamViewMatrix, vectors);
|
||||
|
||||
m_shadow_camnodes[i]->setProjectionMatrix(getTighestFitOrthoProj(SunCamViewMatrix, vectors) , true);
|
||||
m_shadow_camnodes[i]->setProjectionMatrix(tmp_matrix , true);
|
||||
m_shadow_camnodes[i]->render();
|
||||
|
||||
sun_ortho_matrix.push_back(getVideoDriver()->getTransform(video::ETS_PROJECTION) * getVideoDriver()->getTransform(video::ETS_VIEW));
|
||||
|
@ -1789,6 +1789,17 @@ namespace FullScreenShader
|
||||
AssignSamplerNames(Program, 0, "texture");
|
||||
}
|
||||
|
||||
LightspaceBoundingBoxShader::LightspaceBoundingBoxShader()
|
||||
{
|
||||
Program = LoadProgram(OBJECT,
|
||||
GL_COMPUTE_SHADER, file_manager->getAsset("shaders/Lightspaceboundingbox.comp").c_str(),
|
||||
GL_COMPUTE_SHADER, file_manager->getAsset("shaders/utils/getPosFromUVDepth.frag").c_str());
|
||||
AssignSamplerNames(Program, 0, "depth");
|
||||
AssignUniforms("SunCamMatrix", "split0", "split1", "split2", "splitmax");
|
||||
GLuint block_idx = glGetProgramResourceIndex(Program, GL_SHADER_STORAGE_BLOCK, "BoundingBoxes");
|
||||
glShaderStorageBlockBinding(Program, block_idx, 2);
|
||||
}
|
||||
|
||||
GlowShader::GlowShader()
|
||||
{
|
||||
Program = LoadProgram(OBJECT,
|
||||
|
@ -511,6 +511,12 @@ public:
|
||||
LinearizeDepthShader();
|
||||
};
|
||||
|
||||
class LightspaceBoundingBoxShader : public ShaderHelperSingleton<LightspaceBoundingBoxShader, core::matrix4, float, float, float, float>, public TextureRead < Nearest_Filtered >
|
||||
{
|
||||
public:
|
||||
LightspaceBoundingBoxShader();
|
||||
};
|
||||
|
||||
class GlowShader : public ShaderHelperSingleton<GlowShader>, public TextureRead<Bilinear_Filtered>
|
||||
{
|
||||
public:
|
||||
|
Loading…
x
Reference in New Issue
Block a user