From e3483fccbe4796fea67acb111c12ee16749cde88 Mon Sep 17 00:00:00 2001 From: Benau Date: Fri, 2 Feb 2018 21:59:13 +0800 Subject: [PATCH] Unroll the skinned mesh shader for loop This give some fps improvement for adreno 306 in android 5.0 --- data/shaders/sp_normal_visualizer.vert | 64 +++++++++++++++++------ data/shaders/sp_skinning.vert | 70 +++++++++++++++++--------- data/shaders/sp_skinning_shadow.vert | 66 ++++++++++++++++-------- 3 files changed, 139 insertions(+), 61 deletions(-) diff --git a/data/shaders/sp_normal_visualizer.vert b/data/shaders/sp_normal_visualizer.vert index a6c53a21e..83849ec86 100644 --- a/data/shaders/sp_normal_visualizer.vert +++ b/data/shaders/sp_normal_visualizer.vert @@ -50,21 +50,55 @@ void main() vec4 skinned_tangent = vec4(0.0); int skinning_offset = i_misc_data.x; - for (int i = 0; i < 4; i++) - { - mat4 joint_matrix = mat4( - texelFetch(skinning_tex, - clamp(i_joint[i] + skinning_offset, 0, MAX_BONES) * 4), - texelFetch(skinning_tex, - clamp(i_joint[i] + skinning_offset, 0, MAX_BONES) * 4 + 1), - texelFetch(skinning_tex, - clamp(i_joint[i] + skinning_offset, 0, MAX_BONES) * 4 + 2), - texelFetch(skinning_tex, - clamp(i_joint[i] + skinning_offset, 0, MAX_BONES) * 4 + 3)); - skinned_position += i_weight[i] * joint_matrix * idle_position; - skinned_normal += i_weight[i] * joint_matrix * idle_normal; - skinned_tangent += i_weight[i] * joint_matrix * idle_tangent; - } +#ifdef GL_ES + mat4 joint_matrix = + i_weight[0] * mat4( + texelFetch(skinning_tex, ivec2(0, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(1, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(2, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(3, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES)), 0)) + + i_weight[1] * mat4( + texelFetch(skinning_tex, ivec2(0, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(1, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(2, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(3, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES)), 0)) + + i_weight[2] * mat4( + texelFetch(skinning_tex, ivec2(0, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(1, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(2, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(3, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES)), 0)) + + i_weight[3] * mat4( + texelFetch(skinning_tex, ivec2(0, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(1, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(2, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(3, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES)), 0)); +#else + mat4 joint_matrix = + i_weight[0] * mat4( + texelFetch(skinning_tex, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES) * 4), + texelFetch(skinning_tex, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES) * 4 + 1), + texelFetch(skinning_tex, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES) * 4 + 2), + texelFetch(skinning_tex, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES) * 4 + 3)) + + i_weight[1] * mat4( + texelFetch(skinning_tex, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES) * 4), + texelFetch(skinning_tex, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES) * 4 + 1), + texelFetch(skinning_tex, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES) * 4 + 2), + texelFetch(skinning_tex, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES) * 4 + 3)) + + i_weight[2] * mat4( + texelFetch(skinning_tex, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES) * 4), + texelFetch(skinning_tex, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES) * 4 + 1), + texelFetch(skinning_tex, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES) * 4 + 2), + texelFetch(skinning_tex, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES) * 4 + 3)) + + i_weight[3] * mat4( + texelFetch(skinning_tex, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES) * 4), + texelFetch(skinning_tex, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES) * 4 + 1), + texelFetch(skinning_tex, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES) * 4 + 2), + texelFetch(skinning_tex, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES) * 4 + 3)); +#endif + + skinned_position = joint_matrix * idle_position; + skinned_normal = joint_matrix * idle_normal; + skinned_tangent = joint_matrix * idle_tangent; float step_mix = step(float(skinning_offset), 0.0); skinned_position = mix(skinned_position, idle_position, step_mix); diff --git a/data/shaders/sp_skinning.vert b/data/shaders/sp_skinning.vert index 3cd558f05..af8cf21a5 100644 --- a/data/shaders/sp_skinning.vert +++ b/data/shaders/sp_skinning.vert @@ -63,33 +63,55 @@ void main() vec4 skinned_tangent = vec4(0.0); int skinning_offset = i_misc_data.x; - for (int i = 0; i < 4; i++) - { #ifdef GL_ES - mat4 joint_matrix = mat4( - texelFetch(skinning_tex, ivec2 - (0 , clamp(i_joint[i] + skinning_offset, 0, MAX_BONES)), 0), - texelFetch(skinning_tex, ivec2 - (1, clamp(i_joint[i] + skinning_offset, 0, MAX_BONES)), 0), - texelFetch(skinning_tex, ivec2 - (2, clamp(i_joint[i] + skinning_offset, 0, MAX_BONES)), 0), - texelFetch(skinning_tex, ivec2 - (3, clamp(i_joint[i] + skinning_offset, 0, MAX_BONES)), 0)); + mat4 joint_matrix = + i_weight[0] * mat4( + texelFetch(skinning_tex, ivec2(0, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(1, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(2, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(3, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES)), 0)) + + i_weight[1] * mat4( + texelFetch(skinning_tex, ivec2(0, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(1, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(2, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(3, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES)), 0)) + + i_weight[2] * mat4( + texelFetch(skinning_tex, ivec2(0, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(1, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(2, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(3, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES)), 0)) + + i_weight[3] * mat4( + texelFetch(skinning_tex, ivec2(0, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(1, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(2, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(3, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES)), 0)); #else - mat4 joint_matrix = mat4( - texelFetch(skinning_tex, - clamp(i_joint[i] + skinning_offset, 0, MAX_BONES) * 4), - texelFetch(skinning_tex, - clamp(i_joint[i] + skinning_offset, 0, MAX_BONES) * 4 + 1), - texelFetch(skinning_tex, - clamp(i_joint[i] + skinning_offset, 0, MAX_BONES) * 4 + 2), - texelFetch(skinning_tex, - clamp(i_joint[i] + skinning_offset, 0, MAX_BONES) * 4 + 3)); + mat4 joint_matrix = + i_weight[0] * mat4( + texelFetch(skinning_tex, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES) * 4), + texelFetch(skinning_tex, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES) * 4 + 1), + texelFetch(skinning_tex, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES) * 4 + 2), + texelFetch(skinning_tex, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES) * 4 + 3)) + + i_weight[1] * mat4( + texelFetch(skinning_tex, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES) * 4), + texelFetch(skinning_tex, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES) * 4 + 1), + texelFetch(skinning_tex, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES) * 4 + 2), + texelFetch(skinning_tex, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES) * 4 + 3)) + + i_weight[2] * mat4( + texelFetch(skinning_tex, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES) * 4), + texelFetch(skinning_tex, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES) * 4 + 1), + texelFetch(skinning_tex, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES) * 4 + 2), + texelFetch(skinning_tex, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES) * 4 + 3)) + + i_weight[3] * mat4( + texelFetch(skinning_tex, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES) * 4), + texelFetch(skinning_tex, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES) * 4 + 1), + texelFetch(skinning_tex, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES) * 4 + 2), + texelFetch(skinning_tex, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES) * 4 + 3)); #endif - skinned_position += i_weight[i] * joint_matrix * idle_position; - skinned_normal += i_weight[i] * joint_matrix * idle_normal; - skinned_tangent += i_weight[i] * joint_matrix * idle_tangent; - } + + skinned_position = joint_matrix * idle_position; + skinned_normal = joint_matrix * idle_normal; + skinned_tangent = joint_matrix * idle_tangent; vec4 quaternion = normalize(vec4(i_rotation.xyz, i_scale.w)); vec4 world_position = getWorldPosition(i_origin, quaternion, i_scale.xyz, diff --git a/data/shaders/sp_skinning_shadow.vert b/data/shaders/sp_skinning_shadow.vert index 81e0b6c3f..96d337fb7 100644 --- a/data/shaders/sp_skinning_shadow.vert +++ b/data/shaders/sp_skinning_shadow.vert @@ -36,31 +36,53 @@ void main() vec4 skinned_position = vec4(0.0); int skinning_offset = i_misc_data.x; - for (int i = 0; i < 4; i++) - { #ifdef GL_ES - mat4 joint_matrix = mat4( - texelFetch(skinning_tex, ivec2 - (0, clamp(i_joint[i] + skinning_offset, 0, MAX_BONES)), 0), - texelFetch(skinning_tex, ivec2 - (1, clamp(i_joint[i] + skinning_offset, 0, MAX_BONES)), 0), - texelFetch(skinning_tex, ivec2 - (2, clamp(i_joint[i] + skinning_offset, 0, MAX_BONES)), 0), - texelFetch(skinning_tex, ivec2 - (3, clamp(i_joint[i] + skinning_offset, 0, MAX_BONES)), 0)); + mat4 joint_matrix = + i_weight[0] * mat4( + texelFetch(skinning_tex, ivec2(0, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(1, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(2, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(3, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES)), 0)) + + i_weight[1] * mat4( + texelFetch(skinning_tex, ivec2(0, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(1, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(2, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(3, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES)), 0)) + + i_weight[2] * mat4( + texelFetch(skinning_tex, ivec2(0, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(1, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(2, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(3, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES)), 0)) + + i_weight[3] * mat4( + texelFetch(skinning_tex, ivec2(0, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(1, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(2, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES)), 0), + texelFetch(skinning_tex, ivec2(3, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES)), 0)); #else - mat4 joint_matrix = mat4( - texelFetch(skinning_tex, - clamp(i_joint[i] + skinning_offset, 0, MAX_BONES) * 4), - texelFetch(skinning_tex, - clamp(i_joint[i] + skinning_offset, 0, MAX_BONES) * 4 + 1), - texelFetch(skinning_tex, - clamp(i_joint[i] + skinning_offset, 0, MAX_BONES) * 4 + 2), - texelFetch(skinning_tex, - clamp(i_joint[i] + skinning_offset, 0, MAX_BONES) * 4 + 3)); + mat4 joint_matrix = + i_weight[0] * mat4( + texelFetch(skinning_tex, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES) * 4), + texelFetch(skinning_tex, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES) * 4 + 1), + texelFetch(skinning_tex, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES) * 4 + 2), + texelFetch(skinning_tex, clamp(i_joint[0] + skinning_offset, 0, MAX_BONES) * 4 + 3)) + + i_weight[1] * mat4( + texelFetch(skinning_tex, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES) * 4), + texelFetch(skinning_tex, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES) * 4 + 1), + texelFetch(skinning_tex, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES) * 4 + 2), + texelFetch(skinning_tex, clamp(i_joint[1] + skinning_offset, 0, MAX_BONES) * 4 + 3)) + + i_weight[2] * mat4( + texelFetch(skinning_tex, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES) * 4), + texelFetch(skinning_tex, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES) * 4 + 1), + texelFetch(skinning_tex, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES) * 4 + 2), + texelFetch(skinning_tex, clamp(i_joint[2] + skinning_offset, 0, MAX_BONES) * 4 + 3)) + + i_weight[3] * mat4( + texelFetch(skinning_tex, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES) * 4), + texelFetch(skinning_tex, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES) * 4 + 1), + texelFetch(skinning_tex, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES) * 4 + 2), + texelFetch(skinning_tex, clamp(i_joint[3] + skinning_offset, 0, MAX_BONES) * 4 + 3)); #endif - skinned_position += i_weight[i] * joint_matrix * idle_position; - } + + skinned_position = joint_matrix * idle_position; vec4 quaternion = normalize(vec4(i_rotation.xyz, i_scale.w)); vec4 world_position = getWorldPosition(i_origin, quaternion, i_scale.xyz,