Skip to content

Commit

Permalink
Shader nonuniform indexing changes to uniform
Browse files Browse the repository at this point in the history
  • Loading branch information
turanszkij committed Feb 11, 2024
1 parent 640595a commit 3ce3d5a
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 139 deletions.
7 changes: 1 addition & 6 deletions WickedEngine/shaders/lightingHF.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -426,12 +426,8 @@ inline float3 EnvironmentReflection_Global(in Surface surface)
// clipSpacePos: world space pixel position transformed into OBB space by probeProjection matrix
// MIP: mip level to sample
// return: color of the environment map (rgb), blend factor of the environment map (a)
inline float4 EnvironmentReflection_Local(int textureIndex, in Surface surface, in ShaderEntity probe, in float4x4 probeProjection, in float3 clipSpacePos)
inline float4 EnvironmentReflection_Local(in TextureCube cubemap, in Surface surface, in ShaderEntity probe, in float4x4 probeProjection, in float3 clipSpacePos)
{
[branch]
if (GetScene().globalprobe < 0)
return 0;

// Perform parallax correction of reflection ray (R) into OBB:
float3 RayLS = mul((float3x3)probeProjection, surface.R);
float3 FirstPlaneIntersect = (float3(1, 1, 1) - clipSpacePos) / RayLS;
Expand All @@ -441,7 +437,6 @@ inline float4 EnvironmentReflection_Local(int textureIndex, in Surface surface,
float3 IntersectPositionWS = surface.P + surface.R * Distance;
float3 R_parallaxCorrected = IntersectPositionWS - probe.position;

TextureCube cubemap = bindless_cubemaps[NonUniformResourceIndex(textureIndex)];
uint2 dim;
uint mips;
cubemap.GetDimensions(0, dim.x, dim.y, mips);
Expand Down
216 changes: 119 additions & 97 deletions WickedEngine/shaders/shadingHF.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,25 @@ inline void ForwardLighting(inout Surface surface, inout Lighting lighting)
const uint entity_index = bucket_bit_index;
bucket_bits ^= 1u << bucket_bit_index;

ShaderEntity probe = load_entity(GetFrame().envprobearray_offset + entity_index);
if ((probe.layerMask & surface.layerMask) == 0)
continue;

float4x4 probeProjection = load_entitymatrix(probe.GetMatrixIndex());
const int probeTexture = asint(probeProjection[3][0]);
probeProjection[3] = float4(0, 0, 0, 1);
TextureCube cubemap = bindless_cubemaps[probeTexture];

// under here will be VGPR!
[branch]
if (envmapAccumulation.a < 1)
{
ShaderEntity probe = load_entity(GetFrame().envprobearray_offset + entity_index);
if ((probe.layerMask & surface.layerMask) == 0)
continue;

float4x4 probeProjection = load_entitymatrix(probe.GetMatrixIndex());
const int probeTexture = asint(probeProjection[3][0]);
probeProjection[3] = float4(0, 0, 0, 1);
const float3 clipSpacePos = mul(probeProjection, float4(surface.P, 1)).xyz;
const float3 uvw = clipspace_to_uv(clipSpacePos.xyz);
[branch]
if (is_saturated(uvw))
{
const float4 envmapColor = EnvironmentReflection_Local(probeTexture, surface, probe, probeProjection, clipSpacePos);
const float4 envmapColor = EnvironmentReflection_Local(cubemap, surface, probe, probeProjection, clipSpacePos);
// perform manual blending of probes:
// NOTE: they are sorted top-to-bottom, but blending is performed bottom-to-top
envmapAccumulation.rgb = mad(1 - envmapAccumulation.a, envmapColor.a * envmapColor.rgb, envmapAccumulation.rgb);
Expand Down Expand Up @@ -183,7 +186,7 @@ inline void ForwardDecals(inout Surface surface, inout float4 surfaceMap, Sample
uint bucket_bits = xForwardDecalMask;

[loop]
while (bucket_bits != 0 && decalAccumulation.a < 1 && decalBumpAccumulation.a < 1 && decalSurfaceAccumulationAlpha < 1)
while (bucket_bits != 0)
{
// Retrieve global entity index from local bucket, then remove bit from local bucket:
const uint bucket_bit_index = firstbitlow(bucket_bits);
Expand All @@ -200,47 +203,53 @@ inline void ForwardDecals(inout Surface surface, inout float4 surfaceMap, Sample
float decalNormalStrength = decalProjection[3][2];
int decalSurfacemap = asint(decalProjection[3][3]);
decalProjection[3] = float4(0, 0, 0, 1);
const float3 clipSpacePos = mul(decalProjection, float4(surface.P, 1)).xyz;
float3 uvw = clipspace_to_uv(clipSpacePos.xyz);

// under here will be VGPR!
[branch]
if (is_saturated(uvw))
if(decalAccumulation.a < 1 && decalBumpAccumulation.a < 1 && decalSurfaceAccumulationAlpha < 1)
{
uvw.xy = mad(uvw.xy, decal.shadowAtlasMulAdd.xy, decal.shadowAtlasMulAdd.zw);
// mipmapping needs to be performed by hand:
const float2 decalDX = mul(P_dx, (float3x3)decalProjection).xy;
const float2 decalDY = mul(P_dy, (float3x3)decalProjection).xy;
float4 decalColor = decal.GetColor();
// blend out if close to cube Z:
const float edgeBlend = 1 - pow(saturate(abs(clipSpacePos.z)), 8);
const float slopeBlend = decal.GetConeAngleCos() > 0 ? pow(saturate(dot(surface.N, decal.GetDirection())), decal.GetConeAngleCos()) : 1;
decalColor.a *= edgeBlend * slopeBlend;
const float3 clipSpacePos = mul(decalProjection, float4(surface.P, 1)).xyz;
float3 uvw = clipspace_to_uv(clipSpacePos.xyz);
[branch]
if (decalTexture >= 0)
if (is_saturated(uvw))
{
decalColor *= bindless_textures[NonUniformResourceIndex(decalTexture)].SampleGrad(sam, uvw.xy, decalDX, decalDY);
if ((decal.GetFlags() & ENTITY_FLAG_DECAL_BASECOLOR_ONLY_ALPHA) == 0)
uvw.xy = mad(uvw.xy, decal.shadowAtlasMulAdd.xy, decal.shadowAtlasMulAdd.zw);
// mipmapping needs to be performed by hand:
const float2 decalDX = mul(P_dx, (float3x3)decalProjection).xy;
const float2 decalDY = mul(P_dy, (float3x3)decalProjection).xy;
float4 decalColor = decal.GetColor();
// blend out if close to cube Z:
const float edgeBlend = 1 - pow(saturate(abs(clipSpacePos.z)), 8);
const float slopeBlend = decal.GetConeAngleCos() > 0 ? pow(saturate(dot(surface.N, decal.GetDirection())), decal.GetConeAngleCos()) : 1;
decalColor.a *= edgeBlend * slopeBlend;
[branch]
if (decalTexture >= 0)
{
// perform manual blending of decals:
// NOTE: they are sorted top-to-bottom, but blending is performed bottom-to-top
decalAccumulation.rgb = mad(1 - decalAccumulation.a, decalColor.a * decalColor.rgb, decalAccumulation.rgb);
decalAccumulation.a = mad(1 - decalColor.a, decalAccumulation.a, decalColor.a);
decalColor *= bindless_textures[decalTexture].SampleGrad(sam, uvw.xy, decalDX, decalDY);
if ((decal.GetFlags() & ENTITY_FLAG_DECAL_BASECOLOR_ONLY_ALPHA) == 0)
{
// perform manual blending of decals:
// NOTE: they are sorted top-to-bottom, but blending is performed bottom-to-top
decalAccumulation.rgb = mad(1 - decalAccumulation.a, decalColor.a * decalColor.rgb, decalAccumulation.rgb);
decalAccumulation.a = mad(1 - decalColor.a, decalAccumulation.a, decalColor.a);
}
}
[branch]
if (decalNormal >= 0)
{
float3 decalBumpColor = float3(bindless_textures[decalNormal].SampleGrad(sam, uvw.xy, decalDX, decalDY).rg, 1);
decalBumpColor = decalBumpColor * 2 - 1;
decalBumpColor.rg *= decalNormalStrength;
decalBumpAccumulation.rgb = mad(1 - decalBumpAccumulation.a, decalColor.a * decalBumpColor.rgb, decalBumpAccumulation.rgb);
decalBumpAccumulation.a = mad(1 - decalColor.a, decalBumpAccumulation.a, decalColor.a);
}
[branch]
if (decalSurfacemap >= 0)
{
float4 decalSurfaceColor = bindless_textures[decalSurfacemap].SampleGrad(sam, uvw.xy, decalDX, decalDY);
decalSurfaceAccumulation = mad(1 - decalSurfaceAccumulationAlpha, decalColor.a * decalSurfaceColor, decalSurfaceAccumulation);
decalSurfaceAccumulationAlpha = mad(1 - decalColor.a, decalSurfaceAccumulationAlpha, decalColor.a);
}
}
[branch]
if (decalNormal >= 0)
{
float3 decalBumpColor = float3(bindless_textures[NonUniformResourceIndex(decalNormal)].SampleGrad(sam, uvw.xy, decalDX, decalDY).rg, 1);
decalBumpColor = decalBumpColor * 2 - 1;
decalBumpColor.rg *= decalNormalStrength;
decalBumpAccumulation.rgb = mad(1 - decalBumpAccumulation.a, decalColor.a * decalBumpColor.rgb, decalBumpAccumulation.rgb);
decalBumpAccumulation.a = mad(1 - decalColor.a, decalBumpAccumulation.a, decalColor.a);
}
[branch]
if (decalSurfacemap >= 0)
{
float4 decalSurfaceColor = bindless_textures[NonUniformResourceIndex(decalSurfacemap)].SampleGrad(sam, uvw.xy, decalDX, decalDY);
decalSurfaceAccumulation = mad(1 - decalSurfaceAccumulationAlpha, decalColor.a * decalSurfaceColor, decalSurfaceAccumulation);
decalSurfaceAccumulationAlpha = mad(1 - decalColor.a, decalSurfaceAccumulationAlpha, decalColor.a);
}
}
}
Expand Down Expand Up @@ -270,8 +279,8 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f
// Loop through envmap buckets in the tile:
const uint first_item = GetFrame().envprobearray_offset;
const uint last_item = first_item + GetFrame().envprobearray_count - 1;
const uint first_bucket = first_item / 32;
const uint last_bucket = min(last_item / 32, max(0, SHADER_ENTITY_TILE_BUCKET_COUNT - 1));
const uint first_bucket = first_item / 32u;
const uint last_bucket = min(last_item / 32u, max(0, SHADER_ENTITY_TILE_BUCKET_COUNT - 1));
[loop]
for (uint bucket = first_bucket; bucket <= last_bucket; ++bucket)
{
Expand All @@ -291,7 +300,7 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f
bucket_bits ^= 1u << bucket_bit_index;

[branch]
if (entity_index >= first_item && entity_index <= last_item && envmapAccumulation.a < 1)
if (entity_index >= first_item && entity_index <= last_item)
{
ShaderEntity probe = load_entity(entity_index);
if ((probe.layerMask & surface.layerMask) == 0)
Expand All @@ -300,22 +309,29 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f
float4x4 probeProjection = load_entitymatrix(probe.GetMatrixIndex());
const int probeTexture = asint(probeProjection[3][0]);
probeProjection[3] = float4(0, 0, 0, 1);
const float3 clipSpacePos = mul(probeProjection, float4(surface.P, 1)).xyz;
const float3 uvw = clipspace_to_uv(clipSpacePos.xyz);
TextureCube cubemap = bindless_cubemaps[probeTexture];

// under here will be VGPR!
[branch]
if (is_saturated(uvw))
if(envmapAccumulation.a < 1)
{
const float4 envmapColor = EnvironmentReflection_Local(probeTexture, surface, probe, probeProjection, clipSpacePos);
// perform manual blending of probes:
// NOTE: they are sorted top-to-bottom, but blending is performed bottom-to-top
envmapAccumulation.rgb = mad(1 - envmapAccumulation.a, envmapColor.a * envmapColor.rgb, envmapAccumulation.rgb);
envmapAccumulation.a = mad(1 - envmapColor.a, envmapAccumulation.a, envmapColor.a);
const float3 clipSpacePos = mul(probeProjection, float4(surface.P, 1)).xyz;
const float3 uvw = clipspace_to_uv(clipSpacePos.xyz);
[branch]
if (envmapAccumulation.a >= 1.0)
if (is_saturated(uvw))
{
// force exit:
bucket = SHADER_ENTITY_TILE_BUCKET_COUNT;
break;
const float4 envmapColor = EnvironmentReflection_Local(cubemap, surface, probe, probeProjection, clipSpacePos);
// perform manual blending of probes:
// NOTE: they are sorted top-to-bottom, but blending is performed bottom-to-top
envmapAccumulation.rgb = mad(1 - envmapAccumulation.a, envmapColor.a * envmapColor.rgb, envmapAccumulation.rgb);
envmapAccumulation.a = mad(1 - envmapColor.a, envmapAccumulation.a, envmapColor.a);
[branch]
if (envmapAccumulation.a >= 1.0)
{
// force exit:
bucket = SHADER_ENTITY_TILE_BUCKET_COUNT;
break;
}
}
}
}
Expand Down Expand Up @@ -362,8 +378,8 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f
// Loop through light buckets in the tile:
const uint first_item = GetFrame().lightarray_offset;
const uint last_item = first_item + GetFrame().lightarray_count - 1;
const uint first_bucket = first_item / 32;
const uint last_bucket = min(last_item / 32, max(0, SHADER_ENTITY_TILE_BUCKET_COUNT - 1));
const uint first_bucket = first_item / 32u;
const uint last_bucket = min(last_item / 32u, max(0, SHADER_ENTITY_TILE_BUCKET_COUNT - 1));
[loop]
for (uint bucket = first_bucket; bucket <= last_bucket; ++bucket)
{
Expand Down Expand Up @@ -516,7 +532,7 @@ inline void TiledDecals(inout Surface surface, uint flatTileIndex, inout float4
#endif // ENTITY_TILE_UNIFORM

[loop]
while (bucket_bits != 0 && decalAccumulation.a < 1 && decalBumpAccumulation.a < 1 && decalSurfaceAccumulationAlpha < 1)
while (bucket_bits != 0)
{
// Retrieve global entity index from local bucket, then remove bit from local bucket:
const uint bucket_bit_index = firstbitlow(bucket_bits);
Expand All @@ -536,47 +552,53 @@ inline void TiledDecals(inout Surface surface, uint flatTileIndex, inout float4
float decalNormalStrength = decalProjection[3][2];
int decalSurfacemap = asint(decalProjection[3][3]);
decalProjection[3] = float4(0, 0, 0, 1);
const float3 clipSpacePos = mul(decalProjection, float4(surface.P, 1)).xyz;
float3 uvw = clipspace_to_uv(clipSpacePos.xyz);

// under here will be VGPR!
[branch]
if (is_saturated(uvw))
if(decalAccumulation.a < 1 && decalBumpAccumulation.a < 1 && decalSurfaceAccumulationAlpha < 1)
{
uvw.xy = mad(uvw.xy, decal.shadowAtlasMulAdd.xy, decal.shadowAtlasMulAdd.zw);
// mipmapping needs to be performed by hand:
const float2 decalDX = mul(P_dx, (float3x3)decalProjection).xy;
const float2 decalDY = mul(P_dy, (float3x3)decalProjection).xy;
float4 decalColor = decal.GetColor();
// blend out if close to cube Z:
const float edgeBlend = 1 - pow(saturate(abs(clipSpacePos.z)), 8);
const float slopeBlend = decal.GetConeAngleCos() > 0 ? pow(saturate(dot(surface.N, decal.GetDirection())), decal.GetConeAngleCos()) : 1;
decalColor.a *= edgeBlend * slopeBlend;
const float3 clipSpacePos = mul(decalProjection, float4(surface.P, 1)).xyz;
float3 uvw = clipspace_to_uv(clipSpacePos.xyz);
[branch]
if (decalTexture >= 0)
if (is_saturated(uvw))
{
decalColor *= bindless_textures[NonUniformResourceIndex(decalTexture)].SampleGrad(sam, uvw.xy, decalDX, decalDY);
if ((decal.GetFlags() & ENTITY_FLAG_DECAL_BASECOLOR_ONLY_ALPHA) == 0)
uvw.xy = mad(uvw.xy, decal.shadowAtlasMulAdd.xy, decal.shadowAtlasMulAdd.zw);
// mipmapping needs to be performed by hand:
const float2 decalDX = mul(P_dx, (float3x3)decalProjection).xy;
const float2 decalDY = mul(P_dy, (float3x3)decalProjection).xy;
float4 decalColor = decal.GetColor();
// blend out if close to cube Z:
const float edgeBlend = 1 - pow(saturate(abs(clipSpacePos.z)), 8);
const float slopeBlend = decal.GetConeAngleCos() > 0 ? pow(saturate(dot(surface.N, decal.GetDirection())), decal.GetConeAngleCos()) : 1;
decalColor.a *= edgeBlend * slopeBlend;
[branch]
if (decalTexture >= 0)
{
// perform manual blending of decals:
// NOTE: they are sorted top-to-bottom, but blending is performed bottom-to-top
decalAccumulation.rgb = mad(1 - decalAccumulation.a, decalColor.a * decalColor.rgb, decalAccumulation.rgb);
decalAccumulation.a = mad(1 - decalColor.a, decalAccumulation.a, decalColor.a);
decalColor *= bindless_textures[decalTexture].SampleGrad(sam, uvw.xy, decalDX, decalDY);
if ((decal.GetFlags() & ENTITY_FLAG_DECAL_BASECOLOR_ONLY_ALPHA) == 0)
{
// perform manual blending of decals:
// NOTE: they are sorted top-to-bottom, but blending is performed bottom-to-top
decalAccumulation.rgb = mad(1 - decalAccumulation.a, decalColor.a * decalColor.rgb, decalAccumulation.rgb);
decalAccumulation.a = mad(1 - decalColor.a, decalAccumulation.a, decalColor.a);
}
}
[branch]
if (decalNormal >= 0)
{
float3 decalBumpColor = float3(bindless_textures[decalNormal].SampleGrad(sam, uvw.xy, decalDX, decalDY).rg, 1);
decalBumpColor = decalBumpColor * 2 - 1;
decalBumpColor.rg *= decalNormalStrength;
decalBumpAccumulation.rgb = mad(1 - decalBumpAccumulation.a, decalColor.a * decalBumpColor.rgb, decalBumpAccumulation.rgb);
decalBumpAccumulation.a = mad(1 - decalColor.a, decalBumpAccumulation.a, decalColor.a);
}
[branch]
if (decalSurfacemap >= 0)
{
float4 decalSurfaceColor = bindless_textures[decalSurfacemap].SampleGrad(sam, uvw.xy, decalDX, decalDY);
decalSurfaceAccumulation = mad(1 - decalSurfaceAccumulationAlpha, decalColor.a * decalSurfaceColor, decalSurfaceAccumulation);
decalSurfaceAccumulationAlpha = mad(1 - decalColor.a, decalSurfaceAccumulationAlpha, decalColor.a);
}
}
[branch]
if (decalNormal >= 0)
{
float3 decalBumpColor = float3(bindless_textures[NonUniformResourceIndex(decalNormal)].SampleGrad(sam, uvw.xy, decalDX, decalDY).rg, 1);
decalBumpColor = decalBumpColor * 2 - 1;
decalBumpColor.rg *= decalNormalStrength;
decalBumpAccumulation.rgb = mad(1 - decalBumpAccumulation.a, decalColor.a * decalBumpColor.rgb, decalBumpAccumulation.rgb);
decalBumpAccumulation.a = mad(1 - decalColor.a, decalBumpAccumulation.a, decalColor.a);
}
[branch]
if (decalSurfacemap >= 0)
{
float4 decalSurfaceColor = bindless_textures[NonUniformResourceIndex(decalSurfacemap)].SampleGrad(sam, uvw.xy, decalDX, decalDY);
decalSurfaceAccumulation = mad(1 - decalSurfaceAccumulationAlpha, decalColor.a * decalSurfaceColor, decalSurfaceAccumulation);
decalSurfaceAccumulationAlpha = mad(1 - decalColor.a, decalSurfaceAccumulationAlpha, decalColor.a);
}
}
}
Expand Down
Loading

0 comments on commit 3ce3d5a

Please sign in to comment.