diff --git a/Editor/GraphicsWindow.cpp b/Editor/GraphicsWindow.cpp index 0f0d3d58a0..fb848b0005 100644 --- a/Editor/GraphicsWindow.cpp +++ b/Editor/GraphicsWindow.cpp @@ -826,6 +826,53 @@ void GraphicsWindow::Create(EditorComponent* _editor) }); AddWidget(&lightShaftsStrengthStrengthSlider); + capsuleshadowCheckbox.Create("Capsule Shadows: "); + capsuleshadowCheckbox.SetTooltip("Enable ambient occlusion capsule shadows."); + capsuleshadowCheckbox.SetSize(XMFLOAT2(hei, hei)); + capsuleshadowCheckbox.SetPos(XMFLOAT2(x, y += step)); + if (editor->main->config.GetSection("graphics").Has("capsule_shadows")) + { + wi::renderer::SetCapsuleShadowEnabled(editor->main->config.GetSection("graphics").GetBool("capsule_shadows")); + } + capsuleshadowCheckbox.OnClick([=](wi::gui::EventArgs args) { + wi::renderer::SetCapsuleShadowEnabled(args.bValue); + editor->main->config.GetSection("graphics").Set("capsule_shadows", args.bValue); + editor->main->config.Commit(); + }); + AddWidget(&capsuleshadowCheckbox); + + capsuleshadowFadeSlider.Create(0, 1, 0.2f, 100, "CapsuleShadow.Fade: "); + capsuleshadowFadeSlider.SetText("Capsule Shadow Fade: "); + capsuleshadowFadeSlider.SetTooltip("Set capsule shadow fading."); + capsuleshadowFadeSlider.SetSize(XMFLOAT2(mod_wid, hei)); + capsuleshadowFadeSlider.SetPos(XMFLOAT2(x + 100, y)); + if (editor->main->config.GetSection("graphics").Has("capsule_shadow_fade")) + { + wi::renderer::SetCapsuleShadowFade(editor->main->config.GetSection("graphics").GetFloat("capsule_shadow_fade")); + } + capsuleshadowFadeSlider.OnSlide([=](wi::gui::EventArgs args) { + wi::renderer::SetCapsuleShadowFade(args.fValue); + editor->main->config.GetSection("graphics").Set("capsule_shadow_fade", args.fValue); + editor->main->config.Commit(); + }); + AddWidget(&capsuleshadowFadeSlider); + + capsuleshadowAngleSlider.Create(0, 90, 45, 90, "CapsuleShadow.Angle: "); + capsuleshadowAngleSlider.SetText("Angle: "); + capsuleshadowAngleSlider.SetTooltip("Set capsule shadow spread angle."); + capsuleshadowAngleSlider.SetSize(XMFLOAT2(mod_wid, hei)); + capsuleshadowAngleSlider.SetPos(XMFLOAT2(x + 100, y)); + if (editor->main->config.GetSection("graphics").Has("capsule_shadow_angle")) + { + wi::renderer::SetCapsuleShadowAngle(wi::math::DegreesToRadians(editor->main->config.GetSection("graphics").GetFloat("capsule_shadow_angle"))); + } + capsuleshadowAngleSlider.OnSlide([=](wi::gui::EventArgs args) { + wi::renderer::SetCapsuleShadowAngle(wi::math::DegreesToRadians(args.fValue)); + editor->main->config.GetSection("graphics").Set("capsule_shadow_angle", args.fValue); + editor->main->config.Commit(); + }); + AddWidget(&capsuleshadowAngleSlider); + aoComboBox.Create("AO: "); aoComboBox.SetTooltip("Choose Ambient Occlusion type. RTAO is only available if hardware supports ray tracing"); aoComboBox.SetScriptTip("RenderPath3D::SetAO(int value)"); @@ -1558,6 +1605,9 @@ void GraphicsWindow::Update() lensFlareCheckBox.SetCheck(editor->renderPath->getLensFlareEnabled()); lightShaftsCheckBox.SetCheck(editor->renderPath->getLightShaftsEnabled()); lightShaftsStrengthStrengthSlider.SetValue(editor->renderPath->getLightShaftsStrength()); + capsuleshadowCheckbox.SetCheck(wi::renderer::IsCapsuleShadowEnabled()); + capsuleshadowAngleSlider.SetValue(wi::math::RadiansToDegrees(wi::renderer::GetCapsuleShadowAngle())); + capsuleshadowFadeSlider.SetValue(wi::renderer::GetCapsuleShadowFade()); aoComboBox.SetSelectedWithoutCallback(editor->renderPath->getAO()); aoPowerSlider.SetValue((float)editor->renderPath->getAOPower()); @@ -1845,6 +1895,9 @@ void GraphicsWindow::ResizeLayout() add_right(lensFlareCheckBox); add_right(lightShaftsStrengthStrengthSlider); lightShaftsCheckBox.SetPos(XMFLOAT2(lightShaftsStrengthStrengthSlider.GetPos().x - lightShaftsCheckBox.GetSize().x - 80, lightShaftsStrengthStrengthSlider.GetPos().y)); + add_right(capsuleshadowAngleSlider); + add_right(capsuleshadowFadeSlider); + capsuleshadowCheckbox.SetPos(XMFLOAT2(capsuleshadowAngleSlider.GetPos().x - capsuleshadowCheckbox.GetSize().x - 80, capsuleshadowAngleSlider.GetPos().y)); add(aoComboBox); add(aoPowerSlider); add(aoRangeSlider); diff --git a/Editor/GraphicsWindow.h b/Editor/GraphicsWindow.h index 9cc3ec9b90..d5b8f45ecf 100644 --- a/Editor/GraphicsWindow.h +++ b/Editor/GraphicsWindow.h @@ -56,6 +56,9 @@ class GraphicsWindow : public wi::gui::Window wi::gui::CheckBox lensFlareCheckBox; wi::gui::CheckBox lightShaftsCheckBox; wi::gui::Slider lightShaftsStrengthStrengthSlider; + wi::gui::CheckBox capsuleshadowCheckbox; + wi::gui::Slider capsuleshadowFadeSlider; + wi::gui::Slider capsuleshadowAngleSlider; wi::gui::ComboBox aoComboBox; wi::gui::Slider aoPowerSlider; wi::gui::Slider aoRangeSlider; diff --git a/Editor/HumanoidWindow.cpp b/Editor/HumanoidWindow.cpp index 448128d80d..b6bcd27b92 100644 --- a/Editor/HumanoidWindow.cpp +++ b/Editor/HumanoidWindow.cpp @@ -80,6 +80,19 @@ void HumanoidWindow::Create(EditorComponent* _editor) }); AddWidget(&ragdollCheckBox); + capsuleShadowCheckBox.Create("Capsule Shadow Disabled: "); + capsuleShadowCheckBox.SetTooltip("Disable capsule shadow for this specific humanoid."); + capsuleShadowCheckBox.SetSize(XMFLOAT2(hei, hei)); + capsuleShadowCheckBox.OnClick([=](wi::gui::EventArgs args) { + wi::scene::Scene& scene = editor->GetCurrentScene(); + HumanoidComponent* humanoid = scene.humanoids.GetComponent(entity); + if (humanoid != nullptr) + { + humanoid->SetCapsuleShadowDisabled(args.bValue); + } + }); + AddWidget(&capsuleShadowCheckBox); + headRotMaxXSlider.Create(0, 90, 60, 180, "Head horizontal: "); headRotMaxXSlider.SetTooltip("Limit horizontal head movement (input in degrees)"); headRotMaxXSlider.SetSize(XMFLOAT2(wid, hei)); @@ -275,6 +288,7 @@ void HumanoidWindow::SetEntity(Entity entity) { lookatCheckBox.SetCheck(humanoid->IsLookAtEnabled()); ragdollCheckBox.SetCheck(humanoid->IsRagdollPhysicsEnabled()); + capsuleShadowCheckBox.SetCheck(humanoid->IsCapsuleShadowDisabled()); headRotMaxXSlider.SetValue(wi::math::RadiansToDegrees(humanoid->head_rotation_max.x)); headRotMaxYSlider.SetValue(wi::math::RadiansToDegrees(humanoid->head_rotation_max.y)); headRotSpeedSlider.SetValue(humanoid->head_rotation_speed); @@ -572,6 +586,7 @@ void HumanoidWindow::ResizeLayout() lookatMouseCheckBox.SetPos(XMFLOAT2(lookatCheckBox.GetPos().x - 120, lookatCheckBox.GetPos().y)); add(lookatEntityCombo); add_right(ragdollCheckBox); + add_right(capsuleShadowCheckBox); add(headRotMaxXSlider); add(headRotMaxYSlider); add(headRotSpeedSlider); diff --git a/Editor/HumanoidWindow.h b/Editor/HumanoidWindow.h index 6de9086e0f..9e5efb0a40 100644 --- a/Editor/HumanoidWindow.h +++ b/Editor/HumanoidWindow.h @@ -16,6 +16,7 @@ class HumanoidWindow : public wi::gui::Window wi::gui::CheckBox lookatCheckBox; wi::gui::ComboBox lookatEntityCombo; wi::gui::CheckBox ragdollCheckBox; + wi::gui::CheckBox capsuleShadowCheckBox; wi::gui::Slider headRotMaxXSlider; wi::gui::Slider headRotMaxYSlider; wi::gui::Slider headRotSpeedSlider; diff --git a/Editor/MaterialWindow.cpp b/Editor/MaterialWindow.cpp index 56c4861f70..567adf7c34 100644 --- a/Editor/MaterialWindow.cpp +++ b/Editor/MaterialWindow.cpp @@ -262,6 +262,22 @@ void MaterialWindow::Create(EditorComponent* _editor) }); AddWidget(&coplanarCheckBox); + capsuleShadowCheckBox.Create("Capsule Shadow Disabled: "); + capsuleShadowCheckBox.SetTooltip("Disable receiving capsule shadows for this material."); + capsuleShadowCheckBox.SetPos(XMFLOAT2(x, y += step)); + capsuleShadowCheckBox.SetSize(XMFLOAT2(hei, hei)); + capsuleShadowCheckBox.OnClick([&](wi::gui::EventArgs args) { + wi::scene::Scene& scene = editor->GetCurrentScene(); + for (auto& x : editor->translator.selected) + { + MaterialComponent* material = get_material(scene, x); + if (material == nullptr) + continue; + material->SetCapsuleShadowDisabled(args.bValue); + } + }); + AddWidget(&capsuleShadowCheckBox); + shaderTypeComboBox.Create("Shader: "); shaderTypeComboBox.SetTooltip("Select a shader for this material. \nCustom shaders (*) will also show up here (see wi::renderer:RegisterCustomShader() for more info.)\nNote that custom shaders (*) can't select between blend modes, as they are created with an explicit blend mode."); @@ -1187,6 +1203,7 @@ void MaterialWindow::SetEntity(Entity entity) preferUncompressedCheckBox.SetCheck(material->IsPreferUncompressedTexturesEnabled()); disableStreamingCheckBox.SetCheck(material->IsTextureStreamingDisabled()); coplanarCheckBox.SetCheck(material->IsCoplanarBlending()); + capsuleShadowCheckBox.SetCheck(material->IsCapsuleShadowDisabled()); normalMapSlider.SetValue(material->normalMapStrength); roughnessSlider.SetValue(material->roughness); reflectanceSlider.SetValue(material->reflectance); @@ -1379,6 +1396,7 @@ void MaterialWindow::ResizeLayout() add_right(preferUncompressedCheckBox); add_right(disableStreamingCheckBox); add_right(coplanarCheckBox); + add_right(capsuleShadowCheckBox); add(shaderTypeComboBox); add(blendModeComboBox); add(shadingRateComboBox); diff --git a/Editor/MaterialWindow.h b/Editor/MaterialWindow.h index fa897472e9..6ecf8b2a7b 100644 --- a/Editor/MaterialWindow.h +++ b/Editor/MaterialWindow.h @@ -24,6 +24,7 @@ class MaterialWindow : public wi::gui::Window wi::gui::CheckBox preferUncompressedCheckBox; wi::gui::CheckBox disableStreamingCheckBox; wi::gui::CheckBox coplanarCheckBox; + wi::gui::CheckBox capsuleShadowCheckBox; wi::gui::ComboBox shaderTypeComboBox; wi::gui::ComboBox blendModeComboBox; wi::gui::ComboBox shadingRateComboBox; diff --git a/WickedEngine/shaders/ShaderInterop_Renderer.h b/WickedEngine/shaders/ShaderInterop_Renderer.h index 50ca003509..5beb2e3513 100644 --- a/WickedEngine/shaders/ShaderInterop_Renderer.h +++ b/WickedEngine/shaders/ShaderInterop_Renderer.h @@ -81,6 +81,7 @@ enum SHADERMATERIAL_OPTIONS SHADERMATERIAL_OPTION_BIT_ADDITIVE = 1 << 9, SHADERMATERIAL_OPTION_BIT_UNLIT = 1 << 10, SHADERMATERIAL_OPTION_BIT_USE_VERTEXAO = 1 << 11, + SHADERMATERIAL_OPTION_BIT_CAPSULE_SHADOW_DISABLED = 1 << 12, }; // Same as MaterialComponent::TEXTURESLOT @@ -460,6 +461,7 @@ struct alignas(16) ShaderMaterial inline bool IsTransparent() { return GetOptions() & SHADERMATERIAL_OPTION_BIT_TRANSPARENT; } inline bool IsAdditive() { return GetOptions() & SHADERMATERIAL_OPTION_BIT_ADDITIVE; } inline bool IsDoubleSided() { return GetOptions() & SHADERMATERIAL_OPTION_BIT_DOUBLE_SIDED; } + inline bool IsCapsuleShadowDisabled() { return GetOptions() & SHADERMATERIAL_OPTION_BIT_CAPSULE_SHADOW_DISABLED; } }; // For binning shading based on shader types: @@ -1017,9 +1019,13 @@ enum SHADER_ENTITY_FLAGS { ENTITY_FLAG_LIGHT_STATIC = 1 << 0, ENTITY_FLAG_LIGHT_VOLUMETRICCLOUDS = 1 << 1, - ENTITY_FLAG_DECAL_BASECOLOR_ONLY_ALPHA = 1 << 0, + ENTITY_FLAG_DECAL_BASECOLOR_ONLY_ALPHA = 1 << 2, + ENTITY_FLAG_CAPSULE_SHADOW_COLLIDER = 1 << 3, }; +static const float CAPSULE_SHADOW_AFFECTION_RANGE = 2; // how far away the capsule shadow can reach outside of their own radius +static const float CAPSULE_SHADOW_BOLDEN = 1.2f; + static const uint SHADER_ENTITY_COUNT = 256; static const uint SHADER_ENTITY_TILE_BUCKET_COUNT = SHADER_ENTITY_COUNT / 32; @@ -1131,6 +1137,7 @@ enum FRAME_OPTIONS OPTION_BIT_REALISTIC_SKY_HIGH_QUALITY = 1 << 17, OPTION_BIT_REALISTIC_SKY_RECEIVE_SHADOW = 1 << 18, OPTION_BIT_VOLUMETRICCLOUDS_RECEIVE_SHADOW = 1 << 19, + OPTION_BIT_CAPSULE_SHADOW_ENABLED = 1 << 20, }; // ---------- Common Constant buffers: ----------------- @@ -1158,6 +1165,11 @@ struct alignas(16) FrameCB uint giboost_packed; // force fp16 load uint entity_culling_count; + uint capsuleshadow_fade_angle; + int indirect_debugbufferindex; + int padding0; + int padding1; + float blue_noise_phase; int texture_random64x64_index; int texture_bluenoise_index; @@ -1193,7 +1205,7 @@ struct alignas(16) FrameCB uint lights; uint decals; uint forces; - int indirect_debugbufferindex; + uint padding2; ShaderEntity entityArray[SHADER_ENTITY_COUNT]; float4x4 matrixArray[SHADER_ENTITY_COUNT]; diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems b/WickedEngine/shaders/Shaders_SOURCE.vcxitems index 42bf35be01..312939e30e 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems @@ -18,6 +18,7 @@ + diff --git a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters index 17b5556804..185da6cb08 100644 --- a/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters +++ b/WickedEngine/shaders/Shaders_SOURCE.vcxitems.filters @@ -174,6 +174,9 @@ HF + + HF + diff --git a/WickedEngine/shaders/capsuleShadowHF.hlsli b/WickedEngine/shaders/capsuleShadowHF.hlsli new file mode 100644 index 0000000000..5db6021764 --- /dev/null +++ b/WickedEngine/shaders/capsuleShadowHF.hlsli @@ -0,0 +1,69 @@ +#ifndef CAPSULE_SHADOW_HF +#define CAPSULE_SHADOW_HF + +// Source: https://www.shadertoy.com/view/3stcD4 + +float acosFast(float x) { + // Lagarde 2014, "Inverse trigonometric functions GPU optimization for AMD GCN architecture" + // This is the approximation of degree 1, with a max absolute error of 9.0x10^-3 + float y = abs(x); + float p = -0.1565827 * y + 1.570796; + p *= sqrt(1.0 - y); + return x >= 0.0 ? p : PI - p; +} + +float acosFastPositive(float x) { + // Lagarde 2014, "Inverse trigonometric functions GPU optimization for AMD GCN architecture" + float p = -0.1565827 * x + 1.570796; + return p * sqrt(1.0 - x); +} + +float sphericalCapsIntersection(float cosCap1, float cosCap2, float cap2, float cosDistance) { + // Oat and Sander 2007, "Ambient Aperture Lighting" + // Approximation mentioned by Jimenez et al. 2016 + float r1 = acosFastPositive(cosCap1); + float r2 = cap2; + float d = acosFast(cosDistance); + + // We work with cosine angles, replace the original paper's use of + // cos(min(r1, r2)_ with max(cosCap1, cosCap2) + // We also remove a multiplication by 2 * PI to simplify the computation + // since we divide by 2 * PI at the call site + + if (min(r1, r2) <= max(r1, r2) - d) { + return 1.0 - max(cosCap1, cosCap2); + } else if (r1 + r2 <= d) { + return 0.0; + } + + float delta = abs(r1 - r2); + float x = 1.0 - saturate((d - delta) / max(r1 + r2 - delta, 0.0001)); + // simplified smoothstep() + float area = sqr(x) * (-2.0 * x + 3.0); + return area * (1.0 - max(cosCap1, cosCap2)); +} + +float directionalOcclusionSphere(in float3 pos, in float4 sphere, in float4 cone) { + float3 occluder = sphere.xyz - pos; + float occluderLength2 = dot(occluder, occluder); + float3 occluderDir = occluder * rsqrt(occluderLength2); + + float cosPhi = dot(occluderDir, cone.xyz); + // sqr(sphere.w) should be a uniform --> capsuleRadius^2 + float cosTheta = sqrt(occluderLength2 / (sqr(sphere.w) + occluderLength2)); + float cosCone = cos(cone.w); + + return 1.0 - sphericalCapsIntersection(cosTheta, cosCone, cone.w, cosPhi) / (1.0 - cosCone); +} + +float directionalOcclusionCapsule(in float3 pos, in float3 capsuleA, in float3 capsuleB, in float capsuleRadius, in float4 cone) { + float3 Ld = capsuleB - capsuleA; + float3 L0 = capsuleA - pos; + float a = dot(cone.xyz, Ld); + float t = saturate(dot(L0, a * cone.xyz - Ld) / (dot(Ld, Ld) - a * a)); + float3 posToRay = capsuleA + t * Ld; + + return directionalOcclusionSphere(pos, float4(posToRay, capsuleRadius), cone); +} + +#endif // CAPSULE_SHADOW_HF diff --git a/WickedEngine/shaders/globals.hlsli b/WickedEngine/shaders/globals.hlsli index c3c5b197db..bba8d5d814 100644 --- a/WickedEngine/shaders/globals.hlsli +++ b/WickedEngine/shaders/globals.hlsli @@ -871,7 +871,7 @@ inline half3 clipspace_to_uv(in half3 clipspace) } inline half3 GetSunColor() { return unpack_half3(GetWeather().sun_color); } // sun color with intensity applied -inline half3 GetSunDirection() { return unpack_half3(GetWeather().sun_direction); } +inline half3 GetSunDirection() { return normalize(unpack_half3(GetWeather().sun_direction)); } inline half3 GetHorizonColor() { return unpack_half3(GetWeather().horizon); } inline half3 GetZenithColor() { return unpack_half3(GetWeather().zenith); } inline half3 GetAmbientColor() { return unpack_half3(GetWeather().ambient); } @@ -881,6 +881,8 @@ inline float GetTime() { return GetFrame().time; } inline float GetTimePrev() { return GetFrame().time_previous; } inline float GetFrameCount() { return GetFrame().frame_count; } inline min16uint2 GetTemporalAASampleRotation() { return uint2(GetFrame().temporalaa_samplerotation & 0xFF, (GetFrame().temporalaa_samplerotation >> 8u) & 0xFF); } +inline half GetCapsuleShadowFade() { return f16tof32(GetFrame().capsuleshadow_fade_angle); } +inline half GetCapsuleShadowAngle() { return f16tof32(GetFrame().capsuleshadow_fade_angle >> 16u); } inline bool IsStaticSky() { return GetScene().globalenvmap >= 0; } inline half GetGIBoost() { return unpack_half2(GetFrame().giboost_packed).x; } @@ -1762,6 +1764,17 @@ inline half sphere_volume(in half radius) return 4.0 / 3.0 * PI * radius * radius * radius; } +inline float distance_squared(float3 a, float3 b) +{ + float3 diff = b - a; + return dot(diff, diff); +} +inline half distance_squared(half3 a, half3 b) +{ + half3 diff = b - a; + return dot(diff, diff); +} + float plane_point_distance(float3 planeOrigin, float3 planeNormal, float3 P) { diff --git a/WickedEngine/shaders/lightCullingCS.hlsl b/WickedEngine/shaders/lightCullingCS.hlsl index 435eebf113..94e37a7529 100644 --- a/WickedEngine/shaders/lightCullingCS.hlsl +++ b/WickedEngine/shaders/lightCullingCS.hlsl @@ -442,6 +442,39 @@ void main(uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : } } } + + // Capsule shadows: + for (uint i = forces().first_item() + groupIndex; i < forces().end_item(); i += TILED_CULLING_THREADSIZE * TILED_CULLING_THREADSIZE) + { + ShaderEntity entity = load_entity(i); + if ((entity.GetFlags() & ENTITY_FLAG_CAPSULE_SHADOW_COLLIDER) == 0) + continue; + + float3 A = entity.position; + float3 B = entity.GetColliderTip(); + half radius = entity.GetRange() * CAPSULE_SHADOW_BOLDEN; + + // culling based on capsule-sphere: + float3 center = lerp(A, B, 0.5); + half range = distance(center, A) + radius + CAPSULE_SHADOW_AFFECTION_RANGE; + + float3 positionVS = mul(GetCamera().view, float4(center, 1)).xyz; + Sphere sphere = { positionVS.xyz, range }; + if (SphereInsideFrustum(sphere, GroupFrustum, nearClipVS, maxDepthVS)) + { + AppendEntity_Transparent(i); + + if (SphereIntersectsAABB(sphere, GroupAABB)) // tighter fit than sphere-frustum culling + { +#ifdef ADVANCED_CULLING + if (depth_mask & ConstructEntityMask(minDepthVS, __depthRangeRecip, sphere)) +#endif + { + AppendEntity_Opaque(i); + } + } + } + } #endif diff --git a/WickedEngine/shaders/shadingHF.hlsli b/WickedEngine/shaders/shadingHF.hlsli index a3a51671ea..d3e89cb583 100644 --- a/WickedEngine/shaders/shadingHF.hlsli +++ b/WickedEngine/shaders/shadingHF.hlsli @@ -6,6 +6,7 @@ #include "brdf.hlsli" #include "ShaderInterop_SurfelGI.h" #include "ShaderInterop_DDGI.h" +#include "capsuleShadowHF.hlsli" inline void LightMapping(in int lightmap, in float2 ATLAS, inout Lighting lighting, inout Surface surface) { @@ -492,6 +493,57 @@ inline void TiledLighting(inout Surface surface, inout Lighting lighting, uint f } } + // Capsule shadows: + [branch] + if ((GetFrame().options & OPTION_BIT_CAPSULE_SHADOW_ENABLED) && !surface.IsCapsuleShadowDisabled() && !forces().empty()) // capsule shadows are contained in forces array for now... + { + half4 cone = half4(GetSunDirection() * half3(-1, 1, -1), GetCapsuleShadowAngle()); // horizontally reverse of sun direction (better would be precomputed dominant light dir) + half capsuleshadow = 1; + + // Loop through light buckets in the tile: + ShaderEntityIterator iterator = forces(); + for (uint bucket = iterator.first_bucket(); (bucket <= iterator.last_bucket()) && (capsuleshadow > 0); ++bucket) + { + uint bucket_bits = load_entitytile(flatTileIndex + bucket); + bucket_bits = iterator.mask_entity(bucket, bucket_bits); + +#ifndef ENTITY_TILE_UNIFORM + // Bucket scalarizer - Siggraph 2017 - Improved Culling [Michal Drobot]: + bucket_bits = WaveReadLaneFirst(WaveActiveBitOr(bucket_bits)); +#endif // ENTITY_TILE_UNIFORM + + [loop] + while ((bucket_bits != 0) && (capsuleshadow > 0)) + { + // Retrieve global entity index from local bucket, then remove bit from local bucket: + const uint bucket_bit_index = firstbitlow(bucket_bits); + const uint entity_index = bucket * 32 + bucket_bit_index; + bucket_bits ^= 1u << bucket_bit_index; + + ShaderEntity entity = load_entity(entity_index); + if ((entity.GetFlags() & ENTITY_FLAG_CAPSULE_SHADOW_COLLIDER) == 0) + continue; + + float3 A = entity.position; + float3 B = entity.GetColliderTip(); + half radius = entity.GetRange() * CAPSULE_SHADOW_BOLDEN; + half occ = directionalOcclusionCapsule(surface.P, A, B, radius, cone); + + // attenutaion based on capsule-sphere: + float3 center = lerp(A, B, 0.5); + half range = distance(center, A) + radius + CAPSULE_SHADOW_AFFECTION_RANGE; + half range2 = range * range; + half dist2 = distance_squared(surface.P, center); + occ = 1 - saturate((1 - occ) * saturate(attenuation_pointlight(dist2, range, range2))); + + capsuleshadow *= occ; + } + } + capsuleshadow = lerp(capsuleshadow, 1, GetCapsuleShadowFade()); + capsuleshadow = saturate(capsuleshadow); + surface.occlusion *= capsuleshadow; + } + } inline void TiledDecals(inout Surface surface, uint flatTileIndex, inout half4 surfaceMap, SamplerState sam) diff --git a/WickedEngine/shaders/surfaceHF.hlsli b/WickedEngine/shaders/surfaceHF.hlsli index b0ef60c5e4..6fcbe707d8 100644 --- a/WickedEngine/shaders/surfaceHF.hlsli +++ b/WickedEngine/shaders/surfaceHF.hlsli @@ -98,6 +98,7 @@ struct Surface bool receiveshadow; bool is_backface; bool gi_applied; + bool capsuleshadow_disabled; // These will be computed when calling Update(): half NdotV; // cos(angle between normal and view vector) @@ -150,6 +151,7 @@ struct Surface receiveshadow = true; is_backface = false; gi_applied = false; + capsuleshadow_disabled = true; uid_validate = 0; hit_depth = 0; @@ -181,6 +183,7 @@ struct Surface sss = material.GetSSS(); sss_inv = material.GetSSSInverse(); SetReceiveShadow(material.IsReceiveShadow()); + SetCapsuleShadowDisabled(material.IsCapsuleShadowDisabled()); } inline void create( @@ -305,10 +308,12 @@ struct Surface inline bool IsReceiveShadow() { return receiveshadow; } inline bool IsBackface() { return is_backface; } inline bool IsGIApplied() { return gi_applied; } + inline bool IsCapsuleShadowDisabled() { return capsuleshadow_disabled; } inline void SetReceiveShadow(bool value) { receiveshadow = value; } inline void SetBackface(bool value) { is_backface = value; } inline void SetGIApplied(bool value) { gi_applied = value; } + inline void SetCapsuleShadowDisabled(bool value) { capsuleshadow_disabled = value; } ShaderMeshInstance inst; diff --git a/WickedEngine/wiJobSystem.cpp b/WickedEngine/wiJobSystem.cpp index ac8fbbb24c..1cec9105fd 100644 --- a/WickedEngine/wiJobSystem.cpp +++ b/WickedEngine/wiJobSystem.cpp @@ -190,20 +190,25 @@ namespace wi::jobsystem #ifdef PLATFORM_LINUX std::thread& worker = res.threads.emplace_back([threadID, priority, &res] { + // from the sched(2) manpage: + // In the current [Linux 2.6.23+] implementation, each unit of + // difference in the nice values of two processes results in a + // factor of 1.25 in the degree to which the scheduler favors + // the higher priority process. + // + // so 3 would mean that other (prio 0) threads are around twice as important + switch (priority) { case Priority::Low: - case Priority::Streaming: - // from the sched(2) manpage: - // In the current [Linux 2.6.23+] implementation, each unit of - // difference in the nice values of two processes results in a - // factor of 1.25 in the degree to which the scheduler favors - // the higher priority process. - // - // so 3 would mean that other (prio 0) threads are around twice as important if (setpriority(PRIO_PROCESS, 0, 3) != 0) { perror("setpriority"); } + case Priority::Streaming: + if (setpriority(PRIO_PROCESS, 0, 2) != 0) + { + perror("setpriority"); + } break; case Priority::High: // nothing to do @@ -262,7 +267,7 @@ namespace wi::jobsystem } else if (priority == Priority::Streaming) { - BOOL priority_result = SetThreadPriority(handle, THREAD_PRIORITY_LOWEST); + BOOL priority_result = SetThreadPriority(handle, THREAD_PRIORITY_BELOW_NORMAL); assert(priority_result != 0); std::wstring wthreadname = L"wi::job_st_" + std::to_wstring(threadID); diff --git a/WickedEngine/wiPrimitive.h b/WickedEngine/wiPrimitive.h index 9daf92eb3f..083bff4ee7 100644 --- a/WickedEngine/wiPrimitive.h +++ b/WickedEngine/wiPrimitive.h @@ -132,6 +132,15 @@ namespace wi::primitive { assert(radius >= 0); } + inline Sphere getSphere() const + { + XMVECTOR B = XMLoadFloat3(&base); + XMVECTOR T = XMLoadFloat3(&tip); + Sphere ret; + XMStoreFloat3(&ret.center, XMVectorLerp(B, T, 0.5f)); + XMStoreFloat(&ret.radius, XMVector3Length(B - T) * 0.5f); + return ret; + } inline AABB getAABB() const { XMFLOAT3 halfWidth = XMFLOAT3(radius, radius, radius); diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index 7ddab4e40e..83be61a8ab 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -88,7 +88,7 @@ std::string SHADERSOURCEPATH = SHADER_INTEROP_PATH; //#define RTREFLECTION_WITH_RAYTRACING_PIPELINE static thread_local wi::vector barrier_stack; -void barrier_stack_flush(CommandList cmd) +void FlushBarriers(CommandList cmd) { if (barrier_stack.empty()) return; @@ -99,10 +99,6 @@ void PushBarrier(const GPUBarrier& barrier) { barrier_stack.push_back(barrier); } -void FlushBarriers(CommandList cmd) -{ - barrier_stack_flush(cmd); -} bool wireRender = false; bool debugBoneLines = false; @@ -146,6 +142,9 @@ bool VXGI_ENABLED = false; bool VXGI_REFLECTIONS_ENABLED = true; bool VXGI_DEBUG = false; int VXGI_DEBUG_CLIPMAP = 0; +bool CAPSULE_SHADOW_ENABLED = false; +float CAPSULE_SHADOW_ANGLE = XM_PIDIV4; +float CAPSULE_SHADOW_FADE = 0.2f; Texture shadowMapAtlas; Texture shadowMapAtlas_Transparent; @@ -3232,9 +3231,9 @@ void ProcessDeferredTextureRequests(CommandList cmd) // batch begin barriers: for (auto& params : painttextures) { - barrier_stack.push_back(GPUBarrier::Image(¶ms.editTex, params.editTex.desc.layout, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Image(¶ms.editTex, params.editTex.desc.layout, ResourceState::UNORDERED_ACCESS)); } - barrier_stack_flush(cmd); + FlushBarriers(cmd); // render splats: device->BindComputeShader(&shaders[CSTYPE_PAINT_TEXTURE], cmd); @@ -3275,9 +3274,9 @@ void ProcessDeferredTextureRequests(CommandList cmd) // ending barriers: for (auto& params : painttextures) { - barrier_stack.push_back(GPUBarrier::Image(¶ms.editTex, ResourceState::UNORDERED_ACCESS, params.editTex.desc.layout)); + PushBarrier(GPUBarrier::Image(¶ms.editTex, ResourceState::UNORDERED_ACCESS, params.editTex.desc.layout)); } - barrier_stack_flush(cmd); + FlushBarriers(cmd); // mipgen tasks after paint: for (auto& params : painttextures) @@ -3370,7 +3369,6 @@ void UpdateVisibility(Visibility& vis) if (!light.IsInactive()) { // Local stream compaction: - // (also compute light distance for shadow priority sorting) stream_compaction.list[stream_compaction.count++] = args.groupIndex; if (light.IsVolumetricsEnabled()) { @@ -3564,6 +3562,104 @@ void UpdateVisibility(Visibility& vis) }); } + if (vis.flags & Visibility::ALLOW_COLLIDERS) + { + wi::jobsystem::Execute(ctx, [&](wi::jobsystem::JobArgs args) { + for (size_t i = 0; i < vis.scene->collider_count_gpu; ++i) + { + ColliderComponent collider = vis.scene->colliders_gpu[i]; + if (!(collider.layerMask & vis.layerMask)) + { + continue; + } + if (!vis.frustum.CheckBoxFast(vis.scene->aabb_colliders_gpu[i])) + { + continue; + } + collider.dist = wi::math::DistanceSquared(vis.scene->aabb_colliders_gpu[i].getCenter(), vis.camera->Eye); + vis.visibleColliders.push_back(collider); + } + // Ragdoll GPU colliders: + for (size_t i = 0; i < vis.scene->humanoids.GetCount(); ++i) + { + uint32_t layerMask = ~0u; + + Entity entity = vis.scene->humanoids.GetEntity(i); + const LayerComponent* layer = vis.scene->layers.GetComponent(entity); + if (layer != nullptr) + { + layerMask = layer->layerMask; + } + + const HumanoidComponent& humanoid = vis.scene->humanoids[i]; + const bool capsule_shadow = !humanoid.IsCapsuleShadowDisabled(); + for (auto& bodypart : humanoid.ragdoll_bodyparts) + { + Sphere sphere = bodypart.capsule.getSphere(); + sphere.radius += CAPSULE_SHADOW_AFFECTION_RANGE; + if (!vis.camera->frustum.CheckSphere(sphere.center, sphere.radius)) + continue; + + ColliderComponent collider; + collider.layerMask = layerMask; + collider.shape = ColliderComponent::Shape::Capsule; + collider.capsule = bodypart.capsule; + collider.SetCapsuleShadowEnabled(capsule_shadow); + collider.dist = wi::math::DistanceSquared(sphere.center, vis.camera->Eye); + vis.visibleColliders.push_back(collider); + } + + // Add foot capsules fore capsule shadow, which are not part of the ragdoll now: + if (capsule_shadow) + { + const std::pair feet[] = { + {humanoid.bones[(int)HumanoidComponent::HumanoidBone::RightFoot], humanoid.bones[(int)HumanoidComponent::HumanoidBone::RightToes]}, + {humanoid.bones[(int)HumanoidComponent::HumanoidBone::LeftFoot], humanoid.bones[(int)HumanoidComponent::HumanoidBone::LeftToes]}, + }; + for (auto& foot : feet) + { + if (foot.first != INVALID_ENTITY && foot.second != INVALID_ENTITY) + { + const TransformComponent* footTransform = vis.scene->transforms.GetComponent(foot.first); + const TransformComponent* toesTransform = vis.scene->transforms.GetComponent(foot.second); + if (footTransform != nullptr && toesTransform != nullptr) + { + XMVECTOR A = footTransform->GetPositionV(); + XMVECTOR B = toesTransform->GetPositionV(); + XMVECTOR DIFF = B - A; + XMVECTOR LEN = XMVectorMax(XMVectorSplatEpsilon(), XMVector3Length(DIFF)); + XMVECTOR N = DIFF / LEN; // normalize + const float radius = XMVectorGetX(LEN) * 0.4f; + A -= N * radius; + B += N * radius; + Capsule capsule = Capsule(A, B, radius); + Sphere sphere = capsule.getSphere(); + sphere.radius += CAPSULE_SHADOW_AFFECTION_RANGE; + if (vis.camera->frustum.CheckSphere(sphere.center, sphere.radius)) + { + ColliderComponent collider; + collider.layerMask = layerMask; + collider.shape = ColliderComponent::Shape::Capsule; + collider.capsule = capsule; + collider.SetCapsuleShadowEnabled(capsule_shadow); + collider.dist = wi::math::DistanceSquared(sphere.center, vis.camera->Eye); + vis.visibleColliders.push_back(collider); + //static std::mutex locker; + //std::scoped_lock lck(locker); + //DrawCapsule(capsule); + } + } + } + } + } + } + // GPU colliders sorting to camera priority: + std::sort(vis.visibleColliders.begin(), vis.visibleColliders.end(), [](const ColliderComponent& a, const ColliderComponent& b) { + return a.dist < b.dist; + }); + }); + } + wi::jobsystem::Wait(ctx); // finalize stream compaction: @@ -3903,6 +3999,8 @@ void UpdatePerFrameData( frameCB.temporalaa_samplerotation = (x & 0x000000FF) | ((y & 0x000000FF) << 8); } + frameCB.capsuleshadow_fade_angle = uint32_t(XMConvertFloatToHalf(CAPSULE_SHADOW_FADE)) | uint32_t(XMConvertFloatToHalf(std::max(0.001f, CAPSULE_SHADOW_ANGLE * 0.5f))) << 16u; + frameCB.options = 0; if (GetTemporalAAEnabled()) { @@ -3973,6 +4071,10 @@ void UpdatePerFrameData( { frameCB.options |= OPTION_BIT_VOLUMETRICCLOUDS_RECEIVE_SHADOW; } + if (IsCapsuleShadowEnabled()) + { + frameCB.options |= OPTION_BIT_CAPSULE_SHADOW_ENABLED; + } frameCB.scene = vis.scene->shaderscene; @@ -4483,7 +4585,7 @@ void UpdatePerFrameData( // Write colliders into entity array: forcefieldarray_offset = entityCounter; - for (size_t i = 0; i < vis.scene->collider_count_gpu; ++i) + for (size_t i = 0; i < vis.visibleColliders.size(); ++i) { if (entityCounter == SHADER_ENTITY_COUNT) { @@ -4492,9 +4594,14 @@ void UpdatePerFrameData( } ShaderEntity shaderentity = {}; - const ColliderComponent& collider = vis.scene->colliders_gpu[i]; + const ColliderComponent& collider = vis.visibleColliders[i]; shaderentity.layerMask = collider.layerMask; + if (collider.IsCapsuleShadowEnabled()) + { + shaderentity.SetFlags(ENTITY_FLAG_CAPSULE_SHADOW_COLLIDER); + } + switch (collider.shape) { case ColliderComponent::Shape::Sphere: @@ -4566,48 +4673,6 @@ void UpdatePerFrameData( entityCounter++; forcefieldarray_count++; } - - // Write ragdoll colliders into entity array: - for (size_t i = 0; i < vis.scene->humanoids.GetCount(); ++i) - { - if (entityCounter == SHADER_ENTITY_COUNT) - { - entityCounter--; - break; - } - - uint32_t layerMask = ~0u; - - Entity entity = vis.scene->humanoids.GetEntity(i); - const LayerComponent* layer = vis.scene->layers.GetComponent(entity); - if (layer != nullptr) - { - layerMask = layer->layerMask; - } - - const HumanoidComponent& humanoid = vis.scene->humanoids[i]; - for (auto& bodypart : humanoid.ragdoll_bodyparts) - { - if (entityCounter == SHADER_ENTITY_COUNT) - { - entityCounter--; - break; - } - - ShaderEntity shaderentity = {}; - shaderentity.SetType(ENTITY_TYPE_COLLIDER_CAPSULE); - shaderentity.layerMask = layerMask; - shaderentity.position = bodypart.capsule.base; - shaderentity.SetColliderTip(bodypart.capsule.tip); - shaderentity.SetRange(bodypart.capsule.radius); - - //DrawCapsule(bodypart.capsule); - - std::memcpy(entityArray + entityCounter, &shaderentity, sizeof(ShaderEntity)); - entityCounter++; - forcefieldarray_count++; - } - } } frameCB.probes = ShaderEntityIterator(envprobearray_offset, envprobearray_count); @@ -4629,33 +4694,43 @@ void UpdateRenderData( auto prof_updatebuffer_cpu = wi::profiler::BeginRangeCPU("Update Buffers (CPU)"); auto prof_updatebuffer_gpu = wi::profiler::BeginRangeGPU("Update Buffers (GPU)", cmd); + PushBarrier(GPUBarrier::Image(&textures[TEXTYPE_3D_WIND], textures[TEXTYPE_3D_WIND].desc.layout, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Image(&textures[TEXTYPE_3D_WIND_PREV], textures[TEXTYPE_3D_WIND_PREV].desc.layout, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Image(&textures[TEXTYPE_2D_CAUSTICS], textures[TEXTYPE_2D_CAUSTICS].desc.layout, ResourceState::UNORDERED_ACCESS)); + FlushBarriers(cmd); + + device->ClearUAV(&textures[TEXTYPE_3D_WIND], 0, cmd); + device->ClearUAV(&textures[TEXTYPE_3D_WIND_PREV], 0, cmd); + device->ClearUAV(&textures[TEXTYPE_2D_CAUSTICS], 0, cmd); + PushBarrier(GPUBarrier::Memory()); + device->CopyBuffer(&indirectDebugStatsReadback[device->GetBufferIndex()], 0, &buffers[BUFFERTYPE_INDIRECT_DEBUG_0], 0, sizeof(IndirectDrawArgsInstanced), cmd); indirectDebugStatsReadback_available[device->GetBufferIndex()] = true; - barrier_stack.push_back(GPUBarrier::Buffer(&buffers[BUFFERTYPE_INDIRECT_DEBUG_0], ResourceState::COPY_SRC, ResourceState::COPY_DST)); + PushBarrier(GPUBarrier::Buffer(&buffers[BUFFERTYPE_INDIRECT_DEBUG_0], ResourceState::COPY_SRC, ResourceState::COPY_DST)); - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->meshletBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Buffer(&vis.scene->meshletBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS)); - barrier_stack.push_back(GPUBarrier::Buffer(&buffers[BUFFERTYPE_FRAMECB], ResourceState::CONSTANT_BUFFER, ResourceState::COPY_DST)); + PushBarrier(GPUBarrier::Buffer(&buffers[BUFFERTYPE_FRAMECB], ResourceState::CONSTANT_BUFFER, ResourceState::COPY_DST)); if (vis.scene->instanceBuffer.IsValid()) { - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->instanceBuffer, ResourceState::SHADER_RESOURCE, ResourceState::COPY_DST)); + PushBarrier(GPUBarrier::Buffer(&vis.scene->instanceBuffer, ResourceState::SHADER_RESOURCE, ResourceState::COPY_DST)); } if (vis.scene->geometryBuffer.IsValid()) { - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->geometryBuffer, ResourceState::SHADER_RESOURCE, ResourceState::COPY_DST)); + PushBarrier(GPUBarrier::Buffer(&vis.scene->geometryBuffer, ResourceState::SHADER_RESOURCE, ResourceState::COPY_DST)); } if (vis.scene->materialBuffer.IsValid()) { - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->materialBuffer, ResourceState::SHADER_RESOURCE, ResourceState::COPY_DST)); + PushBarrier(GPUBarrier::Buffer(&vis.scene->materialBuffer, ResourceState::SHADER_RESOURCE, ResourceState::COPY_DST)); } if (vis.scene->skinningBuffer.IsValid()) { - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->skinningBuffer, ResourceState::SHADER_RESOURCE, ResourceState::COPY_DST)); + PushBarrier(GPUBarrier::Buffer(&vis.scene->skinningBuffer, ResourceState::SHADER_RESOURCE, ResourceState::COPY_DST)); } - barrier_stack_flush(cmd); + FlushBarriers(cmd); device->UpdateBuffer(&buffers[BUFFERTYPE_FRAMECB], &frameCB, cmd); - barrier_stack.push_back(GPUBarrier::Buffer(&buffers[BUFFERTYPE_FRAMECB], ResourceState::COPY_DST, ResourceState::CONSTANT_BUFFER)); + PushBarrier(GPUBarrier::Buffer(&buffers[BUFFERTYPE_FRAMECB], ResourceState::COPY_DST, ResourceState::CONSTANT_BUFFER)); if (vis.scene->instanceBuffer.IsValid() && vis.scene->instanceArraySize > 0) { @@ -4667,7 +4742,7 @@ void UpdateRenderData( vis.scene->instanceArraySize * sizeof(ShaderMeshInstance), cmd ); - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->instanceBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); + PushBarrier(GPUBarrier::Buffer(&vis.scene->instanceBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); } if (vis.scene->geometryBuffer.IsValid() && vis.scene->geometryArraySize > 0) @@ -4680,7 +4755,7 @@ void UpdateRenderData( vis.scene->geometryArraySize * sizeof(ShaderGeometry), cmd ); - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->geometryBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); + PushBarrier(GPUBarrier::Buffer(&vis.scene->geometryBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); } if (vis.scene->materialBuffer.IsValid() && vis.scene->materialArraySize > 0) @@ -4693,7 +4768,7 @@ void UpdateRenderData( vis.scene->materialArraySize * sizeof(ShaderMaterial), cmd ); - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->materialBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); + PushBarrier(GPUBarrier::Buffer(&vis.scene->materialBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); } if (vis.scene->skinningBuffer.IsValid() && vis.scene->skinningDataSize > 0) @@ -4706,20 +4781,16 @@ void UpdateRenderData( vis.scene->skinningDataSize, cmd ); - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->skinningBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); + PushBarrier(GPUBarrier::Buffer(&vis.scene->skinningBuffer, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); } if (vis.scene->voxelgrid_gpu.IsValid() && vis.scene->voxel_grids.GetCount() > 0) { VoxelGrid& voxelgrid = vis.scene->voxel_grids[0]; device->UpdateBuffer(&vis.scene->voxelgrid_gpu, voxelgrid.voxels.data(), cmd, voxelgrid.voxels.size() * sizeof(uint64_t)); - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->voxelgrid_gpu, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); + PushBarrier(GPUBarrier::Buffer(&vis.scene->voxelgrid_gpu, ResourceState::COPY_DST, ResourceState::SHADER_RESOURCE)); } - barrier_stack.push_back(GPUBarrier::Image(&textures[TEXTYPE_3D_WIND], textures[TEXTYPE_3D_WIND].desc.layout, ResourceState::UNORDERED_ACCESS)); - barrier_stack.push_back(GPUBarrier::Image(&textures[TEXTYPE_3D_WIND_PREV], textures[TEXTYPE_3D_WIND_PREV].desc.layout, ResourceState::UNORDERED_ACCESS)); - barrier_stack.push_back(GPUBarrier::Image(&textures[TEXTYPE_2D_CAUSTICS], textures[TEXTYPE_2D_CAUSTICS].desc.layout, ResourceState::UNORDERED_ACCESS)); - // Indirect debug buffer - clear indirect args: IndirectDrawArgsInstanced debug_indirect = {}; debug_indirect.VertexCountPerInstance = 0; @@ -4727,24 +4798,17 @@ void UpdateRenderData( debug_indirect.StartVertexLocation = 0; debug_indirect.StartInstanceLocation = 0; device->UpdateBuffer(&buffers[BUFFERTYPE_INDIRECT_DEBUG_0], &debug_indirect, cmd, sizeof(debug_indirect)); - barrier_stack.push_back(GPUBarrier::Buffer(&buffers[BUFFERTYPE_INDIRECT_DEBUG_0], ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS)); - barrier_stack.push_back(GPUBarrier::Buffer(&buffers[BUFFERTYPE_INDIRECT_DEBUG_1], ResourceState::UNORDERED_ACCESS, ResourceState::VERTEX_BUFFER | ResourceState::INDIRECT_ARGUMENT | ResourceState::COPY_SRC)); + PushBarrier(GPUBarrier::Buffer(&buffers[BUFFERTYPE_INDIRECT_DEBUG_0], ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Buffer(&buffers[BUFFERTYPE_INDIRECT_DEBUG_1], ResourceState::UNORDERED_ACCESS, ResourceState::VERTEX_BUFFER | ResourceState::INDIRECT_ARGUMENT | ResourceState::COPY_SRC)); // Flush buffer updates: - barrier_stack_flush(cmd); + FlushBarriers(cmd); wi::profiler::EndRange(prof_updatebuffer_cpu); wi::profiler::EndRange(prof_updatebuffer_gpu); BindCommonResources(cmd); - { - device->ClearUAV(&textures[TEXTYPE_3D_WIND], 0, cmd); - device->ClearUAV(&textures[TEXTYPE_3D_WIND_PREV], 0, cmd); - device->ClearUAV(&textures[TEXTYPE_2D_CAUSTICS], 0, cmd); - device->Barrier(GPUBarrier::Memory(), cmd); - } - { auto range = wi::profiler::BeginRangeGPU("Wind", cmd); device->EventBegin("Wind", cmd); @@ -4760,8 +4824,8 @@ void UpdateRenderData( const TextureDesc& desc = textures[TEXTYPE_3D_WIND].GetDesc(); device->Dispatch(desc.width / 8, desc.height / 8, desc.depth / 8, cmd); } - barrier_stack.push_back(GPUBarrier::Image(&textures[TEXTYPE_3D_WIND], ResourceState::UNORDERED_ACCESS, textures[TEXTYPE_3D_WIND].desc.layout)); - barrier_stack.push_back(GPUBarrier::Image(&textures[TEXTYPE_3D_WIND_PREV], ResourceState::UNORDERED_ACCESS, textures[TEXTYPE_3D_WIND_PREV].desc.layout)); + PushBarrier(GPUBarrier::Image(&textures[TEXTYPE_3D_WIND], ResourceState::UNORDERED_ACCESS, textures[TEXTYPE_3D_WIND].desc.layout)); + PushBarrier(GPUBarrier::Image(&textures[TEXTYPE_3D_WIND_PREV], ResourceState::UNORDERED_ACCESS, textures[TEXTYPE_3D_WIND_PREV].desc.layout)); device->EventEnd(cmd); wi::profiler::EndRange(range); } @@ -4772,7 +4836,7 @@ void UpdateRenderData( device->BindUAV(&textures[TEXTYPE_2D_CAUSTICS], 0, cmd); const TextureDesc& desc = textures[TEXTYPE_2D_CAUSTICS].GetDesc(); device->Dispatch(desc.width / 8, desc.height / 8, 1, cmd); - barrier_stack.push_back(GPUBarrier::Image(&textures[TEXTYPE_2D_CAUSTICS], ResourceState::UNORDERED_ACCESS, textures[TEXTYPE_2D_CAUSTICS].desc.layout)); + PushBarrier(GPUBarrier::Image(&textures[TEXTYPE_2D_CAUSTICS], ResourceState::UNORDERED_ACCESS, textures[TEXTYPE_2D_CAUSTICS].desc.layout)); device->EventEnd(cmd); wi::profiler::EndRange(range); } @@ -4855,7 +4919,7 @@ void UpdateRenderData( device->Dispatch(((uint32_t)mesh.vertex_positions.size() + 63) / 64, 1, 1, cmd); - barrier_stack.push_back(GPUBarrier::Buffer(&mesh.streamoutBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE)); + PushBarrier(GPUBarrier::Buffer(&mesh.streamoutBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE)); } } @@ -4863,7 +4927,7 @@ void UpdateRenderData( device->EventEnd(cmd); // Skinning and Morph } - barrier_stack_flush(cmd); // wind/skinning flush + FlushBarriers(cmd); // wind/skinning flush // Hair particle initialization is needed for all, not just visible ones: // This fixes an issue when hair is included in ray tracing acceleration @@ -4907,8 +4971,8 @@ void UpdateRenderData( device->EventBegin("Impostor prepare", cmd); auto range = wi::profiler::BeginRangeGPU("Impostor prepare", cmd); - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->impostorBuffer, ResourceState::SHADER_RESOURCE | ResourceState::INDIRECT_ARGUMENT, ResourceState::COPY_DST)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Buffer(&vis.scene->impostorBuffer, ResourceState::SHADER_RESOURCE | ResourceState::INDIRECT_ARGUMENT, ResourceState::COPY_DST)); + FlushBarriers(cmd); IndirectDrawArgsIndexedInstanced clear_indirect = {}; clear_indirect.IndexCountPerInstance = 0; clear_indirect.InstanceCount = 1; @@ -4916,8 +4980,8 @@ void UpdateRenderData( clear_indirect.BaseVertexLocation = 0; clear_indirect.StartInstanceLocation = 0; device->UpdateBuffer(&vis.scene->impostorBuffer, &clear_indirect, cmd, sizeof(clear_indirect), vis.scene->impostor_indirect.offset); - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->impostorBuffer, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Buffer(&vis.scene->impostorBuffer, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS)); + FlushBarriers(cmd); device->BindComputeShader(&shaders[CSTYPE_IMPOSTOR_PREPARE], cmd); device->BindUAV(&vis.scene->impostorBuffer, 0, cmd, vis.scene->impostor_ib_format == Format::R32_UINT ? vis.scene->impostor_ib32.subresource_uav : vis.scene->impostor_ib16.subresource_uav); @@ -4931,8 +4995,8 @@ void UpdateRenderData( device->Dispatch((object_count + 63u) / 64u, 1, 1, cmd); - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->impostorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE | ResourceState::INDIRECT_ARGUMENT)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Buffer(&vis.scene->impostorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE | ResourceState::INDIRECT_ARGUMENT)); + FlushBarriers(cmd); wi::profiler::EndRange(range); device->EventEnd(cmd); @@ -4940,8 +5004,8 @@ void UpdateRenderData( else if(vis.scene->impostors.GetCount() > 0 && vis.scene->objects.GetCount() == 0 && vis.scene->impostorBuffer.IsValid()) { device->ClearUAV(&vis.scene->impostorBuffer, 0, cmd); - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->impostorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE | ResourceState::INDIRECT_ARGUMENT)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Buffer(&vis.scene->impostorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE | ResourceState::INDIRECT_ARGUMENT)); + FlushBarriers(cmd); } // Meshlets: @@ -4958,13 +5022,13 @@ void UpdateRenderData( device->Dispatch((uint32_t)vis.scene->instanceArraySize, 1, 1, cmd); - barrier_stack.push_back(GPUBarrier::Buffer(&vis.scene->meshletBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE)); + PushBarrier(GPUBarrier::Buffer(&vis.scene->meshletBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE)); wi::profiler::EndRange(range); device->EventEnd(cmd); } - barrier_stack_flush(cmd); + FlushBarriers(cmd); device->EventEnd(cmd); } @@ -4993,9 +5057,9 @@ void UpdateRenderDataAsync( continue; device->ClearUAV(&object.wetmap, 0, cmd); object.wetmap_cleared = true; - barrier_stack.push_back(GPUBarrier::Buffer(&object.wetmap, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE_COMPUTE)); + PushBarrier(GPUBarrier::Buffer(&object.wetmap, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE_COMPUTE)); } - barrier_stack_flush(cmd); + FlushBarriers(cmd); // Precompute static volumetric cloud textures: if (!volumetric_clouds_precomputed && vis.scene->weather.IsVolumetricClouds()) @@ -10225,7 +10289,7 @@ void RayTraceScene( }; device->BindUAVs(uavs, 0, arraysize(uavs), cmd); - barrier_stack.push_back(GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Image(&output, output.desc.layout, ResourceState::UNORDERED_ACCESS)); // Note: these are always in ResourceState::UNORDERED_ACCESS, no need to transition! if (output_albedo != nullptr) @@ -10244,33 +10308,33 @@ void RayTraceScene( { device->BindUAV(output_stencil, 4, cmd); } - barrier_stack_flush(cmd); + FlushBarriers(cmd); if (accumulation_sample == 0) { device->ClearUAV(&output, 0, cmd); - barrier_stack.push_back(GPUBarrier::Memory(&output)); + PushBarrier(GPUBarrier::Memory(&output)); if (output_albedo != nullptr) { device->ClearUAV(output_albedo, 0, cmd); - barrier_stack.push_back(GPUBarrier::Memory(output_albedo)); + PushBarrier(GPUBarrier::Memory(output_albedo)); } if (output_normal != nullptr) { device->ClearUAV(output_normal, 0, cmd); - barrier_stack.push_back(GPUBarrier::Memory(output_normal)); + PushBarrier(GPUBarrier::Memory(output_normal)); } if (output_depth != nullptr) { device->ClearUAV(output_depth, 0, cmd); - barrier_stack.push_back(GPUBarrier::Memory(output_depth)); + PushBarrier(GPUBarrier::Memory(output_depth)); } if (output_stencil != nullptr) { device->ClearUAV(output_stencil, 0, cmd); - barrier_stack.push_back(GPUBarrier::Memory(output_stencil)); + PushBarrier(GPUBarrier::Memory(output_stencil)); } - barrier_stack_flush(cmd); + FlushBarriers(cmd); } device->Dispatch( @@ -10280,7 +10344,7 @@ void RayTraceScene( cmd ); - barrier_stack.push_back(GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout)); + PushBarrier(GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout)); if (output_depth_stencil != nullptr) { CopyDepthStencil( @@ -10295,14 +10359,14 @@ void RayTraceScene( { if (output_depth != nullptr) { - barrier_stack.push_back(GPUBarrier::Image(output_depth, ResourceState::UNORDERED_ACCESS, output_depth->desc.layout)); + PushBarrier(GPUBarrier::Image(output_depth, ResourceState::UNORDERED_ACCESS, output_depth->desc.layout)); } if (output_stencil != nullptr) { - barrier_stack.push_back(GPUBarrier::Image(output_stencil, ResourceState::UNORDERED_ACCESS, output_stencil->desc.layout)); + PushBarrier(GPUBarrier::Image(output_stencil, ResourceState::UNORDERED_ACCESS, output_stencil->desc.layout)); } } - barrier_stack_flush(cmd); + FlushBarriers(cmd); wi::profiler::EndRange(range); device->EventEnd(cmd); // RayTraceScene @@ -10981,9 +11045,9 @@ void Visibility_Prepare( bin.shaderType = i; } device->UpdateBuffer(&res.bins, bins, cmd); - barrier_stack.push_back(GPUBarrier::Buffer(&res.bins, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS)); - barrier_stack.push_back(GPUBarrier::Buffer(&res.binned_tiles, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Buffer(&res.bins, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Buffer(&res.binned_tiles, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); + FlushBarriers(cmd); } // Resolve: @@ -11015,7 +11079,7 @@ void Visibility_Prepare( device->BindUAV(res.depthbuffer, 5, cmd, 2); device->BindUAV(res.depthbuffer, 6, cmd, 3); device->BindUAV(res.depthbuffer, 7, cmd, 4); - barrier_stack.push_back(GPUBarrier::Image(res.depthbuffer, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Image(res.depthbuffer, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); } else { @@ -11032,7 +11096,7 @@ void Visibility_Prepare( device->BindUAV(res.lineardepth, 10, cmd, 2); device->BindUAV(res.lineardepth, 11, cmd, 3); device->BindUAV(res.lineardepth, 12, cmd, 4); - barrier_stack.push_back(GPUBarrier::Image(res.lineardepth, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Image(res.lineardepth, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); } else { @@ -11045,13 +11109,13 @@ void Visibility_Prepare( if (res.primitiveID_resolved) { device->BindUAV(res.primitiveID_resolved, 13, cmd); - barrier_stack.push_back(GPUBarrier::Image(res.primitiveID_resolved, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Image(res.primitiveID_resolved, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); } else { device->BindUAV(&unbind, 13, cmd); } - barrier_stack_flush(cmd); + FlushBarriers(cmd); device->BindComputeShader(&shaders[msaa ? CSTYPE_VISIBILITY_RESOLVE_MSAA : CSTYPE_VISIBILITY_RESOLVE], cmd); @@ -11064,22 +11128,22 @@ void Visibility_Prepare( if (res.depthbuffer) { - barrier_stack.push_back(GPUBarrier::Image(res.depthbuffer, ResourceState::UNORDERED_ACCESS, res.depthbuffer->desc.layout)); + PushBarrier(GPUBarrier::Image(res.depthbuffer, ResourceState::UNORDERED_ACCESS, res.depthbuffer->desc.layout)); } if (res.lineardepth) { - barrier_stack.push_back(GPUBarrier::Image(res.lineardepth, ResourceState::UNORDERED_ACCESS, res.lineardepth->desc.layout)); + PushBarrier(GPUBarrier::Image(res.lineardepth, ResourceState::UNORDERED_ACCESS, res.lineardepth->desc.layout)); } if (res.primitiveID_resolved) { - barrier_stack.push_back(GPUBarrier::Image(res.primitiveID_resolved, ResourceState::UNORDERED_ACCESS, res.primitiveID_resolved->desc.layout)); + PushBarrier(GPUBarrier::Image(res.primitiveID_resolved, ResourceState::UNORDERED_ACCESS, res.primitiveID_resolved->desc.layout)); } if (res.IsValid()) { - barrier_stack.push_back(GPUBarrier::Buffer(&res.bins, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT)); - barrier_stack.push_back(GPUBarrier::Buffer(&res.binned_tiles, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE)); + PushBarrier(GPUBarrier::Buffer(&res.bins, ResourceState::UNORDERED_ACCESS, ResourceState::INDIRECT_ARGUMENT)); + PushBarrier(GPUBarrier::Buffer(&res.binned_tiles, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE)); } - barrier_stack_flush(cmd); + FlushBarriers(cmd); device->EventEnd(cmd); } @@ -11099,12 +11163,12 @@ void Visibility_Surface( BindCommonResources(cmd); // First, do a bunch of resource discards to initialize texture metadata: - barrier_stack.push_back(GPUBarrier::Image(&output, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); - barrier_stack.push_back(GPUBarrier::Image(&res.texture_normals, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); - barrier_stack.push_back(GPUBarrier::Image(&res.texture_roughness, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); - barrier_stack.push_back(GPUBarrier::Image(&res.texture_payload_0, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); - barrier_stack.push_back(GPUBarrier::Image(&res.texture_payload_1, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Image(&output, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Image(&res.texture_normals, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Image(&res.texture_roughness, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Image(&res.texture_payload_0, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Image(&res.texture_payload_1, ResourceState::UNDEFINED, ResourceState::UNORDERED_ACCESS)); + FlushBarriers(cmd); device->BindResource(&res.binned_tiles, 0, cmd); device->BindUAV(&output, 0, cmd); @@ -11136,9 +11200,9 @@ void Visibility_Surface( // Ending barriers: // These resources will be used by other post processing effects - barrier_stack.push_back(GPUBarrier::Image(&res.texture_normals, ResourceState::UNORDERED_ACCESS, res.texture_normals.desc.layout)); - barrier_stack.push_back(GPUBarrier::Image(&res.texture_roughness, ResourceState::UNORDERED_ACCESS, res.texture_roughness.desc.layout)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Image(&res.texture_normals, ResourceState::UNORDERED_ACCESS, res.texture_normals.desc.layout)); + PushBarrier(GPUBarrier::Image(&res.texture_roughness, ResourceState::UNORDERED_ACCESS, res.texture_roughness.desc.layout)); + FlushBarriers(cmd); wi::profiler::EndRange(range); device->EventEnd(cmd); @@ -11153,9 +11217,9 @@ void Visibility_Surface_Reduced( BindCommonResources(cmd); - barrier_stack.push_back(GPUBarrier::Image(&res.texture_normals, res.texture_normals.desc.layout, ResourceState::UNORDERED_ACCESS)); - barrier_stack.push_back(GPUBarrier::Image(&res.texture_roughness, res.texture_roughness.desc.layout, ResourceState::UNORDERED_ACCESS)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Image(&res.texture_normals, res.texture_normals.desc.layout, ResourceState::UNORDERED_ACCESS)); + PushBarrier(GPUBarrier::Image(&res.texture_roughness, res.texture_roughness.desc.layout, ResourceState::UNORDERED_ACCESS)); + FlushBarriers(cmd); device->BindResource(&res.binned_tiles, 0, cmd); device->BindUAV(&res.texture_normals, 1, cmd); @@ -11180,9 +11244,9 @@ void Visibility_Surface_Reduced( // Ending barriers: // These resources will be used by other post processing effects - barrier_stack.push_back(GPUBarrier::Image(&res.texture_normals, ResourceState::UNORDERED_ACCESS, res.texture_normals.desc.layout)); - barrier_stack.push_back(GPUBarrier::Image(&res.texture_roughness, ResourceState::UNORDERED_ACCESS, res.texture_roughness.desc.layout)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Image(&res.texture_normals, ResourceState::UNORDERED_ACCESS, res.texture_normals.desc.layout)); + PushBarrier(GPUBarrier::Image(&res.texture_roughness, ResourceState::UNORDERED_ACCESS, res.texture_roughness.desc.layout)); + FlushBarriers(cmd); wi::profiler::EndRange(range); device->EventEnd(cmd); @@ -11198,9 +11262,9 @@ void Visibility_Shade( BindCommonResources(cmd); - barrier_stack.push_back(GPUBarrier::Image(&res.texture_payload_0, ResourceState::UNORDERED_ACCESS, res.texture_payload_0.desc.layout)); - barrier_stack.push_back(GPUBarrier::Image(&res.texture_payload_1, ResourceState::UNORDERED_ACCESS, res.texture_payload_1.desc.layout)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Image(&res.texture_payload_0, ResourceState::UNORDERED_ACCESS, res.texture_payload_0.desc.layout)); + PushBarrier(GPUBarrier::Image(&res.texture_payload_1, ResourceState::UNORDERED_ACCESS, res.texture_payload_1.desc.layout)); + FlushBarriers(cmd); device->BindResource(&res.binned_tiles, 0, cmd); device->BindResource(&res.texture_payload_0, 2, cmd); @@ -11222,8 +11286,8 @@ void Visibility_Shade( visibility_tile_offset += visibility_tilecount_flat; } - barrier_stack.push_back(GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Image(&output, ResourceState::UNORDERED_ACCESS, output.desc.layout)); + FlushBarriers(cmd); wi::profiler::EndRange(range); device->EventEnd(cmd); @@ -17810,9 +17874,9 @@ void CopyDepthStencil( if (manual_depthstencil_copy_required) { - barrier_stack.push_back(GPUBarrier::Image(input_depth, input_depth->desc.layout, ResourceState::SHADER_RESOURCE)); - barrier_stack.push_back(GPUBarrier::Image(input_stencil, input_stencil->desc.layout, ResourceState::SHADER_RESOURCE)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Image(input_depth, input_depth->desc.layout, ResourceState::SHADER_RESOURCE)); + PushBarrier(GPUBarrier::Image(input_stencil, input_stencil->desc.layout, ResourceState::SHADER_RESOURCE)); + FlushBarriers(cmd); RenderPassImage rp[] = { RenderPassImage::DepthStencil( @@ -17841,7 +17905,7 @@ void CopyDepthStencil( device->BindResource(input_depth, 0, cmd); device->Draw(3, 0, cmd); device->EventEnd(cmd); - barrier_stack.push_back(GPUBarrier::Image(input_depth, ResourceState::SHADER_RESOURCE, input_depth->desc.layout)); + PushBarrier(GPUBarrier::Image(input_depth, ResourceState::SHADER_RESOURCE, input_depth->desc.layout)); } if (input_stencil != nullptr) @@ -17864,20 +17928,20 @@ void CopyDepthStencil( stencil_bits_to_copy >>= 1; } device->EventEnd(cmd); - barrier_stack.push_back(GPUBarrier::Image(input_stencil, ResourceState::SHADER_RESOURCE, input_stencil->desc.layout)); + PushBarrier(GPUBarrier::Image(input_stencil, ResourceState::SHADER_RESOURCE, input_stencil->desc.layout)); } device->RenderPassEnd(cmd); - barrier_stack_flush(cmd); + FlushBarriers(cmd); } else { - barrier_stack.push_back(GPUBarrier::Image(input_depth, input_depth->desc.layout, ResourceState::COPY_SRC)); - barrier_stack.push_back(GPUBarrier::Image(input_stencil, input_stencil->desc.layout, ResourceState::COPY_SRC)); - barrier_stack.push_back(GPUBarrier::Image(&output_depth_stencil, output_depth_stencil.desc.layout, ResourceState::COPY_DST)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Image(input_depth, input_depth->desc.layout, ResourceState::COPY_SRC)); + PushBarrier(GPUBarrier::Image(input_stencil, input_stencil->desc.layout, ResourceState::COPY_SRC)); + PushBarrier(GPUBarrier::Image(&output_depth_stencil, output_depth_stencil.desc.layout, ResourceState::COPY_DST)); + FlushBarriers(cmd); device->CopyTexture( &output_depth_stencil, 0, 0, 0, 0, 0, @@ -17896,10 +17960,10 @@ void CopyDepthStencil( ImageAspect::COLOR ); - barrier_stack.push_back(GPUBarrier::Image(input_depth, ResourceState::COPY_SRC, input_depth->desc.layout)); - barrier_stack.push_back(GPUBarrier::Image(input_stencil, ResourceState::COPY_SRC, input_stencil->desc.layout)); - barrier_stack.push_back(GPUBarrier::Image(&output_depth_stencil, ResourceState::COPY_DST, output_depth_stencil.desc.layout)); - barrier_stack_flush(cmd); + PushBarrier(GPUBarrier::Image(input_depth, ResourceState::COPY_SRC, input_depth->desc.layout)); + PushBarrier(GPUBarrier::Image(input_stencil, ResourceState::COPY_SRC, input_stencil->desc.layout)); + PushBarrier(GPUBarrier::Image(&output_depth_stencil, ResourceState::COPY_DST, output_depth_stencil.desc.layout)); + FlushBarriers(cmd); } device->EventEnd(cmd); @@ -18353,6 +18417,30 @@ bool IsMeshletOcclusionCullingEnabled() { return MESHLET_OCCLUSION_CULLING; } +void SetCapsuleShadowEnabled(bool value) +{ + CAPSULE_SHADOW_ENABLED = value; +} +bool IsCapsuleShadowEnabled() +{ + return CAPSULE_SHADOW_ENABLED; +} +void SetCapsuleShadowAngle(float value) +{ + CAPSULE_SHADOW_ANGLE = value; +} +float GetCapsuleShadowAngle() +{ + return CAPSULE_SHADOW_ANGLE; +} +void SetCapsuleShadowFade(float value) +{ + CAPSULE_SHADOW_FADE = value; +} +float GetCapsuleShadowFade() +{ + return CAPSULE_SHADOW_FADE; +} wi::Resource CreatePaintableTexture(uint32_t width, uint32_t height, uint32_t mips, wi::Color initialColor) { diff --git a/WickedEngine/wiRenderer.h b/WickedEngine/wiRenderer.h index 1d91fff6e5..b2e9c7fc3d 100644 --- a/WickedEngine/wiRenderer.h +++ b/WickedEngine/wiRenderer.h @@ -117,9 +117,10 @@ namespace wi::renderer ALLOW_ENVPROBES = 1 << 3, ALLOW_EMITTERS = 1 << 4, ALLOW_HAIRS = 1 << 5, - ALLOW_REQUEST_REFLECTION = 1 << 6, - ALLOW_OCCLUSION_CULLING = 1 << 7, - ALLOW_SHADOW_ATLAS_PACKING = 1 << 8, + ALLOW_COLLIDERS = 1 << 6, + ALLOW_REQUEST_REFLECTION = 1 << 7, + ALLOW_OCCLUSION_CULLING = 1 << 8, + ALLOW_SHADOW_ATLAS_PACKING = 1 << 9, ALLOW_EVERYTHING = ~0u }; @@ -133,6 +134,7 @@ namespace wi::renderer wi::vector visibleEmitters; wi::vector visibleHairs; wi::vector visibleLights; + wi::vector visibleColliders; wi::rectpacker::State shadow_packer; wi::rectpacker::Rect rain_blocker_shadow_rect; wi::vector visibleLightShadowRects; @@ -155,6 +157,7 @@ namespace wi::renderer visibleEnvProbes.clear(); visibleEmitters.clear(); visibleHairs.clear(); + visibleColliders.clear(); object_counter.store(0); light_counter.store(0); @@ -1129,6 +1132,12 @@ namespace wi::renderer void SetMeshletOcclusionCullingEnabled(bool value); bool IsMeshletOcclusionCullingEnabled(); void Workaround( const int bug, wi::graphics::CommandList cmd); + void SetCapsuleShadowEnabled(bool value); + bool IsCapsuleShadowEnabled(); + void SetCapsuleShadowAngle(float value); // cone angle in radians + float GetCapsuleShadowAngle(); + void SetCapsuleShadowFade(float value); + float GetCapsuleShadowFade(); // Gets pick ray according to the current screen resolution and pointer coordinates. Can be used as input into RayIntersectWorld() wi::primitive::Ray GetPickRay(long cursorX, long cursorY, const wi::Canvas& canvas, const wi::scene::CameraComponent& camera = wi::scene::GetCamera()); diff --git a/WickedEngine/wiScene.cpp b/WickedEngine/wiScene.cpp index 9addd24861..8007b7c103 100644 --- a/WickedEngine/wiScene.cpp +++ b/WickedEngine/wiScene.cpp @@ -1093,12 +1093,14 @@ namespace wi::scene collider_allocator_cpu.store(0u); collider_allocator_gpu.store(0u); collider_deinterleaved_data.reserve( + sizeof(wi::primitive::AABB) * colliders.GetCount() + sizeof(wi::primitive::AABB) * colliders.GetCount() + sizeof(ColliderComponent) * colliders.GetCount() + sizeof(ColliderComponent) * colliders.GetCount() ); aabb_colliders_cpu = (wi::primitive::AABB*)collider_deinterleaved_data.data(); - colliders_cpu = (ColliderComponent*)(aabb_colliders_cpu + colliders.GetCount()); + aabb_colliders_gpu = aabb_colliders_cpu + colliders.GetCount(); + colliders_cpu = (ColliderComponent*)(aabb_colliders_gpu + colliders.GetCount()); colliders_gpu = colliders_cpu + colliders.GetCount(); for (size_t i = 0; i < colliders.GetCount(); ++i) @@ -1167,6 +1169,7 @@ namespace wi::scene { uint32_t index = collider_allocator_gpu.fetch_add(1u); colliders_gpu[index] = collider; + aabb_colliders_gpu[index] = aabb; } } collider_count_cpu = collider_allocator_cpu.load(); @@ -3726,12 +3729,14 @@ namespace wi::scene collider_allocator_cpu.store(0u); collider_allocator_gpu.store(0u); collider_deinterleaved_data.reserve( + sizeof(wi::primitive::AABB)* colliders.GetCount() + sizeof(wi::primitive::AABB) * colliders.GetCount() + sizeof(ColliderComponent) * colliders.GetCount() + sizeof(ColliderComponent) * colliders.GetCount() ); aabb_colliders_cpu = (wi::primitive::AABB*)collider_deinterleaved_data.data(); - colliders_cpu = (ColliderComponent*)(aabb_colliders_cpu + colliders.GetCount()); + aabb_colliders_gpu = aabb_colliders_cpu + colliders.GetCount(); + colliders_cpu = (ColliderComponent*)(aabb_colliders_gpu + colliders.GetCount()); colliders_gpu = colliders_cpu + colliders.GetCount(); wi::jobsystem::Dispatch(ctx, (uint32_t)colliders.GetCount(), small_subtask_groupsize, [&](wi::jobsystem::JobArgs args) { @@ -3806,6 +3811,7 @@ namespace wi::scene { uint32_t index = collider_allocator_gpu.fetch_add(1u); colliders_gpu[index] = collider; + aabb_colliders_gpu[index] = aabb; } }); diff --git a/WickedEngine/wiScene.h b/WickedEngine/wiScene.h index 689ccc5254..fba915bc58 100644 --- a/WickedEngine/wiScene.h +++ b/WickedEngine/wiScene.h @@ -279,6 +279,7 @@ namespace wi::scene uint32_t collider_count_cpu = 0; uint32_t collider_count_gpu = 0; wi::primitive::AABB* aabb_colliders_cpu = nullptr; + wi::primitive::AABB* aabb_colliders_gpu = nullptr; ColliderComponent* colliders_cpu = nullptr; ColliderComponent* colliders_gpu = nullptr; wi::BVH collider_bvh; diff --git a/WickedEngine/wiScene_Components.cpp b/WickedEngine/wiScene_Components.cpp index 017d26c649..b2918b6429 100644 --- a/WickedEngine/wiScene_Components.cpp +++ b/WickedEngine/wiScene_Components.cpp @@ -374,6 +374,10 @@ namespace wi::scene { material.options_stencilref |= SHADERMATERIAL_OPTION_BIT_USE_VERTEXAO; } + if (IsCapsuleShadowDisabled()) + { + material.options_stencilref |= SHADERMATERIAL_OPTION_BIT_CAPSULE_SHADOW_DISABLED; + } material.options_stencilref |= wi::renderer::CombineStencilrefs(engineStencilRef, userStencilRef) << 24u; diff --git a/WickedEngine/wiScene_Components.h b/WickedEngine/wiScene_Components.h index efda366d19..6409aacd06 100644 --- a/WickedEngine/wiScene_Components.h +++ b/WickedEngine/wiScene_Components.h @@ -134,6 +134,7 @@ namespace wi::scene DISABLE_VERTEXAO = 1 << 14, DISABLE_TEXTURE_STREAMING = 1 << 15, COPLANAR_BLENDING = 1 << 16, // force transparent material draw in opaque pass (useful for coplanar polygons) + DISABLE_CAPSULE_SHADOW = 1 << 17, }; uint32_t _flags = CAST_SHADOW; @@ -364,6 +365,10 @@ namespace wi::scene interiorMappingRotation = value; } + constexpr bool IsCapsuleShadowDisabled() const { return _flags & DISABLE_CAPSULE_SHADOW; } + constexpr void SetCapsuleShadowDisabled(bool value = true) { if (value) { _flags |= DISABLE_CAPSULE_SHADOW; } else { _flags &= ~DISABLE_CAPSULE_SHADOW; } } + + void SetPreferUncompressedTexturesEnabled(bool value = true) { if (value) { _flags |= PREFER_UNCOMPRESSED_TEXTURES; } else { _flags &= ~PREFER_UNCOMPRESSED_TEXTURES; } CreateRenderData(true); } // The MaterialComponent will be written to ShaderMaterial (a struct that is optimized for GPU use) @@ -1842,14 +1847,17 @@ namespace wi::scene EMPTY = 0, CPU = 1 << 0, GPU = 1 << 1, + CAPSULE_SHADOW = 1 << 2, }; uint32_t _flags = CPU; constexpr void SetCPUEnabled(bool value = true) { if (value) { _flags |= CPU; } else { _flags &= ~CPU; } } constexpr void SetGPUEnabled(bool value = true) { if (value) { _flags |= GPU; } else { _flags &= ~GPU; } } + constexpr void SetCapsuleShadowEnabled(bool value = true) { if (value) { _flags |= CAPSULE_SHADOW; } else { _flags &= ~CAPSULE_SHADOW; } } constexpr bool IsCPUEnabled() const { return _flags & CPU; } constexpr bool IsGPUEnabled() const { return _flags & GPU; } + constexpr bool IsCapsuleShadowEnabled() const { return _flags & CAPSULE_SHADOW; } enum class Shape { @@ -1868,6 +1876,7 @@ namespace wi::scene wi::primitive::Capsule capsule; wi::primitive::Plane plane; uint32_t layerMask = ~0u; + float dist = 0; void Serialize(wi::Archive& archive, wi::ecs::EntitySerializer& seri); }; @@ -2034,6 +2043,7 @@ namespace wi::scene LOOKAT = 1 << 0, RAGDOLL_PHYSICS = 1 << 1, DISABLE_INTERSECTION = 1 << 2, + DISABLE_CAPSULE_SHADOW = 1 << 3, }; uint32_t _flags = LOOKAT; @@ -2112,10 +2122,12 @@ namespace wi::scene constexpr bool IsLookAtEnabled() const { return _flags & LOOKAT; } constexpr bool IsRagdollPhysicsEnabled() const { return _flags & RAGDOLL_PHYSICS; } constexpr bool IsIntersectionDisabled() const { return _flags & DISABLE_INTERSECTION; } + constexpr bool IsCapsuleShadowDisabled() const { return _flags & DISABLE_CAPSULE_SHADOW; } constexpr void SetLookAtEnabled(bool value = true) { if (value) { _flags |= LOOKAT; } else { _flags &= ~LOOKAT; } } constexpr void SetRagdollPhysicsEnabled(bool value = true) { if (value) { _flags |= RAGDOLL_PHYSICS; } else { _flags &= ~RAGDOLL_PHYSICS; } } constexpr void SetIntersectionDisabled(bool value = true) { if (value) { _flags |= DISABLE_INTERSECTION; } else { _flags &= ~DISABLE_INTERSECTION; } } + constexpr void SetCapsuleShadowDisabled(bool value = true) { if (value) { _flags |= DISABLE_CAPSULE_SHADOW; } else { _flags &= ~DISABLE_CAPSULE_SHADOW; } } XMFLOAT2 head_rotation_max = XMFLOAT2(XM_PI / 3.0f, XM_PI / 6.0f); float head_rotation_speed = 0.1f; diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index f30c0e9dd6..859ac4c9d3 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 694; + const int revision = 695; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);