From 928ab0bcff9bb029965ba4f762547a7f9df7785f Mon Sep 17 00:00:00 2001
From: Jake Turner <jake@evansturner.co.uk>
Date: Thu, 10 Aug 2023 17:22:37 +0100
Subject: [PATCH] Extend D3D12 Depth Test Overlays

Support shader exported depth by replaying using the capture pixel shader to determine passing pixels
---
 renderdoc/driver/d3d12/d3d12_debug.cpp   | 193 ++++++++++++++++++
 renderdoc/driver/d3d12/d3d12_debug.h     |   1 +
 renderdoc/driver/d3d12/d3d12_overlay.cpp | 246 +++++++++++++++++++++--
 renderdoc/driver/d3d12/d3d12_replay.h    |   4 +
 4 files changed, 431 insertions(+), 13 deletions(-)

diff --git a/renderdoc/driver/d3d12/d3d12_debug.cpp b/renderdoc/driver/d3d12/d3d12_debug.cpp
index f8ed7123d2b..f1e140b3345 100644
--- a/renderdoc/driver/d3d12/d3d12_debug.cpp
+++ b/renderdoc/driver/d3d12/d3d12_debug.cpp
@@ -2456,6 +2456,186 @@ void D3D12Replay::OverlayRendering::Init(WrappedID3D12Device *device, D3D12Debug
     SAFE_RELEASE(QOResolvePS);
   }
 
+  {
+    D3D12RootSignature rootSig = {};
+    ID3DBlob *root = shaderCache->MakeRootSig(rootSig);
+    if(root == NULL)
+      RDCERR("Failed to make root signature blob for overlay depth stencil resolve pass");
+
+    hr = device->CreateRootSignature(0, root->GetBufferPointer(), root->GetBufferSize(),
+                                     __uuidof(ID3D12RootSignature), (void **)&DepthResolveRootSig);
+    if(FAILED(hr))
+      RDCERR("Failed to create root signature for overlay depth stencil resolve pass HRESULT: %s",
+             ToStr(hr).c_str());
+
+    SAFE_RELEASE(root);
+  }
+
+  {
+    D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeDesc = {};
+    pipeDesc.pRootSignature = DepthResolveRootSig;
+
+    ID3DBlob *FullscreenVS = NULL;
+    rdcstr hlsl = GetEmbeddedResource(misc_hlsl);
+    shaderCache->GetShaderBlob(hlsl.c_str(), "RENDERDOC_FullscreenVS",
+                               D3DCOMPILE_WARNINGS_ARE_ERRORS, {}, "vs_5_0", &FullscreenVS);
+    pipeDesc.VS.BytecodeLength = FullscreenVS->GetBufferSize();
+    pipeDesc.VS.pShaderBytecode = FullscreenVS->GetBufferPointer();
+
+    ID3DBlob *FixedColPS = shaderCache->MakeFixedColShader(D3D12ShaderCache::GREEN);
+    pipeDesc.PS.BytecodeLength = FixedColPS->GetBufferSize();
+    pipeDesc.PS.pShaderBytecode = FixedColPS->GetBufferPointer();
+
+    pipeDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
+    pipeDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
+    pipeDesc.SampleMask = 0xFFFFFFFF;
+    pipeDesc.SampleDesc.Count = 1;
+    pipeDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
+    pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
+    pipeDesc.NumRenderTargets = 1;
+    pipeDesc.RTVFormats[0] = DXGI_FORMAT_R16G16B16A16_FLOAT;
+
+    pipeDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
+    pipeDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
+
+    pipeDesc.DepthStencilState.DepthEnable = FALSE;
+    pipeDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
+    pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+    pipeDesc.DepthStencilState.StencilEnable = TRUE;
+    pipeDesc.DepthStencilState.StencilReadMask = 0xff;
+    pipeDesc.DepthStencilState.StencilWriteMask = 0x0;
+    pipeDesc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
+    pipeDesc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP;
+    pipeDesc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_KEEP;
+    pipeDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_EQUAL;
+    pipeDesc.DepthStencilState.BackFace = pipeDesc.DepthStencilState.FrontFace;
+    for(DXGI_FORMAT fmt : {DXGI_FORMAT_D24_UNORM_S8_UINT, DXGI_FORMAT_D32_FLOAT_S8X24_UINT})
+    {
+      ID3D12PipelineState **psos = DepthResolvePipe[fmt];
+      for(size_t i = 0; i < 8; i++)
+      {
+        pipeDesc.SampleDesc.Count = UINT(1 << i);
+
+        D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS check = {};
+        check.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;
+        check.SampleCount = pipeDesc.SampleDesc.Count;
+        device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &check, sizeof(check));
+
+        if(check.NumQualityLevels == 0)
+          continue;
+
+        check.Format = fmt;
+        check.SampleCount = pipeDesc.SampleDesc.Count;
+        device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &check, sizeof(check));
+
+        if(check.NumQualityLevels == 0)
+          continue;
+
+        pipeDesc.DSVFormat = fmt;
+        hr = device->CreateGraphicsPipelineState(&pipeDesc, __uuidof(ID3D12PipelineState),
+                                                 (void **)&psos[i]);
+        if(FAILED(hr))
+          RDCERR("Failed to create depth resolve pass overlay pso HRESULT: %s", ToStr(hr).c_str());
+      }
+    }
+
+    SAFE_RELEASE(FullscreenVS);
+    SAFE_RELEASE(FixedColPS);
+  }
+
+  {
+    ID3DBlob *root = shaderCache->MakeRootSig({
+        // depth copy SRV
+        tableParam(D3D12_SHADER_VISIBILITY_PIXEL, D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 0, 1),
+    });
+
+    RDCASSERT(root);
+    hr = device->CreateRootSignature(0, root->GetBufferPointer(), root->GetBufferSize(),
+                                     __uuidof(ID3D12RootSignature), (void **)&DepthCopyRootSig);
+    SAFE_RELEASE(root);
+  }
+  {
+    D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeDesc = {};
+    pipeDesc.pRootSignature = DepthCopyRootSig;
+
+    ID3DBlob *FullscreenVS = NULL;
+    {
+      rdcstr hlsl = GetEmbeddedResource(misc_hlsl);
+      shaderCache->GetShaderBlob(hlsl.c_str(), "RENDERDOC_FullscreenVS",
+                                 D3DCOMPILE_WARNINGS_ARE_ERRORS, {}, "vs_5_0", &FullscreenVS);
+    }
+    pipeDesc.VS.BytecodeLength = FullscreenVS->GetBufferSize();
+    pipeDesc.VS.pShaderBytecode = FullscreenVS->GetBufferPointer();
+
+    ID3DBlob *DepthCopyPS = NULL;
+    ID3DBlob *DepthCopyMSPS = NULL;
+    {
+      rdcstr hlsl = GetEmbeddedResource(depth_copy_hlsl);
+      shaderCache->GetShaderBlob(hlsl.c_str(), "RENDERDOC_DepthCopyPS",
+                                 D3DCOMPILE_WARNINGS_ARE_ERRORS, {}, "ps_5_0", &DepthCopyPS);
+      shaderCache->GetShaderBlob(hlsl.c_str(), "RENDERDOC_DepthCopyMSPS",
+                                 D3DCOMPILE_WARNINGS_ARE_ERRORS, {}, "ps_5_0", &DepthCopyMSPS);
+    }
+
+    pipeDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
+    pipeDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
+    pipeDesc.SampleMask = 0xFFFFFFFF;
+    pipeDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
+    pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
+    pipeDesc.NumRenderTargets = 0;
+
+    // Clear stencil to 0 during the copy
+    pipeDesc.DepthStencilState.DepthEnable = TRUE;
+    pipeDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
+    pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+    pipeDesc.DepthStencilState.StencilEnable = TRUE;
+    pipeDesc.DepthStencilState.StencilReadMask = 0x0;
+    pipeDesc.DepthStencilState.StencilWriteMask = 0xff;
+    pipeDesc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_ZERO;
+    pipeDesc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_ZERO;
+    pipeDesc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_ZERO;
+    pipeDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+    pipeDesc.DepthStencilState.BackFace = pipeDesc.DepthStencilState.FrontFace;
+    pipeDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
+
+    for(DXGI_FORMAT fmt : {DXGI_FORMAT_D24_UNORM_S8_UINT, DXGI_FORMAT_D32_FLOAT_S8X24_UINT})
+    {
+      ID3D12PipelineState **psos = DepthCopyPipe[fmt];
+      for(size_t i = 0; i < 8; i++)
+      {
+        pipeDesc.SampleDesc.Count = UINT(1 << i);
+
+        D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS check = {};
+        check.Format = fmt;
+        check.SampleCount = pipeDesc.SampleDesc.Count;
+        device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &check, sizeof(check));
+
+        if(check.NumQualityLevels == 0)
+          continue;
+
+        pipeDesc.DSVFormat = fmt;
+        if(i == 0)
+        {
+          pipeDesc.PS.BytecodeLength = DepthCopyPS->GetBufferSize();
+          pipeDesc.PS.pShaderBytecode = DepthCopyPS->GetBufferPointer();
+        }
+        else
+        {
+          pipeDesc.PS.BytecodeLength = DepthCopyMSPS->GetBufferSize();
+          pipeDesc.PS.pShaderBytecode = DepthCopyMSPS->GetBufferPointer();
+        }
+
+        hr = device->CreateGraphicsPipelineState(&pipeDesc, __uuidof(ID3D12PipelineState),
+                                                 (void **)&psos[i]);
+        if(FAILED(hr))
+          RDCERR("Failed to create depth resolve pass overlay pso HRESULT: %s", ToStr(hr).c_str());
+      }
+    }
+    SAFE_RELEASE(DepthCopyMSPS);
+    SAFE_RELEASE(DepthCopyPS);
+    SAFE_RELEASE(FullscreenVS);
+  }
+
   shaderCache->SetCaching(false);
 }
 
@@ -2470,6 +2650,19 @@ void D3D12Replay::OverlayRendering::Release()
   for(size_t i = 0; i < ARRAY_COUNT(QuadResolvePipe); i++)
     SAFE_RELEASE(QuadResolvePipe[i]);
 
+  SAFE_RELEASE(DepthResolveRootSig);
+  for(auto it = DepthResolvePipe.begin(); it != DepthResolvePipe.end(); it++)
+  {
+    for(size_t i = 0; i < 8; i++)
+      SAFE_RELEASE(it->second[i]);
+  }
+  SAFE_RELEASE(DepthCopyRootSig);
+  for(auto it = DepthCopyPipe.begin(); it != DepthCopyPipe.end(); it++)
+  {
+    for(size_t i = 0; i < 8; i++)
+      SAFE_RELEASE(it->second[i]);
+  }
+
   SAFE_RELEASE(Texture);
 }
 
diff --git a/renderdoc/driver/d3d12/d3d12_debug.h b/renderdoc/driver/d3d12/d3d12_debug.h
index 3634a103265..61bb89e959d 100644
--- a/renderdoc/driver/d3d12/d3d12_debug.h
+++ b/renderdoc/driver/d3d12/d3d12_debug.h
@@ -90,6 +90,7 @@ enum CBVUAVSRVSlot
   FIRST_PIXELHISTORY_UAV,
   LAST_PIXELHISTORY_UAV = FIRST_PIXELHISTORY_UAV + 5,
 
+  DEPTH_COPY_SRV,
   MAX_SRV_SLOT,
 };
 
diff --git a/renderdoc/driver/d3d12/d3d12_overlay.cpp b/renderdoc/driver/d3d12/d3d12_overlay.cpp
index fd6bcccb8ed..6987a73fc04 100644
--- a/renderdoc/driver/d3d12/d3d12_overlay.cpp
+++ b/renderdoc/driver/d3d12/d3d12_overlay.cpp
@@ -1102,7 +1102,7 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De
     b.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
     b.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE;
 
-    // prepare tex resource for copying
+    // prepare tex resource for writing
     list->ResourceBarrier(1, &b);
 
     list->Close();
@@ -2089,6 +2089,140 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De
   {
     if(pipe && pipe->IsGraphics())
     {
+      ID3D12Resource *renderDepthStencil = renderDepth;
+
+      HRESULT hr;
+      DXGI_FORMAT dsFmt = dsViewDesc.Format;
+      // the depth overlay uses stencil buffer as a mask for the passing pixels
+      if((overlay == DebugOverlay::Depth) && renderDepth)
+      {
+        DXGI_FORMAT dsNewFmt = dsFmt;
+        if(dsFmt == DXGI_FORMAT_D32_FLOAT_S8X24_UINT)
+          dsNewFmt = DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
+        else if(dsFmt == DXGI_FORMAT_D24_UNORM_S8_UINT)
+          dsNewFmt = DXGI_FORMAT_D24_UNORM_S8_UINT;
+        else if(dsFmt == DXGI_FORMAT_D32_FLOAT)
+          dsNewFmt = DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
+        else if(dsFmt == DXGI_FORMAT_D16_UNORM)
+          dsNewFmt = DXGI_FORMAT_D24_UNORM_S8_UINT;
+        else
+          dsNewFmt = DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
+
+        if(m_Overlay.DepthResolvePipe.count(dsNewFmt) == 0)
+        {
+          RDCERR("Unhandled depth resolve format : %s", ToStr(dsNewFmt).c_str());
+          return m_Overlay.resourceId;
+        }
+        if(m_Overlay.DepthCopyPipe.count(dsNewFmt) == 0)
+        {
+          RDCERR("Unhandled depth copy format : %s", ToStr(dsNewFmt).c_str());
+          return m_Overlay.resourceId;
+        }
+        // copy depth over to a new depth-stencil buffer
+        if(dsFmt != dsNewFmt)
+        {
+          D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
+          srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+          if(overlayTexDesc.SampleDesc.Count == 1)
+          {
+            srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+            srvDesc.Texture2D.MipLevels = (UINT)-1;
+            srvDesc.Texture2D.MostDetailedMip = 0;
+            srvDesc.Texture2D.PlaneSlice = 0;
+            srvDesc.Texture2D.ResourceMinLODClamp = 0.0f;
+          }
+          else
+          {
+            srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS;
+          }
+
+          srvDesc.Format = DXGI_FORMAT_UNKNOWN;
+          switch(dsFmt)
+          {
+            case DXGI_FORMAT_D32_FLOAT:
+            case DXGI_FORMAT_R32_FLOAT:
+            case DXGI_FORMAT_R32_TYPELESS: srvDesc.Format = DXGI_FORMAT_R32_FLOAT; break;
+
+            case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
+            case DXGI_FORMAT_R32G8X24_TYPELESS:
+            case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:
+            case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT:
+              srvDesc.Format = DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;
+              break;
+
+            case DXGI_FORMAT_D24_UNORM_S8_UINT:
+            case DXGI_FORMAT_R24G8_TYPELESS:
+            case DXGI_FORMAT_R24_UNORM_X8_TYPELESS:
+            case DXGI_FORMAT_X24_TYPELESS_G8_UINT:
+              srvDesc.Format = DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
+              break;
+
+            case DXGI_FORMAT_D16_UNORM:
+            case DXGI_FORMAT_R16_TYPELESS: srvDesc.Format = DXGI_FORMAT_R16_UNORM; break;
+
+            default: break;
+          }
+          if(srvDesc.Format == DXGI_FORMAT_UNKNOWN)
+          {
+            RDCERR("Unknown Depth overlay format %s", dsFmt);
+            SAFE_RELEASE(renderDepth);
+            return m_Overlay.resourceId;
+          }
+
+          m_pDevice->CreateShaderResourceView(renderDepth, &srvDesc,
+                                              GetDebugManager()->GetCPUHandle(DEPTH_COPY_SRV));
+
+          // New depth-stencil texture
+          dsFmt = dsNewFmt;
+          depthTexDesc.Format = dsFmt;
+          hr = m_pDevice->CreateCommittedResource(
+              &heapProps, D3D12_HEAP_FLAG_NONE, &depthTexDesc, D3D12_RESOURCE_STATE_DEPTH_WRITE,
+              NULL, __uuidof(ID3D12Resource), (void **)&renderDepthStencil);
+          if(FAILED(hr))
+          {
+            RDCERR("Failed to create renderDepthStencil HRESULT: %s", ToStr(hr).c_str());
+            SAFE_RELEASE(renderDepth);
+            return m_Overlay.resourceId;
+          }
+
+          // Copy renderDepth depth data into renderDepthStencil depth data using fullscreen pass
+          // the shader writes 0 to the stencil during the copy
+          D3D12_RESOURCE_BARRIER b = {};
+
+          b.Transition.pResource = renderDepth;
+          b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+          b.Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE;
+          b.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
+          list->ResourceBarrier(1, &b);
+
+          D3D12_DEPTH_STENCIL_VIEW_DESC dsNewViewDesc = dsViewDesc;
+          dsNewViewDesc.Format = dsFmt;
+          m_pDevice->CreateDepthStencilView(renderDepthStencil, &dsNewViewDesc, dsv);
+
+          list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+
+          D3D12_VIEWPORT view = {0.0f, 0.0f, (float)resourceDesc.Width, (float)resourceDesc.Height,
+                                 0.0f, 1.0f};
+          list->RSSetViewports(1, &view);
+
+          D3D12_RECT scissor = {0, 0, 16384, 16384};
+          list->RSSetScissorRects(1, &scissor);
+
+          list->SetPipelineState(
+              m_Overlay.DepthCopyPipe[dsFmt][Log2Floor(overlayTexDesc.SampleDesc.Count)]);
+          list->SetGraphicsRootSignature(m_Overlay.DepthCopyRootSig);
+
+          GetDebugManager()->SetDescriptorHeaps(list, true, false);
+          list->SetGraphicsRootDescriptorTable(0, GetDebugManager()->GetGPUHandle(DEPTH_COPY_SRV));
+
+          list->OMSetRenderTargets(0, NULL, FALSE, &dsv);
+
+          list->DrawInstanced(3, 1, 0, 0);
+
+          rs.ApplyState(m_pDevice, list);
+        }
+      }
+
       D3D12_EXPANDED_PIPELINE_STATE_STREAM_DESC psoDesc;
       pipe->Fill(psoDesc);
 
@@ -2099,6 +2233,8 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De
       ID3DBlob *green =
           m_pDevice->GetShaderCache()->MakeFixedColShader(D3D12ShaderCache::GREEN, dxil);
 
+      D3D12_SHADER_BYTECODE originalPS = psoDesc.PS;
+
       // make sure that if a test is disabled, it shows all
       // pixels passing
       if(!psoDesc.DepthStencilState.DepthEnable)
@@ -2109,17 +2245,34 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De
         psoDesc.DepthStencilState.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
       }
 
-      if(overlay == DebugOverlay::Depth)
+      if((overlay == DebugOverlay::Depth) && renderDepthStencil)
       {
-        psoDesc.DepthStencilState.StencilEnable = FALSE;
+        // Do not replace shader
+        // disable colour write
+        psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0x0;
+        // Write stencil 0x1 for depth passing pixels
+        psoDesc.DepthStencilState.StencilEnable = TRUE;
         psoDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
-        psoDesc.DepthStencilState.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+        psoDesc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP;
+        psoDesc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
+        psoDesc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE;
+        psoDesc.DepthStencilState.BackFace = psoDesc.DepthStencilState.FrontFace;
       }
       else
       {
-        psoDesc.DepthStencilState.DepthEnable = FALSE;
-        psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
-        psoDesc.DepthStencilState.DepthBoundsTestEnable = FALSE;
+        psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
+        if(overlay == DebugOverlay::Depth)
+        {
+          psoDesc.DepthStencilState.StencilEnable = FALSE;
+          psoDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+          psoDesc.DepthStencilState.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+        }
+        else
+        {
+          psoDesc.DepthStencilState.DepthEnable = FALSE;
+          psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+          psoDesc.DepthStencilState.DepthBoundsTestEnable = FALSE;
+        }
       }
 
       RDCEraseEl(psoDesc.RTVFormats.RTFormats);
@@ -2130,7 +2283,6 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De
       psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
       psoDesc.BlendState.IndependentBlendEnable = FALSE;
       psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
-      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
       psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
 
       psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
@@ -2146,6 +2298,8 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De
       {
         SAFE_RELEASE(red);
         SAFE_RELEASE(green);
+        SAFE_RELEASE(renderDepthStencil);
+        SAFE_RELEASE(renderDepth);
         m_pDevice->AddDebugMessage(MessageCategory::Shaders, MessageSeverity::High,
                                    MessageSource::UnsupportedConfiguration,
                                    "No DXIL shader available for overlay");
@@ -2156,15 +2310,38 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De
       psoDesc.PS.BytecodeLength = green->GetBufferSize();
 
       ID3D12PipelineState *greenPSO = NULL;
-      HRESULT hr = m_pDevice->CreatePipeState(psoDesc, &greenPSO);
+      hr = m_pDevice->CreatePipeState(psoDesc, &greenPSO);
       if(FAILED(hr))
       {
         RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
         SAFE_RELEASE(red);
         SAFE_RELEASE(green);
+        SAFE_RELEASE(renderDepthStencil);
+        SAFE_RELEASE(renderDepth);
         return m_Overlay.resourceId;
       }
 
+      ID3D12PipelineState *depthWriteStencilPSO = NULL;
+      if((overlay == DebugOverlay::Depth) && renderDepthStencil)
+      {
+        psoDesc.DSVFormat = dsFmt;
+        psoDesc.PS = originalPS;
+
+        hr = m_pDevice->CreatePipeState(psoDesc, &depthWriteStencilPSO);
+        if(FAILED(hr))
+        {
+          RDCERR("Failed to create depth write overlay pso HRESULT: %s", ToStr(hr).c_str());
+          SAFE_RELEASE(greenPSO);
+          SAFE_RELEASE(red);
+          SAFE_RELEASE(green);
+          SAFE_RELEASE(renderDepthStencil);
+          SAFE_RELEASE(renderDepth);
+          return m_Overlay.resourceId;
+        }
+      }
+
+      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
+
       psoDesc.DepthStencilState.DepthEnable = FALSE;
       psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
       psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
@@ -2182,9 +2359,12 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De
       if(FAILED(hr))
       {
         RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
-        SAFE_RELEASE(redPSO);
+        SAFE_RELEASE(depthWriteStencilPSO);
+        SAFE_RELEASE(greenPSO);
         SAFE_RELEASE(red);
         SAFE_RELEASE(green);
+        SAFE_RELEASE(renderDepthStencil);
+        SAFE_RELEASE(renderDepth);
         return m_Overlay.resourceId;
       }
 
@@ -2200,19 +2380,59 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De
 
       m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);
 
-      rs.pipe = GetResID(greenPSO);
+      if((overlay == DebugOverlay::Depth) && renderDepthStencil)
+      {
+        rs.stencilRefBack = rs.stencilRefFront = 0x1;
+        rs.pipe = GetResID(depthWriteStencilPSO);
+      }
+      else
+      {
+        rs.pipe = GetResID(greenPSO);
+      }
 
       m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);
 
       rs = prev;
 
+      if((overlay == DebugOverlay::Depth) && renderDepthStencil)
+      {
+        // Resolve stencil = 0x1 pixels to green
+        list = m_pDevice->GetNewList();
+        if(!list)
+          return ResourceId();
+
+        list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+
+        D3D12_VIEWPORT view = {0.0f, 0.0f, (float)resourceDesc.Width, (float)resourceDesc.Height,
+                               0.0f, 1.0f};
+        list->RSSetViewports(1, &view);
+
+        D3D12_RECT scissor = {0, 0, 16384, 16384};
+        list->RSSetScissorRects(1, &scissor);
+
+        list->SetPipelineState(
+            m_Overlay.DepthResolvePipe[dsFmt][Log2Floor(overlayTexDesc.SampleDesc.Count)]);
+        list->SetGraphicsRootSignature(m_Overlay.DepthResolveRootSig);
+
+        list->OMSetStencilRef(0x1);
+        list->OMSetRenderTargets(1, &rtv, TRUE, &dsv);
+
+        list->DrawInstanced(3, 1, 0, 0);
+
+        list->Close();
+        list = NULL;
+      }
+
       m_pDevice->ExecuteLists();
       m_pDevice->FlushLists();
 
-      SAFE_RELEASE(red);
-      SAFE_RELEASE(green);
       SAFE_RELEASE(redPSO);
+      SAFE_RELEASE(depthWriteStencilPSO);
       SAFE_RELEASE(greenPSO);
+      SAFE_RELEASE(red);
+      SAFE_RELEASE(green);
+      SAFE_RELEASE(renderDepthStencil);
+      SAFE_RELEASE(renderDepth);
     }
   }
   else
diff --git a/renderdoc/driver/d3d12/d3d12_replay.h b/renderdoc/driver/d3d12/d3d12_replay.h
index 47c56bbfb7e..4f6a9aef183 100644
--- a/renderdoc/driver/d3d12/d3d12_replay.h
+++ b/renderdoc/driver/d3d12/d3d12_replay.h
@@ -439,6 +439,10 @@ class D3D12Replay : public IReplayDriver
     ID3DBlob *QuadOverdrawWriteDXILPS = NULL;
     ID3D12RootSignature *QuadResolveRootSig = NULL;
     ID3D12PipelineState *QuadResolvePipe[8] = {NULL};
+    ID3D12RootSignature *DepthResolveRootSig = NULL;
+    std::unordered_map<DXGI_FORMAT, ID3D12PipelineState *[8]> DepthResolvePipe;
+    ID3D12RootSignature *DepthCopyRootSig = NULL;
+    std::unordered_map<DXGI_FORMAT, ID3D12PipelineState *[8]> DepthCopyPipe;
 
     ID3D12Resource *Texture = NULL;
     ResourceId resourceId;