From bcfbd1c92c9858b03ccdcc727838ea64ddf7fe7a Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson@amd.com>
Date: Tue, 14 Nov 2023 17:41:51 +0900
Subject: [PATCH] llpc: GFX11 flat/custom parameter loads need to be strict WQM

These already use WQM annotation to ensure fetch lanes are active,
but this does not work if there are discards or divergence as the
WQM exec mask will not truly WQM (some helper invocations killed).

Hence:
- Generate strict.wqm call for flat/custom interpolation loads
- Add test coverage for this code path

Ideally this could be resolved by using the FI bit on the mov_dpp,
but there is no backend support for this currently.
---
 lgc/patch/PatchInOutImportExport.cpp       |  7 +++--
 llpc/test/shaderdb/gfx11/FlatParamDpp.frag | 34 ++++++++++++++++++++++
 2 files changed, 38 insertions(+), 3 deletions(-)
 create mode 100644 llpc/test/shaderdb/gfx11/FlatParamDpp.frag
diff --git a/lgc/patch/PatchInOutImportExport.cpp b/lgc/patch/PatchInOutImportExport.cpp
index d31a0aad69..3d15ac692b 100644
--- a/lgc/patch/PatchInOutImportExport.cpp
+++ b/lgc/patch/PatchInOutImportExport.cpp
@@ -1754,9 +1754,10 @@ Value *PatchInOutImportExport::performFsParameterLoad(BuilderBase &builder, Valu
     compValue = builder.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp, builder.getInt32Ty(),
                                         {compValue, builder.getInt32(static_cast<unsigned>(dppCtrl)),
                                          builder.getInt32(15), builder.getInt32(15), builder.getTrue()});
-    // NOTE: Make mov_dpp and its source instructions run in WQM to make sure the mov_dpp could fetch
-    // correct data from possible inactive lanes.
-    compValue = builder.CreateIntrinsic(Intrinsic::amdgcn_wqm, builder.getInt32Ty(), compValue);
+    // NOTE: mov_dpp must run in strict WQM to access lanes potentially inactive with normal exec/WQM.
+    // lds_param_load always runs in strict WQM, but exec/WQM may not match this due to discards or divergence.
+    // Ideally we would use the FI bit on the mov_dpp, but there is currently no backend support.
+    compValue = builder.CreateIntrinsic(Intrinsic::amdgcn_strict_wqm, builder.getInt32Ty(), compValue);
     compValue = builder.CreateBitCast(compValue, builder.getFloatTy());
   } else {
     Value *args[] = {
diff --git a/llpc/test/shaderdb/gfx11/FlatParamDpp.frag b/llpc/test/shaderdb/gfx11/FlatParamDpp.frag
new file mode 100644
index 0000000000..84c8a4023e
--- /dev/null
+++ b/llpc/test/shaderdb/gfx11/FlatParamDpp.frag
@@ -0,0 +1,34 @@
+// Check that flat parameter load uses DPP in strict WQM
+
+// RUN: amdllpc %gfxip --v %s |\
+// RUN:   FileCheck %s --check-prefix=CHECK
+//
+// CHECK-LABEL: {{^}}// LLPC pipeline patching results
+// CHECK:       call void @llvm.amdgcn.kill(i1 false)
+// CHECK:       [[P0:%.*]] = call float @llvm.amdgcn.lds.param.load(i32 immarg 2, i32 immarg 2, i32 %PrimMask)
+// CHECK:       [[P1:%.*]] = bitcast float [[P0]] to i32
+// CHECK:       [[P2:%.*]] = call i32 @llvm.amdgcn.mov.dpp.i32(i32 [[P1]], i32 0, i32 15, i32 15, i1 true)
+// CHECK:       [[P3:%.*]] = call i32 @llvm.amdgcn.strict.wqm.i32(i32 [[P2]])
+// CHECK-LABEL: {{^}}===== AMDLLPC SUCCESS =====
+
+#version 450
+
+layout (location = 0) in vec3 texCoordIn;
+layout (location = 1) in float discardPixel;
+layout (location = 2) flat in vec4 p0;
+
+layout (binding = 0) uniform sampler2D image1;
+layout (binding = 1) uniform sampler s0;
+layout (binding = 2) uniform textureCube t0;
+
+layout (location = 0) out vec4 fragColor;
+
+void main() {
+  fragColor = texture(image1, texCoordIn.xy);
+
+  if (discardPixel > 0.0)
+    discard;
+
+  float lod = p0.z;
+  fragColor += textureLod(samplerCube(t0, s0), texCoordIn, lod);
+}