Handle missing xfb execution mode in addCallInstForXfbOutput

There is a case that the vertex outputs has xfb decoration but without declared with xfb execution mode. As vulkan spec, these outputs will not be captured by xfb buffer. This change will add the missing handling for this case in `addCallInstForXfbOutput`. The check for existing xfb outputis based on lgc.xfb.state metadata.
GPUOpen-Drivers · Oct 27, 2023 · b26fa0f · b26fa0f
1 parent a2ed744
commit b26fa0f
Show file tree

Hide file tree

Showing 2 changed files with 191 additions and 2 deletions.
diff --git a/llpc/lower/llpcSpirvLowerGlobal.cpp b/llpc/lower/llpcSpirvLowerGlobal.cpp
@@ -2421,9 +2421,10 @@ void SpirvLowerGlobal::addCallInstForXfbOutput(const ShaderInOutMetadata &output
                                                unsigned xfbBufferAdjust, unsigned xfbOffsetAdjust, unsigned locOffset,
                                                lgc::InOutInfo outputInfo) {
   assert(m_shaderStage == m_lastVertexProcessingStage);
-  auto pipelineBuildInfo = static_cast<const Vkgc::GraphicsPipelineBuildInfo *>(m_context->getPipelineBuildInfo());
   DenseMap<unsigned, Vkgc::XfbOutInfo> *locXfbMapPtr = outputMeta.IsBuiltIn ? &m_builtInXfbMap : &m_genericXfbMap;
-  if (pipelineBuildInfo->apiXfbOutData.forceDisableStreamOut || (locXfbMapPtr->empty() && !outputMeta.IsXfb))
+  bool hasXfbMetadata = m_entryPoint->getMetadata(lgc::XfbStateMetadataName);
+  bool hasXfbOut = hasXfbMetadata && (!locXfbMapPtr->empty() || outputMeta.IsXfb);
+  if (!hasXfbOut)
     return;
 
   // If the XFB info is specified from API interface so we try to retrieve the info from m_locXfbMap. Otherwise, the XFB

diff --git a/llpc/test/shaderdb/extensions/ExtXfb_TestNoXfbExecutionMode.spvasm b/llpc/test/shaderdb/extensions/ExtXfb_TestNoXfbExecutionMode.spvasm
@@ -0,0 +1,188 @@
+; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py
+; RUN: amdllpc -o - -gfxip 10.3 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos; 4660
+; Bound: 122
+; Schema: 0
+               OpCapability Shader
+               OpCapability SampledCubeArray
+               OpCapability ImageBuffer
+               OpCapability ImageGatherExtended
+               OpCapability ImageQuery
+               OpCapability TransformFeedback
+               OpCapability Tessellation
+               OpCapability SignedZeroInfNanPreserve
+               OpExtension "SPV_KHR_float_controls"
+         %87 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationEvaluation %main "main" %DomainPoint %gl_PrimitiveID %o0xyzw %ipatch0y %gl_PointSize
+               OpExecutionMode %main Quads
+               OpExecutionMode %main SignedZeroInfNanPreserve 32
+          %2 = OpString "VMware VMGI Translator (shader 21)"
+          %3 = OpString "DOMA"
+         %71 = OpString "PROPERTY REFACTORING_ALLOWED"
+         %72 = OpString "DCL INPUT_PATCH_CONSTANT[0].y"
+         %73 = OpString "DCL INPUT_DOMAIN_POINT[0].xy"
+         %75 = OpString "PROPERTY INPUT_CONTROL_POINT_COUNT 4"
+         %76 = OpString "PROPERTY TESS_DOMAIN QUAD"
+         %77 = OpString "DCL OUT[0], POSITION"
+         %78 = OpString "DCL OUT[1].xy, GENERIC[1]"
+         %79 = OpString "IMM INT32 {0, 1065353216, 0, 0}"
+         %83 = OpString "DCL PRIMID[0]"
+         %85 = OpString "soEntry[0] = OUT[0].xyzw, buffer 0, stream 0"
+         %86 = OpString "RasterizedStream=255, DefaultStream=0"
+         %91 = OpString "  0: MOV OUT[0].xy, INPUT_DOMAIN_POINT[0].xyxx"
+        %102 = OpString "  1: MOV OUT[0].zw, IMM[0].xxxy"
+        %111 = OpString "  2: MOV OUT[1].x, INPUT_PATCH_CONSTANT[0].y"
+        %115 = OpString "  3: MOV OUT[1].y, PRIMID[0]"
+        %120 = OpString "  4: RET"
+        %121 = OpString "  5: END"
+               OpName %main "main"
+               OpName %void_Func "void_Func"
+               OpName %struct_2_ivec4 "struct_2_ivec4"
+               OpName %struct_2_uvec4 "struct_2_uvec4"
+               OpName %main_begin "main_begin"
+               OpName %DomainPoint "DomainPoint"
+               OpName %o0xyzw "o0xyzw"
+               OpName %ipatch0y "ipatch0y"
+               OpDecorate %DomainPoint BuiltIn TessCoord
+               OpDecorate %gl_PrimitiveID BuiltIn PrimitiveId
+               OpDecorate %o0xyzw Location 0
+               OpDecorate %o0xyzw XfbBuffer 0
+               OpDecorate %o0xyzw XfbStride 16
+               OpDecorate %o0xyzw Offset 0
+               OpDecorate %ipatch0y Patch
+               OpDecorate %ipatch0y Location 0
+               OpDecorate %ipatch0y Component 1
+               OpDecorate %gl_PointSize BuiltIn PointSize
+       %void = OpTypeVoid
+       %bool = OpTypeBool
+      %float = OpTypeFloat 32
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+    %v2float = OpTypeVector %float 2
+    %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+      %v2int = OpTypeVector %int 2
+      %v3int = OpTypeVector %int 3
+      %v4int = OpTypeVector %int 4
+     %v2uint = OpTypeVector %uint 2
+     %v3uint = OpTypeVector %uint 3
+     %v4uint = OpTypeVector %uint 4
+     %v2bool = OpTypeVector %bool 2
+     %v3bool = OpTypeVector %bool 3
+     %v4bool = OpTypeVector %bool 4
+  %void_Func = OpTypeFunction %void
+         %42 = OpTypeFunction %v4float %v4float
+%struct_2_ivec4 = OpTypeStruct %v4int %v4int
+%struct_2_uvec4 = OpTypeStruct %v4uint %v4uint
+      %int_0 = OpConstant %int 0
+%int_1065353216 = OpConstant %int 1065353216
+         %82 = OpConstantComposite %v4int %int_0 %int_1065353216 %int_0 %int_0
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+      %int_3 = OpConstant %int 3
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Private_v2float = OpTypePointer Private %v2float
+%_ptr_Private_v3float = OpTypePointer Private %v3float
+%_ptr_Private_v4float = OpTypePointer Private %v4float
+%_ptr_Private_int = OpTypePointer Private %int
+%_ptr_Private_v2int = OpTypePointer Private %v2int
+%_ptr_Private_v3int = OpTypePointer Private %v3int
+%_ptr_Private_v4int = OpTypePointer Private %v4int
+%_ptr_Private_uint = OpTypePointer Private %uint
+%_ptr_Private_v2uint = OpTypePointer Private %v2uint
+%_ptr_Private_v3uint = OpTypePointer Private %v3uint
+%_ptr_Private_v4uint = OpTypePointer Private %v4uint
+%_ptr_Private_bool = OpTypePointer Private %bool
+%_ptr_Private_v2bool = OpTypePointer Private %v2bool
+%_ptr_Private_v3bool = OpTypePointer Private %v3bool
+%_ptr_Private_v4bool = OpTypePointer Private %v4bool
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
+%_ptr_PushConstant_v4float = OpTypePointer PushConstant %v4float
+%_ptr_PushConstant_v4int = OpTypePointer PushConstant %v4int
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input_int = OpTypePointer Input %int
+%_ptr_Input_v2int = OpTypePointer Input %v2int
+%_ptr_Input_v3int = OpTypePointer Input %v3int
+%_ptr_Input_v4int = OpTypePointer Input %v4int
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Input_v2uint = OpTypePointer Input %v2uint
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_Input_v4uint = OpTypePointer Input %v4uint
+%_ptr_Input_bool = OpTypePointer Input %bool
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_ptr_Output_int = OpTypePointer Output %int
+%_ptr_Output_v2int = OpTypePointer Output %v2int
+%_ptr_Output_v3int = OpTypePointer Output %v3int
+%_ptr_Output_v4int = OpTypePointer Output %v4int
+%_ptr_Output_uint = OpTypePointer Output %uint
+%_ptr_Output_v2uint = OpTypePointer Output %v2uint
+%_ptr_Output_v3uint = OpTypePointer Output %v3uint
+%_ptr_Output_v4uint = OpTypePointer Output %v4uint
+%DomainPoint = OpVariable %_ptr_Input_v3float Input
+%gl_PrimitiveID = OpVariable %_ptr_Input_int Input
+     %o0xyzw = OpVariable %_ptr_Output_v4float Output
+   %ipatch0y = OpVariable %_ptr_Input_float Input
+%gl_PointSize = OpVariable %_ptr_Output_float Output
+       %main = OpFunction %void None %void_Func
+ %main_begin = OpLabel
+               OpLine %71 0 0
+               OpLine %72 0 0
+               OpLine %73 0 0
+               OpLine %75 0 0
+               OpLine %76 0 0
+               OpLine %77 0 0
+               OpLine %78 0 0
+               OpLine %79 0 0
+               OpLine %83 0 0
+               OpLine %91 0 0
+         %92 = OpLoad %v3float %DomainPoint
+         %95 = OpCompositeConstruct %v4float %92 %float_1
+         %96 = OpVectorShuffle %v4float %95 %95 0 1 0 0
+         %97 = OpCompositeExtract %float %96 0
+         %98 = OpAccessChain %_ptr_Output_float %o0xyzw %int_0
+               OpStore %98 %97
+         %99 = OpCompositeExtract %float %96 1
+        %101 = OpAccessChain %_ptr_Output_float %o0xyzw %int_1
+               OpStore %101 %99
+               OpLine %102 1 0
+        %103 = OpBitcast %v4float %82
+        %104 = OpVectorShuffle %v4float %103 %103 0 0 0 1
+        %105 = OpCompositeExtract %float %104 2
+        %107 = OpAccessChain %_ptr_Output_float %o0xyzw %int_2
+               OpStore %107 %105
+        %108 = OpCompositeExtract %float %104 3
+        %110 = OpAccessChain %_ptr_Output_float %o0xyzw %int_3
+               OpStore %110 %108
+               OpLine %111 2 0
+        %112 = OpLoad %float %ipatch0y
+        %113 = OpCompositeConstruct %v4float %112 %112 %112 %112
+        %114 = OpCompositeExtract %float %113 0
+               OpLine %115 3 0
+        %116 = OpLoad %int %gl_PrimitiveID
+        %117 = OpBitcast %float %116
+        %118 = OpCompositeConstruct %v4float %117 %117 %117 %117
+        %119 = OpCompositeExtract %float %118 1
+               OpLine %120 4 0
+               OpReturn
+               OpFunctionEnd
+               OpLine %121 5 0
+; CHECK-LABEL: @lgc.shader.TES.main(
+; CHECK-NEXT:  main_begin:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <3 x float> (...) @lgc.create.read.builtin.input.v3f32(i32 13, i32 0, i32 poison, i32 poison)
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[__LLPC_OUTPUT_PROXY_O0XYZW_12_VEC_INSERT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> <float poison, float poison, float 0.000000e+00, float 1.000000e+00>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    call void (...) @lgc.create.write.generic.output(<4 x float> [[__LLPC_OUTPUT_PROXY_O0XYZW_12_VEC_INSERT]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison)
+; CHECK-NEXT:    ret void
+;