From c7463c1f8b12a64c3b0d657125ff3e4fbf513308 Mon Sep 17 00:00:00 2001 From: xuechen417 Date: Tue, 24 Oct 2023 17:13:32 +0800 Subject: [PATCH] Handle missing xfb execution mode in addCallInstForXfbOutput There is a case that the vertex outputs has xfb decoration but without declared with xfb execution mode. As vulkan spec, these outputs will not be captured by xfb buffer. This change will add the missing handling for this case in `addCallInstForXfbOutput`. The check for existing xfb outputis based on lgc.xfb.state metadata. --- llpc/lower/llpcSpirvLowerGlobal.cpp | 14 +- .../ExtXfb_TestNoXfbExecutionMode.spvasm | 188 ++++++++++++++++++ 2 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 llpc/test/shaderdb/extensions/ExtXfb_TestNoXfbExecutionMode.spvasm diff --git a/llpc/lower/llpcSpirvLowerGlobal.cpp b/llpc/lower/llpcSpirvLowerGlobal.cpp index 6be6758abc..345b7f5290 100644 --- a/llpc/lower/llpcSpirvLowerGlobal.cpp +++ b/llpc/lower/llpcSpirvLowerGlobal.cpp @@ -2421,9 +2421,19 @@ void SpirvLowerGlobal::addCallInstForXfbOutput(const ShaderInOutMetadata &output unsigned xfbBufferAdjust, unsigned xfbOffsetAdjust, unsigned locOffset, lgc::InOutInfo outputInfo) { assert(m_shaderStage == m_lastVertexProcessingStage); - auto pipelineBuildInfo = static_cast(m_context->getPipelineBuildInfo()); DenseMap *locXfbMapPtr = outputMeta.IsBuiltIn ? &m_builtInXfbMap : &m_genericXfbMap; - if (pipelineBuildInfo->apiXfbOutData.forceDisableStreamOut || (locXfbMapPtr->empty() && !outputMeta.IsXfb)) + bool hasXfbOut = m_entryPoint->getMetadata(lgc::XfbStateMetadataName); +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 69 + // NOTE: primitive generated query will create xfb metadata in the old version. It may be a fake XFB if there is no + // XFB info from API interface or output metadata. + if (hasXfbOut && locXfbMapPtr->empty() && !outputMeta.IsXfb) { + hasXfbOut = false; + auto pipelineBuildInfo = static_cast(m_context->getPipelineBuildInfo()); + assert(pipelineBuildInfo->apiXfbOutData.forceEnablePrimStats); + (void(pipelineBuildInfo)); //unused + } +#endif + if (!hasXfbOut) return; // If the XFB info is specified from API interface so we try to retrieve the info from m_locXfbMap. Otherwise, the XFB diff --git a/llpc/test/shaderdb/extensions/ExtXfb_TestNoXfbExecutionMode.spvasm b/llpc/test/shaderdb/extensions/ExtXfb_TestNoXfbExecutionMode.spvasm new file mode 100644 index 0000000000..1e3a0818fb --- /dev/null +++ b/llpc/test/shaderdb/extensions/ExtXfb_TestNoXfbExecutionMode.spvasm @@ -0,0 +1,188 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py +; RUN: amdllpc -o - -gfxip 10.3 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s +; SPIR-V +; Version: 1.3 +; Generator: Khronos; 4660 +; Bound: 122 +; Schema: 0 + OpCapability Shader + OpCapability SampledCubeArray + OpCapability ImageBuffer + OpCapability ImageGatherExtended + OpCapability ImageQuery + OpCapability TransformFeedback + OpCapability Tessellation + OpCapability SignedZeroInfNanPreserve + OpExtension "SPV_KHR_float_controls" + %87 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TessellationEvaluation %main "main" %DomainPoint %gl_PrimitiveID %o0xyzw %ipatch0y %gl_PointSize + OpExecutionMode %main Quads + OpExecutionMode %main SignedZeroInfNanPreserve 32 + %2 = OpString "VMware VMGI Translator (shader 21)" + %3 = OpString "DOMA" + %71 = OpString "PROPERTY REFACTORING_ALLOWED" + %72 = OpString "DCL INPUT_PATCH_CONSTANT[0].y" + %73 = OpString "DCL INPUT_DOMAIN_POINT[0].xy" + %75 = OpString "PROPERTY INPUT_CONTROL_POINT_COUNT 4" + %76 = OpString "PROPERTY TESS_DOMAIN QUAD" + %77 = OpString "DCL OUT[0], POSITION" + %78 = OpString "DCL OUT[1].xy, GENERIC[1]" + %79 = OpString "IMM INT32 {0, 1065353216, 0, 0}" + %83 = OpString "DCL PRIMID[0]" + %85 = OpString "soEntry[0] = OUT[0].xyzw, buffer 0, stream 0" + %86 = OpString "RasterizedStream=255, DefaultStream=0" + %91 = OpString " 0: MOV OUT[0].xy, INPUT_DOMAIN_POINT[0].xyxx" + %102 = OpString " 1: MOV OUT[0].zw, IMM[0].xxxy" + %111 = OpString " 2: MOV OUT[1].x, INPUT_PATCH_CONSTANT[0].y" + %115 = OpString " 3: MOV OUT[1].y, PRIMID[0]" + %120 = OpString " 4: RET" + %121 = OpString " 5: END" + OpName %main "main" + OpName %void_Func "void_Func" + OpName %struct_2_ivec4 "struct_2_ivec4" + OpName %struct_2_uvec4 "struct_2_uvec4" + OpName %main_begin "main_begin" + OpName %DomainPoint "DomainPoint" + OpName %o0xyzw "o0xyzw" + OpName %ipatch0y "ipatch0y" + OpDecorate %DomainPoint BuiltIn TessCoord + OpDecorate %gl_PrimitiveID BuiltIn PrimitiveId + OpDecorate %o0xyzw Location 0 + OpDecorate %o0xyzw XfbBuffer 0 + OpDecorate %o0xyzw XfbStride 16 + OpDecorate %o0xyzw Offset 0 + OpDecorate %ipatch0y Patch + OpDecorate %ipatch0y Location 0 + OpDecorate %ipatch0y Component 1 + OpDecorate %gl_PointSize BuiltIn PointSize + %void = OpTypeVoid + %bool = OpTypeBool + %float = OpTypeFloat 32 + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 + %v2int = OpTypeVector %int 2 + %v3int = OpTypeVector %int 3 + %v4int = OpTypeVector %int 4 + %v2uint = OpTypeVector %uint 2 + %v3uint = OpTypeVector %uint 3 + %v4uint = OpTypeVector %uint 4 + %v2bool = OpTypeVector %bool 2 + %v3bool = OpTypeVector %bool 3 + %v4bool = OpTypeVector %bool 4 + %void_Func = OpTypeFunction %void + %42 = OpTypeFunction %v4float %v4float +%struct_2_ivec4 = OpTypeStruct %v4int %v4int +%struct_2_uvec4 = OpTypeStruct %v4uint %v4uint + %int_0 = OpConstant %int 0 +%int_1065353216 = OpConstant %int 1065353216 + %82 = OpConstantComposite %v4int %int_0 %int_1065353216 %int_0 %int_0 + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 +%_ptr_Private_float = OpTypePointer Private %float +%_ptr_Private_v2float = OpTypePointer Private %v2float +%_ptr_Private_v3float = OpTypePointer Private %v3float +%_ptr_Private_v4float = OpTypePointer Private %v4float +%_ptr_Private_int = OpTypePointer Private %int +%_ptr_Private_v2int = OpTypePointer Private %v2int +%_ptr_Private_v3int = OpTypePointer Private %v3int +%_ptr_Private_v4int = OpTypePointer Private %v4int +%_ptr_Private_uint = OpTypePointer Private %uint +%_ptr_Private_v2uint = OpTypePointer Private %v2uint +%_ptr_Private_v3uint = OpTypePointer Private %v3uint +%_ptr_Private_v4uint = OpTypePointer Private %v4uint +%_ptr_Private_bool = OpTypePointer Private %bool +%_ptr_Private_v2bool = OpTypePointer Private %v2bool +%_ptr_Private_v3bool = OpTypePointer Private %v3bool +%_ptr_Private_v4bool = OpTypePointer Private %v4bool +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int +%_ptr_PushConstant_v4float = OpTypePointer PushConstant %v4float +%_ptr_PushConstant_v4int = OpTypePointer PushConstant %v4int +%_ptr_Input_float = OpTypePointer Input %float +%_ptr_Input_v2float = OpTypePointer Input %v2float +%_ptr_Input_v3float = OpTypePointer Input %v3float +%_ptr_Input_v4float = OpTypePointer Input %v4float +%_ptr_Input_int = OpTypePointer Input %int +%_ptr_Input_v2int = OpTypePointer Input %v2int +%_ptr_Input_v3int = OpTypePointer Input %v3int +%_ptr_Input_v4int = OpTypePointer Input %v4int +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Input_v2uint = OpTypePointer Input %v2uint +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%_ptr_Input_v4uint = OpTypePointer Input %v4uint +%_ptr_Input_bool = OpTypePointer Input %bool +%_ptr_Output_float = OpTypePointer Output %float +%_ptr_Output_v2float = OpTypePointer Output %v2float +%_ptr_Output_v3float = OpTypePointer Output %v3float +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_ptr_Output_int = OpTypePointer Output %int +%_ptr_Output_v2int = OpTypePointer Output %v2int +%_ptr_Output_v3int = OpTypePointer Output %v3int +%_ptr_Output_v4int = OpTypePointer Output %v4int +%_ptr_Output_uint = OpTypePointer Output %uint +%_ptr_Output_v2uint = OpTypePointer Output %v2uint +%_ptr_Output_v3uint = OpTypePointer Output %v3uint +%_ptr_Output_v4uint = OpTypePointer Output %v4uint +%DomainPoint = OpVariable %_ptr_Input_v3float Input +%gl_PrimitiveID = OpVariable %_ptr_Input_int Input + %o0xyzw = OpVariable %_ptr_Output_v4float Output + %ipatch0y = OpVariable %_ptr_Input_float Input +%gl_PointSize = OpVariable %_ptr_Output_float Output + %main = OpFunction %void None %void_Func + %main_begin = OpLabel + OpLine %71 0 0 + OpLine %72 0 0 + OpLine %73 0 0 + OpLine %75 0 0 + OpLine %76 0 0 + OpLine %77 0 0 + OpLine %78 0 0 + OpLine %79 0 0 + OpLine %83 0 0 + OpLine %91 0 0 + %92 = OpLoad %v3float %DomainPoint + %95 = OpCompositeConstruct %v4float %92 %float_1 + %96 = OpVectorShuffle %v4float %95 %95 0 1 0 0 + %97 = OpCompositeExtract %float %96 0 + %98 = OpAccessChain %_ptr_Output_float %o0xyzw %int_0 + OpStore %98 %97 + %99 = OpCompositeExtract %float %96 1 + %101 = OpAccessChain %_ptr_Output_float %o0xyzw %int_1 + OpStore %101 %99 + OpLine %102 1 0 + %103 = OpBitcast %v4float %82 + %104 = OpVectorShuffle %v4float %103 %103 0 0 0 1 + %105 = OpCompositeExtract %float %104 2 + %107 = OpAccessChain %_ptr_Output_float %o0xyzw %int_2 + OpStore %107 %105 + %108 = OpCompositeExtract %float %104 3 + %110 = OpAccessChain %_ptr_Output_float %o0xyzw %int_3 + OpStore %110 %108 + OpLine %111 2 0 + %112 = OpLoad %float %ipatch0y + %113 = OpCompositeConstruct %v4float %112 %112 %112 %112 + %114 = OpCompositeExtract %float %113 0 + OpLine %115 3 0 + %116 = OpLoad %int %gl_PrimitiveID + %117 = OpBitcast %float %116 + %118 = OpCompositeConstruct %v4float %117 %117 %117 %117 + %119 = OpCompositeExtract %float %118 1 + OpLine %120 4 0 + OpReturn + OpFunctionEnd + OpLine %121 5 0 +; CHECK-LABEL: @lgc.shader.TES.main( +; CHECK-NEXT: main_begin: +; CHECK-NEXT: [[TMP0:%.*]] = call <3 x float> (...) @lgc.create.read.builtin.input.v3f32(i32 13, i32 0, i32 poison, i32 poison) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[__LLPC_OUTPUT_PROXY_O0XYZW_12_VEC_INSERT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: call void (...) @lgc.create.write.generic.output(<4 x float> [[__LLPC_OUTPUT_PROXY_O0XYZW_12_VEC_INSERT]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) +; CHECK-NEXT: ret void +;