From 944291582f59c8336c8b2d8f17e3c4972b2147a6 Mon Sep 17 00:00:00 2001 From: Rex Xu Date: Thu, 16 Nov 2023 17:55:39 +0800 Subject: [PATCH] Mesh shader: Enable row export on GFX11 and fix issues - The preparation of attribute ring access should be moved to entry block. - The function attribute 'amdgpu-flat-work-group-size' is incorrect, which leads to unexpected removal of s_barrier. - Use primOrVertexIndex to do attribute ring access as the VGPR index of buffer_store. Don't use threadIdInSubgroup because when row export is enabled, threadIdInSubgroup is not always equal to primOrVertexIndex. - After fixing those issues, enable row export by default on GFX11. This is because mesh query will have to check msInvocations. On GFX11, HW obtains this value from the register field SPI_SHADER_GS_MESHLET_DIM.MESHLET_THREADGROUP_SIZE. --- lgc/patch/MeshTaskShader.cpp | 10 +++++----- lgc/state/PipelineState.cpp | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lgc/patch/MeshTaskShader.cpp b/lgc/patch/MeshTaskShader.cpp index 3a7219bf27..5572680750 100644 --- a/lgc/patch/MeshTaskShader.cpp +++ b/lgc/patch/MeshTaskShader.cpp @@ -479,7 +479,7 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { const unsigned flatWorkgroupSize = alignTo(m_pipelineState->enableMeshRowExport() ? numMeshThreads : primAmpFactor, waveSize); entryPoint->addFnAttr("amdgpu-flat-work-group-size", - std::to_string(primAmpFactor) + std::string(",") + std::to_string(flatWorkgroupSize)); + std::to_string(flatWorkgroupSize) + std::string(",") + std::to_string(flatWorkgroupSize)); const unsigned numWaves = flatWorkgroupSize / waveSize; const unsigned numMeshWaves = alignTo(numMeshThreads, waveSize) / waveSize; @@ -555,6 +555,9 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { m_builder.CreateStore(m_builder.getFalse(), m_barrierToggle); } + if (m_gfxIp.major >= 11) + prepareAttribRingAccess(); + m_builder.CreateBr(initPrimitiveIndicesHeaderBlock); } @@ -573,9 +576,6 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { m_builder.CreateMul(loopIndexPhi, m_builder.getInt32(waveSize)), "primitiveIndex"); } - if (m_gfxIp.major >= 11) - prepareAttribRingAccess(); - auto validPrimitive = m_builder.CreateICmpULT(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(meshMode.outputPrimitives)); m_builder.CreateCondBr(validPrimitive, initPrimitiveIndicesBodyBlock, endInitPrimitiveIndicesBlock); @@ -2234,7 +2234,7 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { } m_builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_store, valueToStore->getType(), - {valueToStore, m_attribRingBufDesc, m_waveThreadInfo.threadIdInSubgroup, + {valueToStore, m_attribRingBufDesc, m_waveThreadInfo.primOrVertexIndex, locationOffset, m_attribRingBaseOffset, m_builder.getInt32(coherent.u32All)}); } } else { diff --git a/lgc/state/PipelineState.cpp b/lgc/state/PipelineState.cpp index 6833b027fa..03dad2cd21 100644 --- a/lgc/state/PipelineState.cpp +++ b/lgc/state/PipelineState.cpp @@ -52,7 +52,7 @@ static cl::opt EnableTessOffChip("enable-tess-offchip", cl::desc("Enable t // -enable-row-export: enable row export for mesh shader static cl::opt EnableRowExport("enable-row-export", cl::desc("Enable row export for mesh shader"), - cl::init(false)); + cl::init(true)); cl::opt UseRegisterFieldFormat("use-register-field-format", cl::desc("Use register field format in pipeline ELF"), cl::init(true));