Skip to content

Commit

Permalink
Mesh shader: Enable row export on GFX11 and fix issues
Browse files Browse the repository at this point in the history
- The preparation of attribute ring access should be moved to entry
  block.
- The function attribute 'amdgpu-flat-work-group-size' is incorrect,
  which leads to unexpected removal of s_barrier.
- Use primOrVertexIndex to do attribute ring access as the VGPR index of
  buffer_store. Don't use threadIdInSubgroup because when row export is
  enabled, threadIdInSubgroup is not always equal to primOrVertexIndex.
- After fixing those issues, enable row export by default on GFX11. This
  is because mesh query will have to check msInvocations. On GFX11, HW
  obtains this value from the register field
  SPI_SHADER_GS_MESHLET_DIM.MESHLET_THREADGROUP_SIZE.
  • Loading branch information
amdrexu committed Nov 21, 2023
1 parent 5011159 commit f811897
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
10 changes: 5 additions & 5 deletions lgc/patch/MeshTaskShader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) {
const unsigned flatWorkgroupSize =
alignTo(m_pipelineState->enableMeshRowExport() ? numMeshThreads : primAmpFactor, waveSize);
entryPoint->addFnAttr("amdgpu-flat-work-group-size",
std::to_string(primAmpFactor) + std::string(",") + std::to_string(flatWorkgroupSize));
std::to_string(flatWorkgroupSize) + std::string(",") + std::to_string(flatWorkgroupSize));

const unsigned numWaves = flatWorkgroupSize / waveSize;
const unsigned numMeshWaves = alignTo(numMeshThreads, waveSize) / waveSize;
Expand Down Expand Up @@ -555,6 +555,9 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) {
m_builder.CreateStore(m_builder.getFalse(), m_barrierToggle);
}

if (m_gfxIp.major >= 11)
prepareAttribRingAccess();

m_builder.CreateBr(initPrimitiveIndicesHeaderBlock);
}

Expand All @@ -573,9 +576,6 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) {
m_builder.CreateMul(loopIndexPhi, m_builder.getInt32(waveSize)), "primitiveIndex");
}

if (m_gfxIp.major >= 11)
prepareAttribRingAccess();

auto validPrimitive =
m_builder.CreateICmpULT(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(meshMode.outputPrimitives));
m_builder.CreateCondBr(validPrimitive, initPrimitiveIndicesBodyBlock, endInitPrimitiveIndicesBlock);
Expand Down Expand Up @@ -2234,7 +2234,7 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef<ExportInfo> exports) {
}

m_builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_store, valueToStore->getType(),
{valueToStore, m_attribRingBufDesc, m_waveThreadInfo.threadIdInSubgroup,
{valueToStore, m_attribRingBufDesc, m_waveThreadInfo.primOrVertexIndex,
locationOffset, m_attribRingBaseOffset, m_builder.getInt32(coherent.u32All)});
}
} else {
Expand Down
2 changes: 1 addition & 1 deletion lgc/state/PipelineState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ static cl::opt<bool> EnableTessOffChip("enable-tess-offchip", cl::desc("Enable t

// -enable-row-export: enable row export for mesh shader
static cl::opt<bool> EnableRowExport("enable-row-export", cl::desc("Enable row export for mesh shader"),
cl::init(false));
cl::init(true));

cl::opt<bool> UseRegisterFieldFormat("use-register-field-format", cl::desc("Use register field format in pipeline ELF"),
cl::init(true));
Expand Down

0 comments on commit f811897

Please sign in to comment.