From d15e7f25148284916e5869797ff4daf93173ba9c Mon Sep 17 00:00:00 2001 From: Junda Liu Date: Wed, 29 Nov 2023 09:17:10 +0800 Subject: [PATCH] Move LowerGpuRt into LGC With some tweak to adapt the change. --- lgc/CMakeLists.txt | 1 + .../include/lgc/patch}/LowerGpuRt.h | 26 ++-- lgc/interface/lgc/Pipeline.h | 5 +- {llpc/lower => lgc/patch}/LowerGpuRt.cpp | 112 +++++++++++------- lgc/patch/PassRegistry.inc | 1 + lgc/patch/Patch.cpp | 3 + llpc/CMakeLists.txt | 1 - llpc/context/llpcPipelineContext.cpp | 4 + llpc/lower/llpcSpirvLower.cpp | 3 - llpc/lower/llpcSpirvLowerRayTracing.cpp | 24 ++++ llpc/lower/llpcSpirvLowerRayTracing.h | 4 + llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp | 4 +- .../continuations/include/lgc/GpurtDialect.td | 8 +- 13 files changed, 126 insertions(+), 70 deletions(-) rename {llpc/lower => lgc/include/lgc/patch}/LowerGpuRt.h (87%) rename {llpc/lower => lgc/patch}/LowerGpuRt.cpp (80%) diff --git a/lgc/CMakeLists.txt b/lgc/CMakeLists.txt index 380d46b2b5..d6ae3674a3 100644 --- a/lgc/CMakeLists.txt +++ b/lgc/CMakeLists.txt @@ -239,6 +239,7 @@ target_sources(LLVMlgc PRIVATE patch/RegisterMetadataBuilder.cpp patch/CombineCooperativeMatrix.cpp patch/LowerCooperativeMatrix.cpp + patch/LowerGpuRt.cpp ) # lgc/state diff --git a/llpc/lower/LowerGpuRt.h b/lgc/include/lgc/patch/LowerGpuRt.h similarity index 87% rename from llpc/lower/LowerGpuRt.h rename to lgc/include/lgc/patch/LowerGpuRt.h index a014725402..65dcb6ccf7 100644 --- a/llpc/lower/LowerGpuRt.h +++ b/lgc/include/lgc/patch/LowerGpuRt.h @@ -25,16 +25,18 @@ /** *********************************************************************************************************************** * @file LowerGpuRt.h - * @brief LLPC header file: contains declaration of Llpc::LowerGpuRt + * @brief LGC header file: contains declaration of lgc::LowerGpuRt *********************************************************************************************************************** */ #pragma once -#include "llpcSpirvLower.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/PassManager.h" namespace lgc { +class BuilderImpl; +class PipelineState; + class GpurtGetStackSizeOp; class GpurtGetStackBaseOp; class GpurtGetStackStrideOp; @@ -46,16 +48,9 @@ class GpurtGetBoxSortHeuristicModeOp; class GpurtGetStaticFlagsOp; class GpurtGetTriangleCompressionModeOp; class GpurtGetFlattenedGroupThreadIdOp; -} // namespace lgc -namespace llvm { -class AllocaInst; -} - -namespace Llpc { -class LowerGpuRt : public SpirvLower, public llvm::PassInfoMixin { +class LowerGpuRt : public llvm::PassInfoMixin { public: - LowerGpuRt(); llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); private: @@ -63,7 +58,7 @@ class LowerGpuRt : public SpirvLower, public llvm::PassInfoMixin { const static unsigned MaxLdsStackEntries = 16; uint32_t getWorkgroupSize() const; llvm::Value *getThreadIdInGroup() const; - void createGlobalStack(); + void createGlobalStack(llvm::Module &module); void createRayStaticIdValue(); void visitGetStackSize(lgc::GpurtGetStackSizeOp &inst); void visitGetStackBase(lgc::GpurtGetStackBaseOp &inst); @@ -76,10 +71,11 @@ class LowerGpuRt : public SpirvLower, public llvm::PassInfoMixin { void visitGetStaticFlags(lgc::GpurtGetStaticFlagsOp &inst); void visitGetTriangleCompressionMode(lgc::GpurtGetTriangleCompressionModeOp &inst); void visitGetFlattenedGroupThreadId(lgc::GpurtGetFlattenedGroupThreadIdOp &inst); - llvm::Value *m_stack; // Stack array to hold stack value - llvm::Type *m_stackTy; // Stack type - bool m_lowerStack; // If it is lowerStack + llvm::Value *m_stack = nullptr; // Stack array to hold stack value + llvm::Type *m_stackTy = nullptr; // Stack type + PipelineState *m_pipelineState = nullptr; // Pipeline state llvm::SmallVector m_callsToLower; // Call instruction to lower llvm::SmallSet m_funcsToLower; // Functions to lower + BuilderImpl *m_builder = nullptr; }; -} // namespace Llpc +} // namespace lgc diff --git a/lgc/interface/lgc/Pipeline.h b/lgc/interface/lgc/Pipeline.h index 87b47bc7a8..8248e584c9 100644 --- a/lgc/interface/lgc/Pipeline.h +++ b/lgc/interface/lgc/Pipeline.h @@ -127,7 +127,7 @@ static const char SampleShadingMetaName[] = "lgc.sample.shading"; // The front-end should zero-initialize a struct with "= {}" in case future changes add new fields. // Note: new fields must be added to the end of this structure to maintain test compatibility. union Options { - unsigned u32All[36]; + unsigned u32All[40]; struct { uint64_t hash[2]; // Pipeline hash to set in ELF PAL metadata unsigned includeDisassembly; // If set, the disassembly for all compiled shaders will be included @@ -183,6 +183,9 @@ union Options { bool enableFragColor; // If enabled, do frag color broadcast bool useSoftwareVertexBufferDescriptors; // Use software vertex buffer descriptors to structure SRD. unsigned cpsFlags; // CPS feature flags + unsigned rtBoxSortHeuristicMode; // Ray tracing box sort heuristic mode + unsigned rtStaticPipelineFlags; // Ray tracing static pipeline flags + unsigned rtTriCompressMode; // Ray tracing triangle compression mode }; }; static_assert(sizeof(Options) == sizeof(Options::u32All)); diff --git a/llpc/lower/LowerGpuRt.cpp b/lgc/patch/LowerGpuRt.cpp similarity index 80% rename from llpc/lower/LowerGpuRt.cpp rename to lgc/patch/LowerGpuRt.cpp index 42fecefe82..afd9502e09 100644 --- a/llpc/lower/LowerGpuRt.cpp +++ b/lgc/patch/LowerGpuRt.cpp @@ -25,30 +25,27 @@ /** *********************************************************************************************************************** * @file LowerGpuRt.cpp - * @brief LLPC source file: contains implementation of class Llpc::LowerGpuRt. + * @brief LGC source file: contains implementation of class lgc::LowerGpuRt. *********************************************************************************************************************** */ -#include "LowerGpuRt.h" -#include "llpcContext.h" -#include "llpcRayTracingContext.h" +#include "lgc/patch/LowerGpuRt.h" #include "lgc/Builder.h" #include "lgc/GpurtDialect.h" +#include "lgc/LgcContext.h" +#include "lgc/builder/BuilderImpl.h" +#include "lgc/state/TargetInfo.h" #include "llvm-dialects/Dialect/Visitor.h" #include "llvm/IR/IntrinsicsAMDGPU.h" -#define DEBUG_TYPE "llpc-gpurt" +#define DEBUG_TYPE "lgc-lower-gpurt" using namespace lgc; using namespace llvm; -using namespace Llpc; namespace RtName { static const char *LdsStack = "LdsStack"; } // namespace RtName -namespace Llpc { -// ===================================================================================================================== -LowerGpuRt::LowerGpuRt() : m_stack(nullptr), m_stackTy(nullptr), m_lowerStack(false) { -} +namespace lgc { // ===================================================================================================================== // Executes this SPIR-V lowering pass on the specified LLVM module. // @@ -56,12 +53,15 @@ LowerGpuRt::LowerGpuRt() : m_stack(nullptr), m_stackTy(nullptr), m_lowerStack(fa // @param [in/out] analysisManager : Analysis manager to use for this transformation PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysisManager) { LLVM_DEBUG(dbgs() << "Run the pass Lower-gpurt\n"); - SpirvLower::init(&module); - auto gfxip = m_context->getPipelineContext()->getGfxIpVersion(); - // NOTE: rayquery of sect and ahit can reuse lds. - m_lowerStack = (m_entryPoint->getName().startswith("_ahit") || m_entryPoint->getName().startswith("_sect")) && - (gfxip.major < 11); - createGlobalStack(); + + PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); + m_pipelineState = pipelineState; + + BuilderImpl builderImpl(pipelineState); + m_builder = &builderImpl; + m_builder->setShaderStage(ShaderStageCompute); + + createGlobalStack(module); static auto visitor = llvm_dialects::VisitorBuilder() .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) @@ -78,7 +78,7 @@ PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysi .add(&LowerGpuRt::visitGetFlattenedGroupThreadId) .build(); - visitor.visit(*this, *m_module); + visitor.visit(*this, module); for (Instruction *call : m_callsToLower) { call->dropAllReferences(); @@ -99,14 +99,15 @@ PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysi // Get pipeline workgroup size for stack size calculation unsigned LowerGpuRt::getWorkgroupSize() const { unsigned workgroupSize = 0; - if (m_context->getPipelineType() == PipelineType::Graphics) { - workgroupSize = m_context->getPipelineContext()->getRayTracingWaveSize(); + if (m_pipelineState->isGraphics()) { + // Force 64 for graphics stages + workgroupSize = 64; } else { - ComputeShaderMode mode = lgc::Pipeline::getComputeShaderMode(*m_module); + ComputeShaderMode mode = m_pipelineState->getShaderModes()->getComputeShaderMode(); workgroupSize = mode.workgroupSizeX * mode.workgroupSizeY * mode.workgroupSizeZ; } assert(workgroupSize != 0); - if (m_context->getPipelineContext()->getGfxIpVersion().major >= 11) { + if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 11) { // Round up to multiple of 32, as the ds_bvh_stack swizzle as 32 threads workgroupSize = alignTo(workgroupSize, 32); } @@ -117,26 +118,48 @@ unsigned LowerGpuRt::getWorkgroupSize() const { // Get flat thread id in work group/wave Value *LowerGpuRt::getThreadIdInGroup() const { // Todo: for graphics shader, subgroupId * waveSize + subgroupLocalInvocationId() - unsigned builtIn = m_context->getPipelineType() == PipelineType::Graphics ? lgc::BuiltInSubgroupLocalInvocationId - : lgc::BuiltInLocalInvocationIndex; - lgc::InOutInfo inputInfo = {}; - return m_builder->CreateReadBuiltInInput(static_cast(builtIn), inputInfo, nullptr, nullptr, ""); + unsigned builtIn = m_pipelineState->isGraphics() ? BuiltInSubgroupLocalInvocationId : BuiltInLocalInvocationIndex; + InOutInfo inputInfo = {}; + return m_builder->CreateReadBuiltInInput(static_cast(builtIn), inputInfo, nullptr, nullptr, ""); } // ===================================================================================================================== // Create global variable for the stack -void LowerGpuRt::createGlobalStack() { - auto ldsStackSize = getWorkgroupSize() * MaxLdsStackEntries; - // Double anyhit and intersection shader lds size, these shader use lower part of stack to read/write value - if (m_lowerStack) - ldsStackSize = ldsStackSize << 1; - - m_stackTy = ArrayType::get(m_builder->getInt32Ty(), ldsStackSize); - auto ldsStack = new GlobalVariable(*m_module, m_stackTy, false, GlobalValue::ExternalLinkage, nullptr, - RtName::LdsStack, nullptr, GlobalValue::NotThreadLocal, 3); - - ldsStack->setAlignment(MaybeAlign(4)); - m_stack = ldsStack; +// @param [in/out] module : LLVM module to be run on +void LowerGpuRt::createGlobalStack(Module &module) { + + struct Payload { + bool needGlobalStack; + bool needExtraStack; + }; + Payload payload = {false, false}; + static auto visitor = llvm_dialects::VisitorBuilder() + .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) + .add([](auto &payload, auto &op) { + payload.needGlobalStack = true; + payload.needExtraStack |= op.getUseExtraStack(); + }) + .add([](auto &payload, auto &op) { + payload.needGlobalStack = true; + payload.needExtraStack |= op.getUseExtraStack(); + }) + .add([](auto &payload, auto &op) { payload.needGlobalStack = true; }) + .build(); + visitor.visit(payload, module); + + if (payload.needGlobalStack) { + auto ldsStackSize = getWorkgroupSize() * MaxLdsStackEntries; + // Double LDS size when any operations requires to perform on extra stack. + if (payload.needExtraStack) + ldsStackSize = ldsStackSize << 1; + + m_stackTy = ArrayType::get(m_builder->getInt32Ty(), ldsStackSize); + auto ldsStack = new GlobalVariable(module, m_stackTy, false, GlobalValue::ExternalLinkage, nullptr, + RtName::LdsStack, nullptr, GlobalValue::NotThreadLocal, 3); + + ldsStack->setAlignment(MaybeAlign(4)); + m_stack = ldsStack; + } } // ===================================================================================================================== @@ -184,7 +207,7 @@ void LowerGpuRt::visitStackRead(GpurtStackReadOp &inst) { m_builder->SetInsertPoint(&inst); Value *stackIndex = inst.getIndex(); Type *stackTy = PointerType::get(m_builder->getInt32Ty(), 3); - if (m_lowerStack) { + if (inst.getUseExtraStack()) { auto ldsStackSize = m_builder->getInt32(getWorkgroupSize() * MaxLdsStackEntries); stackIndex = m_builder->CreateAdd(stackIndex, ldsStackSize); } @@ -206,7 +229,7 @@ void LowerGpuRt::visitStackWrite(GpurtStackWriteOp &inst) { Value *stackIndex = inst.getIndex(); Value *stackData = inst.getValue(); Type *stackTy = PointerType::get(m_builder->getInt32Ty(), 3); - if (m_lowerStack) { + if (inst.getUseExtraStack()) { auto ldsStackSize = m_builder->getInt32(getWorkgroupSize() * MaxLdsStackEntries); stackIndex = m_builder->CreateAdd(stackIndex, ldsStackSize); } @@ -297,8 +320,7 @@ void LowerGpuRt::visitLdsStackStore(GpurtLdsStackStoreOp &inst) { // @param inst : The dialect instruction to process void LowerGpuRt::visitGetBoxSortHeuristicMode(GpurtGetBoxSortHeuristicModeOp &inst) { m_builder->SetInsertPoint(&inst); - auto rtState = m_context->getPipelineContext()->getRayTracingState(); - Value *boxSortHeuristicMode = m_builder->getInt32(rtState->boxSortHeuristicMode); + Value *boxSortHeuristicMode = m_builder->getInt32(m_pipelineState->getOptions().rtBoxSortHeuristicMode); inst.replaceAllUsesWith(boxSortHeuristicMode); m_callsToLower.push_back(&inst); m_funcsToLower.insert(inst.getCalledFunction()); @@ -310,8 +332,7 @@ void LowerGpuRt::visitGetBoxSortHeuristicMode(GpurtGetBoxSortHeuristicModeOp &in // @param inst : The dialect instruction to process void LowerGpuRt::visitGetStaticFlags(GpurtGetStaticFlagsOp &inst) { m_builder->SetInsertPoint(&inst); - auto rtState = m_context->getPipelineContext()->getRayTracingState(); - Value *staticPipelineFlags = m_builder->getInt32(rtState->staticPipelineFlags); + Value *staticPipelineFlags = m_builder->getInt32(m_pipelineState->getOptions().rtStaticPipelineFlags); inst.replaceAllUsesWith(staticPipelineFlags); m_callsToLower.push_back(&inst); m_funcsToLower.insert(inst.getCalledFunction()); @@ -323,8 +344,7 @@ void LowerGpuRt::visitGetStaticFlags(GpurtGetStaticFlagsOp &inst) { // @param inst : The dialect instruction to process void LowerGpuRt::visitGetTriangleCompressionMode(GpurtGetTriangleCompressionModeOp &inst) { m_builder->SetInsertPoint(&inst); - auto rtState = m_context->getPipelineContext()->getRayTracingState(); - Value *triCompressMode = m_builder->getInt32(rtState->triCompressMode); + Value *triCompressMode = m_builder->getInt32(m_pipelineState->getOptions().rtTriCompressMode); inst.replaceAllUsesWith(triCompressMode); m_callsToLower.push_back(&inst); m_funcsToLower.insert(inst.getCalledFunction()); @@ -341,4 +361,4 @@ void LowerGpuRt::visitGetFlattenedGroupThreadId(GpurtGetFlattenedGroupThreadIdOp m_funcsToLower.insert(inst.getCalledFunction()); } -} // namespace Llpc +} // namespace lgc diff --git a/lgc/patch/PassRegistry.inc b/lgc/patch/PassRegistry.inc index 6b1f2da15a..9ede6c656d 100644 --- a/lgc/patch/PassRegistry.inc +++ b/lgc/patch/PassRegistry.inc @@ -81,6 +81,7 @@ LLPC_MODULE_PASS("lgc-lower-debug-printf", LowerDebugPrintf) LLPC_FUNCTION_PASS("lgc-combine-cooperative-matrix", CombineCooperativeMatrix) LLPC_MODULE_PASS("lgc-lower-cooperative-matrix", LowerCooperativeMatrix) +LLPC_MODULE_PASS("lgc-lower-gpurt", LowerGpuRt) #undef LLPC_PASS #undef LLPC_LOOP_PASS diff --git a/lgc/patch/Patch.cpp b/lgc/patch/Patch.cpp index fc55926a1f..6d70d00780 100644 --- a/lgc/patch/Patch.cpp +++ b/lgc/patch/Patch.cpp @@ -40,6 +40,7 @@ #include "lgc/patch/FragColorExport.h" #include "lgc/patch/LowerCooperativeMatrix.h" #include "lgc/patch/LowerDebugPrintf.h" +#include "lgc/patch/LowerGpuRt.h" #include "lgc/patch/PatchBufferOp.h" #include "lgc/patch/PatchCheckShaderCache.h" #include "lgc/patch/PatchCopyShader.h" @@ -141,6 +142,8 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T "// LLPC pipeline before-patching results\n")); } + passMgr.addPass(LowerGpuRt()); + const auto indirectMode = pipelineState->getOptions().rtIndirectMode; if (indirectMode == RayTracingIndirectMode::ContinuationsContinufy || indirectMode == RayTracingIndirectMode::Continuations) { diff --git a/llpc/CMakeLists.txt b/llpc/CMakeLists.txt index 72331a2fb8..d99e2ff799 100644 --- a/llpc/CMakeLists.txt +++ b/llpc/CMakeLists.txt @@ -222,7 +222,6 @@ if(ICD_BUILD_LLPC) lower/llpcSpirvLowerTranslator.cpp lower/llpcSpirvLowerUtil.cpp lower/llpcSpirvProcessGpuRtLibrary.cpp - lower/LowerGpuRt.cpp lower/llpcSpirvLowerInternalLibraryIntrinsicUtil.cpp lower/LowerGLCompatibility.cpp lower/llpcSpirvLowerCooperativeMatrix.cpp diff --git a/llpc/context/llpcPipelineContext.cpp b/llpc/context/llpcPipelineContext.cpp index 0f0e508baf..e71c97e687 100644 --- a/llpc/context/llpcPipelineContext.cpp +++ b/llpc/context/llpcPipelineContext.cpp @@ -342,6 +342,10 @@ Options PipelineContext::computePipelineOptions() const { options.enablePrimGeneratedQuery = getPipelineOptions()->enablePrimGeneratedQuery; options.enableFragColor = getPipelineOptions()->enableFragColor; + options.rtBoxSortHeuristicMode = m_rtState.boxSortHeuristicMode; + options.rtStaticPipelineFlags = m_rtState.staticPipelineFlags; + options.rtTriCompressMode = m_rtState.triCompressMode; + return options; } diff --git a/llpc/lower/llpcSpirvLower.cpp b/llpc/lower/llpcSpirvLower.cpp index 1d225ff8b7..9ddccf4fe9 100644 --- a/llpc/lower/llpcSpirvLower.cpp +++ b/llpc/lower/llpcSpirvLower.cpp @@ -59,7 +59,6 @@ // New version of the code (also handles unknown version, which we treat as latest) #include "llvm/IRPrinter/IRPrintingPasses.h" #endif -#include "LowerGpuRt.h" #include "llvm/Support/FileSystem.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO.h" @@ -264,8 +263,6 @@ void SpirvLower::addPasses(Context *context, ShaderStage stage, lgc::PassManager passMgr.addPass(SpirvLowerInstMetaRemove()); if (rayTracing || rayQuery || isInternalRtShader) { - passMgr.addPass(LowerGpuRt()); - FunctionPassManager fpm; fpm.addPass(SROAPass(SROAOptions::PreserveCFG)); fpm.addPass(InstCombinePass(instCombineOpt)); diff --git a/llpc/lower/llpcSpirvLowerRayTracing.cpp b/llpc/lower/llpcSpirvLowerRayTracing.cpp index d0cb0a1782..beccca5b9b 100644 --- a/llpc/lower/llpcSpirvLowerRayTracing.cpp +++ b/llpc/lower/llpcSpirvLowerRayTracing.cpp @@ -630,6 +630,8 @@ PreservedAnalyses SpirvLowerRayTracing::run(Module &module, ModuleAnalysisManage .add(&SpirvLowerRayTracing::visitInstanceInclusionMaskOp) .add(&SpirvLowerRayTracing::visitShaderIndexOp) .add(&SpirvLowerRayTracing::visitShaderRecordBufferOp) + .add(&SpirvLowerRayTracing::visitStackReadOp) + .add(&SpirvLowerRayTracing::visitStackWriteOp) .build(); visitor.visit(*this, *m_module); @@ -2575,6 +2577,28 @@ void SpirvLowerRayTracing::visitGetRayStaticId(lgc::GpurtGetRayStaticIdOp &inst) m_funcsToLower.insert(inst.getCalledFunction()); } +// ===================================================================================================================== +// Visits "lgc.gpurt.stack.read" instructions +// +// @param inst : The instruction +void SpirvLowerRayTracing::visitStackReadOp(lgc::GpurtStackReadOp &inst) { + // NOTE: If RayQuery is used inside intersection or any-hit shaders, where we already holding a traversal stack for + // TraceRay, perform the stack operations for this RayQuery in an extra stack space. + if ((m_shaderStage == ShaderStageRayTracingIntersect) || (m_shaderStage == ShaderStageRayTracingAnyHit)) + inst.setUseExtraStack(true); +} + +// ===================================================================================================================== +// Visits "lgc.gpurt.stack.write" instructions +// +// @param inst : The instruction +void SpirvLowerRayTracing::visitStackWriteOp(lgc::GpurtStackWriteOp &inst) { + // NOTE: If RayQuery is used inside intersection or any-hit shaders, where we already holding a traversal stack for + // TraceRay, perform the stack operations for this RayQuery in an extra stack space. + if ((m_shaderStage == ShaderStageRayTracingIntersect) || (m_shaderStage == ShaderStageRayTracingAnyHit)) + inst.setUseExtraStack(true); +} + // ===================================================================================================================== // Visits "lgc.gpurt.get.parent.id" instructions // diff --git a/llpc/lower/llpcSpirvLowerRayTracing.h b/llpc/lower/llpcSpirvLowerRayTracing.h index e1f9586153..61bf5681d8 100644 --- a/llpc/lower/llpcSpirvLowerRayTracing.h +++ b/llpc/lower/llpcSpirvLowerRayTracing.h @@ -77,6 +77,8 @@ class GpurtSetHitTriangleNodePointerOp; class GpurtGetParentIdOp; class GpurtSetParentIdOp; class GpurtGetRayStaticIdOp; +class GpurtStackReadOp; +class GpurtStackWriteOp; } // namespace lgc namespace Llpc { @@ -245,6 +247,8 @@ class SpirvLowerRayTracing : public SpirvLowerRayQuery { void visitGetParentId(lgc::GpurtGetParentIdOp &inst); void visitSetParentId(lgc::GpurtSetParentIdOp &inst); void visitGetRayStaticId(lgc::GpurtGetRayStaticIdOp &inst); + void visitStackReadOp(lgc::GpurtStackReadOp &inst); + void visitStackWriteOp(lgc::GpurtStackWriteOp &inst); void visitDispatchRayIndex(lgc::rt::DispatchRaysIndexOp &inst); void visitDispatchRaysDimensionsOp(lgc::rt::DispatchRaysDimensionsOp &inst); void visitWorldRayOriginOp(lgc::rt::WorldRayOriginOp &inst); diff --git a/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp b/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp index d4f0c98dd2..b792e51f78 100644 --- a/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp +++ b/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp @@ -211,7 +211,7 @@ void SpirvProcessGpuRtLibrary::createLdsWrite(Function *func) { auto int32ty = m_builder->getInt32Ty(); Value *stackOffset = m_builder->CreateLoad(int32ty, argIt++); Value *stackData = m_builder->CreateLoad(int32ty, argIt); - m_builder->CreateRet(m_builder->create(stackOffset, stackData)); + m_builder->CreateRet(m_builder->create(stackOffset, stackData, false)); } // ===================================================================================================================== @@ -221,7 +221,7 @@ void SpirvProcessGpuRtLibrary::createLdsWrite(Function *func) { void SpirvProcessGpuRtLibrary::createLdsRead(Function *func) { Value *stackIndex = func->arg_begin(); stackIndex = m_builder->CreateLoad(m_builder->getInt32Ty(), stackIndex); - m_builder->CreateRet(m_builder->create(stackIndex)); + m_builder->CreateRet(m_builder->create(stackIndex, false)); } // ===================================================================================================================== diff --git a/shared/continuations/include/lgc/GpurtDialect.td b/shared/continuations/include/lgc/GpurtDialect.td index 1ab827d4ea..6ad1435cea 100644 --- a/shared/continuations/include/lgc/GpurtDialect.td +++ b/shared/continuations/include/lgc/GpurtDialect.td @@ -48,20 +48,24 @@ def GpurtGetStackSizeOp : GpurtOp<"get.stack.size", [Memory<[]>, WillReturn]> { } def GpurtStackReadOp : GpurtOp<"stack.read", [Memory<[(read)]>, WillReturn]> { - let arguments = (ins I32:$index); + let arguments = (ins I32:$index, AttrI1:$use_extra_stack); let results = (outs I32:$result); let summary = "read a dword from stack"; let description = [{ Read a dword from lds/(scratch buffer) stack at index position + + Setting use_extra_stack to true means this read will perform on an extra piece of stack to avoid traversal stack conflict. }]; } def GpurtStackWriteOp : GpurtOp<"stack.write", [Memory<[(write)]>, WillReturn]> { - let arguments = (ins I32:$index, I32:$value); + let arguments = (ins I32:$index, I32:$value, AttrI1:$use_extra_stack); let results = (outs I32:$result); let summary = "write a dword to stack"; let description = [{ Write a dword to lds/(scratch buffer) stack at index position + + Setting use_extra_stack to true means this write will perform on an extra piece of stack to avoid traversal stack conflict. }]; }