Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move LowerGpuRt into LGC #2852

Merged
merged 2 commits into from
Nov 30, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lgc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -239,6 +239,7 @@ target_sources(LLVMlgc PRIVATE
patch/RegisterMetadataBuilder.cpp
patch/CombineCooperativeMatrix.cpp
patch/LowerCooperativeMatrix.cpp
patch/LowerGpuRt.cpp
)

# lgc/state
26 changes: 11 additions & 15 deletions llpc/lower/LowerGpuRt.h → lgc/include/lgc/patch/LowerGpuRt.h
Original file line number Diff line number Diff line change
@@ -25,16 +25,18 @@
/**
***********************************************************************************************************************
* @file LowerGpuRt.h
* @brief LLPC header file: contains declaration of Llpc::LowerGpuRt
* @brief LGC header file: contains declaration of lgc::LowerGpuRt
***********************************************************************************************************************
*/
#pragma once

#include "llpcSpirvLower.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/IR/PassManager.h"

namespace lgc {
class Builder;
class PipelineState;

class GpurtGetStackSizeOp;
class GpurtGetStackBaseOp;
class GpurtGetStackStrideOp;
@@ -46,24 +48,17 @@ class GpurtGetBoxSortHeuristicModeOp;
class GpurtGetStaticFlagsOp;
class GpurtGetTriangleCompressionModeOp;
class GpurtGetFlattenedGroupThreadIdOp;
} // namespace lgc

namespace llvm {
class AllocaInst;
}

namespace Llpc {
class LowerGpuRt : public SpirvLower, public llvm::PassInfoMixin<LowerGpuRt> {
class LowerGpuRt : public llvm::PassInfoMixin<LowerGpuRt> {
public:
LowerGpuRt();
llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager);

private:
typedef void (LowerGpuRt::*LibraryFuncPtr)(llvm::Function *, unsigned);
const static unsigned MaxLdsStackEntries = 16;
uint32_t getWorkgroupSize() const;
llvm::Value *getThreadIdInGroup() const;
void createGlobalStack();
void createGlobalStack(llvm::Module &module);
void createRayStaticIdValue();
void visitGetStackSize(lgc::GpurtGetStackSizeOp &inst);
void visitGetStackBase(lgc::GpurtGetStackBaseOp &inst);
@@ -76,10 +71,11 @@ class LowerGpuRt : public SpirvLower, public llvm::PassInfoMixin<LowerGpuRt> {
void visitGetStaticFlags(lgc::GpurtGetStaticFlagsOp &inst);
void visitGetTriangleCompressionMode(lgc::GpurtGetTriangleCompressionModeOp &inst);
void visitGetFlattenedGroupThreadId(lgc::GpurtGetFlattenedGroupThreadIdOp &inst);
llvm::Value *m_stack; // Stack array to hold stack value
llvm::Type *m_stackTy; // Stack type
bool m_lowerStack; // If it is lowerStack
llvm::Value *m_stack = nullptr; // Stack array to hold stack value
llvm::Type *m_stackTy = nullptr; // Stack type
PipelineState *m_pipelineState = nullptr; // Pipeline state
llvm::SmallVector<llvm::Instruction *> m_callsToLower; // Call instruction to lower
llvm::SmallSet<llvm::Function *, 4> m_funcsToLower; // Functions to lower
Builder *m_builder = nullptr;
};
} // namespace Llpc
} // namespace lgc
6 changes: 5 additions & 1 deletion lgc/interface/lgc/Pipeline.h
Original file line number Diff line number Diff line change
@@ -127,7 +127,7 @@ static const char SampleShadingMetaName[] = "lgc.sample.shading";
// The front-end should zero-initialize a struct with "= {}" in case future changes add new fields.
// Note: new fields must be added to the end of this structure to maintain test compatibility.
union Options {
unsigned u32All[36];
unsigned u32All[40];
struct {
uint64_t hash[2]; // Pipeline hash to set in ELF PAL metadata
unsigned includeDisassembly; // If set, the disassembly for all compiled shaders will be included
@@ -183,6 +183,10 @@ union Options {
bool enableFragColor; // If enabled, do frag color broadcast
bool useSoftwareVertexBufferDescriptors; // Use software vertex buffer descriptors to structure SRD.
unsigned cpsFlags; // CPS feature flags
unsigned rtBoxSortHeuristicMode; // Ray tracing box sort heuristic mode
unsigned rtStaticPipelineFlags; // Ray tracing static pipeline flags
unsigned rtTriCompressMode; // Ray tracing triangle compression mode
bool useGpurt; // Whether GPURT is used
};
};
static_assert(sizeof(Options) == sizeof(Options::u32All));
110 changes: 64 additions & 46 deletions llpc/lower/LowerGpuRt.cpp → lgc/patch/LowerGpuRt.cpp
Original file line number Diff line number Diff line change
@@ -25,43 +25,42 @@
/**
***********************************************************************************************************************
* @file LowerGpuRt.cpp
* @brief LLPC source file: contains implementation of class Llpc::LowerGpuRt.
* @brief LGC source file: contains implementation of class lgc::LowerGpuRt.
***********************************************************************************************************************
*/
#include "LowerGpuRt.h"
#include "llpcContext.h"
#include "llpcRayTracingContext.h"
#include "lgc/patch/LowerGpuRt.h"
#include "lgc/Builder.h"
#include "lgc/GpurtDialect.h"
#include "lgc/LgcContext.h"
#include "lgc/builder/BuilderImpl.h"
#include "lgc/state/TargetInfo.h"
#include "llvm-dialects/Dialect/Visitor.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"

#define DEBUG_TYPE "llpc-gpurt"
#define DEBUG_TYPE "lgc-lower-gpurt"
using namespace lgc;
using namespace llvm;
using namespace Llpc;

namespace RtName {
static const char *LdsStack = "LdsStack";
} // namespace RtName

namespace Llpc {
// =====================================================================================================================
LowerGpuRt::LowerGpuRt() : m_stack(nullptr), m_stackTy(nullptr), m_lowerStack(false) {
}
namespace lgc {
// =====================================================================================================================
// Executes this SPIR-V lowering pass on the specified LLVM module.
//
// @param [in/out] module : LLVM module to be run on
// @param [in/out] analysisManager : Analysis manager to use for this transformation
PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysisManager) {
LLVM_DEBUG(dbgs() << "Run the pass Lower-gpurt\n");
SpirvLower::init(&module);
auto gfxip = m_context->getPipelineContext()->getGfxIpVersion();
// NOTE: rayquery of sect and ahit can reuse lds.
m_lowerStack = (m_entryPoint->getName().startswith("_ahit") || m_entryPoint->getName().startswith("_sect")) &&
(gfxip.major < 11);
createGlobalStack();

PipelineState *pipelineState = analysisManager.getResult<PipelineStateWrapper>(module).getPipelineState();
m_pipelineState = pipelineState;

Builder builderImpl(pipelineState->getContext());
m_builder = &builderImpl;

createGlobalStack(module);

static auto visitor = llvm_dialects::VisitorBuilder<LowerGpuRt>()
.setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration)
@@ -78,7 +77,7 @@ PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysi
.add(&LowerGpuRt::visitGetFlattenedGroupThreadId)
.build();

visitor.visit(*this, *m_module);
visitor.visit(*this, module);

for (Instruction *call : m_callsToLower) {
call->dropAllReferences();
@@ -99,14 +98,15 @@ PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysi
// Get pipeline workgroup size for stack size calculation
unsigned LowerGpuRt::getWorkgroupSize() const {
unsigned workgroupSize = 0;
if (m_context->getPipelineType() == PipelineType::Graphics) {
workgroupSize = m_context->getPipelineContext()->getRayTracingWaveSize();
if (m_pipelineState->isGraphics()) {
// Force 64 for graphics stages
workgroupSize = 64;
} else {
ComputeShaderMode mode = lgc::Pipeline::getComputeShaderMode(*m_module);
ComputeShaderMode mode = m_pipelineState->getShaderModes()->getComputeShaderMode();
workgroupSize = mode.workgroupSizeX * mode.workgroupSizeY * mode.workgroupSizeZ;
}
assert(workgroupSize != 0);
if (m_context->getPipelineContext()->getGfxIpVersion().major >= 11) {
if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 11) {
// Round up to multiple of 32, as the ds_bvh_stack swizzle as 32 threads
workgroupSize = alignTo(workgroupSize, 32);
}
@@ -117,26 +117,47 @@ unsigned LowerGpuRt::getWorkgroupSize() const {
// Get flat thread id in work group/wave
Value *LowerGpuRt::getThreadIdInGroup() const {
// Todo: for graphics shader, subgroupId * waveSize + subgroupLocalInvocationId()
unsigned builtIn = m_context->getPipelineType() == PipelineType::Graphics ? lgc::BuiltInSubgroupLocalInvocationId
: lgc::BuiltInLocalInvocationIndex;
lgc::InOutInfo inputInfo = {};
return m_builder->CreateReadBuiltInInput(static_cast<lgc::BuiltInKind>(builtIn), inputInfo, nullptr, nullptr, "");
unsigned builtIn = m_pipelineState->isGraphics() ? BuiltInSubgroupLocalInvocationId : BuiltInLocalInvocationIndex;
InOutInfo inputInfo = {};
return m_builder->CreateReadBuiltInInput(static_cast<BuiltInKind>(builtIn), inputInfo, nullptr, nullptr, "");
}

// =====================================================================================================================
// Create global variable for the stack
void LowerGpuRt::createGlobalStack() {
auto ldsStackSize = getWorkgroupSize() * MaxLdsStackEntries;
// Double anyhit and intersection shader lds size, these shader use lower part of stack to read/write value
if (m_lowerStack)
ldsStackSize = ldsStackSize << 1;

m_stackTy = ArrayType::get(m_builder->getInt32Ty(), ldsStackSize);
auto ldsStack = new GlobalVariable(*m_module, m_stackTy, false, GlobalValue::ExternalLinkage, nullptr,
RtName::LdsStack, nullptr, GlobalValue::NotThreadLocal, 3);

ldsStack->setAlignment(MaybeAlign(4));
m_stack = ldsStack;
// @param [in/out] module : LLVM module to be run on
void LowerGpuRt::createGlobalStack(Module &module) {
struct Payload {
bool needGlobalStack;
bool needExtraStack;
};
Payload payload = {false, false};
static auto visitor = llvm_dialects::VisitorBuilder<Payload>()
.setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration)
.add<GpurtStackWriteOp>([](auto &payload, auto &op) {
payload.needGlobalStack = true;
payload.needExtraStack |= op.getUseExtraStack();
})
.add<GpurtStackReadOp>([](auto &payload, auto &op) {
payload.needGlobalStack = true;
payload.needExtraStack |= op.getUseExtraStack();
})
.add<GpurtLdsStackInitOp>([](auto &payload, auto &op) { payload.needGlobalStack = true; })
.build();
visitor.visit(payload, module);

if (payload.needGlobalStack) {
auto ldsStackSize = getWorkgroupSize() * MaxLdsStackEntries;
// Double LDS size when any operations requires to perform on extra stack.
if (payload.needExtraStack)
ldsStackSize = ldsStackSize << 1;

m_stackTy = ArrayType::get(m_builder->getInt32Ty(), ldsStackSize);
auto ldsStack = new GlobalVariable(module, m_stackTy, false, GlobalValue::ExternalLinkage, nullptr,
RtName::LdsStack, nullptr, GlobalValue::NotThreadLocal, 3);

ldsStack->setAlignment(MaybeAlign(4));
m_stack = ldsStack;
}
}

// =====================================================================================================================
@@ -184,7 +205,7 @@ void LowerGpuRt::visitStackRead(GpurtStackReadOp &inst) {
m_builder->SetInsertPoint(&inst);
Value *stackIndex = inst.getIndex();
Type *stackTy = PointerType::get(m_builder->getInt32Ty(), 3);
if (m_lowerStack) {
if (inst.getUseExtraStack()) {
auto ldsStackSize = m_builder->getInt32(getWorkgroupSize() * MaxLdsStackEntries);
stackIndex = m_builder->CreateAdd(stackIndex, ldsStackSize);
}
@@ -206,7 +227,7 @@ void LowerGpuRt::visitStackWrite(GpurtStackWriteOp &inst) {
Value *stackIndex = inst.getIndex();
Value *stackData = inst.getValue();
Type *stackTy = PointerType::get(m_builder->getInt32Ty(), 3);
if (m_lowerStack) {
if (inst.getUseExtraStack()) {
auto ldsStackSize = m_builder->getInt32(getWorkgroupSize() * MaxLdsStackEntries);
stackIndex = m_builder->CreateAdd(stackIndex, ldsStackSize);
}
@@ -297,8 +318,7 @@ void LowerGpuRt::visitLdsStackStore(GpurtLdsStackStoreOp &inst) {
// @param inst : The dialect instruction to process
void LowerGpuRt::visitGetBoxSortHeuristicMode(GpurtGetBoxSortHeuristicModeOp &inst) {
m_builder->SetInsertPoint(&inst);
auto rtState = m_context->getPipelineContext()->getRayTracingState();
Value *boxSortHeuristicMode = m_builder->getInt32(rtState->boxSortHeuristicMode);
Value *boxSortHeuristicMode = m_builder->getInt32(m_pipelineState->getOptions().rtBoxSortHeuristicMode);
inst.replaceAllUsesWith(boxSortHeuristicMode);
m_callsToLower.push_back(&inst);
m_funcsToLower.insert(inst.getCalledFunction());
@@ -310,8 +330,7 @@ void LowerGpuRt::visitGetBoxSortHeuristicMode(GpurtGetBoxSortHeuristicModeOp &in
// @param inst : The dialect instruction to process
void LowerGpuRt::visitGetStaticFlags(GpurtGetStaticFlagsOp &inst) {
m_builder->SetInsertPoint(&inst);
auto rtState = m_context->getPipelineContext()->getRayTracingState();
Value *staticPipelineFlags = m_builder->getInt32(rtState->staticPipelineFlags);
Value *staticPipelineFlags = m_builder->getInt32(m_pipelineState->getOptions().rtStaticPipelineFlags);
inst.replaceAllUsesWith(staticPipelineFlags);
m_callsToLower.push_back(&inst);
m_funcsToLower.insert(inst.getCalledFunction());
@@ -323,8 +342,7 @@ void LowerGpuRt::visitGetStaticFlags(GpurtGetStaticFlagsOp &inst) {
// @param inst : The dialect instruction to process
void LowerGpuRt::visitGetTriangleCompressionMode(GpurtGetTriangleCompressionModeOp &inst) {
m_builder->SetInsertPoint(&inst);
auto rtState = m_context->getPipelineContext()->getRayTracingState();
Value *triCompressMode = m_builder->getInt32(rtState->triCompressMode);
Value *triCompressMode = m_builder->getInt32(m_pipelineState->getOptions().rtTriCompressMode);
inst.replaceAllUsesWith(triCompressMode);
m_callsToLower.push_back(&inst);
m_funcsToLower.insert(inst.getCalledFunction());
@@ -341,4 +359,4 @@ void LowerGpuRt::visitGetFlattenedGroupThreadId(GpurtGetFlattenedGroupThreadIdOp
m_funcsToLower.insert(inst.getCalledFunction());
}

} // namespace Llpc
} // namespace lgc
1 change: 1 addition & 0 deletions lgc/patch/PassRegistry.inc
Original file line number Diff line number Diff line change
@@ -81,6 +81,7 @@ LLPC_MODULE_PASS("lgc-lower-debug-printf", LowerDebugPrintf)

LLPC_FUNCTION_PASS("lgc-combine-cooperative-matrix", CombineCooperativeMatrix)
LLPC_MODULE_PASS("lgc-lower-cooperative-matrix", LowerCooperativeMatrix)
LLPC_MODULE_PASS("lgc-lower-gpurt", LowerGpuRt)

#undef LLPC_PASS
#undef LLPC_LOOP_PASS
9 changes: 9 additions & 0 deletions lgc/patch/Patch.cpp
Original file line number Diff line number Diff line change
@@ -40,6 +40,7 @@
#include "lgc/patch/FragColorExport.h"
#include "lgc/patch/LowerCooperativeMatrix.h"
#include "lgc/patch/LowerDebugPrintf.h"
#include "lgc/patch/LowerGpuRt.h"
#include "lgc/patch/PatchBufferOp.h"
#include "lgc/patch/PatchCheckShaderCache.h"
#include "lgc/patch/PatchCopyShader.h"
@@ -132,6 +133,14 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T
if (patchTimer)
LgcContext::createAndAddStartStopTimer(passMgr, patchTimer, true);

if (pipelineState->getOptions().useGpurt) {
// NOTE: Lower GPURT operations and run InstCombinePass before builder replayer, because some Op are going to be
// turned into constant value, so that we can eliminate unused `@lgc.create.load.buffer.desc` before getting into
// replayer. Otherwise, unnecessary `writes_uavs` and `uses_uav` may be set.
passMgr.addPass(LowerGpuRt());
passMgr.addPass(createModuleToFunctionPassAdaptor(InstCombinePass()));
}

// We're using BuilderRecorder; replay the Builder calls now
passMgr.addPass(BuilderReplayer());

1 change: 0 additions & 1 deletion llpc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -222,7 +222,6 @@ if(ICD_BUILD_LLPC)
lower/llpcSpirvLowerTranslator.cpp
lower/llpcSpirvLowerUtil.cpp
lower/llpcSpirvProcessGpuRtLibrary.cpp
lower/LowerGpuRt.cpp
lower/llpcSpirvLowerInternalLibraryIntrinsicUtil.cpp
lower/LowerGLCompatibility.cpp
lower/llpcSpirvLowerCooperativeMatrix.cpp
12 changes: 12 additions & 0 deletions llpc/context/llpcCompiler.cpp
Original file line number Diff line number Diff line change
@@ -1663,6 +1663,7 @@ Result Compiler::buildPipelineInternal(Context *context, ArrayRef<const Pipeline

if (numStagesWithRayQuery) {
std::unique_ptr<Module> gpurtShaderLibrary = createGpurtShaderLibrary(context);
setUseGpurt(&*pipeline);
if (!gpurtShaderLibrary)
return Result::ErrorInvalidShader;

@@ -2677,6 +2678,16 @@ Result Compiler::generatePipeline(Context *context, unsigned moduleIndex, std::u
return Result::Success;
}

// =====================================================================================================================
// Set this pipeline use GPURT library
//
// @param pipeline : The pipeline object
void Compiler::setUseGpurt(lgc::Pipeline *pipeline) {
auto options = pipeline->getOptions();
options.useGpurt = true;
pipeline->setOptions(options);
}

// =====================================================================================================================
// Build single ray tracing pipeline ELF package.
//
@@ -2888,6 +2899,7 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext,
std::unique_ptr<Module> gpurtShaderLibrary;
if (needGpurtShaderLibrary) {
gpurtShaderLibrary = createGpurtShaderLibrary(mainContext);
setUseGpurt(&*pipeline);
if (!gpurtShaderLibrary)
return Result::ErrorInvalidShader;
}
2 changes: 2 additions & 0 deletions llpc/context/llpcCompiler.h
Original file line number Diff line number Diff line change
@@ -199,6 +199,8 @@ class Compiler : public ICompiler {
Result generatePipeline(Context *context, unsigned moduleIndex, std::unique_ptr<llvm::Module> module,
ElfPackage &pipelineElf, lgc::Pipeline *pipeline, TimerProfiler &timerProfiler);

void setUseGpurt(lgc::Pipeline *pipeline);

std::vector<std::string> m_options; // Compilation options
MetroHash::Hash m_optionHash; // Hash code of compilation options
GfxIpVersion m_gfxIp; // Graphics IP version info
4 changes: 4 additions & 0 deletions llpc/context/llpcPipelineContext.cpp
Original file line number Diff line number Diff line change
@@ -342,6 +342,10 @@ Options PipelineContext::computePipelineOptions() const {
options.enablePrimGeneratedQuery = getPipelineOptions()->enablePrimGeneratedQuery;
options.enableFragColor = getPipelineOptions()->enableFragColor;

options.rtBoxSortHeuristicMode = m_rtState.boxSortHeuristicMode;
options.rtStaticPipelineFlags = m_rtState.staticPipelineFlags;
options.rtTriCompressMode = m_rtState.triCompressMode;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need to default-initialize useGpurt here as well?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The = {} initialization at the top should take care of that, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. My question is based on the fact that we initialize other members with a constant value here as well, so it is merely a question about coding style.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we don't need to because it is always false (unknown) at here.


return options;
}

Loading