Skip to content

Commit

Permalink
PatchBufferOp issues when lowering load ptr(7), ptr(7)
Browse files Browse the repository at this point in the history
  • Loading branch information
ruimzhao committed Dec 12, 2023
1 parent 84105a3 commit ccac591
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 4 deletions.
6 changes: 6 additions & 0 deletions compilerutils/lib/TypeLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,12 @@ void TypeLowering::visitInsert(llvm::InsertValueInst &insert) {
//
// @param load : the instruction
void TypeLowering::visitLoad(LoadInst &load) {
// For the pointerOperand which address space is ADDR_SPACE_BUFFER_FAT_POINTER, it will be lowered
// on postVisitLoad. But this maybe have impacts on other pass like CPS which doesn't have postVisitLoad
if (load.getPointerAddressSpace() == 7) {
return;
}

auto types = convertType(load.getType());
if (types.size() == 1 && types[0] == load.getType())
return;
Expand Down
23 changes: 19 additions & 4 deletions lgc/patch/PatchBufferOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,9 @@ void BufferOpLowering::visitLoadInst(LoadInst &loadInst) {
void BufferOpLowering::postVisitLoadInst(LoadInst &loadInst) {
Value *const newLoad = replaceLoadStore(loadInst);

if (newLoad == nullptr)
return;

// Record the load instruction so we remember to delete it later.
m_typeLowering.eraseInstruction(&loadInst);

Expand Down Expand Up @@ -1503,10 +1506,22 @@ Value *BufferOpLowering::replaceLoadStore(Instruction &inst) {
assert(newInst);

if (type->isPointerTy()) {
newInst = m_builder.CreateBitCast(newInst, m_builder.getIntNTy(bytesToHandle * 8));
copyMetadata(newInst, &inst);
newInst = m_builder.CreateIntToPtr(newInst, type);
copyMetadata(newInst, &inst);
if (type->getPointerAddressSpace() == ADDR_SPACE_BUFFER_FAT_POINTER) {
assert(cast<FixedVectorType>(newInst->getType())->getNumElements() == 5);
Value *bufferDesc = m_builder.CreateShuffleVector(newInst, {0, 1, 2, 3});
Value *baseIndex = m_builder.CreateExtractElement(newInst, 4);
baseIndex = m_builder.CreateIntToPtr(baseIndex, PointerType::get(type->getContext(), ADDR_SPACE_CONST_32BIT));

SmallVector<Value *> newFatPointer = {bufferDesc, baseIndex};
copyMetadata(baseIndex, &inst);
m_typeLowering.replaceInstruction(&inst, newFatPointer);
return nullptr;
} else {
newInst = m_builder.CreateBitCast(newInst, m_builder.getIntNTy(bytesToHandle * 8));
copyMetadata(newInst, &inst);
newInst = m_builder.CreateIntToPtr(newInst, type);
copyMetadata(newInst, &inst);
}
} else {
newInst = m_builder.CreateBitCast(newInst, type);
copyMetadata(newInst, &inst);
Expand Down
83 changes: 83 additions & 0 deletions lgc/test/Transforms/PatchBufferOp/fatptrLoadfatptr.lgc
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc
; RUN: lgc -o - -passes='require<lgc-pipeline-state>,function(lgc-patch-buffer-op)' %s | FileCheck --check-prefixes=CHECK %s

define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata5, i32 inreg noundef %dummyInit2, i32 inreg noundef %dummyInit3, i32 inreg noundef %dummyInit4, i32 inreg noundef %dummyInit5, i32 inreg noundef %dummyInit6, i32 inreg noundef %dummyInit7, i32 inreg noundef %dummyInit8, i32 inreg noundef %dummyInit9, i32 inreg noundef %dummyInit10, i32 inreg noundef %dummyInit11, i32 inreg noundef %dummyInit12, i32 inreg noundef %dummyInit13, i32 inreg noundef %dummyInit14, i32 inreg noundef %dummyInit15, i32 inreg noundef %MultiDispatchInfo, i32 noundef %LocalInvocationId) #0 !lgc.shaderstage !12 {
; CHECK-LABEL: @_amdgpu_cs_main(
; CHECK-NEXT: .entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc()
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[USERDATA5:%.*]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4)
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP4]], align 16
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i64 32
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP6]], align 16
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> [[TMP5]], i32 0, i32 0, i32 0)
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> [[TMP5]], i32 16, i32 0, i32 0)
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP8]], i64 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <5 x i32> poison, i32 [[TMP10]], i64 0
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP8]], i64 1
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <5 x i32> [[TMP11]], i32 [[TMP12]], i64 1
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP8]], i64 2
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <5 x i32> [[TMP13]], i32 [[TMP14]], i64 2
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP8]], i64 3
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <5 x i32> [[TMP15]], i32 [[TMP16]], i64 3
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP9]] to <1 x i32>
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <1 x i32> [[TMP18]], i64 0
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <5 x i32> [[TMP17]], i32 [[TMP19]], i64 4
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <5 x i32> [[TMP20]], <5 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <5 x i32> [[TMP20]], i64 4
; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP22]] to ptr addrspace(6)
; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr addrspace(6) [[TMP23]] to i32
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP21]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP26:%.*]] = and <2 x i32> [[TMP25]], <i32 -1, i32 65535>
; CHECK-NEXT: [[TMP27:%.*]] = bitcast <2 x i32> [[TMP26]] to i64
; CHECK-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP27]] to ptr addrspace(1)
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP21]], i64 2
; CHECK-NEXT: [[TMP30:%.*]] = icmp ult i32 [[TMP24]], [[TMP29]]
; CHECK-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP24]], i32 0
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP28]], i32 [[TMP31]]
; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr addrspace(1) [[TMP32]], align 1
; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[TMP33]] to <1 x i8>
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <1 x i8> [[TMP34]], i64 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <1 x i8> poison, i8 [[TMP35]], i64 0
; CHECK-NEXT: [[TMP37:%.*]] = bitcast <1 x i8> [[TMP36]] to i8
; CHECK-NEXT: call void @llvm.amdgcn.raw.buffer.store.i8(i8 [[TMP37]], <4 x i32> [[TMP7]], i32 0, i32 0, i32 0)
; CHECK-NEXT: ret void
;
.entry:
%0 = call i64 @llvm.amdgcn.s.getpc()
%1 = and i64 %0, -4294967296
%2 = zext i32 %userdata5 to i64
%3 = or i64 %1, %2
%4 = inttoptr i64 %3 to ptr addrspace(4)
%5 = load <4 x i32>, ptr addrspace(4) %4, align 16
%6 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %5)
%7 = getelementptr i8, ptr addrspace(4) %4, i64 32
%8 = load <4 x i32>, ptr addrspace(4) %7, align 16
%9 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %8)
%10 = load ptr addrspace(7), ptr addrspace(7) %6, align 32
%11 = load i8, ptr addrspace(7) %10, align 1
store i8 %11, ptr addrspace(7) %9, align 1
ret void
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.amdgcn.s.getpc() #1

; Function Attrs: nounwind willreturn memory(none)
declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %0) #2

!0 = !{i32 1, i32 1, i32 1}
!1 = !{!"Vulkan"}
!2 = !{i32 -949757337, i32 1284858466, i32 103664779, i32 -2019744539, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16777216}
!3 = !{i32 1000766198, i32 254420182, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800}
!4 = !{!"StreamOutTableVaPtr", i32 11, i32 2, i32 3, i32 1, i32 0}
!5 = !{!"DescriptorTableVaPtr", i32 7, i32 128, i32 4, i32 1, i32 1}
!6 = !{!"DescriptorBuffer", i32 6, i32 128, i32 0, i32 4, i64 4294967295, i32 6, i32 4}
!7 = !{!"DescriptorTableVaPtr", i32 7, i32 128, i32 5, i32 1, i32 3}
!8 = !{!"DescriptorBuffer", i32 6, i32 128, i32 0, i32 4, i64 0, i32 0, i32 4}
!9 = !{!"DescriptorBuffer", i32 6, i32 128, i32 4, i32 4, i64 0, i32 1, i32 4}
!10 = !{!"DescriptorBuffer", i32 6, i32 128, i32 8, i32 4, i64 0, i32 2, i32 4}
!11 = !{!"\82\B0amdpal.pipelines\91\83\B0.spill_threshold\CD\FF\FF\B0.user_data_limit\00\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CFF\10\C1l\F8\85\C5N\CF4l\CC\81\90#\194\AD.llpc_version\A470.1\AEamdpal.version\92\03\00"}
!12 = !{i32 7}

0 comments on commit ccac591

Please sign in to comment.