-
Notifications
You must be signed in to change notification settings - Fork 116
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PatchBufferOp issues when lowering load ptr(7), ptr(7)
- Loading branch information
ruimzhao
committed
Dec 12, 2023
1 parent
84105a3
commit ccac591
Showing
3 changed files
with
108 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc | ||
; RUN: lgc -o - -passes='require<lgc-pipeline-state>,function(lgc-patch-buffer-op)' %s | FileCheck --check-prefixes=CHECK %s | ||
|
||
define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata5, i32 inreg noundef %dummyInit2, i32 inreg noundef %dummyInit3, i32 inreg noundef %dummyInit4, i32 inreg noundef %dummyInit5, i32 inreg noundef %dummyInit6, i32 inreg noundef %dummyInit7, i32 inreg noundef %dummyInit8, i32 inreg noundef %dummyInit9, i32 inreg noundef %dummyInit10, i32 inreg noundef %dummyInit11, i32 inreg noundef %dummyInit12, i32 inreg noundef %dummyInit13, i32 inreg noundef %dummyInit14, i32 inreg noundef %dummyInit15, i32 inreg noundef %MultiDispatchInfo, i32 noundef %LocalInvocationId) #0 !lgc.shaderstage !12 { | ||
; CHECK-LABEL: @_amdgpu_cs_main( | ||
; CHECK-NEXT: .entry: | ||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() | ||
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 | ||
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[USERDATA5:%.*]] to i64 | ||
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]] | ||
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) | ||
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP4]], align 16 | ||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i64 32 | ||
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP6]], align 16 | ||
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> [[TMP5]], i32 0, i32 0, i32 0) | ||
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> [[TMP5]], i32 16, i32 0, i32 0) | ||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP8]], i64 0 | ||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <5 x i32> poison, i32 [[TMP10]], i64 0 | ||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP8]], i64 1 | ||
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <5 x i32> [[TMP11]], i32 [[TMP12]], i64 1 | ||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP8]], i64 2 | ||
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <5 x i32> [[TMP13]], i32 [[TMP14]], i64 2 | ||
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP8]], i64 3 | ||
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <5 x i32> [[TMP15]], i32 [[TMP16]], i64 3 | ||
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP9]] to <1 x i32> | ||
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <1 x i32> [[TMP18]], i64 0 | ||
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <5 x i32> [[TMP17]], i32 [[TMP19]], i64 4 | ||
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <5 x i32> [[TMP20]], <5 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | ||
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <5 x i32> [[TMP20]], i64 4 | ||
; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP22]] to ptr addrspace(6) | ||
; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr addrspace(6) [[TMP23]] to i32 | ||
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP21]], <4 x i32> poison, <2 x i32> <i32 0, i32 1> | ||
; CHECK-NEXT: [[TMP26:%.*]] = and <2 x i32> [[TMP25]], <i32 -1, i32 65535> | ||
; CHECK-NEXT: [[TMP27:%.*]] = bitcast <2 x i32> [[TMP26]] to i64 | ||
; CHECK-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP27]] to ptr addrspace(1) | ||
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP21]], i64 2 | ||
; CHECK-NEXT: [[TMP30:%.*]] = icmp ult i32 [[TMP24]], [[TMP29]] | ||
; CHECK-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP24]], i32 0 | ||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP28]], i32 [[TMP31]] | ||
; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr addrspace(1) [[TMP32]], align 1 | ||
; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[TMP33]] to <1 x i8> | ||
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <1 x i8> [[TMP34]], i64 0 | ||
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <1 x i8> poison, i8 [[TMP35]], i64 0 | ||
; CHECK-NEXT: [[TMP37:%.*]] = bitcast <1 x i8> [[TMP36]] to i8 | ||
; CHECK-NEXT: call void @llvm.amdgcn.raw.buffer.store.i8(i8 [[TMP37]], <4 x i32> [[TMP7]], i32 0, i32 0, i32 0) | ||
; CHECK-NEXT: ret void | ||
; | ||
.entry: | ||
%0 = call i64 @llvm.amdgcn.s.getpc() | ||
%1 = and i64 %0, -4294967296 | ||
%2 = zext i32 %userdata5 to i64 | ||
%3 = or i64 %1, %2 | ||
%4 = inttoptr i64 %3 to ptr addrspace(4) | ||
%5 = load <4 x i32>, ptr addrspace(4) %4, align 16 | ||
%6 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %5) | ||
%7 = getelementptr i8, ptr addrspace(4) %4, i64 32 | ||
%8 = load <4 x i32>, ptr addrspace(4) %7, align 16 | ||
%9 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %8) | ||
%10 = load ptr addrspace(7), ptr addrspace(7) %6, align 32 | ||
%11 = load i8, ptr addrspace(7) %10, align 1 | ||
store i8 %11, ptr addrspace(7) %9, align 1 | ||
ret void | ||
} | ||
|
||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) | ||
declare i64 @llvm.amdgcn.s.getpc() #1 | ||
|
||
; Function Attrs: nounwind willreturn memory(none) | ||
declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %0) #2 | ||
|
||
!0 = !{i32 1, i32 1, i32 1} | ||
!1 = !{!"Vulkan"} | ||
!2 = !{i32 -949757337, i32 1284858466, i32 103664779, i32 -2019744539, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16777216} | ||
!3 = !{i32 1000766198, i32 254420182, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800} | ||
!4 = !{!"StreamOutTableVaPtr", i32 11, i32 2, i32 3, i32 1, i32 0} | ||
!5 = !{!"DescriptorTableVaPtr", i32 7, i32 128, i32 4, i32 1, i32 1} | ||
!6 = !{!"DescriptorBuffer", i32 6, i32 128, i32 0, i32 4, i64 4294967295, i32 6, i32 4} | ||
!7 = !{!"DescriptorTableVaPtr", i32 7, i32 128, i32 5, i32 1, i32 3} | ||
!8 = !{!"DescriptorBuffer", i32 6, i32 128, i32 0, i32 4, i64 0, i32 0, i32 4} | ||
!9 = !{!"DescriptorBuffer", i32 6, i32 128, i32 4, i32 4, i64 0, i32 1, i32 4} | ||
!10 = !{!"DescriptorBuffer", i32 6, i32 128, i32 8, i32 4, i64 0, i32 2, i32 4} | ||
!11 = !{!"\82\B0amdpal.pipelines\91\83\B0.spill_threshold\CD\FF\FF\B0.user_data_limit\00\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CFF\10\C1l\F8\85\C5N\CF4l\CC\81\90#\194\AD.llpc_version\A470.1\AEamdpal.version\92\03\00"} | ||
!12 = !{i32 7} |