diff --git a/lgc/patch/VertexFetch.cpp b/lgc/patch/VertexFetch.cpp index 94be658658..d0ce05ead1 100644 --- a/lgc/patch/VertexFetch.cpp +++ b/lgc/patch/VertexFetch.cpp @@ -905,7 +905,7 @@ Value *VertexFetchImpl::fetchVertex(InputImportGenericOp *inst, Value *descPtr, wholeVertex = builder.CreateIntrinsic(instId, fetchType, args, {}); if (is64bitFetch) { // If it is 64-bit, we need the second fetch - args[offsetIdx] = builder.CreateAdd(args[2], builder.getInt32(SizeOfVec4)); + args[offsetIdx] = builder.CreateAdd(args[offsetIdx], builder.getInt32(SizeOfVec4)); auto secondFetch = builder.CreateIntrinsic(instId, fetchType, args, {}); wholeVertex = builder.CreateShuffleVector(wholeVertex, secondFetch, ArrayRef{0, 1, 2, 3, 4, 5, 6, 7}); } diff --git a/llpc/test/shaderdb/extensions/PipelineVsFs_TestFetchSingleInput.pipe b/llpc/test/shaderdb/extensions/PipelineVsFs_TestFetchSingleInput.pipe index 43262fbbb7..65cfab5c36 100644 --- a/llpc/test/shaderdb/extensions/PipelineVsFs_TestFetchSingleInput.pipe +++ b/llpc/test/shaderdb/extensions/PipelineVsFs_TestFetchSingleInput.pipe @@ -13,9 +13,9 @@ ; - there is no VGPR input for the vertex input that the fetch shader generates. ; SHADERTEST: define amdgpu_vs { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, <4 x float> } @_amdgpu_vs_main(i32 inreg noundef %0, i32 inreg noundef %1, i32 inreg noundef %VertexBufferTable, i32 inreg noundef %BaseVertex, i32 inreg noundef %BaseInstance, i32 inreg noundef %2, i32 inreg noundef %3, i32 inreg noundef %4, i32 inreg noundef %5, i32 inreg noundef %6, i32 inreg noundef %7, i32 inreg noundef %8, i32 inreg noundef %9, i32 inreg noundef %10, i32 inreg noundef %11, float noundef %VertexId, float noundef %12, float noundef %13, float noundef %InstanceId) ; Check that the attribute is loaded. -; SHADERTEST: [[f0:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[addr:%[0-9]*]], i32 %VertexIndex, i32 0, i32 0, i32 immarg 22, i32 immarg 0) -; SHADERTEST: [[f1:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[addr:%[0-9]*]], i32 %VertexIndex, i32 4, i32 0, i32 immarg 22, i32 immarg 0) -; SHADERTEST: [[f2:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[addr:%[0-9]*]], i32 %VertexIndex, i32 8, i32 0, i32 immarg 22, i32 immarg 0) +; SHADERTEST: [[f0:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[addr:%[0-9]*]], i32 %VertexIndex, i32 0, i32 0, i32 22, i32 0) +; SHADERTEST: [[f1:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[addr:%[0-9]*]], i32 %VertexIndex, i32 4, i32 0, i32 22, i32 0) +; SHADERTEST: [[f2:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[addr:%[0-9]*]], i32 %VertexIndex, i32 8, i32 0, i32 22, i32 0) ; SHADERTEST: [[vectmp0:%.*]] = insertelement <4 x i32> , i32 [[f0]], i{{32|64}} 0 ; SHADERTEST: [[vectmp1:%.*]] = insertelement <4 x i32> [[vectmp0]], i32 [[f1]], i{{32|64}} 1 ; SHADERTEST: [[vecf:%.*]] = insertelement <4 x i32> [[vectmp1]], i32 [[f2]], i{{32|64}} 2 diff --git a/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe b/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe index ab8f6ace19..a55bca8982 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe @@ -87,7 +87,7 @@ attribute[1].offset = 16 ; SHADERTEST-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) ; SHADERTEST-NEXT: [[TMP5:%.*]] = getelementptr <4 x i32>, ptr addrspace(4) [[TMP4]], i64 0 ; SHADERTEST-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP5]], align 16, !invariant.load !13 -; SHADERTEST-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32> [[TMP6]], i32 [[VERTEXINDEX]], i32 16, i32 0, i32 immarg 64, i32 immarg 0) #[[ATTR9:[0-9]+]] +; SHADERTEST-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32> [[TMP6]], i32 [[VERTEXINDEX]], i32 16, i32 0, i32 64, i32 0) ; SHADERTEST-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> , i32 0 ; SHADERTEST-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> , i32 1 ; SHADERTEST-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> , i32 2 @@ -99,14 +99,14 @@ attribute[1].offset = 16 ; SHADERTEST-NEXT: [[VERTEX1_0:%.*]] = bitcast <2 x i32> [[TMP15]] to <2 x float> ; SHADERTEST-NEXT: [[TMP16:%.*]] = getelementptr <4 x i32>, ptr addrspace(4) [[TMP4]], i64 0 ; SHADERTEST-NEXT: [[TMP17:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP16]], align 16, !invariant.load !13 -; SHADERTEST-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP17]], i32 [[VERTEXINDEX]], i32 0, i32 0, i32 immarg 22, i32 immarg 0) #[[ATTR9]] -; SHADERTEST-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i32 0 -; SHADERTEST-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP17]], i32 [[VERTEXINDEX]], i32 4, i32 0, i32 immarg 22, i32 immarg 0) #[[ATTR9]] -; SHADERTEST-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP20]], i32 1 -; SHADERTEST-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP17]], i32 [[VERTEXINDEX]], i32 8, i32 0, i32 immarg 22, i32 immarg 0) #[[ATTR9]] -; SHADERTEST-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP22]], i32 2 -; SHADERTEST-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP17]], i32 [[VERTEXINDEX]], i32 12, i32 0, i32 immarg 22, i32 immarg 0) #[[ATTR9]] -; SHADERTEST-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP23]], i32 [[TMP24]], i32 3 +; SHADERTEST-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP17]], i32 [[VERTEXINDEX]], i32 0, i32 0, i32 22, i32 0) +; SHADERTEST-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0 +; SHADERTEST-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP17]], i32 [[VERTEXINDEX]], i32 4, i32 0, i32 22, i32 0) +; SHADERTEST-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP20]], i64 1 +; SHADERTEST-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP17]], i32 [[VERTEXINDEX]], i32 8, i32 0, i32 22, i32 0) +; SHADERTEST-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP22]], i64 2 +; SHADERTEST-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP17]], i32 [[VERTEXINDEX]], i32 12, i32 0, i32 22, i32 0) +; SHADERTEST-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP23]], i32 [[TMP24]], i64 3 ; SHADERTEST-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> , i32 0 ; SHADERTEST-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> , i32 1 ; SHADERTEST-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> , i32 2 diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestVertexFetchWithR8G8.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestVertexFetchWithR8G8.pipe index 3eb9f8f455..651d23fd09 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestVertexFetchWithR8G8.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestVertexFetchWithR8G8.pipe @@ -5,7 +5,7 @@ ; BEGIN_SHADERTEST ; RUN: amdllpc %gfxip -v %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} final pipeline module info -; SHADERTEST: call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 0, i32 0, i32 immarg 2, i32 immarg 0) +; SHADERTEST: call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 0, i32 0, i32 2, i32 0) ; SHADERTEST-LABEL: _amdgpu_vs_main: ; SHADERTEST: tbuffer_load_format_x v{{[0-9]*}}, v{{[0-9]*}}, s[{{[0-9]*:[0-9]*}}], 0 format:[BUF_FMT_8_SNORM] idxen ; SHADERTEST: AMDLLPC SUCCESS