diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h index 5a94b8befd..dd580ec9fe 100644 --- a/include/llvm/IR/Operator.h +++ b/include/llvm/IR/Operator.h @@ -501,6 +501,26 @@ class BitCastOperator } }; +// HLSL CHANGE: Add this helper class from upstream. +class AddrSpaceCastOperator + : public ConcreteOperator { + friend class AddrSpaceCastInst; + friend class ConstantExpr; + +public: + Value *getPointerOperand() { return getOperand(0); } + + const Value *getPointerOperand() const { return getOperand(0); } + + unsigned getSrcAddressSpace() const { + return getPointerOperand()->getType()->getPointerAddressSpace(); + } + + unsigned getDestAddressSpace() const { + return getType()->getPointerAddressSpace(); + } +}; + } // End llvm namespace #endif diff --git a/lib/DXIL/DxilUtilDbgInfoAndMisc.cpp b/lib/DXIL/DxilUtilDbgInfoAndMisc.cpp index ab4a952b51..f6e2424f8b 100644 --- a/lib/DXIL/DxilUtilDbgInfoAndMisc.cpp +++ b/lib/DXIL/DxilUtilDbgInfoAndMisc.cpp @@ -37,7 +37,18 @@ using namespace hlsl; namespace { -Value *MergeGEP(GEPOperator *SrcGEP, GEPOperator *GEP) { +// Attempt to merge the two GEPs into a single GEP. +// +// If `AsCast` is non-null the merged GEP will be wrapped +// in an addrspacecast before replacing users. This allows +// merging GEPs of the form +// +// gep(addrspacecast(gep(p0, gep_args0) to p1*), gep_args1) +// into +// addrspacecast(gep(p0, gep_args0+gep_args1) to p1*) +// +Value *MergeGEP(GEPOperator *SrcGEP, GEPOperator *GEP, + AddrSpaceCastOperator *AsCast) { IRBuilder<> Builder(GEP->getContext()); StringRef Name = ""; if (Instruction *I = dyn_cast(GEP)) { @@ -75,7 +86,7 @@ Value *MergeGEP(GEPOperator *SrcGEP, GEPOperator *GEP) { } // Update the GEP in place if possible. - if (SrcGEP->getNumOperands() == 2) { + if (SrcGEP->getNumOperands() == 2 && !AsCast) { GEP->setOperand(0, SrcGEP->getOperand(0)); GEP->setOperand(1, Sum); return GEP; @@ -94,12 +105,64 @@ Value *MergeGEP(GEPOperator *SrcGEP, GEPOperator *GEP) { DXASSERT(!Indices.empty(), "must merge"); Value *newGEP = Builder.CreateInBoundsGEP(nullptr, SrcGEP->getOperand(0), Indices, Name); + + // Wrap the new gep in an addrspacecast if needed. + if (AsCast) + newGEP = Builder.CreateAddrSpaceCast( + newGEP, PointerType::get(GEP->getType()->getPointerElementType(), + AsCast->getDestAddressSpace())); GEP->replaceAllUsesWith(newGEP); if (Instruction *I = dyn_cast(GEP)) I->eraseFromParent(); return newGEP; } +// Examine the gep and try to merge it when the input pointer is +// itself a gep. We handle two forms here: +// +// gep(gep(p)) +// gep(addrspacecast(gep(p))) +// +// If the gep was merged successfully then return the updated value, otherwise +// return nullptr. +// +// When the gep is sucessfully merged we will delete the gep and also try to +// delete the nested gep and addrspacecast. +static Value *TryMegeWithNestedGEP(GEPOperator *GEP) { + // Sentinal value to return when we fail to merge. + Value *FailedToMerge = nullptr; + + Value *Ptr = GEP->getPointerOperand(); + GEPOperator *prevGEP = dyn_cast(Ptr); + AddrSpaceCastOperator *AsCast = nullptr; + + // If there is no directly nested gep try looking through an addrspacecast to + // find one. + if (!prevGEP) { + AsCast = dyn_cast(Ptr); + if (AsCast) + prevGEP = dyn_cast(AsCast->getPointerOperand()); + } + + // Not a nested gep expression. + if (!prevGEP) + return FailedToMerge; + + // Try merging the two geps. + Value *newGEP = MergeGEP(prevGEP, GEP, AsCast); + if (!newGEP) + return FailedToMerge; + + // Delete the nested gep and addrspacecast if no more users. + if (AsCast && AsCast->user_empty() && isa(AsCast)) + cast(AsCast)->eraseFromParent(); + + if (prevGEP->user_empty() && isa(prevGEP)) + cast(prevGEP)->eraseFromParent(); + + return newGEP; +} + } // namespace namespace hlsl { @@ -130,23 +193,14 @@ bool MergeGepUse(Value *V) { // merge any GEP users of the untranslated bitcast addUsersToWorklist(V); } + } else if (isa(V)) { + addUsersToWorklist(V); } else if (GEPOperator *GEP = dyn_cast(V)) { - if (GEPOperator *prevGEP = - dyn_cast(GEP->getPointerOperand())) { - // merge the 2 GEPs, returns nullptr if couldn't merge - if (Value *newGEP = MergeGEP(prevGEP, GEP)) { - changed = true; - worklist.push_back(newGEP); - // delete prevGEP if no more users - if (prevGEP->user_empty() && isa(prevGEP)) { - cast(prevGEP)->eraseFromParent(); - } - } else { - addUsersToWorklist(GEP); - } + if (Value *newGEP = TryMegeWithNestedGEP(GEP)) { + changed = true; + worklist.push_back(newGEP); } else { - // nothing to merge yet, add GEP users - addUsersToWorklist(V); + addUsersToWorklist(GEP); } } } diff --git a/lib/Transforms/Scalar/LowerTypePasses.cpp b/lib/Transforms/Scalar/LowerTypePasses.cpp index 2e57ccfa28..feeb23a5da 100644 --- a/lib/Transforms/Scalar/LowerTypePasses.cpp +++ b/lib/Transforms/Scalar/LowerTypePasses.cpp @@ -161,6 +161,7 @@ bool LowerTypePass::runOnModule(Module &M) { HLModule::UpdateGlobalVariableDebugInfo(GV, Finder, NewGV); } // Replace users. + GV->removeDeadConstantUsers(); lowerUseWithNewValue(GV, NewGV); // Remove GV. GV->removeDeadConstantUsers(); diff --git a/test/HLSL/passes/multi_dim_one_dim/gep_addrspacecast_gep.ll b/test/HLSL/passes/multi_dim_one_dim/gep_addrspacecast_gep.ll new file mode 100644 index 0000000000..4eb5302388 --- /dev/null +++ b/test/HLSL/passes/multi_dim_one_dim/gep_addrspacecast_gep.ll @@ -0,0 +1,198 @@ +; RUN: opt -S -multi-dim-one-dim %s | FileCheck %s +; +; Tests for the pass that changes multi-dimension global variable accesses into +; a flattened one-dimensional access. The tests focus on the case where the geps +; need to be merged but are separated by an addrspacecast operation. This was +; causing the pass to fail because it could not merge the gep through the +; addrspace cast. + +; Naming convention: gep0_addrspacecast_gep1 + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +@ArrayOfArray = addrspace(3) global [256 x [9 x float]] undef, align 4 +@ArrayOfArrayOfArray = addrspace(3) global [256 x [9 x [3 x float]]] undef, align 4 + +; Test that we can merge the geps when all parts are instructions. +; CHECK-LABEL: @merge_gep_instr_instr_instr +; CHECK: load float, float* addrspacecast (float addrspace(3)* getelementptr inbounds ([2304 x float], [2304 x float] addrspace(3)* @ArrayOfArray.1dim, i32 0, i32 1) to float*) +define void @merge_gep_instr_instr_instr() { +entry: + %gep0 = getelementptr inbounds [256 x [9 x float]], [256 x [9 x float]] addrspace(3)* @ArrayOfArray, i32 0, i32 0 + %asc = addrspacecast [9 x float] addrspace(3)* %gep0 to [9 x float]* + %gep1 = getelementptr inbounds [9 x float], [9 x float]* %asc, i32 0, i32 1 + %load = load float, float* %gep1 + ret void +} + +; Test that we can merge the geps when the inner gep are constants. +; CHECK-LABEL: @merge_gep_instr_instr_const +; CHECK: load float, float* addrspacecast (float addrspace(3)* getelementptr inbounds ([2304 x float], [2304 x float] addrspace(3)* @ArrayOfArray.1dim, i32 0, i32 1) to float*) +define void @merge_gep_instr_instr_const() { +entry: + %asc = addrspacecast [9 x float] addrspace(3)* getelementptr inbounds ([256 x [9 x float]], [256 x [9 x float]] addrspace(3)* @ArrayOfArray, i32 0, i32 0) to [9 x float]* + %gep1 = getelementptr inbounds [9 x float], [9 x float]* %asc, i32 0, i32 1 + %load = load float, float* %gep1 + ret void +} + +; Test that we can merge the geps when the addrspace and inner gep are constants. +; CHECK-LABEL: @merge_gep_instr_const_const +; CHECK: load float, float* addrspacecast (float addrspace(3)* getelementptr inbounds ([2304 x float], [2304 x float] addrspace(3)* @ArrayOfArray.1dim, i32 0, i32 1) to float*) +define void @merge_gep_instr_const_const() { +entry: + %gep1 = getelementptr inbounds [9 x float], [9 x float]* addrspacecast ([9 x float] addrspace(3)* getelementptr inbounds ([256 x [9 x float]], [256 x [9 x float]] addrspace(3)* @ArrayOfArray, i32 0, i32 0) to [9 x float]*), i32 0, i32 1 + %load = load float, float* %gep1 + ret void +} + +; Test that we can merge the geps when all parts are constants. +; CHECK-LABEL: @merge_gep_const_const +; CHECK: load float, float* addrspacecast (float addrspace(3)* getelementptr inbounds ([2304 x float], [2304 x float] addrspace(3)* @ArrayOfArray.1dim, i32 0, i32 1) to float*) +define void @merge_gep_const_const_const() { +entry: + %load = load float, float* getelementptr inbounds ([9 x float], [9 x float]* addrspacecast ([9 x float] addrspace(3)* getelementptr inbounds ([256 x [9 x float]], [256 x [9 x float]] addrspace(3)* @ArrayOfArray, i32 0, i32 0) to [9 x float]*), i32 0, i32 1) + ret void +} + +; Test that we compute the correct index when the outer array has +; a non-zero constant index. +; CHECK-LABEL: @merge_gep_const_outer_array_index +; CHECK: load float, float* addrspacecast (float addrspace(3)* getelementptr inbounds ([2304 x float], [2304 x float] addrspace(3)* @ArrayOfArray.1dim, i32 0, i32 66) to float*) +define void @merge_gep_const_outer_array_index() { +entry: + %gep0 = getelementptr inbounds [256 x [9 x float]], [256 x [9 x float]] addrspace(3)* @ArrayOfArray, i32 0, i32 7 + %asc = addrspacecast [9 x float] addrspace(3)* %gep0 to [9 x float]* + %gep1 = getelementptr inbounds [9 x float], [9 x float]* %asc, i32 0, i32 3 + %load = load float, float* %gep1 + ret void +} + +; Test that we compute the correct index when the outer array has +; a non-constant index. +; CHECK-LABEL: @merge_gep_dynamic_outer_array_index +; CHECK: %0 = mul i32 %idx, 9 +; CHECK: %1 = add i32 3, %0 +; CHECK: %2 = getelementptr [2304 x float], [2304 x float] addrspace(3)* @ArrayOfArray.1dim, i32 0, i32 %1 +; CHECK: %3 = addrspacecast float addrspace(3)* %2 to float* +; CHECK: load float, float* %3 +define void @merge_gep_dynamic_outer_array_index(i32 %idx) { +entry: + %gep0 = getelementptr inbounds [256 x [9 x float]], [256 x [9 x float]] addrspace(3)* @ArrayOfArray, i32 0, i32 %idx + %asc = addrspacecast [9 x float] addrspace(3)* %gep0 to [9 x float]* + %gep1 = getelementptr inbounds [9 x float], [9 x float]* %asc, i32 0, i32 3 + %load = load float, float* %gep1 + ret void +} + +; Test that we compute the correct index when the both arrays have +; a non-constant index. +; CHECK-LABEL: @merge_gep_dynamic_array_index +; CHECK: %0 = mul i32 %idx0, 9 +; CHECK: %1 = add i32 %idx1, %0 +; CHECK: %2 = getelementptr [2304 x float], [2304 x float] addrspace(3)* @ArrayOfArray.1dim, i32 0, i32 %1 +; CHECK: %3 = addrspacecast float addrspace(3)* %2 to float* +; CHECK: load float, float* %3 +define void @merge_gep_dynamic_array_index(i32 %idx0, i32 %idx1) { +entry: + %gep0 = getelementptr inbounds [256 x [9 x float]], [256 x [9 x float]] addrspace(3)* @ArrayOfArray, i32 0, i32 %idx0 + %asc = addrspacecast [9 x float] addrspace(3)* %gep0 to [9 x float]* + %gep1 = getelementptr inbounds [9 x float], [9 x float]* %asc, i32 0, i32 %idx1 + %load = load float, float* %gep1 + ret void +} + +; Test that we compute the correct index when there are multiple +; geps after the addrspacecast. This also exercises the case +; where one of the outer geps ends in an array which hits +; an early return in MergeGEP. +; CHECK-LABEL: @merge_gep_multi_level_end_in_sequential_with_addrspace +; CHECK: %0 = mul i32 %idx0, 9 +; CHECK: %1 = add i32 %idx1, %0 +; CHECK: %2 = getelementptr [2304 x float], [2304 x float] addrspace(3)* @ArrayOfArray.1dim, i32 0, i32 %1 +; CHECK: %3 = addrspacecast float addrspace(3)* %2 to float* +; CHECK: load float, float* %3 +define void @merge_gep_multi_level_end_in_sequential_with_addrspace(i32 %idx0, i32 %idx1) { +entry: + %gep0 = getelementptr inbounds [256 x [9 x float]], [256 x [9 x float]] addrspace(3)* @ArrayOfArray, i32 0 + %asc = addrspacecast [256 x [9 x float]] addrspace(3)* %gep0 to [256 x [9 x float]]* + %gep1 = getelementptr inbounds [256 x [9 x float]], [256 x [9 x float]]* %asc, i32 0, i32 %idx0 + %gep2 = getelementptr inbounds [9 x float], [9 x float]* %gep1, i32 0, i32 %idx1 + %load = load float, float* %gep2 + ret void +} + +; Test that we compute the correct index when there are three levels of geps. +; This also exercises the case where one of the outer geps ends in an +; array which hits an early return in MergeGEP. +; CHECK-LABEL: @merge_gep_multi_level_end_in_sequential +; CHECK: %0 = mul i32 %idx0, 9 +; CHECK: %1 = add i32 %idx1, %0 +; CHECK: %2 = getelementptr [2304 x float], [2304 x float] addrspace(3)* @ArrayOfArray.1dim, i32 0, i32 %1 +; CHECK: load float, float addrspace(3)* %2 +define void @merge_gep_multi_level_end_in_sequential(i32 %idx0, i32 %idx1) { +entry: + %gep0 = getelementptr inbounds [256 x [9 x float]], [256 x [9 x float]] addrspace(3)* @ArrayOfArray, i32 0 + %gep1 = getelementptr inbounds [256 x [9 x float]], [256 x [9 x float]] addrspace(3)* %gep0, i32 0, i32 %idx0 + %gep2 = getelementptr inbounds [9 x float], [9 x float] addrspace(3)* %gep1, i32 0, i32 %idx1 + %load = load float, float addrspace(3)* %gep2 + ret void +} + +; Test that we compute the correct index when the global has 3 levels of +; nested arrays and an addrspacecast. +; CHECK-LABEL: @merge_gep_multi_level_with_addrspace +; CHECK: %0 = mul i32 %idx0, 9 +; CHECK: %1 = add i32 %idx1, %0 +; CHECK: %2 = mul i32 %1, 3 +; CHECK: %3 = add i32 %idx2, %2 +; CHECK: %4 = getelementptr [6912 x float], [6912 x float] addrspace(3)* @ArrayOfArrayOfArray.1dim, i32 0, i32 %3 +; CHECK: %5 = addrspacecast float addrspace(3)* %4 to float* +; CHECK: load float, float* %5 +define void @merge_gep_multi_level_with_addrspace(i32 %idx0, i32 %idx1, i32 %idx2) { +entry: + %gep0 = getelementptr inbounds [256 x [9 x [3 x float]]], [256 x [9 x [3 x float]]] addrspace(3)* @ArrayOfArrayOfArray, i32 0, i32 %idx0 + %asc = addrspacecast [9 x [3 x float]] addrspace(3)* %gep0 to [9 x [3 x float]]* + %gep1 = getelementptr inbounds [9 x [3 x float]], [9 x [3 x float]]* %asc, i32 0, i32 %idx1 + %gep2 = getelementptr inbounds [3 x float], [3 x float]* %gep1, i32 0, i32 %idx2 + %load = load float, float* %gep2 + ret void +} + +; Test that we compute the correct index when the global has 3 levels of +; nested arrays. +; CHECK-LABEL: @merge_gep_multi_level +; CHECK: %0 = mul i32 %idx0, 9 +; CHECK: %1 = add i32 %idx1, %0 +; CHECK: %2 = mul i32 %1, 3 +; CHECK: %3 = add i32 %idx2, %2 +; CHECK: %4 = getelementptr [6912 x float], [6912 x float] addrspace(3)* @ArrayOfArrayOfArray.1dim, i32 0, i32 %3 +; CHECK: load float, float addrspace(3)* %4 +define void @merge_gep_multi_level(i32 %idx0, i32 %idx1, i32 %idx2) { +entry: + %gep0 = getelementptr inbounds [256 x [9 x [3 x float]]], [256 x [9 x [3 x float]]] addrspace(3)* @ArrayOfArrayOfArray, i32 0, i32 %idx0 + %gep1 = getelementptr inbounds [9 x [3 x float]], [9 x [3 x float]] addrspace(3)* %gep0, i32 0, i32 %idx1 + %gep2 = getelementptr inbounds [3 x float], [3 x float] addrspace(3)* %gep1, i32 0, i32 %idx2 + %load = load float, float addrspace(3)* %gep2 + ret void +} + +; Test that we compute the correct index when the addrspacecast includes both a +; change in address space and a change in the underlying type. I did not see +; this pattern in IR generated from hlsl, but we can handle this case so I am +; adding a test for it anyway. +; CHECK-LABEL: addrspace_cast_new_type +; CHECK: %0 = mul i32 %idx0, 9 +; CHECK: %1 = add i32 %idx1, %0 +; CHECK: %2 = getelementptr [2304 x float], [2304 x float] addrspace(3)* @ArrayOfArray.1dim, i32 0, i32 %1 +; CHECK: %3 = addrspacecast float addrspace(3)* %2 to i32* +; CHECK: load i32, i32* %3 +define void @addrspace_cast_new_type(i32 %idx0, i32 %idx1) { +entry: + %gep0 = getelementptr inbounds [256 x [9 x float]], [256 x [9 x float]] addrspace(3)* @ArrayOfArray, i32 0, i32 %idx0 + %asc = addrspacecast [9 x float] addrspace(3)* %gep0 to [3 x i32]* + %gep1 = getelementptr inbounds [3 x i32], [3 x i32]* %asc, i32 0, i32 %idx1 + %load = load i32, i32* %gep1 + ret void +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenDXIL/ByteAddressBuffer/store_groupshared_struct_with_array.hlsl b/tools/clang/test/CodeGenDXIL/ByteAddressBuffer/store_groupshared_struct_with_array.hlsl new file mode 100644 index 0000000000..5affc5e673 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/ByteAddressBuffer/store_groupshared_struct_with_array.hlsl @@ -0,0 +1,57 @@ +// This is a regression test for a shader that crashed the compiler. The crash +// occured because we failed to flatten the multi-level gep into a single-level +// gep which caused a crash in the multi-dimension global flattening pass. +// +// The shader is compiled with three different index values (two different +// constants and a dynamic index value). Each of these crashed in different +// places in the compiler as I was developing a fix. Originally, the /Od +// version crashed with index 0 or 1 and the /O3 version crashed with +// index 1. + +// RUN: dxc /Tcs_6_0 /DINDEX=0 %s | FileCheck %s +// RUN: dxc /Tcs_6_0 /Od /DINDEX=0 %s | FileCheck %s +// RUN: dxc /Tcs_6_0 /DINDEX=1 %s | FileCheck %s +// RUN: dxc /Tcs_6_0 /Od /DINDEX=1 %s | FileCheck %s +// RUN: dxc /Tcs_6_0 /DINDEX=idx %s | FileCheck %s +// RUN: dxc /Tcs_6_0 /Od /DINDEX=idx %s | FileCheck %s + + +// CHECK: @main +// CHECK-DAG: [[ELEM_0:%[0-9]+]] = load float, float addrspace(3)* +// CHECK-DAG: call void @dx.op.bufferStore.f32({{.*}}, float [[ELEM_0]] + +// CHECK-DAG: [[ELEM_1:%[0-9]+]] = load float, float addrspace(3)* +// CHECK-DAG: call void @dx.op.bufferStore.f32({{.*}}, float [[ELEM_1]] + +// CHECK-DAG: [[ELEM_2:%[0-9]+]] = load float, float addrspace(3)* +// CHECK-DAG: call void @dx.op.bufferStore.f32({{.*}}, float [[ELEM_2]] + +// CHECK-DAG: [[ELEM_3:%[0-9]+]] = load float, float addrspace(3)* +// CHECK-DAG: call void @dx.op.bufferStore.f32({{.*}}, float [[ELEM_3]] + +// CHECK-DAG: [[ELEM_4:%[0-9]+]] = load float, float addrspace(3)* +// CHECK-DAG: call void @dx.op.bufferStore.f32({{.*}}, float [[ELEM_4]] + +// CHECK-DAG: [[ELEM_5:%[0-9]+]] = load float, float addrspace(3)* +// CHECK-DAG: call void @dx.op.bufferStore.f32({{.*}}, float [[ELEM_5]] + +// CHECK-DAG: [[ELEM_6:%[0-9]+]] = load float, float addrspace(3)* +// CHECK-DAG: call void @dx.op.bufferStore.f32({{.*}}, float [[ELEM_6]] + +// CHECK-DAG: [[ELEM_7:%[0-9]+]] = load float, float addrspace(3)* +// CHECK-DAG: call void @dx.op.bufferStore.f32({{.*}}, float [[ELEM_7]] + +// CHECK-DAG: [[ELEM_8:%[0-9]+]] = load float, float addrspace(3)* +// CHECK-DAG: call void @dx.op.bufferStore.f32({{.*}}, float [[ELEM_8]] + +struct Vec9 { float data[9]; }; +groupshared Vec9 A[256]; + +RWByteAddressBuffer buf; + +[numthreads(1,1,1)] +[RootSignature("DescriptorTable(SRV(t0, numDescriptors=10), UAV(u0,numDescriptors=10))")] +void main(uint idx : SV_GroupID) +{ + buf.Store(idx, A[INDEX]); +}