Skip to content

Commit

Permalink
[mlir][xegpu] Support boundary checks only for block instructions (#1…
Browse files Browse the repository at this point in the history
…19380)

Constrains Vector lowering to apply boundary checks only to data
transfers operating on block shapes.

This further aligns lowering with the current Xe instructions'
restrictions.
  • Loading branch information
adam-smnk authored Dec 13, 2024
1 parent 06789cc commit 4c597d4
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 7 deletions.
19 changes: 14 additions & 5 deletions mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ static LogicalResult transferPreconditions(PatternRewriter &rewriter,
xferOp, "Buffer must be contiguous in the innermost dimension");

unsigned vecRank = vecTy.getRank();
if (xferOp.hasOutOfBoundsDim() && vecRank < 2)
return rewriter.notifyMatchFailure(
xferOp, "Boundary check is available only for block instructions.");

AffineMap map = xferOp.getPermutationMap();
if (!map.isProjectedPermutation(/*allowZeroInResults=*/false))
return rewriter.notifyMatchFailure(xferOp, "Unsupported permutation map");
Expand Down Expand Up @@ -255,9 +259,12 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
if (failed(storeLoadPreconditions(rewriter, loadOp, vecTy)))
return failure();

// Boundary check is available only for block instructions.
bool boundaryCheck = vecTy.getRank() > 1;

auto descType = xegpu::TensorDescType::get(
vecTy.getShape(), vecTy.getElementType(), /*array_length=*/1,
/*boundary_check=*/true, xegpu::MemorySpace::Global);
boundaryCheck, xegpu::MemorySpace::Global);
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
rewriter, loc, descType, loadOp.getBase(), loadOp.getIndices());

Expand Down Expand Up @@ -285,10 +292,12 @@ struct StoreLowering : public OpRewritePattern<vector::StoreOp> {
if (failed(storeLoadPreconditions(rewriter, storeOp, vecTy)))
return failure();

auto descType =
xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(),
/*array_length=*/1, /*boundary_check=*/true,
xegpu::MemorySpace::Global);
// Boundary check is available only for block instructions.
bool boundaryCheck = vecTy.getRank() > 1;

auto descType = xegpu::TensorDescType::get(
vecTy.getShape(), vecTy.getElementType(),
/*array_length=*/1, boundaryCheck, xegpu::MemorySpace::Global);
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
rewriter, loc, descType, storeOp.getBase(), storeOp.getIndices());

Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ func.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vecto
// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
// CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
// CHECK-SAME: boundary_check = true
// CHECK-SAME: boundary_check = false
// CHECK: %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8xf32>
// CHECK: return %[[VEC]]

Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>,
// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
// CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
// CHECK-SAME: boundary_check = true
// CHECK-SAME: boundary_check = false
// CHECK: xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8xf32>

// -----
Expand Down
13 changes: 13 additions & 0 deletions mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,19 @@ func.func @no_load_out_of_bounds_non_zero_pad(%source: memref<32x64xf32>,

// -----

func.func @no_load_out_of_bounds_1D_vector(%source: memref<8x16x32xf32>,
%offset: index) -> vector<8xf32> {
%c0 = arith.constant 0.0 : f32
%0 = vector.transfer_read %source[%offset, %offset, %offset], %c0
{in_bounds = [false]} : memref<8x16x32xf32>, vector<8xf32>
return %0 : vector<8xf32>
}

// CHECK-LABEL: @no_load_out_of_bounds_1D_vector(
// CHECK: vector.transfer_read

// -----

func.func @no_load_masked(%source : memref<4xf32>,
%offset : index) -> vector<4xf32> {
%c0 = arith.constant 0.0 : f32
Expand Down
13 changes: 13 additions & 0 deletions mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,16 @@ func.func @no_store_unsupported_map(%vec: vector<8x16xf32>,

// CHECK-LABEL: @no_store_unsupported_map(
// CHECK: vector.transfer_write

// -----

func.func @no_store_out_of_bounds_1D_vector(%vec: vector<8xf32>,
%source: memref<8x16x32xf32>, %offset: index) {
vector.transfer_write %vec, %source[%offset, %offset, %offset]
{in_bounds = [false]}
: vector<8xf32>, memref<8x16x32xf32>
return
}

// CHECK-LABEL: @no_store_out_of_bounds_1D_vector(
// CHECK: vector.transfer_write

0 comments on commit 4c597d4

Please sign in to comment.