From b020a93a109c8d5fdc97321983f142ca73a8a51d Mon Sep 17 00:00:00 2001 From: Muhammad Asif Manzoor Date: Mon, 24 Feb 2025 13:15:56 -0500 Subject: [PATCH] Add support for stablehlo.reduce op for logical or operator (#2160) TTNN does not support reduction for logical or operator. So stablehlo.reduce for stablehlo.or operator is decomposed into reduction sum op along give dimension. If ttnn.sum output is zero then reduce_or output is false; otherwise the output is true. ### Ticket https://github.com/tenstorrent/tt-mlir/issues/1143 ### Problem description Add support for reduction operation for logical or operator ### What's changed - `ttir.reduce_or` op is added in TTIR dialect - `ttir.reduce_or` op is decomposed/converted to `ttir.sum` op as tt-metal does not support reduction or operation. - Stablehlo conversion for reduce or op. ### Checklist - [X] New tests provide coverage for changes --- include/ttmlir/Dialect/TTIR/IR/TTIROps.td | 22 ++++++++++ .../StableHLOToTTIRPatterns.cpp | 17 ++++++-- .../TTIRToTTIRDecomposition.cpp | 25 +++++++++++ .../TTIRToTTIRDecompositionPass.cpp | 1 + lib/Dialect/TTIR/IR/TTIROps.cpp | 16 +++++++ .../reduction/reduce_or_op.mlir | 39 +++++++++++++++++ .../TTIR/reduction/reduce_or.mlir | 41 ++++++++++++++++++ .../TTNN/reduction/simple_reduce_or.mlir | 39 +++++++++++++++++ .../n150/reduction/reduce_or_op.mlir | 19 ++++++++ .../Silicon/TTNN/n150/simple_reduce_or.mlir | 43 +++++++++++++++++++ 10 files changed, 259 insertions(+), 3 deletions(-) create mode 100644 test/ttmlir/Conversion/StableHLOToTTIR/reduction/reduce_or_op.mlir create mode 100644 test/ttmlir/Decomposition/TTIR/reduction/reduce_or.mlir create mode 100644 test/ttmlir/Dialect/TTNN/reduction/simple_reduce_or.mlir create mode 100644 test/ttmlir/Silicon/StableHLO/n150/reduction/reduce_or_op.mlir create mode 100644 test/ttmlir/Silicon/TTNN/n150/simple_reduce_or.mlir diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td index 8f83056edb..bfcc7a5ce3 100644 --- a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td +++ b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td @@ -806,6 +806,28 @@ def TTIR_ReduceAndOp : TTIR_ReductionOp<"reduce_and"> { }]; } +def TTIR_ReduceOrOp : TTIR_ReductionOp<"reduce_or"> { + let summary = "Or reduction op."; + let description = [{ + Reduces a given tensor using logical or operator along the given dimension(s). + + Example: + input: [[True, False, False, False], + [True, True, False, True], + [False, False, False, True], + [False, False, False, False]] + + // Reduction along dim 0 + output: [True, True, False, True] + + // Reduction along dim 1 + output: [True, True, True, False] + + // Reduction for both dimensions (entire tensor) + output: [True] + }]; +} + def TTIR_ProdOp : TTIR_ReductionOp<"prod"> { let summary = "Product reduction op."; let description = [{ diff --git a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp index 334e228303..d3518a3bc4 100644 --- a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp +++ b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp @@ -99,6 +99,10 @@ class StableHLOToTTIRReduceOpConversionPattern return matchAndRewriteInternal( srcOp, adaptor, rewriter); } + if (mlir::isa(innerOp)) { + return matchAndRewriteInternal(srcOp, adaptor, + rewriter); + } if (isArgMax(srcOp, adaptor, rewriter)) { return matchAndRewriteInternalArgMax(srcOp, adaptor, rewriter); } @@ -129,16 +133,23 @@ class StableHLOToTTIRReduceOpConversionPattern } mlir::Operation &innerOp = srcOp.getBody().front().front(); - if (mlir::isa(innerOp)) { + if (mlir::isa(innerOp) || + mlir::isa(innerOp)) { bool allOperandsAreBoolean = std::all_of( srcOp->operand_begin(), srcOp->operand_end(), [](auto operand) { return mlir::cast(operand.getType()) .getElementTypeBitWidth() == 1; }); + // Stablehlo (unlike other dialects) has single op for both logical and + // bitwise operation. Data type is used to distinguish between logical and + // bitwise operation. If the datatype is boolean then it is a logical + // operation; otherwise it is bitwise operation. This check ensure that + // the inputs are boolean as tt-metal only supports logical operations. if (!allOperandsAreBoolean) { return rewriter.notifyMatchFailure( - srcOp, "stablehlo.reduce for stablehlo.and operator is only " - "supported for logical and."); + srcOp, + "stablehlo.reduce for stablehlo.and/stablehlo.or operator is only " + "supported for logical operator."); } } diff --git a/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecomposition.cpp b/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecomposition.cpp index fe9a5cb865..d2249affb9 100644 --- a/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecomposition.cpp +++ b/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecomposition.cpp @@ -1344,6 +1344,30 @@ struct ArgMaxOpKeepDimConversionPattern }; } // namespace +// TTNN does not support reduction operation for logical or. So this reduction +// is performed by decomposing/converting into reduction sum (ttnn.sum op). +// If ttnn.sum output is zero then reduce_or output is false; otherwise the +// output is true. +namespace { +struct ReductionOrPattern : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ttir::ReduceOrOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + RankedTensorType reduceOutputType = mlir::cast( + getTypeConverter()->convertType(op.getResult().getType())); + + ttmlir::utils::replaceOpWithNewDPSOp( + rewriter, op, reduceOutputType, adaptor.getInput(), op.getKeepDim(), + op.getDimArgAttr()); + + return success(); + } +}; +} // namespace + void populateTTIRToTTIRDecompositionPatterns(MLIRContext *ctx, RewritePatternSet &patterns, TypeConverter &typeConverter) { @@ -1356,6 +1380,7 @@ void populateTTIRToTTIRDecompositionPatterns(MLIRContext *ctx, patterns.add(typeConverter, ctx); patterns.add(typeConverter, ctx); patterns.add(typeConverter, ctx); + patterns.add(typeConverter, ctx); patterns.add(typeConverter, ctx); } diff --git a/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecompositionPass.cpp b/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecompositionPass.cpp index 0ef642bbed..59befd3192 100644 --- a/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecompositionPass.cpp +++ b/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecompositionPass.cpp @@ -54,6 +54,7 @@ struct TTIRToTTIRDecompositionPass target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); + target.addIllegalOp(); // These are the ops that must satisfy some conditions after this pass target.addDynamicallyLegalOp([&](ttir::ArangeOp op) { diff --git a/lib/Dialect/TTIR/IR/TTIROps.cpp b/lib/Dialect/TTIR/IR/TTIROps.cpp index 798af3c1cc..64087a92a6 100644 --- a/lib/Dialect/TTIR/IR/TTIROps.cpp +++ b/lib/Dialect/TTIR/IR/TTIROps.cpp @@ -2563,6 +2563,22 @@ ::mlir::LogicalResult mlir::tt::ttir::ReduceAndOp::verify() { return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); } +//===----------------------------------------------------------------------===// +// ReduceOrOp +//===----------------------------------------------------------------------===// + +// ReduceOrOp kernel builder. +void mlir::tt::ttir::ReduceOrOp::buildGenericRegion( + ::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { + // NOLINTNEXTLINE + createReduceOp(opBuilder, block, getLoc(), "or"); +} + +// ReduceOrOp verification. +::mlir::LogicalResult mlir::tt::ttir::ReduceOrOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); +} + //===----------------------------------------------------------------------===// // Reduce ArgMaxOp //===----------------------------------------------------------------------===// diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/reduction/reduce_or_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/reduction/reduce_or_op.mlir new file mode 100644 index 0000000000..c4b63543c3 --- /dev/null +++ b/test/ttmlir/Conversion/StableHLOToTTIR/reduction/reduce_or_op.mlir @@ -0,0 +1,39 @@ +// REQUIRES: stablehlo +// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s +module @jit_reduce_or attributes {} { + func.func public @test_reduce_or_4to3dim(%arg0: tensor<128x10x32x4xi1>, %cst_0: tensor) -> tensor<128x10x32xi1> { + // CHECK-LABEL: func.func public @test_reduce_or_4to3dim + // CHECK: tensor.empty + // CHECK: "ttir.reduce_or" + // CHECK-SAME: dim_arg = [3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xbf16> + // CHECK-SAME: -> tensor<128x10x32xbf16> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.or across dimensions = [3] : (tensor<128x10x32x4xi1>, tensor) -> tensor<128x10x32xi1> + return %0 : tensor<128x10x32xi1> + } + + func.func public @test_reduce_or_3to2dim(%arg0: tensor<128x10x4xi1>, %cst_0: tensor) -> tensor<128x4xi1> { + // CHECK-LABEL: func.func public @test_reduce_or_3to2dim + // CHECK: tensor.empty + // CHECK: "ttir.reduce_or" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xbf16> + // CHECK-SAME: -> tensor<128x4xbf16> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.or across dimensions = [1] : (tensor<128x10x4xi1>, tensor) -> tensor<128x4xi1> + return %0 : tensor<128x4xi1> + } + + func.func public @test_reduce_or_2to1dim(%arg0: tensor<128x10xi1>, %cst_0: tensor) -> tensor<10xi1> { + // CHECK-LABEL: func.func public @test_reduce_or_2to1dim + // CHECK: tensor.empty + // CHECK: "ttir.reduce_or" + // CHECK-SAME: dim_arg = [0 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10xbf16> + // CHECK-SAME: -> tensor<10xbf16> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.or across dimensions = [0] : (tensor<128x10xi1>, tensor) -> tensor<10xi1> + return %0 : tensor<10xi1> + } +} diff --git a/test/ttmlir/Decomposition/TTIR/reduction/reduce_or.mlir b/test/ttmlir/Decomposition/TTIR/reduction/reduce_or.mlir new file mode 100644 index 0000000000..1225427c19 --- /dev/null +++ b/test/ttmlir/Decomposition/TTIR/reduction/reduce_or.mlir @@ -0,0 +1,41 @@ +// RUN: ttmlir-opt --ttir-to-ttir-decomposition %s | FileCheck %s +module attributes {} { + func.func public @test_reduce_or_4to3dim(%arg0: tensor<128x10x32x4xbf16>, %arg1: tensor<1xbf16>) -> tensor<128x10x32xbf16> { + // CHECK-LABEL: func.func public @test_reduce_or_4to3dim + // CHECK: %[[SUM:[0-9]+]] = "ttir.sum" + // CHECK-SAME: dim_arg = [3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xbf16> + // CHECK-SAME: -> tensor<128x10x32xbf16> + // CHECK: return %[[SUM]] + %0 = tensor.empty() : tensor<128x10x32xbf16> + %1 = "ttir.reduce_or"(%arg0, %0) <{dim_arg = [3 : i32], keep_dim = false}> : (tensor<128x10x32x4xbf16>, tensor<128x10x32xbf16>) -> tensor<128x10x32xbf16> + return %1 : tensor<128x10x32xbf16> + } + + func.func public @test_reduce_or_3to2dim(%arg0: tensor<128x10x4xbf16>, %arg1: tensor<1xbf16>) -> tensor<128x4xbf16> { + // CHECK-LABEL: func.func public @test_reduce_or_3to2dim + // CHECK: %[[SUM:[0-9]+]] = "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xbf16> + // CHECK-SAME: -> tensor<128x4xbf16> + // CHECK: return %[[SUM]] + %0 = tensor.empty() : tensor<128x4xbf16> + %1 = "ttir.reduce_or"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = false}> : (tensor<128x10x4xbf16>, tensor<128x4xbf16>) -> tensor<128x4xbf16> + return %1 : tensor<128x4xbf16> + } + + func.func public @test_reduce_or_2to1dim(%arg0: tensor<128x10xbf16>, %arg1: tensor<1xbf16>) -> tensor<10xbf16> { + // CHECK-LABEL: func.func public @test_reduce_or_2to1dim + // CHECK: %[[SUM:[0-9]+]] = "ttir.sum" + // CHECK-SAME: dim_arg = [0 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10xbf16> + // CHECK-SAME: -> tensor<10xbf16> + // CHECK: return %[[SUM]] + %0 = tensor.empty() : tensor<10xbf16> + %1 = "ttir.reduce_or"(%arg0, %0) <{dim_arg = [0 : i32], keep_dim = false}> : (tensor<128x10xbf16>, tensor<10xbf16>) -> tensor<10xbf16> + return %1 : tensor<10xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/reduction/simple_reduce_or.mlir b/test/ttmlir/Dialect/TTNN/reduction/simple_reduce_or.mlir new file mode 100644 index 0000000000..1d269ac676 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/reduction/simple_reduce_or.mlir @@ -0,0 +1,39 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline %s | FileCheck %s + +module attributes {} { + func.func public @test_reduce_or_4to3dim(%arg0: tensor<128x10x32x4xbf16>, %arg1: tensor<1xbf16>) -> tensor<128x10x32xbf16> { + // CHECK-LABEL: func.func public @test_reduce_or_4to3dim + // CHECK: %[[SUM:[0-9]+]] = "ttnn.sum" + // CHECK-SAME: dim_arg = [3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xbf16, + // CHECK-SAME: -> tensor<128x10x32xbf16, + %0 = tensor.empty() : tensor<128x10x32xbf16> + %1 = "ttir.reduce_or"(%arg0, %0) <{dim_arg = [3 : i32], keep_dim = false}> : (tensor<128x10x32x4xbf16>, tensor<128x10x32xbf16>) -> tensor<128x10x32xbf16> + return %1 : tensor<128x10x32xbf16> + } + + func.func public @test_reduce_or_3to2dim(%arg0: tensor<128x10x4xbf16>, %arg1: tensor<1xbf16>) -> tensor<128x4xbf16> { + // CHECK-LABEL: func.func public @test_reduce_or_3to2dim + // CHECK: %[[SUM:[0-9]+]] = "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xbf16, + // CHECK-SAME: -> tensor<128x4xbf16, + %0 = tensor.empty() : tensor<128x4xbf16> + %1 = "ttir.reduce_or"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = false}> : (tensor<128x10x4xbf16>, tensor<128x4xbf16>) -> tensor<128x4xbf16> + return %1 : tensor<128x4xbf16> + } + + func.func public @test_reduce_or_2to1dim(%arg0: tensor<128x10xbf16>, %arg1: tensor<1xbf16>) -> tensor<10xbf16> { + // CHECK-LABEL: func.func public @test_reduce_or_2to1dim + // CHECK: %[[SUM:[0-9]+]] = "ttnn.sum" + // CHECK-SAME: dim_arg = [0 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10xbf16, + // CHECK-SAME: -> tensor<10xbf16, + %0 = tensor.empty() : tensor<10xbf16> + %1 = "ttir.reduce_or"(%arg0, %0) <{dim_arg = [0 : i32], keep_dim = false}> : (tensor<128x10xbf16>, tensor<10xbf16>) -> tensor<10xbf16> + return %1 : tensor<10xbf16> + } +} diff --git a/test/ttmlir/Silicon/StableHLO/n150/reduction/reduce_or_op.mlir b/test/ttmlir/Silicon/StableHLO/n150/reduction/reduce_or_op.mlir new file mode 100644 index 0000000000..4584f323f2 --- /dev/null +++ b/test/ttmlir/Silicon/StableHLO/n150/reduction/reduce_or_op.mlir @@ -0,0 +1,19 @@ +// REQUIRES: stablehlo +// RUN: rm -rf %t.ttnn +// RUN: rm -rf %t.mlir +// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline \ +// RUN: --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// RUN: FileCheck --input-file=%t.mlir %s + +module @jit_reduce_add attributes {} { + func.func public @test_reduce_or_4to3dim(%arg0: tensor<128x10x32x4xi1>, %cst_0: tensor) -> tensor<128x10x32xi1> { + // CHECK-LABEL: func.func public @test_reduce_or_4to3dim + // CHECK: "ttnn.sum" + // CHECK-SAME: dim_arg = [3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: -> tensor<128x10x32xbf16, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.or across dimensions = [3] : (tensor<128x10x32x4xi1>, tensor) -> tensor<128x10x32xi1> + return %0 : tensor<128x10x32xi1> + } +} diff --git a/test/ttmlir/Silicon/TTNN/n150/simple_reduce_or.mlir b/test/ttmlir/Silicon/TTNN/n150/simple_reduce_or.mlir new file mode 100644 index 0000000000..e754cfac84 --- /dev/null +++ b/test/ttmlir/Silicon/TTNN/n150/simple_reduce_or.mlir @@ -0,0 +1,43 @@ +// RUN: rm -rf %t.ttnn +// RUN: rm -rf %t.mlir +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir +// RUN: FileCheck %s --input-file=%t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn + +module attributes {} { + func.func public @test_reduce_or_4to2dim(%arg0: tensor<128x10x32x4xbf16>, %arg1: tensor<1xbf16>) -> tensor<128x32xbf16> { + // CHECK-LABEL: func.func public @test_reduce_or_4to2dim + // CHECK: %[[SUM:[0-9]+]] = "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xbf16, + // CHECK-SAME: -> tensor<128x32xbf16, + %0 = tensor.empty() : tensor<128x32xbf16> + %1 = "ttir.reduce_or"(%arg0, %0) <{dim_arg = [1: i32, 3 : i32], keep_dim = false}> : (tensor<128x10x32x4xbf16>, tensor<128x32xbf16>) -> tensor<128x32xbf16> + return %1 : tensor<128x32xbf16> + } + + func.func public @test_reduce_or_3to2dim(%arg0: tensor<128x10x4xbf16>, %arg1: tensor<1xbf16>) -> tensor<128x4xbf16> { + // CHECK-LABEL: func.func public @test_reduce_or_3to2dim + // CHECK: %[[SUM:[0-9]+]] = "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xbf16, + // CHECK-SAME: -> tensor<128x4xbf16, + %0 = tensor.empty() : tensor<128x4xbf16> + %1 = "ttir.reduce_or"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = false}> : (tensor<128x10x4xbf16>, tensor<128x4xbf16>) -> tensor<128x4xbf16> + return %1 : tensor<128x4xbf16> + } + + func.func public @test_reduce_or_2to1dim(%arg0: tensor<128x10xbf16>, %arg1: tensor<1xbf16>) -> tensor<10xbf16> { + // CHECK-LABEL: func.func public @test_reduce_or_2to1dim + // CHECK: %[[SUM:[0-9]+]] = "ttnn.sum" + // CHECK-SAME: dim_arg = [0 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10xbf16, + // CHECK-SAME: -> tensor<10xbf16, + %0 = tensor.empty() : tensor<10xbf16> + %1 = "ttir.reduce_or"(%arg0, %0) <{dim_arg = [0 : i32], keep_dim = false}> : (tensor<128x10xbf16>, tensor<10xbf16>) -> tensor<10xbf16> + return %1 : tensor<10xbf16> + } +}