Skip to content

Commit

Permalink
[RISCV] Implement RISCVTTIImpl::getPreferredAddressingMode for HasVen…
Browse files Browse the repository at this point in the history
…dorXCVmem (llvm#120533)

For a simple matmult kernel this heuristic reduces the length of the
critical basic block from 15 to 20 instructions, resulting in a 20%
speedup.

**Without heuristic:**

```
       13688: 001b838b      cv.lb   t2, (s7), 0x1
       1368c: 09cdbcab      cv.lb   s9, t3(s11)
       13690: 089db62b      cv.lb   a2, s1(s11)
       13694: 092dbdab      cv.lb   s11, s2(s11)
       13698: 001d028b      cv.lb   t0, (s10), 0x1
       1369c: 00f282b3      add     t0, t0, a5
       136a0: 9072b52      cv.mac  a0, t0, t2
       136a4: 9192bfab      cv.mac  t6, t0, s9
       136a8: 90c2bea      cv.mac  t4, t0, a2
       136ac: 91b2bf2b      cv.mac  t5, t0, s11
       136b0: fffc0c13      addi    s8, s8, -0x1
       136b4: 018e0633      add     a2, t3, s8
       136b8: 91b2b0ab      cv.mac  ra, t0, s11
       136bc: 000b8d93      mv      s11, s7
       136c0: fc0614e3      bnez    a2, 0x13688 <muriscv_nn_vec_mat_mult_t_s8+0x2f0>

       #instrs = 15
```

**With heuristic:**

```
        7bc0: 001c860b      cv.lb   a2, (s9), 0x1
        7bc4: 001e0d0b      cv.lb   s10, (t3), 0x1
        7bc8: 001e808b      cv.lb   ra, (t4), 0x1
        7bcc: 0015038b      cv.lb   t2, (a0), 0x1
        7bd0: 001c028b      cv.lb   t0, (s8), 0x1
        7bd4: 00f282b3      add     t0, t0, a5
        7bd8: 90c2bfab      cv.mac  t6, t0, a2
        7bdc: 91a2b92b      cv.mac  s2, t0, s10
        7be0: 9012b5ab      cv.mac  a1, t0, ra
        7be4: 9072b9ab      cv.mac  s3, t0, t2
        7be8: 9072b72b      cv.mac  a4, t0, t2
        7bec: fc851ae3      bne     a0, s0, 0x7bc0 <muriscv_nn_vec_mat_mult_t_s8+0x338>

        #instrs = 12

        improvement = 1 - 12/15 = 0.2 = 20%
```
  • Loading branch information
PhilippvK authored Dec 31, 2024
1 parent e50ec3e commit f590963
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 0 deletions.
9 changes: 9 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2329,6 +2329,15 @@ unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
return std::max<unsigned>(1U, RegWidth.getFixedValue() / ElemWidth);
}

TTI::AddressingModeKind
RISCVTTIImpl::getPreferredAddressingMode(const Loop *L,
ScalarEvolution *SE) const {
if (ST->hasVendorXCVmem() && !ST->is64Bit())
return TTI::AMK_PostIndexed;

return BasicTTIImplBase::getPreferredAddressingMode(L, SE);
}

bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2) {
// RISC-V specific here are "instruction number 1st priority".
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
llvm_unreachable("unknown register class");
}

TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L,
ScalarEvolution *SE) const;

unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
if (Vector)
return RISCVRegisterClass::VRRC;
Expand Down
34 changes: 34 additions & 0 deletions llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O3 -mtriple=riscv32 -mattr=+m,+xcvmem -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK

define i32 @test_heuristic(ptr %b, i32 %e, i1 %0) {
; CHECK-LABEL: test_heuristic:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: add a3, a0, a1
; CHECK-NEXT: andi a2, a2, 1
; CHECK-NEXT: .LBB0_1: # %loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cv.lbu a1, (a3), 1
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: beqz a2, .LBB0_1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: ret
entry:
%1 = getelementptr i8, ptr %b, i32 %e
br label %loop

loop: ; preds = %loop, %entry
%2 = phi ptr [ %b, %entry ], [ %7, %loop ]
%3 = phi ptr [ %1, %entry ], [ %8, %loop ]
%4 = load i8, ptr %2, align 1
%5 = load i8, ptr %3, align 1
%6 = zext i8 %5 to i32
%7 = getelementptr i8, ptr %2, i32 1
%8 = getelementptr i8, ptr %3, i32 1
br i1 %0, label %exit, label %loop

exit: ; preds = %loop
ret i32 %6
}

0 comments on commit f590963

Please sign in to comment.