Skip to content

Commit

Permalink
[NVPTX] Support address offsets added with disjoint or (llvm#122042)
Browse files Browse the repository at this point in the history
Sometime DAGCombiner gets a little too clever and converts an add of a
small constant offset to a highly aligned pointer into a 'disjoint or'.
When looking for address operands handle this case as well.
  • Loading branch information
AlexMaclean authored Jan 8, 2025
1 parent 8948340 commit 560b72c
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 37 deletions.
10 changes: 8 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "NVPTXUtilities.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
Expand Down Expand Up @@ -2449,6 +2450,11 @@ bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
return true;
}

static inline bool isAddLike(const SDValue V) {
return V.getOpcode() == ISD::ADD ||
(V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
}

// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
Expand All @@ -2475,7 +2481,7 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
// symbol+offset
bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
if (Addr.getOpcode() == ISD::ADD) {
if (isAddLike(Addr)) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
SDValue base = Addr.getOperand(0);
if (SelectDirectAddr(base, Base)) {
Expand Down Expand Up @@ -2512,7 +2518,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(
Addr.getOpcode() == ISD::TargetGlobalAddress)
return false; // direct calls.

if (Addr.getOpcode() == ISD::ADD) {
if (isAddLike(Addr)) {
if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
return false;
}
Expand Down
25 changes: 25 additions & 0 deletions llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
target triple = "nvptx64-nvidia-cuda"

@a = external global ptr align 16

define i32 @test_disjoint_or_addr(i16 %a) {
; CHECK-LABEL: test_disjoint_or_addr(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: mov.u64 %rd1, a;
; CHECK-NEXT: cvta.global.u64 %rd2, %rd1;
; CHECK-NEXT: ld.u32 %r1, [%rd2+8];
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
%a1 = ptrtoint ptr @a to i64
%a2 = or disjoint i64 %a1, 8
%a3 = inttoptr i64 %a2 to ptr
%v = load i32, ptr %a3
ret i32 %v
}
29 changes: 14 additions & 15 deletions llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; PTX-NEXT: .reg .pred %p<2>;
; PTX-NEXT: .reg .b16 %rs<3>;
; PTX-NEXT: .reg .b32 %r<11>;
; PTX-NEXT: .reg .b64 %rd<10>;
; PTX-NEXT: .reg .b64 %rd<9>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.u64 %SPL, __local_depot0;
Expand All @@ -38,23 +38,22 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; PTX-NEXT: and.b16 %rs2, %rs1, 1;
; PTX-NEXT: setp.eq.b16 %p1, %rs2, 1;
; PTX-NEXT: ld.param.s32 %rd1, [non_kernel_function_param_2];
; PTX-NEXT: add.u64 %rd2, %SP, 0;
; PTX-NEXT: or.b64 %rd3, %rd2, 8;
; PTX-NEXT: ld.param.u64 %rd4, [non_kernel_function_param_0+8];
; PTX-NEXT: st.u64 [%rd3], %rd4;
; PTX-NEXT: ld.param.u64 %rd5, [non_kernel_function_param_0];
; PTX-NEXT: st.u64 [%SP], %rd5;
; PTX-NEXT: mov.u64 %rd6, gi;
; PTX-NEXT: cvta.global.u64 %rd7, %rd6;
; PTX-NEXT: selp.b64 %rd8, %rd2, %rd7, %p1;
; PTX-NEXT: add.s64 %rd9, %rd8, %rd1;
; PTX-NEXT: ld.u8 %r1, [%rd9];
; PTX-NEXT: ld.u8 %r2, [%rd9+1];
; PTX-NEXT: ld.param.u64 %rd2, [non_kernel_function_param_0+8];
; PTX-NEXT: st.u64 [%SP+8], %rd2;
; PTX-NEXT: ld.param.u64 %rd3, [non_kernel_function_param_0];
; PTX-NEXT: st.u64 [%SP], %rd3;
; PTX-NEXT: mov.u64 %rd4, gi;
; PTX-NEXT: cvta.global.u64 %rd5, %rd4;
; PTX-NEXT: add.u64 %rd6, %SP, 0;
; PTX-NEXT: selp.b64 %rd7, %rd6, %rd5, %p1;
; PTX-NEXT: add.s64 %rd8, %rd7, %rd1;
; PTX-NEXT: ld.u8 %r1, [%rd8];
; PTX-NEXT: ld.u8 %r2, [%rd8+1];
; PTX-NEXT: shl.b32 %r3, %r2, 8;
; PTX-NEXT: or.b32 %r4, %r3, %r1;
; PTX-NEXT: ld.u8 %r5, [%rd9+2];
; PTX-NEXT: ld.u8 %r5, [%rd8+2];
; PTX-NEXT: shl.b32 %r6, %r5, 16;
; PTX-NEXT: ld.u8 %r7, [%rd9+3];
; PTX-NEXT: ld.u8 %r7, [%rd8+3];
; PTX-NEXT: shl.b32 %r8, %r7, 24;
; PTX-NEXT: or.b32 %r9, %r8, %r6;
; PTX-NEXT: or.b32 %r10, %r9, %r4;
Expand Down
35 changes: 15 additions & 20 deletions llvm/test/CodeGen/NVPTX/variadics-backend.ll
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
; CHECK-PTX-NEXT: .reg .b32 %r<7>;
; CHECK-PTX-NEXT: .reg .b64 %rd<11>;
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2;
Expand All @@ -163,24 +163,20 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8;
; CHECK-PTX-NEXT: ld.u32 %r2, [%rd3];
; CHECK-PTX-NEXT: or.b64 %rd4, %rd3, 4;
; CHECK-PTX-NEXT: ld.s8 %r3, [%rd4];
; CHECK-PTX-NEXT: or.b64 %rd5, %rd3, 5;
; CHECK-PTX-NEXT: or.b64 %rd6, %rd3, 7;
; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd6];
; CHECK-PTX-NEXT: ld.s8 %r3, [%rd3+4];
; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd3+7];
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs1;
; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd5];
; CHECK-PTX-NEXT: or.b64 %rd7, %rd3, 6;
; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd7];
; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd3+5];
; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd3+6];
; CHECK-PTX-NEXT: shl.b16 %rs4, %rs3, 8;
; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2;
; CHECK-PTX-NEXT: st.u16 [%SP], %rs5;
; CHECK-PTX-NEXT: ld.u64 %rd8, [%rd3+8];
; CHECK-PTX-NEXT: ld.u64 %rd4, [%rd3+8];
; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2;
; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3;
; CHECK-PTX-NEXT: cvt.u64.u32 %rd9, %r5;
; CHECK-PTX-NEXT: add.s64 %rd10, %rd9, %rd8;
; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd10;
; CHECK-PTX-NEXT: cvt.u64.u32 %rd5, %r5;
; CHECK-PTX-NEXT: add.s64 %rd6, %rd5, %rd4;
; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd6;
; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-PTX-NEXT: ret;
entry:
Expand Down Expand Up @@ -219,7 +215,7 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<10>;
; CHECK-PTX-NEXT: .reg .b32 %r<4>;
; CHECK-PTX-NEXT: .reg .b64 %rd<8>;
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3;
Expand All @@ -240,17 +236,16 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: st.u16 [%SP], %rs8;
; CHECK-PTX-NEXT: mov.b32 %r1, 1;
; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 8;
; CHECK-PTX-NEXT: or.b64 %rd6, %rd5, 4;
; CHECK-PTX-NEXT: mov.b16 %rs9, 1;
; CHECK-PTX-NEXT: st.u8 [%rd6], %rs9;
; CHECK-PTX-NEXT: mov.b64 %rd7, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7;
; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs9;
; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 8;
; CHECK-PTX-NEXT: { // callseq 1, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics2,
Expand Down

0 comments on commit 560b72c

Please sign in to comment.