Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

S390x: add preliminary support for SystemZ #4810

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
2 changes: 2 additions & 0 deletions dmd/argtypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,7 @@ namespace dmd
TypeTuple *toArgTypes_sysv_x64(Type *t);
// in argtypes_aarch64.d
TypeTuple *toArgTypes_aarch64(Type *t);
// in argtypes_s390x.d
TypeTuple *toArgTypes_s390x(Type *t);
bool isHFVA(Type *t, int maxNumElements = 4, Type **rewriteType = nullptr);
}
74 changes: 74 additions & 0 deletions dmd/argtypes_s390x.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/**
* Break down a D type into basic (register) types for the IBM Z ELF ABI.
*
* Copyright: Copyright (C) 2024-2025 by The D Language Foundation, All Rights Reserved
* Authors: Martin Kinkelin
* License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
* Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/argtypes_s390x.d, _argtypes_s390x.d)
* Documentation: https://dlang.org/phobos/dmd_argtypes_s390x.html
* Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/argtypes_s390x.d
*/

module dmd.argtypes_s390x;

import dmd.astenums;
import dmd.mtype;
import dmd.typesem;

/****************************************************
* This breaks a type down into 'simpler' types that can be passed to a function
* in registers, and returned in registers.
* This is the implementation for the IBM Z ELF ABI,
* based on https://github.com/IBM/s390x-abi/releases/download/v1.6/lzsabi_s390x.pdf.
* Params:
* t = type to break down
* Returns:
* tuple of types, each element can be passed in a register.
* A tuple of zero length means the type cannot be passed/returned in registers.
* null indicates a `void`.
*/
TypeTuple toArgTypes_s390x(Type t)
{
if (t == Type.terror)
return new TypeTuple(t);

const size = cast(size_t) t.size();
if (size == 0)
return null;

// TODO
// Implement the rest of the va args passing
//...
Type tb = t.toBasetype();
const isAggregate = tb.ty == Tstruct || tb.ty == Tsarray || tb.ty == Tarray || tb.ty == Tdelegate || tb.iscomplex();
if (!isAggregate)
return new TypeTuple(t);
// unwrap single-float struct per ABI requirements
if (auto tstruct = t.isTypeStruct())
{
if (tstruct.sym.fields.length == 1)
{
Type fieldType = tstruct.sym.fields[0].type.toBasetype();
if (fieldType.isfloating())
{
return new TypeTuple(fieldType);
}
}
}

// pass remaining aggregates in 1 or 2 GP registers
static Type getGPType(size_t size)
{
switch (size)
{
case 1: return Type.tint8;
case 2: return Type.tint16;
case 4: return Type.tint32;
case 8: return Type.tint64;
default:
import dmd.typesem : sarrayOf;
return Type.tint64.sarrayOf((size + 7) / 8);
}
}
return new TypeTuple(getGPType(size));
}
9 changes: 9 additions & 0 deletions dmd/cxxfrontend.d
Original file line number Diff line number Diff line change
Expand Up @@ -698,4 +698,13 @@ version (IN_LLVM)
import dmd.argtypes_x86;
return dmd.argtypes_x86.toArgTypes_x86(t);
}

/***********************************************************
* argtypes_s390x.d
*/
TypeTuple toArgTypes_s390x(Type t)
{
import dmd.argtypes_s390x;
return dmd.argtypes_s390x.toArgTypes_s390x(t);
}
}
2 changes: 2 additions & 0 deletions gen/abi/abi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ TargetABI *TargetABI::getTarget() {
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
return getWasmTargetABI();
case llvm::Triple::systemz:
return getSystemZTargetABI();
default:
warning(Loc(),
"unknown target ABI, falling back to generic implementation. C/C++ "
Expand Down
243 changes: 243 additions & 0 deletions gen/abi/systemz.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
//===-- abi-systemz.cpp
//-----------------------------------------------------===//
//
// LDC - the LLVM D compiler
//
// This file is distributed under the BSD-style LDC license. See the LICENSE
// file for details.
//
//===----------------------------------------------------------------------===//
//
// The ABI implementation used for 64 bit big-endian IBM Z targets.
//
// The IBM s390x ELF ABI can be found here:
// https://github.com/IBM/s390x-abi
//===----------------------------------------------------------------------===//

#include "dmd/identifier.h"
#include "dmd/nspace.h"
#include "gen/abi/abi.h"
#include "gen/abi/generic.h"
#include "gen/dvalue.h"
#include "gen/irstate.h"
#include "gen/llvmhelpers.h"
#include "gen/tollvm.h"

using namespace dmd;

struct SimpleHardfloatRewrite : ABIRewrite {
Type *getFirstFieldType(Type *ty) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the C ABI, this is applied recursively: a struct with a single element that is a struct with a single element of floating-point type is also passed like a float, etc.

if (auto ts = ty->toBasetype()->isTypeStruct()) {
assert(ts->sym->fields.size() == 1);
auto *subField = ts->sym->fields[0];
if (subField->type->isfloating()) {
return subField->type;
}
return nullptr;
}
return nullptr;
}

LLValue *put(DValue *dv, bool, bool) override {
const auto flat = getFirstFieldType(dv->type);
LLType *asType = DtoType(flat);
assert(dv->isLVal());
LLValue *flatGEP = DtoGEP1(asType, DtoLVal(dv), 0U);
LLValue *flatValue = DtoLoad(asType, flatGEP, ".HardfloatRewrite_arg");
return flatValue;
}

LLValue *getLVal(Type *dty, LLValue *v) override {
// inverse operation of method "put"
LLValue *insertedValue = DtoInsertValue(llvm::UndefValue::get(DtoType(dty)), v, 0);
return DtoAllocaDump(insertedValue, dty, ".HardfloatRewrite_param_storage");
}

LLType *type(Type *ty) override { return DtoType(getFirstFieldType(ty)); }

bool shouldApplyRewrite(Type *ty) {
if (auto ts = ty->toBasetype()->isTypeStruct()) {
return ts->sym->fields.size() == 1 &&
ts->sym->fields[0]->type->isfloating();
}
return false;
}
};

struct StructSimpleFlattenRewrite : BaseBitcastABIRewrite {
LLType *type(Type *ty) override {
const size_t type_size = size(ty);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the C ABI, a struct containing just a single float or double member is passed like a plain float or double, i.e. possibly in a floating-point register. I don't see this being handled anywhere here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fixed now.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I forgot to mention one similar case, sorry: a struct containing just a single element of vector type (or recursively another such struct), is passed like a plain vector type (i.e. in vector registers).

// "A struct or a union of 1, 2, 4, or 8 bytes"
switch (type_size) {
case 1:
return LLType::getInt8Ty(gIR->context());
case 2:
return LLType::getInt16Ty(gIR->context());
case 4:
return LLType::getInt32Ty(gIR->context());
case 8:
return LLType::getInt64Ty(gIR->context());
default:
return DtoType(ty);
}
}
};

struct SystemZTargetABI : TargetABI {
IndirectByvalRewrite indirectByvalRewrite{};
StructSimpleFlattenRewrite structSimpleFlattenRewrite{};
SimpleHardfloatRewrite simpleHardfloatRewrite{};

explicit SystemZTargetABI() {}

bool isSystemZVaList(Type *t) {
// look for a __va_list struct in a `std` C++ namespace
if (auto ts = t->isTypeStruct()) {
auto sd = ts->sym;
if (strcmp(sd->ident->toChars(), "__va_list_tag") == 0) {
if (auto ns = sd->parent->isNspace()) {
return strcmp(ns->toChars(), "std") == 0;
}
}
}

return false;
}

bool returnInArg(TypeFunction *tf, bool) override {
if (tf->isref()) {
return false;
}
Type *rt = tf->next->toBasetype();
if (rt->ty == TY::Tstruct) {
return true;
}
if (rt->isTypeVector() && size(rt) > 16) {
return true;
}
return shouldPassByVal(tf->next);
}

bool passByVal(TypeFunction *, Type *t) override {
// LLVM's byval attribute is not compatible with the SystemZ ABI
// due to how SystemZ's stack is setup
return false;
}

bool shouldPassByVal(Type *t) {
if (t->ty == TY::Tstruct && size(t) <= 8) {
return false;
}
// "A struct or union of any other size, a complex type, an __int128, a long
// double, a _Decimal128, or a vector whose size exceeds 16 bytes"
if (size(t) > 16 || t->iscomplex() || t->isimaginary()) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused by the size <=8 check above vs. the size > 16 check here. What about structs with sizes in between the two? They should be passed by reference - I'm not sure if this is what this code does.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any struct objects that are between 8 to 16 bytes would be passed by reference (will be determined by DtoIsInMemoryOnly).

return true;
}
if (t->ty == TY::Tint128 || t->ty == TY::Tcomplex80) {
return true;
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Vector types of size up to 16 should be passed in vector registers, but only when compiling for an architecture that supports vector registers in the first place (i.e. z13 and above). Older machines use another ABI where vector types are always passed via reference. Do you intend to support both ABIs, or do you plan to simply require z13 or later (either in general, or whenever vector types are used)? That may be a reasonable choice at this point, but I guess you should make sure that the machine type / features are set up accordingly for the LLVM back-end.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Vector types of size up to 16 should be passed in vector registers, but only when compiling for an architecture that supports vector registers in the first place (i.e. z13 and above). Older machines use another ABI where vector types are always passed via reference. Do you intend to support both ABIs, or do you plan to simply require z13 or later (either in general, or whenever vector types are used)? That may be a reasonable choice at this point, but I guess you should make sure that the machine type / features are set up accordingly for the LLVM back-end.

I don't think in D, and there is an easy way to construct TY::Tint128 (you can use core.int128.Cent, but that type is { i64, i64 } in D ABI). Even if we lower it to int128 and pass by reference, LLVM will still correctly pass it using vector registers (see https://godbolt.org/z/a8xEEfezz).
For TY::Tcomplex80, this lowers to fp128 and will automatically be handled by LLVM according to the -mcpu values passed to LLVM.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Vector types of size up to 16 should be passed in vector registers, but only when compiling for an architecture that supports vector registers in the first place (i.e. z13 and above). Older machines use another ABI where vector types are always passed via reference. Do you intend to support both ABIs, or do you plan to simply require z13 or later (either in general, or whenever vector types are used)? That may be a reasonable choice at this point, but I guess you should make sure that the machine type / features are set up accordingly for the LLVM back-end.

I don't think in D, and there is an easy way to construct TY::Tint128 (you can use core.int128.Cent, but that type is { i64, i64 } in D ABI). Even if we lower it to int128 and pass by reference, LLVM will still correctly pass it using vector registers (see https://godbolt.org/z/a8xEEfezz).

int128 is always passed via reference, also in your godbolt example (it uses vector registers temporarily to set up the value, but the actual argument is passed at 160(%r15), with %r2 pointing to that address.

For TY::Tcomplex80, this lowers to fp128 and will automatically be handled by LLVM according to the -mcpu values passed to LLVM.

fp128 is always passed in a pair of floating-point registers, no matter what -mcpu.

What I was refering to applies solely to actual vector types (of size up to 16). Those are passed via reference on pre-z13 machines, and in vector registers on z13 and later.

return DtoIsInMemoryOnly(t);
}

void rewriteFunctionType(IrFuncTy &fty) override {
if (!fty.ret->byref) {
rewriteArgument(fty, *fty.ret);
}

for (auto arg : fty.args) {
if (!arg->byref) {
rewriteArgument(fty, *arg);
}
}
}

void rewriteArgument(IrFuncTy &fty, IrFuncTyArg &arg) override {
if (!isPOD(arg.type) || shouldPassByVal(arg.type)) {
// non-PODs should be passed in memory
indirectByvalRewrite.applyTo(arg);
return;
}
Type *ty = arg.type->toBasetype();
// compiler magic: pass va_list args implicitly by reference
if (isSystemZVaList(ty)) {
arg.byref = true;
arg.ltype = arg.ltype->getPointerTo();
return;
}
// integer types less than 64-bits should be extended to 64 bits
if (ty->isintegral() &&
!(ty->ty == TY::Tstruct || ty->ty == TY::Tsarray ||
ty->ty == TY::Tvector) &&
size(ty) < 8) {
arg.attrs.addAttribute(ty->isunsigned() ? LLAttribute::ZExt
: LLAttribute::SExt);
}
if (ty->isTypeStruct()) {
if (simpleHardfloatRewrite.shouldApplyRewrite(ty)) {
simpleHardfloatRewrite.applyTo(arg);
} else if (size(ty) <= 8) {
structSimpleFlattenRewrite.applyToIfNotObsolete(arg);
}
}
}

Type *vaListType() override {
// We need to pass the actual va_list type for correct mangling. Simply
// using TypeIdentifier here is a bit wonky but works, as long as the name
// is actually available in the scope (this is what DMD does, so if a
// better solution is found there, this should be adapted).
return dmd::pointerTo(
TypeIdentifier::create(Loc(), Identifier::idPool("__va_list_tag")));
}

/**
* The SystemZ ABI (like AMD64) uses a special native va_list type -
* a 32-bytes struct passed by reference.
* In druntime, the struct is aliased as object.__va_list_tag; the actually
* used core.stdc.stdarg.va_list type is a __va_list_tag* pointer though to
* achieve byref semantics. This requires a little bit of compiler magic in
* the following implementations.
*/

LLType *getValistType() {
LLType *longType = LLType::getInt64Ty(gIR->context());
LLType *pointerType = getOpaquePtrType();

std::vector<LLType *> parts; // struct __va_list_tag {
parts.push_back(longType); // long __gpr;
parts.push_back(longType); // long __fpr;
parts.push_back(pointerType); // void *__overflow_arg_area;
parts.push_back(pointerType); // void *__reg_save_area; }

return LLStructType::get(gIR->context(), parts);
}

LLValue *prepareVaStart(DLValue *ap) override {
// Since the user only created a __va_list_tag* pointer (ap) on the stack
// before invoking va_start, we first need to allocate the actual
// __va_list_tag struct and set `ap` to its address.
LLValue *valistmem = DtoRawAlloca(getValistType(), 0, "__va_list_mem");
DtoStore(valistmem, DtoLVal(ap));
// Pass an opaque pointer to the actual struct to LLVM's va_start intrinsic.
return valistmem;
}

void vaCopy(DLValue *dest, DValue *src) override {
// Analog to va_start, we first need to allocate a new __va_list_tag struct
// on the stack and set `dest` to its address.
LLValue *valistmem = DtoRawAlloca(getValistType(), 0, "__va_list_mem");
DtoStore(valistmem, DtoLVal(dest));
// Then fill the new struct with a bitcopy of the source struct.
// `src` is a __va_list_tag* pointer to the source struct.
DtoMemCpy(getValistType(), valistmem, DtoRVal(src));
}

LLValue *prepareVaArg(DLValue *ap) override {
// Pass an opaque pointer to the actual __va_list_tag struct to LLVM's
// va_arg intrinsic.
return DtoRVal(ap);
}
};

// The public getter for abi.cpp
TargetABI *getSystemZTargetABI() { return new SystemZTargetABI(); }
2 changes: 2 additions & 0 deletions gen/abi/targets.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,5 @@ TargetABI *getX86TargetABI();
TargetABI *getLoongArch64TargetABI();

TargetABI *getWasmTargetABI();

TargetABI *getSystemZTargetABI();
21 changes: 18 additions & 3 deletions gen/ctfloat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ void CTFloat::toAPFloat(const real_t src, APFloat &dst) {
CTFloatUnion u;
u.fp = src;

#ifdef __FLOAT_WORD_ORDER
#if __FLOAT_WORD_ORDER == __ORDER_BIG_ENDIAN__
std::swap(u.bits[0], u.bits[1]);
#endif // __FLOAT_WORD_ORDER == __ORDER_BIG_ENDIAN__
#endif // __FLOAT_WORD_ORDER

const unsigned sizeInBits = APFloat::getSizeInBits(*apSemantics);
const APInt bits = APInt(sizeInBits, numUint64Parts, u.bits);

Expand All @@ -97,11 +103,20 @@ real_t CTFloat::fromAPFloat(const APFloat &src_) {
src.convert(*apSemantics, APFloat::rmNearestTiesToEven, &ignored);
}

#if LDC_LLVM_VER >= 2001 && defined(HAS_IEE754_FLOAT128)
return src.convertToQuad();
#else
const APInt bits = src.bitcastToAPInt();

CTFloatUnion u;
memcpy(u.bits, bits.getRawData(), bits.getBitWidth() / 8);
CTFloatUnion u{};
memcpy(u.bits, bits.getRawData(),
std::min(static_cast<size_t>(bits.getNumWords()) * 8, sizeof(u.bits)));
#ifdef __FLOAT_WORD_ORDER
#if __FLOAT_WORD_ORDER == __ORDER_BIG_ENDIAN__
std::swap(u.bits[0], u.bits[1]);
#endif // __FLOAT_WORD_ORDER == __ORDER_BIG_ENDIAN__
#endif // __FLOAT_WORD_ORDER
return u.fp;
#endif
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
Loading
Loading