diff --git a/dmd/argtypes.h b/dmd/argtypes.h index 8d018ea324b..d2e0800a57d 100644 --- a/dmd/argtypes.h +++ b/dmd/argtypes.h @@ -21,5 +21,7 @@ namespace dmd TypeTuple *toArgTypes_sysv_x64(Type *t); // in argtypes_aarch64.d TypeTuple *toArgTypes_aarch64(Type *t); + // in argtypes_s390x.d + TypeTuple *toArgTypes_s390x(Type *t); bool isHFVA(Type *t, int maxNumElements = 4, Type **rewriteType = nullptr); } diff --git a/dmd/argtypes_s390x.d b/dmd/argtypes_s390x.d new file mode 100644 index 00000000000..03a4b1fe451 --- /dev/null +++ b/dmd/argtypes_s390x.d @@ -0,0 +1,74 @@ +/** + * Break down a D type into basic (register) types for the IBM Z ELF ABI. + * + * Copyright: Copyright (C) 2024-2025 by The D Language Foundation, All Rights Reserved + * Authors: Martin Kinkelin + * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) + * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/argtypes_s390x.d, _argtypes_s390x.d) + * Documentation: https://dlang.org/phobos/dmd_argtypes_s390x.html + * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/argtypes_s390x.d + */ + +module dmd.argtypes_s390x; + +import dmd.astenums; +import dmd.mtype; +import dmd.typesem; + +/**************************************************** + * This breaks a type down into 'simpler' types that can be passed to a function + * in registers, and returned in registers. + * This is the implementation for the IBM Z ELF ABI, + * based on https://github.com/IBM/s390x-abi/releases/download/v1.6/lzsabi_s390x.pdf. + * Params: + * t = type to break down + * Returns: + * tuple of types, each element can be passed in a register. + * A tuple of zero length means the type cannot be passed/returned in registers. + * null indicates a `void`. + */ +TypeTuple toArgTypes_s390x(Type t) +{ + if (t == Type.terror) + return new TypeTuple(t); + + const size = cast(size_t) t.size(); + if (size == 0) + return null; + + // TODO + // Implement the rest of the va args passing + //... + Type tb = t.toBasetype(); + const isAggregate = tb.ty == Tstruct || tb.ty == Tsarray || tb.ty == Tarray || tb.ty == Tdelegate || tb.iscomplex(); + if (!isAggregate) + return new TypeTuple(t); + // unwrap single-float struct per ABI requirements + if (auto tstruct = t.isTypeStruct()) + { + if (tstruct.sym.fields.length == 1) + { + Type fieldType = tstruct.sym.fields[0].type.toBasetype(); + if (fieldType.isfloating()) + { + return new TypeTuple(fieldType); + } + } + } + + // pass remaining aggregates in 1 or 2 GP registers + static Type getGPType(size_t size) + { + switch (size) + { + case 1: return Type.tint8; + case 2: return Type.tint16; + case 4: return Type.tint32; + case 8: return Type.tint64; + default: + import dmd.typesem : sarrayOf; + return Type.tint64.sarrayOf((size + 7) / 8); + } + } + return new TypeTuple(getGPType(size)); +} \ No newline at end of file diff --git a/dmd/cxxfrontend.d b/dmd/cxxfrontend.d index 47a443e3787..f4a6528e82e 100644 --- a/dmd/cxxfrontend.d +++ b/dmd/cxxfrontend.d @@ -698,4 +698,13 @@ version (IN_LLVM) import dmd.argtypes_x86; return dmd.argtypes_x86.toArgTypes_x86(t); } + + /*********************************************************** + * argtypes_s390x.d + */ + TypeTuple toArgTypes_s390x(Type t) + { + import dmd.argtypes_s390x; + return dmd.argtypes_s390x.toArgTypes_s390x(t); + } } diff --git a/gen/abi/abi.cpp b/gen/abi/abi.cpp index d0bbf9bfb36..6d119212f01 100644 --- a/gen/abi/abi.cpp +++ b/gen/abi/abi.cpp @@ -281,6 +281,8 @@ TargetABI *TargetABI::getTarget() { case llvm::Triple::wasm32: case llvm::Triple::wasm64: return getWasmTargetABI(); + case llvm::Triple::systemz: + return getSystemZTargetABI(); default: warning(Loc(), "unknown target ABI, falling back to generic implementation. C/C++ " diff --git a/gen/abi/systemz.cpp b/gen/abi/systemz.cpp new file mode 100644 index 00000000000..c1a7d72560f --- /dev/null +++ b/gen/abi/systemz.cpp @@ -0,0 +1,243 @@ +//===-- abi-systemz.cpp +//-----------------------------------------------------===// +// +// LDC - the LLVM D compiler +// +// This file is distributed under the BSD-style LDC license. See the LICENSE +// file for details. +// +//===----------------------------------------------------------------------===// +// +// The ABI implementation used for 64 bit big-endian IBM Z targets. +// +// The IBM s390x ELF ABI can be found here: +// https://github.com/IBM/s390x-abi +//===----------------------------------------------------------------------===// + +#include "dmd/identifier.h" +#include "dmd/nspace.h" +#include "gen/abi/abi.h" +#include "gen/abi/generic.h" +#include "gen/dvalue.h" +#include "gen/irstate.h" +#include "gen/llvmhelpers.h" +#include "gen/tollvm.h" + +using namespace dmd; + +struct SimpleHardfloatRewrite : ABIRewrite { + Type *getFirstFieldType(Type *ty) { + if (auto ts = ty->toBasetype()->isTypeStruct()) { + assert(ts->sym->fields.size() == 1); + auto *subField = ts->sym->fields[0]; + if (subField->type->isfloating()) { + return subField->type; + } + return nullptr; + } + return nullptr; + } + + LLValue *put(DValue *dv, bool, bool) override { + const auto flat = getFirstFieldType(dv->type); + LLType *asType = DtoType(flat); + assert(dv->isLVal()); + LLValue *flatGEP = DtoGEP1(asType, DtoLVal(dv), 0U); + LLValue *flatValue = DtoLoad(asType, flatGEP, ".HardfloatRewrite_arg"); + return flatValue; + } + + LLValue *getLVal(Type *dty, LLValue *v) override { + // inverse operation of method "put" + LLValue *insertedValue = DtoInsertValue(llvm::UndefValue::get(DtoType(dty)), v, 0); + return DtoAllocaDump(insertedValue, dty, ".HardfloatRewrite_param_storage"); + } + + LLType *type(Type *ty) override { return DtoType(getFirstFieldType(ty)); } + + bool shouldApplyRewrite(Type *ty) { + if (auto ts = ty->toBasetype()->isTypeStruct()) { + return ts->sym->fields.size() == 1 && + ts->sym->fields[0]->type->isfloating(); + } + return false; + } +}; + +struct StructSimpleFlattenRewrite : BaseBitcastABIRewrite { + LLType *type(Type *ty) override { + const size_t type_size = size(ty); + // "A struct or a union of 1, 2, 4, or 8 bytes" + switch (type_size) { + case 1: + return LLType::getInt8Ty(gIR->context()); + case 2: + return LLType::getInt16Ty(gIR->context()); + case 4: + return LLType::getInt32Ty(gIR->context()); + case 8: + return LLType::getInt64Ty(gIR->context()); + default: + return DtoType(ty); + } + } +}; + +struct SystemZTargetABI : TargetABI { + IndirectByvalRewrite indirectByvalRewrite{}; + StructSimpleFlattenRewrite structSimpleFlattenRewrite{}; + SimpleHardfloatRewrite simpleHardfloatRewrite{}; + + explicit SystemZTargetABI() {} + + bool isSystemZVaList(Type *t) { + // look for a __va_list struct in a `std` C++ namespace + if (auto ts = t->isTypeStruct()) { + auto sd = ts->sym; + if (strcmp(sd->ident->toChars(), "__va_list_tag") == 0) { + if (auto ns = sd->parent->isNspace()) { + return strcmp(ns->toChars(), "std") == 0; + } + } + } + + return false; + } + + bool returnInArg(TypeFunction *tf, bool) override { + if (tf->isref()) { + return false; + } + Type *rt = tf->next->toBasetype(); + if (rt->ty == TY::Tstruct) { + return true; + } + if (rt->isTypeVector() && size(rt) > 16) { + return true; + } + return shouldPassByVal(tf->next); + } + + bool passByVal(TypeFunction *, Type *t) override { + // LLVM's byval attribute is not compatible with the SystemZ ABI + // due to how SystemZ's stack is setup + return false; + } + + bool shouldPassByVal(Type *t) { + if (t->ty == TY::Tstruct && size(t) <= 8) { + return false; + } + // "A struct or union of any other size, a complex type, an __int128, a long + // double, a _Decimal128, or a vector whose size exceeds 16 bytes" + if (size(t) > 16 || t->iscomplex() || t->isimaginary()) { + return true; + } + if (t->ty == TY::Tint128 || t->ty == TY::Tcomplex80) { + return true; + } + return DtoIsInMemoryOnly(t); + } + + void rewriteFunctionType(IrFuncTy &fty) override { + if (!fty.ret->byref) { + rewriteArgument(fty, *fty.ret); + } + + for (auto arg : fty.args) { + if (!arg->byref) { + rewriteArgument(fty, *arg); + } + } + } + + void rewriteArgument(IrFuncTy &fty, IrFuncTyArg &arg) override { + if (!isPOD(arg.type) || shouldPassByVal(arg.type)) { + // non-PODs should be passed in memory + indirectByvalRewrite.applyTo(arg); + return; + } + Type *ty = arg.type->toBasetype(); + // compiler magic: pass va_list args implicitly by reference + if (isSystemZVaList(ty)) { + arg.byref = true; + arg.ltype = arg.ltype->getPointerTo(); + return; + } + // integer types less than 64-bits should be extended to 64 bits + if (ty->isintegral() && + !(ty->ty == TY::Tstruct || ty->ty == TY::Tsarray || + ty->ty == TY::Tvector) && + size(ty) < 8) { + arg.attrs.addAttribute(ty->isunsigned() ? LLAttribute::ZExt + : LLAttribute::SExt); + } + if (ty->isTypeStruct()) { + if (simpleHardfloatRewrite.shouldApplyRewrite(ty)) { + simpleHardfloatRewrite.applyTo(arg); + } else if (size(ty) <= 8) { + structSimpleFlattenRewrite.applyToIfNotObsolete(arg); + } + } + } + + Type *vaListType() override { + // We need to pass the actual va_list type for correct mangling. Simply + // using TypeIdentifier here is a bit wonky but works, as long as the name + // is actually available in the scope (this is what DMD does, so if a + // better solution is found there, this should be adapted). + return dmd::pointerTo( + TypeIdentifier::create(Loc(), Identifier::idPool("__va_list_tag"))); + } + + /** + * The SystemZ ABI (like AMD64) uses a special native va_list type - + * a 32-bytes struct passed by reference. + * In druntime, the struct is aliased as object.__va_list_tag; the actually + * used core.stdc.stdarg.va_list type is a __va_list_tag* pointer though to + * achieve byref semantics. This requires a little bit of compiler magic in + * the following implementations. + */ + + LLType *getValistType() { + LLType *longType = LLType::getInt64Ty(gIR->context()); + LLType *pointerType = getOpaquePtrType(); + + std::vector parts; // struct __va_list_tag { + parts.push_back(longType); // long __gpr; + parts.push_back(longType); // long __fpr; + parts.push_back(pointerType); // void *__overflow_arg_area; + parts.push_back(pointerType); // void *__reg_save_area; } + + return LLStructType::get(gIR->context(), parts); + } + + LLValue *prepareVaStart(DLValue *ap) override { + // Since the user only created a __va_list_tag* pointer (ap) on the stack + // before invoking va_start, we first need to allocate the actual + // __va_list_tag struct and set `ap` to its address. + LLValue *valistmem = DtoRawAlloca(getValistType(), 0, "__va_list_mem"); + DtoStore(valistmem, DtoLVal(ap)); + // Pass an opaque pointer to the actual struct to LLVM's va_start intrinsic. + return valistmem; + } + + void vaCopy(DLValue *dest, DValue *src) override { + // Analog to va_start, we first need to allocate a new __va_list_tag struct + // on the stack and set `dest` to its address. + LLValue *valistmem = DtoRawAlloca(getValistType(), 0, "__va_list_mem"); + DtoStore(valistmem, DtoLVal(dest)); + // Then fill the new struct with a bitcopy of the source struct. + // `src` is a __va_list_tag* pointer to the source struct. + DtoMemCpy(getValistType(), valistmem, DtoRVal(src)); + } + + LLValue *prepareVaArg(DLValue *ap) override { + // Pass an opaque pointer to the actual __va_list_tag struct to LLVM's + // va_arg intrinsic. + return DtoRVal(ap); + } +}; + +// The public getter for abi.cpp +TargetABI *getSystemZTargetABI() { return new SystemZTargetABI(); } diff --git a/gen/abi/targets.h b/gen/abi/targets.h index 49098fe2579..0825d0062ea 100644 --- a/gen/abi/targets.h +++ b/gen/abi/targets.h @@ -40,3 +40,5 @@ TargetABI *getX86TargetABI(); TargetABI *getLoongArch64TargetABI(); TargetABI *getWasmTargetABI(); + +TargetABI *getSystemZTargetABI(); diff --git a/gen/ctfloat.cpp b/gen/ctfloat.cpp index ef033c018df..88fa314fe57 100644 --- a/gen/ctfloat.cpp +++ b/gen/ctfloat.cpp @@ -82,6 +82,12 @@ void CTFloat::toAPFloat(const real_t src, APFloat &dst) { CTFloatUnion u; u.fp = src; +#ifdef __FLOAT_WORD_ORDER +#if __FLOAT_WORD_ORDER == __ORDER_BIG_ENDIAN__ + std::swap(u.bits[0], u.bits[1]); +#endif // __FLOAT_WORD_ORDER == __ORDER_BIG_ENDIAN__ +#endif // __FLOAT_WORD_ORDER + const unsigned sizeInBits = APFloat::getSizeInBits(*apSemantics); const APInt bits = APInt(sizeInBits, numUint64Parts, u.bits); @@ -97,11 +103,20 @@ real_t CTFloat::fromAPFloat(const APFloat &src_) { src.convert(*apSemantics, APFloat::rmNearestTiesToEven, &ignored); } +#if LDC_LLVM_VER >= 2001 && defined(HAS_IEE754_FLOAT128) + return src.convertToQuad(); +#else const APInt bits = src.bitcastToAPInt(); - - CTFloatUnion u; - memcpy(u.bits, bits.getRawData(), bits.getBitWidth() / 8); + CTFloatUnion u{}; + memcpy(u.bits, bits.getRawData(), + std::min(static_cast(bits.getNumWords()) * 8, sizeof(u.bits))); +#ifdef __FLOAT_WORD_ORDER +#if __FLOAT_WORD_ORDER == __ORDER_BIG_ENDIAN__ + std::swap(u.bits[0], u.bits[1]); +#endif // __FLOAT_WORD_ORDER == __ORDER_BIG_ENDIAN__ +#endif // __FLOAT_WORD_ORDER return u.fp; +#endif } //////////////////////////////////////////////////////////////////////////////// diff --git a/gen/target.cpp b/gen/target.cpp index 85bcf9f3740..aa36e9e3186 100644 --- a/gen/target.cpp +++ b/gen/target.cpp @@ -54,6 +54,7 @@ llvm::Type *getRealType(const llvm::Triple &triple) { case Triple::riscv32: case Triple::riscv64: + case Triple::systemz: #if LDC_LLVM_VER >= 1600 case Triple::loongarch32: case Triple::loongarch64: @@ -66,7 +67,7 @@ llvm::Type *getRealType(const llvm::Triple &triple) { default: // 64-bit double precision for all other targets - // FIXME: PowerPC, SystemZ, ... + // FIXME: PowerPC ... return LLType::getDoubleTy(ctx); } } @@ -237,10 +238,13 @@ Type *Target::va_listType(const Loc &loc, Scope *sc) { const char *TargetCPP::typeMangle(Type *t) { if (t->ty == TY::Tfloat80) { const auto &triple = *global.params.targetTriple; - // `long double` on Android/x64 is __float128 and mangled as `g` - bool isAndroidX64 = triple.getEnvironment() == llvm::Triple::Android && - triple.getArch() == llvm::Triple::x86_64; - return isAndroidX64 ? "g" : "e"; + // `long double` on Android/x64 and IBM SystemZ is __float128 and mangled as + // `g` instead of `e`. + const auto targetArch = triple.getArch(); + const bool isAndroidX64 = + triple.getEnvironment() == llvm::Triple::Android && + targetArch == llvm::Triple::x86_64; + return (isAndroidX64 || targetArch == llvm::Triple::systemz) ? "g" : "e"; } return nullptr; } @@ -254,6 +258,8 @@ TypeTuple *Target::toArgTypes(Type *t) { return toArgTypes_sysv_x64(t); if (arch == llvm::Triple::aarch64 || arch == llvm::Triple::aarch64_be) return toArgTypes_aarch64(t); + if (arch == llvm::Triple::systemz) + return toArgTypes_s390x(t); return nullptr; } diff --git a/ir/irstruct.cpp b/ir/irstruct.cpp index 1b0f4003c0d..85b68f1b422 100644 --- a/ir/irstruct.cpp +++ b/ir/irstruct.cpp @@ -107,7 +107,8 @@ LLConstant *IrStruct::getTypeInfoInit() { const bool withArgTypes = (arch == llvm::Triple::x86_64 && !triple.isOSWindows()) || (!triple.isOSDarwin() && // Apple uses a simpler scheme - (arch == llvm::Triple::aarch64 || arch == llvm::Triple::aarch64_be)); + (arch == llvm::Triple::aarch64 || arch == llvm::Triple::aarch64_be)) || + (arch == llvm::Triple::systemz); const unsigned expectedFields = 11 + (withArgTypes ? 2 : 0); const unsigned actualFields = structTypeInfoDecl->fields.length - diff --git a/runtime/druntime/src/__importc_builtins.di b/runtime/druntime/src/__importc_builtins.di index fc17bce4a22..d50c643c9d9 100644 --- a/runtime/druntime/src/__importc_builtins.di +++ b/runtime/druntime/src/__importc_builtins.di @@ -51,6 +51,10 @@ version (LDC) else version (AArch64) public import core.internal.vararg.aarch64 : __va_list; } + else version (SystemZ) + { + public import core.internal.vararg.s390x : __va_list; + } } else version (Posix) { diff --git a/runtime/druntime/src/core/internal/vararg/s390x.d b/runtime/druntime/src/core/internal/vararg/s390x.d new file mode 100644 index 00000000000..ad02a94379d --- /dev/null +++ b/runtime/druntime/src/core/internal/vararg/s390x.d @@ -0,0 +1,167 @@ +module core.internal.vararg.s390x; + +version (SystemZ) : import core.stdc.stdarg : alignUp; + +nothrow: + +// Layout of this struct must match __gnuc_va_list for C ABI compatibility +struct __va_list_tag +{ + long __gpr = 0; // no regs + long __fpr = 0; // no fp regs + void* __overflow_arg_area; + void* __reg_save_area; +} + +alias __va_list = __va_list_tag; + +/** + * Making it an array of 1 causes va_list to be passed as a pointer in + * function argument lists + */ +alias va_list = __va_list*; + +/// Compile-time `va_arg` extraction for s390x +T va_arg(T)(va_list ap) +{ + static if (is(T U == __argTypes)) + { + static if (U.length == 0 || U[0].sizeof > 8 || is(U[0] == __vector)) + { + // Always passed in memory (varying vectors are passed in parameter area) + auto p = *cast(T*) ap.__overflow_arg_area; + ap.__overflow_arg_area = p + T.alignof.alignUp; + return p; + } + else static if (U.length == 1) + { + // Arg is passed in one register + alias T1 = U[0]; + static if (is(T1 == double) || is(T1 == float)) + { + // Maybe passed in $fr registers + if (ap.__fpr <= 4) + { + // Passed in $fr registers (FPR region starts at +0x80) + auto p = cast(T*) ap.__reg_save_area + 128 + ap.__fpr * 8; + ap.__fpr++; + return *p; + } + else + { + // overflow arguments + auto p = cast(T*) ap.__overflow_arg_area; + // no matter the actual size of the fp variable + // parameter slot is always 8-byte-wide (f32 is extended to f64) + ap.__overflow_arg_area += 8; + return *p; + } + } + else + { + // Maybe passed in $r (GPR) registers + if (ap.__gpr <= 5) + { + // Passed in $gpr registers (GPR region starts at +0x10) + auto p = cast(T*) ap.__reg_save_area + 16 + ap.__gpr * 8; + ap.__gpr++; + return *p; + } + else + { + // overflow arguments + auto p = cast(T*) ap.__overflow_arg_area; + // no matter the actual size of the gpr variable + // parameter slot is always 8-byte-wide (after ABI adjustments) + ap.__overflow_arg_area += 8; + return *p; + } + } + } + else + { + static assert(false); + } + } + else + { + static assert(false, "not a valid argument type for va_arg"); + } +} + +/// Runtime `va_arg` extraction for s390x +void va_arg()(va_list ap, TypeInfo ti, void* parmn) +{ + TypeInfo arg1, arg2; + if (TypeInfo_Struct ti_struct = cast(TypeInfo_Struct) ti) + { + // handle single-float element struct + const rtFields = ti_struct.offTi(); + if (rtFields && rtFields.length == 1) + { + TypeInfo field1TypeInfo = rtFields[0].ti; + if (field1TypeInfo is typeid(float) || field1TypeInfo is typeid(double)) + { + auto tsize = field1TypeInfo.tsize; + auto toffset = rtFields[0].offset; + parmn[0..tsize] = p[toffset..tsize]; + return; + } + } + } + + if (!ti.argTypes(arg1, arg2)) + { + TypeInfo_Vector v1 = arg1 ? cast(TypeInfo_Vector) arg1 : null; + if (arg1 && (arg1.tsize <= 8 && !v1)) + { + auto tsize = arg1.tsize; + // Maybe passed in $r (GPR) registers + if (ap.__gpr <= 5) + { + // Passed in $gpr registers (GPR region starts at +0x10) + auto p = cast(T*) ap.__reg_save_area + 16 + ap.__gpr * 8; + ap.__gpr++; + parmn[0..tsize] = p[0..tsize]; + } + else + { + // overflow arguments + auto p = cast(T*) ap.__overflow_arg_area; + // no matter the actual size of the gpr variable + // parameter slot is always 8-byte-wide (after ABI adjustments) + ap.__overflow_arg_area += 8; + parmn[0..tsize] = p[0..tsize]; + } + } + else if (arg1 && (arg1 is typeid(float) || arg1 is typeid(double))) + { + // Maybe passed in $fr registers + if (ap.__fpr <= 4) + { + // Passed in $fr registers (FPR region starts at +0x80) + auto p = cast(T*) ap.__reg_save_area + 128 + ap.__fpr * 8; + ap.__fpr++; + parmn[0..tsize] = p[0..tsize]; + } + else + { + // overflow arguments + auto p = cast(T*) ap.__overflow_arg_area; + // no matter the actual size of the fp variable + // parameter slot is always 8-byte-wide (f32 is extended to f64) + ap.__overflow_arg_area += 8; + parmn[0..tsize] = p[0..tsize]; + } + } + else + { + assert(false, "unhandled va_arg type!"); + } + assert(!arg2); + } + else + { + assert(false, "not a valid argument type for va_arg"); + } +} diff --git a/runtime/druntime/src/core/stdc/stdarg.d b/runtime/druntime/src/core/stdc/stdarg.d index 0ba1ebe34e3..00a8bdef114 100644 --- a/runtime/druntime/src/core/stdc/stdarg.d +++ b/runtime/druntime/src/core/stdc/stdarg.d @@ -72,6 +72,10 @@ else version (ARM_Any) static import core.internal.vararg.aarch64; } } +else version (SystemZ) +{ + static import core.internal.vararg.s390x; +} T alignUp(size_t alignment = size_t.sizeof, T)(T base) pure @@ -137,6 +141,11 @@ else version (RISCV_Any) // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc alias va_list = void*; } +else version (SystemZ) +{ + alias va_list = core.internal.vararg.s390x.va_list; + public import core.internal.vararg.s390x : __va_list, __va_list_tag; +} else { alias va_list = char*; // incl. unknown platforms @@ -285,6 +294,10 @@ T va_arg(T)(ref va_list ap) ap += T.sizeof.alignUp; return *p; } + else version (SystemZ) + { + return core.internal.vararg.s390x.va_arg!T(ap); + } else static assert(0, "Unsupported platform"); } diff --git a/runtime/druntime/src/core/thread/fiber.d b/runtime/druntime/src/core/thread/fiber.d index 1f2ed97b634..307ecfa055a 100644 --- a/runtime/druntime/src/core/thread/fiber.d +++ b/runtime/druntime/src/core/thread/fiber.d @@ -170,6 +170,14 @@ private version = AlignFiberStackTo16Byte; } } + else version (SystemZ) + { + version (Posix) + { + version = AsmSystemZ_Posix; + version = AsmExternal; + } + } version (Posix) { @@ -570,6 +578,8 @@ version (LDC) version (AArch64) version = CheckFiberMigration; + version (SystemZ) version = CheckFiberMigration; + // Fiber migration across threads is (probably) not possible with ASan fakestack enabled (different parts of the stack // will contain fakestack pointers that were created on different threads...) version (SupportSanitizers) version = CheckFiberMigration; @@ -1898,6 +1908,34 @@ private: push(cast(size_t) &fiber_trampoline); // see threadasm.S for docs pstack += size_t.sizeof; // adjust sp (newp) above lr } + else version (AsmSystemZ_Posix) { + // Unlike a lot of architectures, s390x has a very special way + // to do function calls: by saving registers onto + // "register save area" (which is below the stack frame). + // However, we put fp registers on top of the "register save area" + // because saved fp registers are not part of this area. + // fiber_switchContext expects newp sp to look like this: + // 0: %f15 <-- newp tstack + // -1: %f13 + // -2: %f11 + // ... + // -8: %f6 (not saved) <-- top of register save area + // ... + // -13: %r14 [&fiber_entryPoint] + // ... + // -25: %r2 (not saved) <-- bottom of the register save area + // -26: reserved + // -27: %r0 <-- backchain slot + + version (StackGrowsDown) {} + else + static assert(false, "Only full descending stacks supported on SystemZ"); + + push(cast(size_t) 0x0); // sp + push(cast(size_t) &fiber_entryPoint); // r14 (return address) + pstack -= size_t.sizeof * 22; // skip past space reserved for a lot of stuff + + } else version (AsmAArch64_Posix) { // Like others, FP registers and return address (lr) are kept diff --git a/runtime/druntime/src/core/thread/osthread.d b/runtime/druntime/src/core/thread/osthread.d index 8b07397296a..26236151740 100644 --- a/runtime/druntime/src/core/thread/osthread.d +++ b/runtime/druntime/src/core/thread/osthread.d @@ -44,6 +44,9 @@ version (LDC) { import ldc.sanitizers_optionally_linked; } + + pragma(LDC_intrinsic, "llvm.eh.unwind.init") + void llvm_unwind_init() nothrow @nogc; } @@ -1568,8 +1571,8 @@ in (fn) }} asm pure nothrow @nogc { ("sd $gp, %0") : "=m" (regs[8]); - ("sd $fp, %0") : "=m" (regs[9]); - ("sd $ra, %0") : "=m" (sp); + ("sd $fp, %0") : "=m" (regs[9]); + ("sd $sp, %0") : "=m" (sp); } } else version (MIPS_Any) @@ -1633,9 +1636,21 @@ in (fn) asm pure nothrow @nogc { ( "st.d $fp, %0") : "=m" (regs[17]); } asm pure nothrow @nogc { ( "st.d $sp, %0") : "=m" (sp); } } + else version (SystemZ) + { + // Callee-save registers, according to SystemZ Calling Convention + // https://github.com/IBM/s390x-abi/blob/main/lzsabi.tex + size_t[10] regs = void; + asm pure nothrow @nogc { + // save callee-saved GPRs (%r6 - %r15) + "stmg %%r6, %%r15, %0" : "=m" (regs[0]); + } + sp = cast(void*)regs[9]; + } else { - static assert(false, "Architecture not supported."); + llvm_unwind_init(); + sp = &sp; } } else @@ -1700,6 +1715,7 @@ version (LDC) version (ARM_Any) version = LDC_stackTopAsm; version (PPC_Any) version = LDC_stackTopAsm; version (MIPS_Any) version = LDC_stackTopAsm; + version (SystemZ) version = LDC_stackTopAsm; version (LDC_stackTopAsm) { @@ -1719,6 +1735,8 @@ version (LDC) return __asm!(void*)("mr $0, 1", "=r"); else version (MIPS_Any) return __asm!(void*)("move $0, $$sp", "=r"); + else version (SystemZ) + return __asm!(void*)("lgr $0, %r15", "=r"); else static assert(0); } diff --git a/runtime/druntime/src/core/threadasm.S b/runtime/druntime/src/core/threadasm.S index 77d00674974..08675e14650 100644 --- a/runtime/druntime/src/core/threadasm.S +++ b/runtime/druntime/src/core/threadasm.S @@ -705,6 +705,113 @@ CSYM(fiber_trampoline): // fiber_entryPoint never returns bl CSYM(fiber_entryPoint) .cfi_endproc + +#elif defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +/************************************************************************************ + * SystemZ (S390X) ASM BITS + ************************************************************************************/ + +// Note: Does not support S390 systems, only S390X (IBM Z) systems are covered. +/** + * Performs a context switch. + * + * Parameters: + * r2 - void** - ptr to old stack pointer + * r3 - void* - new stack pointer + * + */ + +.text +.globl fiber_switchContext +.type fiber_switchContext, %function +fiber_switchContext: + .cfi_startproc + .machine "z10" + + /** + * Save the call-saved general purpose registers onto + * the caller register save-area (below the %sp per s390x ABI) + * (This includes return address %r4 and stack pointer %r15) */ + stmg %r6, %r15, 48(%r15) + .cfi_offset 6, -112 + .cfi_offset 7, -104 + .cfi_offset 8, -96 + .cfi_offset 9, -88 + .cfi_offset 10, -80 + .cfi_offset 11, -72 + .cfi_offset 12, -64 + .cfi_offset 13, -56 + .cfi_offset 14, -48 + .cfi_offset 15, -40 + lgr %r1, %r15 + aghi %r15, -64 + /* we need to tell the debugger that the current stack offset is: + 64 (frame size) + 160 (parameter area size) */ + .cfi_def_cfa_offset 224 + /* store the (optional) backchain data */ + stg %r1, 0(%r15) + /* Save callee-saved floating point registers + s390x ABI has a very unique way for storing fp registers: + even-pairs first and odd-pairs last */ + std %f8, 0(%r15) + .cfi_offset 24, -224 + std %f10, 8(%r15) + .cfi_offset 25, -216 + std %f12, 16(%r15) + .cfi_offset 26, -208 + std %f14, 24(%r15) + .cfi_offset 27, -200 + std %f9, 32(%r15) + .cfi_offset 28, -192 + std %f11, 40(%r15) + .cfi_offset 29, -184 + std %f13, 48(%r15) + .cfi_offset 30, -176 + std %f15, 56(%r15) + .cfi_offset 31, -168 + + /* Save stack pointer, the stack pointer is adjusted so that + GC won't see the float point registers */ + stg %r15, 0(%r2) + + /* Load the new context pointer as stack pointer. */ + lgr %r15, %r3 + .cfi_def_cfa_offset 224 + + /* Restore call-saved floating point registers. */ + ld %f8, 0(%r15) + .cfi_offset 24, -224 + ld %f10, 8(%r15) + .cfi_offset 25, -216 + ld %f12, 16(%r15) + .cfi_offset 26, -208 + ld %f14, 24(%r15) + .cfi_offset 27, -200 + ld %f9, 32(%r15) + .cfi_offset 28, -192 + ld %f11, 40(%r15) + .cfi_offset 29, -184 + ld %f13, 48(%r15) + .cfi_offset 30, -176 + ld %f15, 56(%r15) + .cfi_offset 31, -168 + lmg %r6, %r14, 112(%r15) + .cfi_offset 6, -112 + .cfi_offset 7, -104 + .cfi_offset 8, -96 + .cfi_offset 9, -88 + .cfi_offset 10, -80 + .cfi_offset 11, -72 + .cfi_offset 12, -64 + .cfi_offset 13, -56 + .cfi_offset 14, -48 + aghi %r15, 64 + .cfi_def_cfa_offset 160 + + .cfi_return_column 14 + br %r14 + .cfi_endproc + #elif defined(__arm__) && (defined(__ARM_EABI__) || defined(__APPLE__)) /************************************************************************************ * ARM ASM BITS diff --git a/runtime/druntime/src/core/vararg.d b/runtime/druntime/src/core/vararg.d index e6dd47d06d3..09b6d62aa0f 100644 --- a/runtime/druntime/src/core/vararg.d +++ b/runtime/druntime/src/core/vararg.d @@ -118,6 +118,11 @@ void va_arg()(ref va_list ap, TypeInfo ti, void* parmn) ap += tsize.alignUp; parmn[0..tsize] = p[0..tsize]; } + else version (SystemZ) + { + static import core.internal.vararg.s390x; + core.internal.vararg.s390x.va_arg(ap, ti, parmn); + } else version (PPC_Any) { if (ti.talign >= 8) diff --git a/runtime/druntime/src/object.d b/runtime/druntime/src/object.d index 582467ca300..6c75d91ff5e 100644 --- a/runtime/druntime/src/object.d +++ b/runtime/druntime/src/object.d @@ -103,6 +103,10 @@ version (LDC) // note: there's a copy for importC in __importc_builtins.di else version (AArch64) public import core.internal.vararg.aarch64 : __va_list; } + else version (SystemZ) + { + public import core.internal.vararg.s390x : __va_list; + } } version (D_ObjectiveC) @@ -132,6 +136,10 @@ else version (AArch64) else version (WatchOS) {} else version = WithArgTypes; } +else version (SystemZ) +{ + version = WithArgTypes; +} /** * All D class objects inherit from Object. diff --git a/runtime/druntime/src/rt/dwarfeh.d b/runtime/druntime/src/rt/dwarfeh.d index 27023fac445..e950d7ae059 100644 --- a/runtime/druntime/src/rt/dwarfeh.d +++ b/runtime/druntime/src/rt/dwarfeh.d @@ -99,6 +99,11 @@ else version (LoongArch64) enum eh_exception_regno = 4; enum eh_selector_regno = 5; } +else version (SystemZ) +{ + enum eh_exception_regno = 6; + enum eh_selector_regno = 7; +} else { static assert(0, "Unknown EH register numbers for this architecture"); diff --git a/runtime/druntime/src/rt/sections_elf_shared.d b/runtime/druntime/src/rt/sections_elf_shared.d index ddccf7fbc3f..8db8450c6cd 100644 --- a/runtime/druntime/src/rt/sections_elf_shared.d +++ b/runtime/druntime/src/rt/sections_elf_shared.d @@ -40,6 +40,7 @@ version (MIPS32) version = MIPS_Any; version (MIPS64) version = MIPS_Any; version (RISCV32) version = RISCV_Any; version (RISCV64) version = RISCV_Any; +version (SystemZ) version = IBMZ_Any; // debug = PRINTF; import core.internal.elf.dl; @@ -1182,6 +1183,36 @@ version (LDC) extern(C) void* ___tls_get_addr(tls_index* ti) nothrow @nogc; alias __tls_get_addr = ___tls_get_addr; } + else version (IBMZ_Any) + { + import ldc.intrinsics; + /// __tls_get_offset (available since GLibc 2.3) returns the thread pointer offset + /// of the request object. + /// IBM Z does not expose the `__tls_get_addr` function like other architectures. + extern(C) void* __tls_get_offset(size_t offset) nothrow @nogc; + // keep this function internal + private void* __tls_get_addr(tls_index* ti) nothrow @nogc + { + // adapted from GDC's assembler routine: libphobos/libdruntime/config/systemz/get_tls_offset.S + size_t got_offset = cast(size_t)ti; + version (SystemZ) { + // got_offset = &ti - &got (stored in r12) + asm pure nothrow @nogc { + "sgr %0, %%r12" : "=r" (got_offset) : "0" (got_offset); + } + } + else version (S390) + { + asm nothrow @nogc { + "larl %%r12, _GLOBAL_OFFSET_TABLE_" ::: "r12"; + "sr %0, %%r12" : "=r" (got_offset) : "0" (got_offset); + } + } + // the offset is relative to the thread pointer base + // we need to add that to get the final address + return __tls_get_offset(got_offset) + cast(size_t)llvm_thread_pointer(); + } + } else extern(C) void* __tls_get_addr(tls_index* ti) nothrow @nogc; } @@ -1218,6 +1249,8 @@ else version (MIPS_Any) enum TLS_DTV_OFFSET = 0x8000; else version (LoongArch64) enum TLS_DTV_OFFSET = 0x0; +else version (IBMZ_Any) + enum TLS_DTV_OFFSET = 0x0; else static assert( false, "Platform not supported." );