From b977d84f111a1b4b405d339b0226fae3cc808fd0 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 10 Jan 2025 15:51:08 +0100 Subject: [PATCH] [ARM64_DYNAREC] Reworked a bit DB / 7 opcode --- src/dynarec/arm64/dynarec_arm64_db.c | 106 +++++++++++++-------------- 1 file changed, 50 insertions(+), 56 deletions(-) diff --git a/src/dynarec/arm64/dynarec_arm64_db.c b/src/dynarec/arm64/dynarec_arm64_db.c index c16d14e47..a27f54567 100644 --- a/src/dynarec/arm64/dynarec_arm64_db.c +++ b/src/dynarec/arm64/dynarec_arm64_db.c @@ -324,63 +324,57 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VST64(v1, wback, fixedaddress); } else { - #if 0 - x87_forget(dyn, ninst, x1, x3, 0); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); - if(ed!=x1) { - MOVx_REG(x1, ed); + if(!box64_dynarec_fastround) { + x87_forget(dyn, ninst, x1, x3, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL(native_fstp, -1); + } else { + // Painfully long, straight conversion from the C code, shoud be optimized + v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); + FMOVxD(x1, v1); + // do special value first + TSTx_mask(x1, 1, 0b00000, 0b111110); //0x7fffffffffffffffL + B_MARK(cNE); + // Zero + LSRx(x3, x1, 63-15); //x3 = sign+exp + MOVZw(x5, 0); // x5 = mantisse + B_MARK3_nocond; + MARK; + // get sign, in main ouput x5 for sign+exp + ANDx_mask(x5, x1, 1, 1, 0); //0x8000000000000000 + LSRx(x5, x5, 63-15); // x5 = sign + // get exp + LSRx(x3, x1, 52); // x3 = exp11 + ANDw_mask(x3, x3, 0, 0b1010); //0x7ff + ANDSx_mask(x1, x1, 1, 0, 0b110011); //0x000fffffffffffffL + LSLx_IMM(x1, x1, 11); // mantice + CMPSw_U12(x3, 0x7ff); + B_MARK2(cNE); + // NaN and Infinite + ORRw_mask(x3, x5, 0, 0b1110); //x3 = sign | 0x7fff + ORRx_mask(x5, x1, 1, 1, 0); //0x8000000000000000 + B_MARK3_nocond; + MARK2; + // regular / denormals + MOVZw(x4, 16383-1023); //BIAS80 - BIAS64 + CBZw(x3, 4+4*4); // exp11 == 0? + // normals + ADDw_REG(x3, x3, x4); // x3 = exp16 + ORRw_REG(x3, x3, x5); // x3 = sign | exp + ORRx_mask(x5, x1, 1, 1, 0); //0x8000000000000000 x5 = mantisse + B_MARK3_nocond; + // denormals + CLZx(x6, x1); + ADDw_U12(x6, x6, 1); // "one" + SUBw_REG(x3, x4, x6); // x3 = exp16 + ORRw_REG(x3, x3, x5); // x3 = sign | exp16 + LSLx_REG(x5, x1, x6); // x5 = mantisse + MARK3; + STRx_U12(x5, wback, 0); + STRH_U12(x3, wback, 8); } - CALL(native_fstp, -1); - #else - // Painfully long, straight conversion from the C code, shoud be optimized - v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); - FMOVxD(x1, v1); - // do special value first - TSTx_mask(x1, 1, 0b00000, 0b111110); //0x7fffffffffffffffL - B_MARK(cNE); - // Zero - LSRx(x3, x1, 63-15); //x3 = sign+exp - MOVZw(x5, 0); // x5 = mantisse - B_MARK3_nocond; - MARK; - // get sign, in main ouput x5 for sign+exp - ANDx_mask(x5, x1, 1, 1, 0); //0x8000000000000000 - LSRx(x5, x5, 63-15); // x5 = sign - // get exp - LSRx(x3, x1, 52); // x3 = exp11 - ANDw_mask(x3, x3, 0, 0b1010); //0x7ff - MOV32w(x4, 0x7ff); - CMPSw_REG(x3, x4); - B_MARK2(cNE); - // NaN and Infinite - ORRw_mask(x3, x5, 0, 0b1110); //x3 = sign | 0x7fff - TSTx_mask(x1, 1, 0, 0b110011); //0x000fffffffffffffL - ORRx_mask(x5, xZR, 1, 1, 0); //0x8000000000000000 - ORRx_mask(x4, xZR, 1, 0b10, 0b01); //0xc000000000000000 - CSELx(x5, x5, x4, cEQ); // x5 = mantisse - B_MARK3_nocond; - MARK2; - // regular / denormals - ANDx_mask(x1, x1, 1, 0, 0b110011); //0x000fffffffffffffL - LSLx_IMM(x1, x1, 11); //x1 = mantisse missing "1" - MOVZw(x4, 16383-1023); //BIAS80 - BIAS64 - CBZw(x3, 4+3*4); // exp11 == 0? - // normals - ADDw_REG(x3, x3, x4); // x3 = exp16 - ORRw_REG(x3, x3, x5); // x3 = sign | exp - ORRx_mask(x5, x1, 1, 1, 0); //0x8000000000000000 x5 = mantisse - B_MARK3_nocond; - // denormals - CLZx(x6, x1); - ADDw_U12(x6, x6, 1); // "one" - SUBw_REG(x3, x4, x6); // x3 = exp16 - ORRw_REG(x3, x3, x5); // x3 = sign | exp16 - LSLx_REG(x5, x1, x6); // x5 = mantisse - MARK3; - STRx_U12(x5, wback, 0); - STRH_U12(x3, wback, 8); - #endif } X87_POP_OR_FAIL(dyn, ninst, x3); break;