Skip to content

Commit

Permalink
[ARM64_DYNAREC] Reworked a bit DB / 7 opcode
Browse files Browse the repository at this point in the history
  • Loading branch information
ptitSeb committed Jan 10, 2025
1 parent b7f14ff commit b977d84
Showing 1 changed file with 50 additions and 56 deletions.
106 changes: 50 additions & 56 deletions src/dynarec/arm64/dynarec_arm64_db.c
Original file line number Diff line number Diff line change
Expand Up @@ -324,63 +324,57 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
VST64(v1, wback, fixedaddress);
} else {
#if 0
x87_forget(dyn, ninst, x1, x3, 0);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
if(ed!=x1) {
MOVx_REG(x1, ed);
if(!box64_dynarec_fastround) {
x87_forget(dyn, ninst, x1, x3, 0);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
if(ed!=x1) {MOVx_REG(x1, ed);}
CALL(native_fstp, -1);
} else {
// Painfully long, straight conversion from the C code, shoud be optimized
v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
FMOVxD(x1, v1);
// do special value first
TSTx_mask(x1, 1, 0b00000, 0b111110); //0x7fffffffffffffffL
B_MARK(cNE);
// Zero
LSRx(x3, x1, 63-15); //x3 = sign+exp
MOVZw(x5, 0); // x5 = mantisse
B_MARK3_nocond;
MARK;
// get sign, in main ouput x5 for sign+exp
ANDx_mask(x5, x1, 1, 1, 0); //0x8000000000000000
LSRx(x5, x5, 63-15); // x5 = sign
// get exp
LSRx(x3, x1, 52); // x3 = exp11
ANDw_mask(x3, x3, 0, 0b1010); //0x7ff
ANDSx_mask(x1, x1, 1, 0, 0b110011); //0x000fffffffffffffL
LSLx_IMM(x1, x1, 11); // mantice
CMPSw_U12(x3, 0x7ff);
B_MARK2(cNE);
// NaN and Infinite
ORRw_mask(x3, x5, 0, 0b1110); //x3 = sign | 0x7fff
ORRx_mask(x5, x1, 1, 1, 0); //0x8000000000000000
B_MARK3_nocond;
MARK2;
// regular / denormals
MOVZw(x4, 16383-1023); //BIAS80 - BIAS64
CBZw(x3, 4+4*4); // exp11 == 0?
// normals
ADDw_REG(x3, x3, x4); // x3 = exp16
ORRw_REG(x3, x3, x5); // x3 = sign | exp
ORRx_mask(x5, x1, 1, 1, 0); //0x8000000000000000 x5 = mantisse
B_MARK3_nocond;
// denormals
CLZx(x6, x1);
ADDw_U12(x6, x6, 1); // "one"
SUBw_REG(x3, x4, x6); // x3 = exp16
ORRw_REG(x3, x3, x5); // x3 = sign | exp16
LSLx_REG(x5, x1, x6); // x5 = mantisse
MARK3;
STRx_U12(x5, wback, 0);
STRH_U12(x3, wback, 8);
}
CALL(native_fstp, -1);
#else
// Painfully long, straight conversion from the C code, shoud be optimized
v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
FMOVxD(x1, v1);
// do special value first
TSTx_mask(x1, 1, 0b00000, 0b111110); //0x7fffffffffffffffL
B_MARK(cNE);
// Zero
LSRx(x3, x1, 63-15); //x3 = sign+exp
MOVZw(x5, 0); // x5 = mantisse
B_MARK3_nocond;
MARK;
// get sign, in main ouput x5 for sign+exp
ANDx_mask(x5, x1, 1, 1, 0); //0x8000000000000000
LSRx(x5, x5, 63-15); // x5 = sign
// get exp
LSRx(x3, x1, 52); // x3 = exp11
ANDw_mask(x3, x3, 0, 0b1010); //0x7ff
MOV32w(x4, 0x7ff);
CMPSw_REG(x3, x4);
B_MARK2(cNE);
// NaN and Infinite
ORRw_mask(x3, x5, 0, 0b1110); //x3 = sign | 0x7fff
TSTx_mask(x1, 1, 0, 0b110011); //0x000fffffffffffffL
ORRx_mask(x5, xZR, 1, 1, 0); //0x8000000000000000
ORRx_mask(x4, xZR, 1, 0b10, 0b01); //0xc000000000000000
CSELx(x5, x5, x4, cEQ); // x5 = mantisse
B_MARK3_nocond;
MARK2;
// regular / denormals
ANDx_mask(x1, x1, 1, 0, 0b110011); //0x000fffffffffffffL
LSLx_IMM(x1, x1, 11); //x1 = mantisse missing "1"
MOVZw(x4, 16383-1023); //BIAS80 - BIAS64
CBZw(x3, 4+3*4); // exp11 == 0?
// normals
ADDw_REG(x3, x3, x4); // x3 = exp16
ORRw_REG(x3, x3, x5); // x3 = sign | exp
ORRx_mask(x5, x1, 1, 1, 0); //0x8000000000000000 x5 = mantisse
B_MARK3_nocond;
// denormals
CLZx(x6, x1);
ADDw_U12(x6, x6, 1); // "one"
SUBw_REG(x3, x4, x6); // x3 = exp16
ORRw_REG(x3, x3, x5); // x3 = sign | exp16
LSLx_REG(x5, x1, x6); // x5 = mantisse
MARK3;
STRx_U12(x5, wback, 0);
STRH_U12(x3, wback, 8);
#endif
}
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
Expand Down

0 comments on commit b977d84

Please sign in to comment.