Skip to content

Commit

Permalink
internal/subtle: s390x, check if VLM/VSTM issue
Browse files Browse the repository at this point in the history
  • Loading branch information
emmansun authored Aug 28, 2024
1 parent a2295b9 commit f366964
Showing 1 changed file with 41 additions and 7 deletions.
48 changes: 41 additions & 7 deletions internal/subtle/xor_s390x.s
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,53 @@ TEXT ·xorBytes(SB),NOSPLIT,$0-32
MOVD n+24(FP), R4

MOVD $0, R5
CMPBLT R4, $16, tail
CMPBLT R4, $64, tail

loop16b:
loop_64:
VL 0(R2)(R5*1), V0
VL 0(R3)(R5*1), V1
VX V0, V1, V2
VL 16(R2)(R5*1), V1
VL 32(R2)(R5*1), V2
VL 48(R2)(R5*1), V3
VL 0(R3)(R5*1), V4
VL 16(R3)(R5*1), V5
VL 32(R3)(R5*1), V6
VL 48(R3)(R5*1), V7
VX V0, V4, V4
VX V1, V5, V5
VX V2, V6, V6
VX V3, V7, V7
VST V4, 0(R1)(R5*1)
VST V5, 16(R1)(R5*1)
VST V6, 32(R1)(R5*1)
VST V7, 48(R1)(R5*1)
LAY 64(R5), R5
SUB $64, R4
CMPBGE R4, $64, loop_64

tail:
CMPBEQ R4, $0, done
CMPBLT R4, $32, less_than32
VL 0(R2)(R5*1), V0
VL 16(R2)(R5*1), V1
VL 0(R3)(R5*1), V4
VL 16(R3)(R5*1), V5
VX V0, V2, V2
VX V1, V3, V3
VST V2, 0(R1)(R5*1)
VST V3, 16(R1)(R5*1)
LAY 32(R5), R5
SUB $32, R4

less_than32:
CMPBLT R4, $16, less_than16
VL 0(R2)(R5*1), V0
VL 0(R3)(R5*1), V1
VX V0, V1, V1
VST V1, 0(R1)(R5*1)
LAY 16(R5), R5
SUB $16, R4
CMPBGE R4, $16, loop16b

tail:
CMPBEQ R4, $0, done
less_than16:
CMPBLT R4, $8, less_than8
MOVD 0(R2)(R5*1), R7
MOVD 0(R3)(R5*1), R8
Expand Down

0 comments on commit f366964

Please sign in to comment.