Skip to content

Commit

Permalink
internal/subtle: port ppc64x from golang
Browse files Browse the repository at this point in the history
  • Loading branch information
emmansun authored Aug 28, 2024
1 parent 7ab7bb6 commit 8be54dd
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 10 deletions.
8 changes: 2 additions & 6 deletions .github/workflows/test_ppc64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,8 @@ jobs:
- name: Check out code
uses: actions/checkout@v4

- name: Test
run: go test -v -short ./internal/bigmod/...
- name: Test internal
run: go test ./internal/...
env:
GOARCH: ${{ matrix.arch }}

- name: Test SM2EC
run: go test -v ./internal/sm2ec/...
env:
GOARCH: ${{ matrix.arch }}
6 changes: 3 additions & 3 deletions .github/workflows/test_s390x.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ jobs:
- name: Check out code
uses: actions/checkout@v4

- name: Test SM2EC
run: go test -v ./internal/sm2ec/...
- name: Test internal
run: go test -v ./internal/...
env:
GOARCH: ${{ matrix.arch }}

# - name: Test
# run: go test -v -short ./...
# env:
Expand Down
2 changes: 1 addition & 1 deletion internal/subtle/xor_generic.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
//go:build purego || !(amd64 || arm64 || s390x)
//go:build purego || !(amd64 || arm64 || s390x || ppc64 || ppc64le)

package subtle

Expand Down
10 changes: 10 additions & 0 deletions internal/subtle/xor_ppc64x.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build (ppc64 || ppc64le) && !purego

package subtle

//go:noescape
func xorBytes(dst, a, b *byte, n int)
142 changes: 142 additions & 0 deletions internal/subtle/xor_ppc64x.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build (ppc64 || ppc64le) && !purego

#include "textflag.h"

// func xorBytes(dst, a, b *byte, n int)
TEXT ·xorBytes(SB), NOSPLIT, $0
MOVD dst+0(FP), R3 // R3 = dst
MOVD a+8(FP), R4 // R4 = a
MOVD b+16(FP), R5 // R5 = b
MOVD n+24(FP), R6 // R6 = n

CMPU R6, $64, CR7 // Check if n ≥ 64 bytes
MOVD R0, R8 // R8 = index
CMPU R6, $8, CR6 // Check if 8 ≤ n < 64 bytes
BLE CR6, small // <= 8
BLT CR7, xor32 // Case for 32 ≤ n < 64 bytes

// Case for n ≥ 64 bytes
preloop64:
SRD $6, R6, R7 // Set up loop counter
MOVD R7, CTR
MOVD $16, R10
MOVD $32, R14
MOVD $48, R15
ANDCC $63, R6, R9 // Check for tailing bytes for later
PCALIGN $16
// Case for >= 64 bytes
// Process 64 bytes per iteration
// Load 4 vectors of a and b
// XOR the corresponding vectors
// from a and b and store the result
loop64:
LXVD2X (R4)(R8), VS32
LXVD2X (R4)(R10), VS34
LXVD2X (R4)(R14), VS36
LXVD2X (R4)(R15), VS38
LXVD2X (R5)(R8), VS33
LXVD2X (R5)(R10), VS35
LXVD2X (R5)(R14), VS37
LXVD2X (R5)(R15), VS39
XXLXOR VS32, VS33, VS32
XXLXOR VS34, VS35, VS34
XXLXOR VS36, VS37, VS36
XXLXOR VS38, VS39, VS38
STXVD2X VS32, (R3)(R8)
STXVD2X VS34, (R3)(R10)
STXVD2X VS36, (R3)(R14)
STXVD2X VS38, (R3)(R15)
ADD $64, R8
ADD $64, R10
ADD $64, R14
ADD $64, R15
BDNZ loop64
BC 12,2,LR // BEQLR
MOVD R9, R6
CMP R6, $8
BLE small
// Case for 8 <= n < 64 bytes
// Process 32 bytes if available
xor32:
CMP R6, $32
BLT xor16
ADD $16, R8, R9
LXVD2X (R4)(R8), VS32
LXVD2X (R4)(R9), VS33
LXVD2X (R5)(R8), VS34
LXVD2X (R5)(R9), VS35
XXLXOR VS32, VS34, VS32
XXLXOR VS33, VS35, VS33
STXVD2X VS32, (R3)(R8)
STXVD2X VS33, (R3)(R9)
ADD $32, R8
ADD $-32, R6
CMP R6, $8
BLE small
// Case for 8 <= n < 32 bytes
// Process 16 bytes if available
xor16:
CMP R6, $16
BLT xor8
LXVD2X (R4)(R8), VS32
LXVD2X (R5)(R8), VS33
XXLXOR VS32, VS33, VS32
STXVD2X VS32, (R3)(R8)
ADD $16, R8
ADD $-16, R6
small:
CMP R6, $0
BC 12,2,LR // BEQLR
xor8:
#ifdef GOPPC64_power10
SLD $56,R6,R17
ADD R4,R8,R18
ADD R5,R8,R19
ADD R3,R8,R20
LXVL R18,R17,V0
LXVL R19,R17,V1
VXOR V0,V1,V1
STXVL V1,R20,R17
RET
#else
CMP R6, $8
BLT xor4
// Case for 8 ≤ n < 16 bytes
MOVD (R4)(R8), R14 // R14 = a[i,...,i+7]
MOVD (R5)(R8), R15 // R15 = b[i,...,i+7]
XOR R14, R15, R16 // R16 = a[] ^ b[]
SUB $8, R6 // n = n - 8
MOVD R16, (R3)(R8) // Store to dst
ADD $8, R8
xor4:
CMP R6, $4
BLT xor2
MOVWZ (R4)(R8), R14
MOVWZ (R5)(R8), R15
XOR R14, R15, R16
MOVW R16, (R3)(R8)
ADD $4,R8
ADD $-4,R6
xor2:
CMP R6, $2
BLT xor1
MOVHZ (R4)(R8), R14
MOVHZ (R5)(R8), R15
XOR R14, R15, R16
MOVH R16, (R3)(R8)
ADD $2,R8
ADD $-2,R6
xor1:
CMP R6, $0
BC 12,2,LR // BEQLR
MOVBZ (R4)(R8), R14 // R14 = a[i]
MOVBZ (R5)(R8), R15 // R15 = b[i]
XOR R14, R15, R16 // R16 = a[i] ^ b[i]
MOVB R16, (R3)(R8) // Store to dst
#endif
done:
RET

1 comment on commit 8be54dd

@emmansun
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.