Skip to content

Commit

Permalink
cipher/hctr: optimization with GCM GF128 method
Browse files Browse the repository at this point in the history
  • Loading branch information
emmansun committed Dec 3, 2023
1 parent 9d467f8 commit e5effb8
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 48 deletions.
16 changes: 16 additions & 0 deletions cipher/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,28 @@ package cipher_test
import (
"crypto/aes"
"crypto/cipher"
"crypto/rand"
"io"
"testing"

smcipher "github.com/emmansun/gmsm/cipher"
"github.com/emmansun/gmsm/sm4"
)

func BenchmarkSM4HCTREncrypt1K(b *testing.B) {
var key [16]byte
var tweak [32]byte
c, _ := sm4.NewCipher(key[:])
io.ReadFull(rand.Reader, tweak[:])
hctr, _ := smcipher.NewHCTR(c, tweak[:16], tweak[16:])
buf := make([]byte, 1024)
b.SetBytes(int64(len(buf)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
hctr.Encrypt(buf, buf)
}
}

func benchmarkECBEncrypt1K(b *testing.B, block cipher.Block) {
buf := make([]byte, 1024)
b.SetBytes(int64(len(buf)))
Expand Down
172 changes: 124 additions & 48 deletions cipher/hctr.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,68 @@ type LengthPreservingMode interface {
Decrypt(dst, src []byte)
}

// hctrFieldElement represents a value in GF(2¹²⁸). In order to reflect the HCTR
// standard and make binary.BigEndian suitable for marshaling these values, the
// bits are stored in big endian order. For example:
// the coefficient of x⁰ can be obtained by v.low >> 63.
// the coefficient of x⁶³ can be obtained by v.low & 1.
// the coefficient of x⁶⁴ can be obtained by v.high >> 63.
// the coefficient of x¹²⁷ can be obtained by v.high & 1.
type hctrFieldElement struct {
low, high uint64
}

// reverseBits reverses the order of the bits of 4-bit number in i.
func reverseBits(i int) int {
i = ((i << 2) & 0xc) | ((i >> 2) & 0x3)
i = ((i << 1) & 0xa) | ((i >> 1) & 0x5)
return i
}

// hctrAdd adds two elements of GF(2¹²⁸) and returns the sum.
func hctrAdd(x, y *hctrFieldElement) hctrFieldElement {
// Addition in a characteristic 2 field is just XOR.
return hctrFieldElement{x.low ^ y.low, x.high ^ y.high}
}

// hctrDouble returns the result of doubling an element of GF(2¹²⁸).
func hctrDouble(x *hctrFieldElement) (double hctrFieldElement) {
msbSet := x.high&1 == 1

// Because of the bit-ordering, doubling is actually a right shift.
double.high = x.high >> 1
double.high |= x.low << 63
double.low = x.low >> 1

// If the most-significant bit was set before shifting then it,
// conceptually, becomes a term of x^128. This is greater than the
// irreducible polynomial so the result has to be reduced. The
// irreducible polynomial is 1+x+x^2+x^7+x^128. We can subtract that to
// eliminate the term at x^128 which also means subtracting the other
// four terms. In characteristic 2 fields, subtraction == addition ==
// XOR.
if msbSet {
double.low ^= 0xe100000000000000
}

return
}

var hctrReductionTable = []uint16{
0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0,
0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0,
}

// hctr represents a Varaible-Input-Length enciphering mode with a specific block cipher,
// and specific tweak and a hash key. See
// https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.470.5288
// GB/T 17964-2021 第11章 带泛杂凑函数的计数器工作模式
type hctr struct {
cipher _cipher.Block
tweak [blockSize]byte
hkey [blockSize]byte
// productTable contains the first sixteen powers of the hash key.
// However, they are in bit reversed order.
productTable [16]hctrFieldElement
}

// NewHCTR returns a [LengthPreservingMode] which encrypts/decrypts useing the given [Block]
Expand All @@ -55,72 +109,95 @@ func NewHCTR(cipher _cipher.Block, tweak, hkey []byte) (LengthPreservingMode, er
}
c := &hctr{}
c.cipher = cipher
copy(c.hkey[:], hkey)
copy(c.tweak[:], tweak)
return c, nil
}

func _mul2(v *[blockSize]byte) {
var carryIn byte
for j := range v {
carryOut := (v[j] << 7) & 0x80
v[j] = (v[j] >> 1) + carryIn
carryIn = carryOut
// We precompute 16 multiples of |key|. However, when we do lookups
// into this table we'll be using bits from a field element and
// therefore the bits will be in the reverse order. So normally one
// would expect, say, 4*key to be in index 4 of the table but due to
// this bit ordering it will actually be in index 0010 (base 2) = 2.
x := hctrFieldElement{
binary.BigEndian.Uint64(hkey[:8]),
binary.BigEndian.Uint64(hkey[8:blockSize]),
}
if carryIn != 0 {
v[0] ^= 0xE1 // 1<<7 | 1<<6 | 1<<5 | 1
c.productTable[reverseBits(1)] = x

for i := 2; i < 16; i += 2 {
c.productTable[reverseBits(i)] = hctrDouble(&c.productTable[reverseBits(i/2)])
c.productTable[reverseBits(i+1)] = hctrAdd(&c.productTable[reverseBits(i)], &x)
}
return c, nil
}

// mul sets y to y*hkey.
func (h *hctr) mul(y *[blockSize]byte) {
var z [blockSize]byte
for _, i := range h.hkey {
for k := 0; k < 8; k++ {
if (i>>(7-k))&1 == 1 {
subtle.XORBytes(z[:], z[:], y[:])
}
_mul2(y)
// mul sets y to y*H, where H is the GCM key, fixed during NewHCTR.
func (h *hctr) mul(y *hctrFieldElement) {
var z hctrFieldElement

for i := 0; i < 2; i++ {
word := y.high
if i == 1 {
word = y.low
}

// Multiplication works by multiplying z by 16 and adding in
// one of the precomputed multiples of hash key.
for j := 0; j < 64; j += 4 {
msw := z.high & 0xf
z.high >>= 4
z.high |= z.low << 60
z.low >>= 4
z.low ^= uint64(hctrReductionTable[msw]) << 48

// the values in |table| are ordered for
// little-endian bit positions. See the comment
// in NewGCMWithNonceSize.
t := &h.productTable[word&0xf]

z.low ^= t.low
z.high ^= t.high
word >>= 4
}
}
copy(y[:], z[:])

*y = z
}

func (h *hctr) updateBlock(block []byte, y *hctrFieldElement) {
y.low ^= binary.BigEndian.Uint64(block)
y.high ^= binary.BigEndian.Uint64(block[8:blockSize])
h.mul(y)
}

// Universal Hash Function.
// Chapter 3.3 in https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.470.5288.
func (h *hctr) uhash(m []byte, dst *[blockSize]byte) {
for k := 0; k < blockSize; k++ {
dst[k] = 0
}
func (h *hctr) uhash(m []byte, out *[blockSize]byte) {
var y hctrFieldElement
msg := m
// update blocks
for len(msg) >= blockSize {
subtle.XORBytes(dst[:], dst[:], msg[:blockSize])
h.mul(dst)
h.updateBlock(msg, &y)
msg = msg[blockSize:]
}
var v [blockSize]byte
// update partial block & tweak
if len(msg) > 0 {
copy(v[:], msg)
copy(v[len(msg):], h.tweak[:])
subtle.XORBytes(dst[:], dst[:], v[:])
h.mul(dst)
copy(v[:], h.tweak[len(msg):])
var partialBlock [blockSize]byte
copy(partialBlock[:], msg)
copy(partialBlock[len(msg):], h.tweak[:])
h.updateBlock(partialBlock[:], &y)

copy(partialBlock[:], h.tweak[len(msg):])
for i := len(msg); i < blockSize; i++ {
v[i] = 0
}
subtle.XORBytes(dst[:], dst[:], v[:])
h.mul(dst)
for i := 0; i < len(msg); i++ {
v[i] = 0
partialBlock[i] = 0
}
h.updateBlock(partialBlock[:], &y)
} else {
subtle.XORBytes(dst[:], dst[:], h.tweak[:])
h.mul(dst)
h.updateBlock(h.tweak[:], &y)
}
// (|M|)₂
binary.BigEndian.PutUint64(v[8:], uint64(len(m)+blockSize)<<3)
subtle.XORBytes(dst[:], dst[:], v[:])
h.mul(dst)
// update bit string length (|M|)₂
y.high ^= uint64(len(m)+blockSize) * 8
h.mul(&y)
// output result
binary.BigEndian.PutUint64(out[:], y.low)
binary.BigEndian.PutUint64(out[8:], y.high)
}

func (h *hctr) Encrypt(ciphertext, plaintext []byte) {
Expand All @@ -135,7 +212,6 @@ func (h *hctr) Encrypt(ciphertext, plaintext []byte) {
}

var z1, z2 [blockSize]byte

// a) z1 generation
h.uhash(plaintext[blockSize:], &z1)
subtle.XORBytes(z1[:], z1[:], plaintext[:blockSize])
Expand Down

1 comment on commit e5effb8

@emmansun
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will change LengthPreservingMode interface, makes it different from crypto.Block interface.

Please sign in to comment.