Skip to content

Commit

Permalink
optimize internal parsing function -- avoid unsafe []byte parsing
Browse files Browse the repository at this point in the history
Making xtob take two byte arguments avoids a lot of slicing.  This makes
the Parse function faster.  In addition, because so much slicing is
avoiding, duplicating the parse logic to ParseBytes resulted in the
function being faster than Parse (<1ns).

The BenchmarkParseBytesNative function has been removed (parseBytes was
identical to ParseBytes).  And a new benchmark,
BenchmarkParseBytesUnsafe, has been added to benchmark the old way of
parsing []byte (which is slightly slower than Parse and thus the new
ParseBytes implementation).

    benchmark                         old ns/op     new ns/op     delta
    BenchmarkUUID_MarshalJSON-4       685           667           -2.63%
    BenchmarkUUID_UnmarshalJSON-4     1145          1162          +1.48%
    BenchmarkParse-4                  61.6          56.5          -8.28%
    BenchmarkParseBytes-4             65.7          55.9          -14.92%
    BenchmarkParseBytesCopy-4         121           115           -4.96%
    BenchmarkNew-4                    1665          1643          -1.32%
    BenchmarkUUID_String-4            112           113           +0.89%
    BenchmarkUUID_URN-4               117           119           +1.71%

    benchmark                         old allocs     new allocs     delta
    BenchmarkUUID_MarshalJSON-4       4              4              +0.00%
    BenchmarkUUID_UnmarshalJSON-4     2              2              +0.00%
    BenchmarkParse-4                  0              0              +0.00%
    BenchmarkParseBytes-4             0              0              +0.00%
    BenchmarkParseBytesCopy-4         1              1              +0.00%
    BenchmarkNew-4                    1              1              +0.00%
    BenchmarkUUID_String-4            1              1              +0.00%
    BenchmarkUUID_URN-4               1              1              +0.00%

    benchmark                         old bytes     new bytes     delta
    BenchmarkUUID_MarshalJSON-4       248           248           +0.00%
    BenchmarkUUID_UnmarshalJSON-4     248           248           +0.00%
    BenchmarkParse-4                  0             0             +0.00%
    BenchmarkParseBytes-4             0             0             +0.00%
    BenchmarkParseBytesCopy-4         48            48            +0.00%
    BenchmarkNew-4                    16            16            +0.00%
    BenchmarkUUID_String-4            48            48            +0.00%
    BenchmarkUUID_URN-4               48            48            +0.00%
  • Loading branch information
bmatsuo committed Feb 26, 2016
1 parent a8b7006 commit 7508f98
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 63 deletions.
7 changes: 2 additions & 5 deletions marshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@

package uuid

import (
"fmt"
"unsafe"
)
import "fmt"

// MarshalText implements encoding.TextMarshaler.
func (u UUID) MarshalText() ([]byte, error) {
Expand All @@ -20,7 +17,7 @@ func (u UUID) MarshalText() ([]byte, error) {
func (u *UUID) UnmarshalText(data []byte) error {
// See comment in ParseBytes why we do this.
// id, err := ParseBytes(data)
id, err := Parse(*(*string)(unsafe.Pointer(&data)))
id, err := ParseBytes(data)
if err == nil {
*u = id
}
Expand Down
8 changes: 4 additions & 4 deletions util.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ var xvalues = [256]byte{
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
}

// xtob converts the the first two hex bytes of x into a byte.
func xtob(x string) (byte, bool) {
b1 := xvalues[x[0]]
b2 := xvalues[x[1]]
// xtob converts hex characters x1 and x2 into a byte.
func xtob(x1, x2 byte) (byte, bool) {
b1 := xvalues[x1]
b2 := xvalues[x2]
return (b1 << 4) | b2, b1 != 255 && b2 != 255
}
36 changes: 28 additions & 8 deletions uuid.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
package uuid

import (
"bytes"
"crypto/rand"
"encoding/hex"
"errors"
"fmt"
"io"
"strings"
"unsafe"
)

// A UUID is a 128 bit (16 byte) Universal Unique IDentifier as defined in RFC
Expand Down Expand Up @@ -58,7 +58,7 @@ func Parse(s string) (UUID, error) {
14, 16,
19, 21,
24, 26, 28, 30, 32, 34} {
if v, ok := xtob(s[x:]); !ok {
if v, ok := xtob(s[x], s[x+1]); !ok {
return uuid, errors.New("invalid UUID format")
} else {
uuid[i] = v
Expand All @@ -69,12 +69,32 @@ func Parse(s string) (UUID, error) {

// ParseBytes is like Parse, exect it parses a byte slice instead of a string.
func ParseBytes(b []byte) (UUID, error) {
// Parsing a string is actually faster than parsing a byte slice as it
// is cheaper to slice a string. Further, it is not safe to convert
// a string into a byte slice but the opposite direction is. These
// stem from the fact that a byte slice is 3 words while a string
// is only 2 words.
return Parse(*(*string)(unsafe.Pointer(&b)))
var uuid UUID
if len(b) != 36 {
if len(b) != 36+9 {
return uuid, fmt.Errorf("invalid UUID length: %d", len(b))
}
if !bytes.Equal(bytes.ToLower(b[:9]), []byte("urn:uuid:")) {
return uuid, fmt.Errorf("invalid urn prefix: %q", b[:9])
}
b = b[9:]
}
if b[8] != '-' || b[13] != '-' || b[18] != '-' || b[23] != '-' {
return uuid, errors.New("invalid UUID format")
}
for i, x := range [16]int{
0, 2, 4, 6,
9, 11,
14, 16,
19, 21,
24, 26, 28, 30, 32, 34} {
if v, ok := xtob(b[x], b[x+1]); !ok {
return uuid, errors.New("invalid UUID format")
} else {
uuid[i] = v
}
}
return uuid, nil
}

func MustParse(s string) UUID {
Expand Down
57 changes: 11 additions & 46 deletions uuid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ package uuid

import (
"bytes"
"errors"
"fmt"
"os"
"strings"
"testing"
"time"
"unsafe"
)

type test struct {
Expand Down Expand Up @@ -466,60 +466,25 @@ func BenchmarkParseBytes(b *testing.B) {
}
}

// parseBytesCopy is to benchmark not using unsafe.
func parseBytesCopy(b []byte) (UUID, error) {
return Parse(string(b))
// parseBytesUnsafe is to benchmark using unsafe.
func parseBytesUnsafe(b []byte) (UUID, error) {
return Parse(*(*string)(unsafe.Pointer(&b)))
}


// xtobb converts the the first two hex bytes of x into a byte.
func xtobb(x []byte) (byte, bool) {
b1 := xvalues[x[0]]
b2 := xvalues[x[1]]
return (b1 << 4) | b2, b1 != 255 && b2 != 255
}

// parseBytes is the same as Parse, but with byte slices. It demonstrates
// that it is faster to convert the byte slice into a string and then parse
// than to parse the byte slice directly.
func parseBytes(s []byte) (UUID, error) {
var uuid UUID
if len(s) != 36 {
if len(s) != 36+9 {
return uuid, fmt.Errorf("invalid UUID length: %d", len(s))
}
if !bytes.Equal(bytes.ToLower(s[:9]), []byte("urn:uuid:")) {
return uuid, fmt.Errorf("invalid urn prefix: %q", s[:9])
}
s = s[9:]
}
if s[8] != '-' || s[13] != '-' || s[18] != '-' || s[23] != '-' {
return uuid, errors.New("invalid UUID format")
}
for i, x := range [16]int{
0, 2, 4, 6,
9, 11,
14, 16,
19, 21,
24, 26, 28, 30, 32, 34} {
if v, ok := xtobb(s[x:]); !ok {
return uuid, errors.New("invalid UUID format")
} else {
uuid[i] = v
}
}
return uuid, nil
}

func BenchmarkParseBytesNative(b *testing.B) {
func BenchmarkParseBytesUnsafe(b *testing.B) {
for i := 0; i < b.N; i++ {
_, err := parseBytes(asBytes)
_, err := parseBytesUnsafe(asBytes)
if err != nil {
b.Fatal(err)
}
}
}

// parseBytesCopy is to benchmark not using unsafe.
func parseBytesCopy(b []byte) (UUID, error) {
return Parse(string(b))
}

func BenchmarkParseBytesCopy(b *testing.B) {
for i := 0; i < b.N; i++ {
_, err := parseBytesCopy(asBytes)
Expand Down

0 comments on commit 7508f98

Please sign in to comment.