Skip to content

Commit

Permalink
tags: Aggressively inline tag serialization small tag sets (#72)
Browse files Browse the repository at this point in the history
* tags: Aggressively inline tag serialization for four and less tags

This improves runtime by roughly 25% and reduces allocs by roughly 70%.

Benchmark results:

```
benchmark                               old ns/op     new ns/op     delta
BenchmarkSerializeTags_Four-8           341           262           -23.17%

benchmark                               old allocs     new allocs     delta
BenchmarkSerializeTags_Four-8           3              1              -66.67%

benchmark                               old bytes     new bytes     delta
BenchmarkSerializeTags_Four-8           224           64            -71.43%
```

* tags: test againse reference implementation

* tags: remove commented out code

* tags: Benchmark serializeTags for 1..10 tags

The goal here is to highlight the improvements made for serializing
small tag sets (1..4).

Benchmark results:

benchmark                               old ns/op     new ns/op     delta
BenchmarkSerializeTags_One-8            97.3          92.7          -4.73%
BenchmarkSerializeTags_Two-8            158           145           -8.23%
BenchmarkSerializeTags_Three-8          308           205           -33.44%
BenchmarkSerializeTags_Four-8           367           272           -25.89%
BenchmarkSerializeTags_Five-8           448           437           -2.46%
BenchmarkSerializeTags_Six-8            518           498           -3.86%
BenchmarkSerializeTags_Seven-8          611           597           -2.29%
BenchmarkSerializeTags_Eight-8          682           649           -4.84%
BenchmarkSerializeTags_Nine-8           833           777           -6.72%
BenchmarkSerializeTags_Ten-8            949           874           -7.90%

benchmark                               old allocs     new allocs     delta
BenchmarkSerializeTags_One-8            1              1              +0.00%
BenchmarkSerializeTags_Two-8            1              1              +0.00%
BenchmarkSerializeTags_Three-8          3              1              -66.67%
BenchmarkSerializeTags_Four-8           3              1              -66.67%
BenchmarkSerializeTags_Five-8           3              3              +0.00%
BenchmarkSerializeTags_Six-8            3              3              +0.00%
BenchmarkSerializeTags_Seven-8          3              3              +0.00%
BenchmarkSerializeTags_Eight-8          3              3              +0.00%
BenchmarkSerializeTags_Nine-8           3              3              +0.00%
BenchmarkSerializeTags_Ten-8            3              3              +0.00%

benchmark                               old bytes     new bytes     delta
BenchmarkSerializeTags_One-8            32            32            +0.00%
BenchmarkSerializeTags_Two-8            32            32            +0.00%
BenchmarkSerializeTags_Three-8          176           48            -72.73%
BenchmarkSerializeTags_Four-8           224           64            -71.43%
BenchmarkSerializeTags_Five-8           272           272           +0.00%
BenchmarkSerializeTags_Six-8            304           304           +0.00%
BenchmarkSerializeTags_Seven-8          352           352           +0.00%
BenchmarkSerializeTags_Eight-8          400           400           +0.00%
BenchmarkSerializeTags_Nine-8           448           448           +0.00%
BenchmarkSerializeTags_Ten-8            480           480           +0.00%

* tags: remove BenchmarkSerializeTags as it is now redundant

It is the same as BenchmarkSerializeTags_Five()

* tags: use B.Run() for BenchmarkSerializeTags 1..10

* tags: test the network sort used for small tag sets
  • Loading branch information
charlievieth authored Mar 20, 2019
1 parent 295858e commit 1759516
Show file tree
Hide file tree
Showing 2 changed files with 169 additions and 35 deletions.
61 changes: 53 additions & 8 deletions tags.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ func serializeTags(name string, tags map[string]string) string {
const prefix = ".__"
const sep = "="

// switch len(tags) {
switch len(tags) {
case 0:
return name
Expand All @@ -30,16 +29,62 @@ func serializeTags(name string, tags map[string]string) string {
}
panic("unreachable")
case 2:
var a, b tagPair
var t0, t1 tagPair
for k, v := range tags {
b = a
a = tagPair{k, replaceChars(v)}
t1 = t0
t0 = tagPair{k, replaceChars(v)}
}
if a.dimension > b.dimension {
a, b = b, a
if t0.dimension > t1.dimension {
t0, t1 = t1, t0
}
return name + prefix + a.dimension + sep + a.value +
prefix + b.dimension + sep + b.value
return name + prefix + t0.dimension + sep + t0.value +
prefix + t1.dimension + sep + t1.value
case 3:
var t0, t1, t2 tagPair
for k, v := range tags {
t2 = t1
t1 = t0
t0 = tagPair{k, replaceChars(v)}
}
if t1.dimension > t2.dimension {
t1, t2 = t2, t1
}
if t0.dimension > t2.dimension {
t0, t2 = t2, t0
}
if t0.dimension > t1.dimension {
t0, t1 = t1, t0
}
return name + prefix + t0.dimension + sep + t0.value +
prefix + t1.dimension + sep + t1.value +
prefix + t2.dimension + sep + t2.value
case 4:
var t0, t1, t2, t3 tagPair
for k, v := range tags {
t3 = t2
t2 = t1
t1 = t0
t0 = tagPair{k, replaceChars(v)}
}
if t0.dimension > t1.dimension {
t0, t1 = t1, t0
}
if t2.dimension > t3.dimension {
t2, t3 = t3, t2
}
if t0.dimension > t2.dimension {
t0, t2 = t2, t0
}
if t1.dimension > t3.dimension {
t1, t3 = t3, t1
}
if t1.dimension > t2.dimension {
t1, t2 = t2, t1
}
return name + prefix + t0.dimension + sep + t0.value +
prefix + t1.dimension + sep + t1.value +
prefix + t2.dimension + sep + t2.value +
prefix + t3.dimension + sep + t3.value
default:
// n stores the length of the serialized name + tags
n := (len(prefix) + len(sep)) * len(tags)
Expand Down
143 changes: 116 additions & 27 deletions tags_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,38 @@
package stats

import (
"bufio"
"bytes"
crand "crypto/rand"
"encoding/base64"
"fmt"
"math/rand"
"sort"
"testing"
"time"
)

// Reference serializeTags implementation
func serializeTagsReference(name string, tags map[string]string) string {
const prefix = ".__"
const sep = "="
if len(tags) == 0 {
return name
}
tagPairs := make([]tagPair, 0, len(tags))
for tagKey, tagValue := range tags {
tagValue = replaceChars(tagValue)
tagPairs = append(tagPairs, tagPair{tagKey, tagValue})
}
sort.Sort(tagSet(tagPairs))

buf := new(bytes.Buffer)
for _, tag := range tagPairs {
fmt.Fprint(buf, prefix, tag.dimension, sep, tag.value)
}
return name + buf.String()
}

func TestSerializeTags(t *testing.T) {
const name = "prefix"
const expected = name + ".__q=r.__zzz=hello"
Expand All @@ -15,6 +44,82 @@ func TestSerializeTags(t *testing.T) {
}
}

// Test that the optimized serializeTags() function matches the reference
// implementation.
func TestSerializeTagsReference(t *testing.T) {
const name = "prefix"
makeTags := func(n int) map[string]string {
m := make(map[string]string, n)
for i := 0; i < n; i++ {
k := fmt.Sprintf("key%d", i)
v := fmt.Sprintf("val%d", i)
m[k] = v
}
return m
}
for i := 0; i < 100; i++ {
tags := makeTags(i)
expected := serializeTagsReference(name, tags)
serialized := serializeTags(name, tags)
if serialized != expected {
t.Errorf("%d Serialized output (%s) didn't match expected output: %s",
i, serialized, expected)
}
}
}

// Test the network sort used when we have 4 or less tags. Since the iteration
// order of maps is random we use random keys in an attempt to get 100% test
// coverage.
func TestSerializeTagsNetworkSort(t *testing.T) {
const name = "prefix"

rand.Seed(time.Now().UnixNano())
buf := bufio.NewReader(crand.Reader)
seen := make(map[string]bool)

randomString := func() string {
for i := 0; i < 100; i++ {
b := make([]byte, rand.Intn(30)+1)
if _, err := buf.Read(b); err != nil {
t.Fatal(err)
}
s := base64.StdEncoding.EncodeToString(b)
if !seen[s] {
seen[s] = true
return s
}
}
t.Fatal("Failed to generate a random string")
return ""
}

makeTags := func(n int) map[string]string {
m := make(map[string]string, n)
for i := 0; i < n; i++ {
k := randomString()
v := randomString()
m[k] = v
}
return m
}

// we use a network sort when tag length is 4 or less, but test up to 8
// here in case that value is ever increased.
for i := 1; i <= 4; i++ {
// loop to increase the odds of 100% test coverage
for i := 0; i < 10; i++ {
tags := makeTags(i)
expected := serializeTagsReference(name, tags)
serialized := serializeTags(name, tags)
if serialized != expected {
t.Errorf("%d Serialized output (%s) didn't match expected output: %s",
i, serialized, expected)
}
}
}
}

func TestSerializeWithPerInstanceFlag(t *testing.T) {
const name = "prefix"
const expected = name + ".___f=i.__foo=bar"
Expand Down Expand Up @@ -48,40 +153,24 @@ func TestSerializeTagValuePeriod(t *testing.T) {
}
}

func BenchmarkSerializeTags(b *testing.B) {
const name = "prefix"
tags := map[string]string{
"tag1": "val1",
"tag2": "val2",
"tag3": "val3",
"tag4": "val4",
"tag5": "val5",
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
serializeTags(name, tags)
}
}

func BenchmarkSerializeTags_One(b *testing.B) {
func benchmarkSerializeTags(b *testing.B, n int) {
const name = "prefix"
tags := map[string]string{
"tag1": "val1",
tags := make(map[string]string, n)
for i := 0; i < n; i++ {
k := fmt.Sprintf("key%d", i)
v := fmt.Sprintf("val%d", i)
tags[k] = v
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
serializeTags(name, tags)
}
}

func BenchmarkSerializeTags_Two(b *testing.B) {
const name = "prefix"
tags := map[string]string{
"tag1": "val1",
"tag2": "val2",
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
serializeTags(name, tags)
func BenchmarkSerializeTags(b *testing.B) {
for i := 1; i <= 10; i++ {
b.Run(fmt.Sprintf("%d", i), func(b *testing.B) {
benchmarkSerializeTags(b, i)
})
}
}

0 comments on commit 1759516

Please sign in to comment.