tags: Aggressively inline tag serialization small tag sets (#72)

* tags: Aggressively inline tag serialization for four and less tags This improves runtime by roughly 25% and reduces allocs by roughly 70%. Benchmark results: ``` benchmark old ns/op new ns/op delta BenchmarkSerializeTags_Four-8 341 262 -23.17% benchmark old allocs new allocs delta BenchmarkSerializeTags_Four-8 3 1 -66.67% benchmark old bytes new bytes delta BenchmarkSerializeTags_Four-8 224 64 -71.43% ``` * tags: test againse reference implementation * tags: remove commented out code * tags: Benchmark serializeTags for 1..10 tags The goal here is to highlight the improvements made for serializing small tag sets (1..4). Benchmark results: benchmark old ns/op new ns/op delta BenchmarkSerializeTags_One-8 97.3 92.7 -4.73% BenchmarkSerializeTags_Two-8 158 145 -8.23% BenchmarkSerializeTags_Three-8 308 205 -33.44% BenchmarkSerializeTags_Four-8 367 272 -25.89% BenchmarkSerializeTags_Five-8 448 437 -2.46% BenchmarkSerializeTags_Six-8 518 498 -3.86% BenchmarkSerializeTags_Seven-8 611 597 -2.29% BenchmarkSerializeTags_Eight-8 682 649 -4.84% BenchmarkSerializeTags_Nine-8 833 777 -6.72% BenchmarkSerializeTags_Ten-8 949 874 -7.90% benchmark old allocs new allocs delta BenchmarkSerializeTags_One-8 1 1 +0.00% BenchmarkSerializeTags_Two-8 1 1 +0.00% BenchmarkSerializeTags_Three-8 3 1 -66.67% BenchmarkSerializeTags_Four-8 3 1 -66.67% BenchmarkSerializeTags_Five-8 3 3 +0.00% BenchmarkSerializeTags_Six-8 3 3 +0.00% BenchmarkSerializeTags_Seven-8 3 3 +0.00% BenchmarkSerializeTags_Eight-8 3 3 +0.00% BenchmarkSerializeTags_Nine-8 3 3 +0.00% BenchmarkSerializeTags_Ten-8 3 3 +0.00% benchmark old bytes new bytes delta BenchmarkSerializeTags_One-8 32 32 +0.00% BenchmarkSerializeTags_Two-8 32 32 +0.00% BenchmarkSerializeTags_Three-8 176 48 -72.73% BenchmarkSerializeTags_Four-8 224 64 -71.43% BenchmarkSerializeTags_Five-8 272 272 +0.00% BenchmarkSerializeTags_Six-8 304 304 +0.00% BenchmarkSerializeTags_Seven-8 352 352 +0.00% BenchmarkSerializeTags_Eight-8 400 400 +0.00% BenchmarkSerializeTags_Nine-8 448 448 +0.00% BenchmarkSerializeTags_Ten-8 480 480 +0.00% * tags: remove BenchmarkSerializeTags as it is now redundant It is the same as BenchmarkSerializeTags_Five() * tags: use B.Run() for BenchmarkSerializeTags 1..10 * tags: test the network sort used for small tag sets
lyft · Mar 20, 2019 · 1759516 · 1759516
1 parent 295858e
commit 1759516
Show file tree

Hide file tree

Showing 2 changed files with 169 additions and 35 deletions.
diff --git a/tags.go b/tags.go
@@ -20,7 +20,6 @@ func serializeTags(name string, tags map[string]string) string {
 	const prefix = ".__"
 	const sep = "="
 
-	// switch len(tags) {
 	switch len(tags) {
 	case 0:
 		return name
@@ -30,16 +29,62 @@ func serializeTags(name string, tags map[string]string) string {
 		}
 		panic("unreachable")
 	case 2:
-		var a, b tagPair
+		var t0, t1 tagPair
 		for k, v := range tags {
-			b = a
-			a = tagPair{k, replaceChars(v)}
+			t1 = t0
+			t0 = tagPair{k, replaceChars(v)}
 		}
-		if a.dimension > b.dimension {
-			a, b = b, a
+		if t0.dimension > t1.dimension {
+			t0, t1 = t1, t0
 		}
-		return name + prefix + a.dimension + sep + a.value +
-			prefix + b.dimension + sep + b.value
+		return name + prefix + t0.dimension + sep + t0.value +
+			prefix + t1.dimension + sep + t1.value
+	case 3:
+		var t0, t1, t2 tagPair
+		for k, v := range tags {
+			t2 = t1
+			t1 = t0
+			t0 = tagPair{k, replaceChars(v)}
+		}
+		if t1.dimension > t2.dimension {
+			t1, t2 = t2, t1
+		}
+		if t0.dimension > t2.dimension {
+			t0, t2 = t2, t0
+		}
+		if t0.dimension > t1.dimension {
+			t0, t1 = t1, t0
+		}
+		return name + prefix + t0.dimension + sep + t0.value +
+			prefix + t1.dimension + sep + t1.value +
+			prefix + t2.dimension + sep + t2.value
+	case 4:
+		var t0, t1, t2, t3 tagPair
+		for k, v := range tags {
+			t3 = t2
+			t2 = t1
+			t1 = t0
+			t0 = tagPair{k, replaceChars(v)}
+		}
+		if t0.dimension > t1.dimension {
+			t0, t1 = t1, t0
+		}
+		if t2.dimension > t3.dimension {
+			t2, t3 = t3, t2
+		}
+		if t0.dimension > t2.dimension {
+			t0, t2 = t2, t0
+		}
+		if t1.dimension > t3.dimension {
+			t1, t3 = t3, t1
+		}
+		if t1.dimension > t2.dimension {
+			t1, t2 = t2, t1
+		}
+		return name + prefix + t0.dimension + sep + t0.value +
+			prefix + t1.dimension + sep + t1.value +
+			prefix + t2.dimension + sep + t2.value +
+			prefix + t3.dimension + sep + t3.value
 	default:
 		// n stores the length of the serialized name + tags
 		n := (len(prefix) + len(sep)) * len(tags)

diff --git a/tags_test.go b/tags_test.go
@@ -1,9 +1,38 @@
 package stats
 
 import (
+	"bufio"
+	"bytes"
+	crand "crypto/rand"
+	"encoding/base64"
+	"fmt"
+	"math/rand"
+	"sort"
 	"testing"
+	"time"
 )
 
+// Reference serializeTags implementation
+func serializeTagsReference(name string, tags map[string]string) string {
+	const prefix = ".__"
+	const sep = "="
+	if len(tags) == 0 {
+		return name
+	}
+	tagPairs := make([]tagPair, 0, len(tags))
+	for tagKey, tagValue := range tags {
+		tagValue = replaceChars(tagValue)
+		tagPairs = append(tagPairs, tagPair{tagKey, tagValue})
+	}
+	sort.Sort(tagSet(tagPairs))
+
+	buf := new(bytes.Buffer)
+	for _, tag := range tagPairs {
+		fmt.Fprint(buf, prefix, tag.dimension, sep, tag.value)
+	}
+	return name + buf.String()
+}
+
 func TestSerializeTags(t *testing.T) {
 	const name = "prefix"
 	const expected = name + ".__q=r.__zzz=hello"
@@ -15,6 +44,82 @@ func TestSerializeTags(t *testing.T) {
 	}
 }
 
+// Test that the optimized serializeTags() function matches the reference
+// implementation.
+func TestSerializeTagsReference(t *testing.T) {
+	const name = "prefix"
+	makeTags := func(n int) map[string]string {
+		m := make(map[string]string, n)
+		for i := 0; i < n; i++ {
+			k := fmt.Sprintf("key%d", i)
+			v := fmt.Sprintf("val%d", i)
+			m[k] = v
+		}
+		return m
+	}
+	for i := 0; i < 100; i++ {
+		tags := makeTags(i)
+		expected := serializeTagsReference(name, tags)
+		serialized := serializeTags(name, tags)
+		if serialized != expected {
+			t.Errorf("%d Serialized output (%s) didn't match expected output: %s",
+				i, serialized, expected)
+		}
+	}
+}
+
+// Test the network sort used when we have 4 or less tags.  Since the iteration
+// order of maps is random we use random keys in an attempt to get 100% test
+// coverage.
+func TestSerializeTagsNetworkSort(t *testing.T) {
+	const name = "prefix"
+
+	rand.Seed(time.Now().UnixNano())
+	buf := bufio.NewReader(crand.Reader)
+	seen := make(map[string]bool)
+
+	randomString := func() string {
+		for i := 0; i < 100; i++ {
+			b := make([]byte, rand.Intn(30)+1)
+			if _, err := buf.Read(b); err != nil {
+				t.Fatal(err)
+			}
+			s := base64.StdEncoding.EncodeToString(b)
+			if !seen[s] {
+				seen[s] = true
+				return s
+			}
+		}
+		t.Fatal("Failed to generate a random string")
+		return ""
+	}
+
+	makeTags := func(n int) map[string]string {
+		m := make(map[string]string, n)
+		for i := 0; i < n; i++ {
+			k := randomString()
+			v := randomString()
+			m[k] = v
+		}
+		return m
+	}
+
+	// we use a network sort when tag length is 4 or less, but test up to 8
+	// here in case that value is ever increased.
+	for i := 1; i <= 4; i++ {
+		// loop to increase the odds of 100% test coverage
+		for i := 0; i < 10; i++ {
+			tags := makeTags(i)
+			expected := serializeTagsReference(name, tags)
+			serialized := serializeTags(name, tags)
+			if serialized != expected {
+				t.Errorf("%d Serialized output (%s) didn't match expected output: %s",
+					i, serialized, expected)
+			}
+		}
+	}
+}
+
 func TestSerializeWithPerInstanceFlag(t *testing.T) {
 	const name = "prefix"
 	const expected = name + ".___f=i.__foo=bar"
@@ -48,40 +153,24 @@ func TestSerializeTagValuePeriod(t *testing.T) {
 	}
 }
 
-func BenchmarkSerializeTags(b *testing.B) {
-	const name = "prefix"
-	tags := map[string]string{
-		"tag1": "val1",
-		"tag2": "val2",
-		"tag3": "val3",
-		"tag4": "val4",
-		"tag5": "val5",
-	}
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		serializeTags(name, tags)
-	}
-}
-
-func BenchmarkSerializeTags_One(b *testing.B) {
+func benchmarkSerializeTags(b *testing.B, n int) {
 	const name = "prefix"
-	tags := map[string]string{
-		"tag1": "val1",
+	tags := make(map[string]string, n)
+	for i := 0; i < n; i++ {
+		k := fmt.Sprintf("key%d", i)
+		v := fmt.Sprintf("val%d", i)
+		tags[k] = v
 	}
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		serializeTags(name, tags)
 	}
 }
 
-func BenchmarkSerializeTags_Two(b *testing.B) {
-	const name = "prefix"
-	tags := map[string]string{
-		"tag1": "val1",
-		"tag2": "val2",
-	}
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		serializeTags(name, tags)
+func BenchmarkSerializeTags(b *testing.B) {
+	for i := 1; i <= 10; i++ {
+		b.Run(fmt.Sprintf("%d", i), func(b *testing.B) {
+			benchmarkSerializeTags(b, i)
+		})
 	}
 }