Skip to content

Commit

Permalink
refactor: Stats & Metrics (#82)
Browse files Browse the repository at this point in the history
- [x] Create the `Stats` structs to output structured metrics
convertible into key-values ready for the backend
- [x] Store truncations in the code because there will be output merged
between runs

---------

Signed-off-by: Eliott Bouhana <[email protected]>
  • Loading branch information
eliottness authored Mar 13, 2024
1 parent 02b711a commit 405eb7a
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 80 deletions.
97 changes: 34 additions & 63 deletions context.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ type Context struct {

// metrics stores the cumulative time spent in various parts of the WAF
metrics metricsStore

// truncations provides details about truncations that occurred while
// encoding address data for WAF execution.
truncations map[TruncationReason][]int
}

// NewContext returns a new WAF context of to the given WAF handle.
Expand Down Expand Up @@ -64,7 +68,7 @@ func NewContextWithBudget(handle *Handle, budget time.Duration) *Context {
return nil
}

timer, err := timer.NewTreeTimer(timer.WithBudget(budget), timer.WithComponents("_dd.appsec.waf.run"))
timer, err := timer.NewTreeTimer(timer.WithBudget(budget), timer.WithComponents(wafRunTag))
if err != nil {
return nil
}
Expand Down Expand Up @@ -111,12 +115,12 @@ func (context *Context) Run(addressData RunAddressData, _ time.Duration) (res Re
return Result{}, errors.ErrTimeout
}

runTimer, err := context.timer.NewNode("_dd.appsec.waf.run",
runTimer, err := context.timer.NewNode(wafRunTag,
timer.WithComponents(
"_dd.appsec.waf.encode.persistent",
"_dd.appsec.waf.encode.ephemeral",
"_dd.appsec.waf.duration_ext",
"_dd.appsec.waf.duration",
wafPersistentEncoderTag,
wafEphemeralEncoderTag,
wafDurationExtTag,
wafDurationTag,
),
)
if err != nil {
Expand All @@ -129,14 +133,14 @@ func (context *Context) Run(addressData RunAddressData, _ time.Duration) (res Re
context.metrics.merge(runTimer.Stats())
}()

persistentData, persistentEncoder, err := encodeOneAddressType(addressData.Persistent, runTimer.MustLeaf("_dd.appsec.waf.encode.persistent"))
persistentData, persistentEncoder, err := context.encodeOneAddressType(addressData.Persistent, runTimer.MustLeaf(wafPersistentEncoderTag))
if err != nil {
return res, err
}

// The WAF releases ephemeral address data at the max of each run call, so we need not keep the Go values live beyond
// that in the same way we need for persistent data. We hence use a separate encoder.
ephemeralData, ephemeralEncoder, err := encodeOneAddressType(addressData.Ephemeral, runTimer.MustLeaf("_dd.appsec.waf.encode.ephemeral"))
ephemeralData, ephemeralEncoder, err := context.encodeOneAddressType(addressData.Ephemeral, runTimer.MustLeaf(wafEphemeralEncoderTag))
if err != nil {
return res, err
}
Expand All @@ -153,16 +157,15 @@ func (context *Context) Run(addressData RunAddressData, _ time.Duration) (res Re
// into C ddwaf_objects. libddwaf's API requires to keep this data for the lifetime of the ddwaf_context.
defer context.cgoRefs.append(persistentEncoder.cgoRefs)

wafExtTimer := runTimer.MustLeaf("_dd.appsec.waf.duration_ext")
wafExtTimer := runTimer.MustLeaf(wafDurationExtTag)
res, err = context.run(persistentData, ephemeralData, wafExtTimer, runTimer.SumRemaining())

runTimer.AddTime("_dd.appsec.waf.duration", res.TimeSpent)
runTimer.AddTime(wafDurationTag, res.TimeSpent)

// Ensure the ephemerals don't get optimized away by the compiler before the WAF had a chance to use them.
unsafe.KeepAlive(ephemeralEncoder.cgoRefs)
unsafe.KeepAlive(persistentEncoder.cgoRefs)

res.Truncations = merge(persistentEncoder.Truncations(), ephemeralEncoder.Truncations())
return
}

Expand Down Expand Up @@ -206,13 +209,20 @@ func merge[K comparable, V any](a, b map[K][]V) (merged map[K][]V) {
// is a nil map, but this behaviour is expected since either persistent or ephemeral addresses are allowed to be null
// one at a time. In this case, EncodeAddresses will return nil contrary to Encode which will return a nil wafObject,
// which is what we need to send to ddwaf_run to signal that the address data is empty.
func encodeOneAddressType(addressData map[string]any, timer timer.Timer) (*bindings.WafObject, encoder, error) {
func (context *Context) encodeOneAddressType(addressData map[string]any, timer timer.Timer) (*bindings.WafObject, encoder, error) {
encoder := newLimitedEncoder(timer)
if addressData == nil {
return nil, encoder, nil
}

data, _ := encoder.EncodeAddresses(addressData)
if len(encoder.truncations) > 0 {
context.mutex.Lock()
defer context.mutex.Unlock()

context.truncations = merge(context.truncations, encoder.truncations)
}

if timer.Exhausted() {
return nil, encoder, errors.ErrTimeout
}
Expand Down Expand Up @@ -291,7 +301,7 @@ func (context *Context) Close() {
// Returned time is in nanoseconds.
// Deprecated: use Timings instead
func (context *Context) TotalRuntime() (uint64, uint64) {
return uint64(context.metrics.get("_dd.appsec.waf.run") * time.Nanosecond), uint64(context.metrics.get("_dd.appsec.waf.duration"))
return uint64(context.metrics.get(wafRunTag)), uint64(context.metrics.get(wafDurationTag))
}

// TotalTimeouts returns the cumulated amount of WAF timeouts across various run calls within the same WAF context.
Expand All @@ -301,58 +311,19 @@ func (context *Context) TotalTimeouts() uint64 {

// Stats returns the cumulative time spent in various parts of the WAF, all in nanoseconds
// and the timeout value used
func (context *Context) Stats() map[string]time.Duration {
return context.metrics.copy()
}

type metricsStore struct {
data map[string]time.Duration
mutex sync.RWMutex
}

func (metrics *metricsStore) add(key string, duration time.Duration) {
metrics.mutex.Lock()
defer metrics.mutex.Unlock()
if metrics.data == nil {
metrics.data = make(map[string]time.Duration, 5)
}

metrics.data[key] += duration
}

func (metrics *metricsStore) get(key string) time.Duration {
metrics.mutex.RLock()
defer metrics.mutex.RUnlock()
return metrics.data[key]
}

func (metrics *metricsStore) copy() map[string]time.Duration {
metrics.mutex.Lock()
defer metrics.mutex.Unlock()
if metrics.data == nil {
return nil
}

copy := make(map[string]time.Duration, len(metrics.data))
for k, v := range metrics.data {
copy[k] = v
}
return copy
}
func (context *Context) Stats() Stats {
context.mutex.Lock()
defer context.mutex.Unlock()

// merge merges the current metrics with new ones
func (metrics *metricsStore) merge(other map[string]time.Duration) {
metrics.mutex.Lock()
defer metrics.mutex.Unlock()
if metrics.data == nil {
metrics.data = make(map[string]time.Duration, 5)
truncations := make(map[TruncationReason][]int, len(context.truncations))
for reason, counts := range context.truncations {
truncations[reason] = make([]int, len(counts))
copy(truncations[reason], counts)
}

for key, val := range other {
prev, ok := metrics.data[key]
if !ok {
prev = 0
}
metrics.data[key] = prev + val
return Stats{
Timers: context.metrics.copy(),
TimeoutCount: context.timeoutCount.Load(),
Truncations: truncations,
}
}
14 changes: 14 additions & 0 deletions encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package waf

import (
"context"
"fmt"
"github.com/DataDog/go-libddwaf/v2/timer"
"math"
"reflect"
Expand Down Expand Up @@ -56,6 +57,19 @@ const (
ObjectTooDeep
)

func (reason TruncationReason) String() string {
switch reason {
case ObjectTooDeep:
return "depth"
case ContainerTooLarge:
return "container-size"
case StringTooLong:
return "string-size"
default:
return fmt.Sprintf("TruncationReason(%v)", int(reason))
}
}

const (
AppsecFieldTag = "ddwaf"
AppsecFieldTagValueIgnore = "ignore"
Expand Down
102 changes: 102 additions & 0 deletions metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

package waf

import (
"fmt"
"sync"
"time"
)

// Stats stores the metrics collected by the WAF.
type Stats struct {
// Timers returns a map of metrics and their durations.
Timers map[string]time.Duration

// Timeout
TimeoutCount uint64

// Truncations provides details about truncations that occurred while
// encoding address data for WAF execution.
Truncations map[TruncationReason][]int
}

const (
wafPersistentEncoderTag = "_dd.appsec.waf.encode.persistent"
wafEphemeralEncoderTag = "_dd.appsec.waf.encode.ephemeral"
wafRunTag = "_dd.appsec.waf.run"
wafDurationTag = "_dd.appsec.waf.duration"
wafDurationExtTag = "_dd.appsec.waf.duration_ext"
wafTimeoutTag = "_dd.appsec.waf.timeouts"
wafTruncationTag = "_dd.appsec.waf.truncations"
)

// Metrics transform the stats returned by the WAF into a map of key value metrics for datadog backend
func (stats *Stats) Metrics() map[string]any {
tags := make(map[string]any, len(stats.Timers)+len(stats.Truncations)+1)
for k, v := range stats.Timers {
tags[k] = uint64(v.Microseconds())
}

tags[wafTimeoutTag] = stats.TimeoutCount
for reason, list := range stats.Truncations {
tags[fmt.Sprintf("%s.%s", wafTruncationTag, reason.String())] = list
}

return tags
}

type metricsStore struct {
data map[string]time.Duration
mutex sync.RWMutex
}

func (metrics *metricsStore) add(key string, duration time.Duration) {
metrics.mutex.Lock()
defer metrics.mutex.Unlock()
if metrics.data == nil {
metrics.data = make(map[string]time.Duration, 5)
}

metrics.data[key] += duration
}

func (metrics *metricsStore) get(key string) time.Duration {
metrics.mutex.RLock()
defer metrics.mutex.RUnlock()
return metrics.data[key]
}

func (metrics *metricsStore) copy() map[string]time.Duration {
metrics.mutex.Lock()
defer metrics.mutex.Unlock()
if metrics.data == nil {
return nil
}

copy := make(map[string]time.Duration, len(metrics.data))
for k, v := range metrics.data {
copy[k] = v
}
return copy
}

// merge merges the current metrics with new ones
func (metrics *metricsStore) merge(other map[string]time.Duration) {
metrics.mutex.Lock()
defer metrics.mutex.Unlock()
if metrics.data == nil {
metrics.data = make(map[string]time.Duration, 5)
}

for key, val := range other {
prev, ok := metrics.data[key]
if !ok {
prev = 0
}
metrics.data[key] = prev + val
}
}
4 changes: 0 additions & 4 deletions waf.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,6 @@ type Result struct {
// against the provided address data.
Actions []string

// Truncatations provides details about truncations that occurred while
// encoding address data for WAF execution.
Truncations map[TruncationReason][]int

// TimeSpent is the time the WAF self-reported as spent processing the call to ddwaf_run
TimeSpent time.Duration
}
Expand Down
26 changes: 13 additions & 13 deletions waf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,11 +376,11 @@ func TestTimeout(t *testing.T) {
_, err := context.Run(RunAddressData{Persistent: normalValue, Ephemeral: normalValue}, 0)
require.NoError(t, err)
require.NotEmpty(t, context.Stats())
require.NotZero(t, context.Stats()["_dd.appsec.waf.run"])
require.NotZero(t, context.Stats()["_dd.appsec.waf.encode.persistent"])
require.NotZero(t, context.Stats()["_dd.appsec.waf.encode.ephemeral"])
require.NotZero(t, context.Stats()["_dd.appsec.waf.duration_ext"])
require.NotZero(t, context.Stats()["_dd.appsec.waf.duration"])
require.NotZero(t, context.Stats().Timers["_dd.appsec.waf.run"])
require.NotZero(t, context.Stats().Timers["_dd.appsec.waf.encode.persistent"])
require.NotZero(t, context.Stats().Timers["_dd.appsec.waf.encode.ephemeral"])
require.NotZero(t, context.Stats().Timers["_dd.appsec.waf.duration_ext"])
require.NotZero(t, context.Stats().Timers["_dd.appsec.waf.duration"])
})

t.Run("timeout-persistent-encoder", func(t *testing.T) {
Expand All @@ -390,9 +390,9 @@ func TestTimeout(t *testing.T) {

_, err := context.Run(RunAddressData{Persistent: largeValue}, 0)
require.Equal(t, errors.ErrTimeout, err)
require.GreaterOrEqual(t, context.Stats()["_dd.appsec.waf.run"], time.Millisecond)
require.GreaterOrEqual(t, context.Stats()["_dd.appsec.waf.encode.persistent"], time.Millisecond)
require.Equal(t, context.Stats()["_dd.appsec.waf.encode.ephemeral"], time.Duration(0))
require.GreaterOrEqual(t, context.Stats().Timers["_dd.appsec.waf.run"], time.Millisecond)
require.GreaterOrEqual(t, context.Stats().Timers["_dd.appsec.waf.encode.persistent"], time.Millisecond)
require.Equal(t, context.Stats().Timers["_dd.appsec.waf.encode.ephemeral"], time.Duration(0))
})

t.Run("timeout-ephemeral-encoder", func(t *testing.T) {
Expand All @@ -402,9 +402,9 @@ func TestTimeout(t *testing.T) {

_, err := context.Run(RunAddressData{Ephemeral: largeValue}, 0)
require.Equal(t, errors.ErrTimeout, err)
require.GreaterOrEqual(t, context.Stats()["_dd.appsec.waf.run"], time.Millisecond)
require.Equal(t, context.Stats()["_dd.appsec.waf.encode.persistent"], time.Duration(0))
require.GreaterOrEqual(t, context.Stats()["_dd.appsec.waf.encode.ephemeral"], time.Millisecond)
require.GreaterOrEqual(t, context.Stats().Timers["_dd.appsec.waf.run"], time.Millisecond)
require.Equal(t, context.Stats().Timers["_dd.appsec.waf.encode.persistent"], time.Duration(0))
require.GreaterOrEqual(t, context.Stats().Timers["_dd.appsec.waf.encode.ephemeral"], time.Millisecond)
})

t.Run("many-runs", func(t *testing.T) {
Expand Down Expand Up @@ -1199,7 +1199,7 @@ func TestTruncationInformation(t *testing.T) {

extra := rand.Intn(10) + 1 // Random int between 1 and 10

res, err := ctx.Run(RunAddressData{
_, err = ctx.Run(RunAddressData{
Ephemeral: map[string]any{
"my.input": map[string]any{
"string_too_long": strings.Repeat("Z", bindings.WafMaxStringLength+extra),
Expand All @@ -1217,7 +1217,7 @@ func TestTruncationInformation(t *testing.T) {
require.Equal(t, map[TruncationReason][]int{
StringTooLong: {bindings.WafMaxStringLength + extra + 2, bindings.WafMaxStringLength + extra},
ContainerTooLarge: {bindings.WafMaxContainerSize + extra + 2, bindings.WafMaxContainerSize + extra},
}, res.Truncations)
}, ctx.truncations)
}

func BenchmarkEncoder(b *testing.B) {
Expand Down

0 comments on commit 405eb7a

Please sign in to comment.