Skip to content

Commit

Permalink
Add Reset Transformation (#2794)
Browse files Browse the repository at this point in the history
* Add Reset Transformation to reset prometheus counters

This transformation resets an aggregated prometheus counter by emitting
a zero. This transformation fixes the display issue when an M3Aggregator
failover occurs. When the Add transformation is used to generate a cumulative
counter for prometheus, the values in the leader and follower are different
since they started counting at different times. When the failover happens, the
value might decrease. When a cumulative counter decreases, PromQL assumes the
counter reset. This leads to strange display issues, since the counter did not
actually reset. By explicitly reseting the counter each resolution period and
not accumulating a counter, PromQL can correclty graph the rate across failovers.

This does have the downside of an extra 0 datapoint per resolution period. The storage
cost is more than just the extra 0 since the extra 0 is stored 1 second after the actual
datapoint. This degrades the timestamp encoding since the timestamps are no longer at
a fixed interval. In practice we see a 3x increase in storage for these aggregated counters.
ryanhall07 authored Oct 28, 2020
1 parent 7ed094e commit 89215d5
Showing 14 changed files with 382 additions and 39 deletions.
3 changes: 3 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -2,6 +2,9 @@
mock_*.go linguist-generated
*_mock.go linguist-generated

# genny files
*_gen.go linguist-generated

# generated manifests
kube/bundle.yaml linguist-generated=true

9 changes: 8 additions & 1 deletion scripts/auto-gen-helpers.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
#!/bin/bash
source "${GOPATH}/src/github.com/m3db/m3/.ci/variables.sh"

if [ -z "$GOPATH" ]; then
# assume a developer is running locally without a GOPATH
source .ci/variables.sh
else
source "${GOPATH}/src/github.com/m3db/m3/.ci/variables.sh"
fi


set -e

31 changes: 26 additions & 5 deletions src/aggregator/aggregator/counter_elem_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

90 changes: 90 additions & 0 deletions src/aggregator/aggregator/elem_test.go
Original file line number Diff line number Diff line change
@@ -1613,6 +1613,96 @@ func TestGaugeElemConsumeCustomAggregationCustomPipeline(t *testing.T) {
require.Equal(t, 0, len(e.values))
}

func TestGaugeElemReset(t *testing.T) {
alignedstartAtNanos := []int64{
time.Unix(210, 0).UnixNano(),
time.Unix(220, 0).UnixNano(),
time.Unix(230, 0).UnixNano(),
time.Unix(240, 0).UnixNano(),
}
gaugeVals := []float64{123.0, 456.0, 589.0}
aggregationTypes := maggregation.Types{maggregation.Sum}
isEarlierThanFn := isStandardMetricEarlierThan
timestampNanosFn := standardMetricTimestampNanos
opts := NewOptions().SetDiscardNaNAggregatedValues(false)

testPipeline := applied.NewPipeline([]applied.OpUnion{
{
Type: pipeline.TransformationOpType,
Transformation: pipeline.TransformationOp{Type: transformation.Reset},
},
})

e := testGaugeElem(alignedstartAtNanos[:3], gaugeVals, aggregationTypes, testPipeline, opts)

// Consume values before an early-enough time.
localFn, localRes := testFlushLocalMetricFn()
forwardFn, forwardRes := testFlushForwardedMetricFn()
onForwardedFlushedFn, onForwardedFlushedRes := testOnForwardedFlushedFn()
require.False(t, e.Consume(0, isEarlierThanFn, timestampNanosFn, localFn, forwardFn, onForwardedFlushedFn))
require.Equal(t, 0, len(*onForwardedFlushedRes))
require.Equal(t, 0, len(*localRes))
require.Equal(t, 0, len(*forwardRes))
require.Equal(t, 3, len(e.values))

// Consume one value.
localFn, localRes = testFlushLocalMetricFn()
forwardFn, forwardRes = testFlushForwardedMetricFn()
onForwardedFlushedFn, onForwardedFlushedRes = testOnForwardedFlushedFn()
require.False(t, e.Consume(alignedstartAtNanos[1], isEarlierThanFn, timestampNanosFn, localFn, forwardFn, onForwardedFlushedFn))
require.Equal(t, (*localRes)[0].timeNanos, alignedstartAtNanos[1])
require.Equal(t, (*localRes)[0].value, 123.0)
require.Equal(t, (*localRes)[1].timeNanos, alignedstartAtNanos[1]+int64(time.Second))
require.Equal(t, (*localRes)[1].value, 0.0)
require.Equal(t, 0, len(*forwardRes))
require.Equal(t, 0, len(*onForwardedFlushedRes))
require.Equal(t, 2, len(e.values))
require.Equal(t, time.Unix(220, 0).UnixNano(), e.lastConsumedAtNanos)
require.Equal(t, 0, len(e.lastConsumedValues))

// Consume all values.
localFn, localRes = testFlushLocalMetricFn()
forwardFn, forwardRes = testFlushForwardedMetricFn()
onForwardedFlushedFn, onForwardedFlushedRes = testOnForwardedFlushedFn()
require.False(t, e.Consume(alignedstartAtNanos[3], isEarlierThanFn, timestampNanosFn, localFn, forwardFn, onForwardedFlushedFn))
require.Equal(t, (*localRes)[0].timeNanos, alignedstartAtNanos[2])
require.Equal(t, (*localRes)[0].value, 456.0)
require.Equal(t, (*localRes)[1].timeNanos, alignedstartAtNanos[2]+int64(time.Second))
require.Equal(t, (*localRes)[1].value, 0.0)
require.Equal(t, (*localRes)[2].timeNanos, alignedstartAtNanos[3])
require.Equal(t, (*localRes)[2].value, 589.0)
require.Equal(t, (*localRes)[3].timeNanos, alignedstartAtNanos[3]+int64(time.Second))
require.Equal(t, (*localRes)[3].value, 0.0)
require.Equal(t, 0, len(*forwardRes))
require.Equal(t, 0, len(*onForwardedFlushedRes))
require.Equal(t, 0, len(e.values))
require.Equal(t, time.Unix(240, 0).UnixNano(), e.lastConsumedAtNanos)
require.Equal(t, 0, len(e.lastConsumedValues))

// Tombstone the element and discard all values.
e.tombstoned = true
localFn, localRes = testFlushLocalMetricFn()
forwardFn, forwardRes = testFlushForwardedMetricFn()
onForwardedFlushedFn, onForwardedFlushedRes = testOnForwardedFlushedFn()
require.True(t, e.Consume(alignedstartAtNanos[3], isEarlierThanFn, timestampNanosFn, localFn, forwardFn, onForwardedFlushedFn))
//verifyOnForwardedFlushResult(t, expectedOnFlushedRes, *onForwardedFlushedRes)
require.Equal(t, 0, len(*forwardRes))
require.Equal(t, 0, len(*localRes))
require.Equal(t, 0, len(*forwardRes))
require.Equal(t, 0, len(e.values))

// Reading and discarding values from a closed element is no op.
e.closed = true
localFn, localRes = testFlushLocalMetricFn()
forwardFn, forwardRes = testFlushForwardedMetricFn()
onForwardedFlushedFn, onForwardedFlushedRes = testOnForwardedFlushedFn()
require.False(t, e.Consume(alignedstartAtNanos[3], isEarlierThanFn, timestampNanosFn, localFn, forwardFn, onForwardedFlushedFn))
require.Equal(t, 0, len(*localRes))
require.Equal(t, 0, len(*forwardRes))
require.Equal(t, 0, len(*onForwardedFlushedRes))
require.Equal(t, 0, len(e.values))
}

func TestGaugeElemClose(t *testing.T) {
e := testGaugeElem(testAlignedStarts[:len(testAlignedStarts)-1], testGaugeVals, maggregation.DefaultTypes, applied.DefaultPipeline, NewOptions())
require.False(t, e.closed)
31 changes: 26 additions & 5 deletions src/aggregator/aggregator/gauge_elem_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 26 additions & 5 deletions src/aggregator/aggregator/generic_elem.go
Original file line number Diff line number Diff line change
@@ -468,10 +468,12 @@ func (e *GenericElem) processValueWithAggregationLock(
discardNaNValues = e.opts.DiscardNaNAggregatedValues()
)
for aggTypeIdx, aggType := range e.aggTypes {
var extraDp transformation.Datapoint
value := lockedAgg.aggregation.ValueOf(aggType)
for _, transformOp := range transformations {
unaryOp, isUnaryOp := transformOp.UnaryTransform()
binaryOp, isBinaryOp := transformOp.BinaryTransform()
unaryMultiOp, isUnaryMultiOp := transformOp.UnaryMultiOutputTransform()
switch {
case isUnaryOp:
curr := transformation.Datapoint{
@@ -507,7 +509,15 @@ func (e *GenericElem) processValueWithAggregationLock(
}

value = res.Value
case isUnaryMultiOp:
curr := transformation.Datapoint{
TimeNanos: timeNanos,
Value: value,
}

var res transformation.Datapoint
res, extraDp = unaryMultiOp.Evaluate(curr)
value = res.Value
}
}

@@ -516,11 +526,22 @@ func (e *GenericElem) processValueWithAggregationLock(
}

if !e.parsedPipeline.HasRollup {
switch e.idPrefixSuffixType {
case NoPrefixNoSuffix:
flushLocalFn(nil, e.id, nil, timeNanos, value, e.sp)
case WithPrefixWithSuffix:
flushLocalFn(e.FullPrefix(e.opts), e.id, e.TypeStringFor(e.aggTypesOpts, aggType), timeNanos, value, e.sp)
toFlush := make([]transformation.Datapoint, 0, 2)
toFlush = append(toFlush, transformation.Datapoint{
TimeNanos: timeNanos,
Value: value,
})
if extraDp.TimeNanos != 0 {
toFlush = append(toFlush, extraDp)
}
for _, point := range toFlush {
switch e.idPrefixSuffixType {
case NoPrefixNoSuffix:
flushLocalFn(nil, e.id, nil, point.TimeNanos, point.Value, e.sp)
case WithPrefixWithSuffix:
flushLocalFn(e.FullPrefix(e.opts), e.id, e.TypeStringFor(e.aggTypesOpts, aggType),
point.TimeNanos, point.Value, e.sp)
}
}
} else {
forwardedAggregationKey, _ := e.ForwardedAggregationKey()
31 changes: 26 additions & 5 deletions src/aggregator/aggregator/timer_elem_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 10 additions & 6 deletions src/metrics/generated/proto/transformationpb/transformation.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -30,4 +30,5 @@ enum TransformationType {
PERSECOND = 2;
INCREASE = 3;
ADD = 4;
RESET = 5;
}
13 changes: 13 additions & 0 deletions src/metrics/transformation/func.go
Original file line number Diff line number Diff line change
@@ -66,3 +66,16 @@ type BinaryTransformFn func(prev, curr Datapoint) Datapoint
func (fn BinaryTransformFn) Evaluate(prev, curr Datapoint) Datapoint {
return fn(prev, curr)
}

// UnaryMultiOutputTransform is like UnaryTransform, but can output an additional datapoint.
// The additional datapoint is not passed to subsequent transforms.
type UnaryMultiOutputTransform interface {
Evaluate(dp Datapoint) (Datapoint, Datapoint)
}

// UnaryMultiOutputTransformFn implements UnaryMultiOutputTransform as a function.
type UnaryMultiOutputTransformFn func(dp Datapoint) (Datapoint, Datapoint)

func (fn UnaryMultiOutputTransformFn) Evaluate(dp Datapoint) (Datapoint, Datapoint) {
return fn(dp)
}
69 changes: 59 additions & 10 deletions src/metrics/transformation/type.go
Original file line number Diff line number Diff line change
@@ -37,11 +37,12 @@ const (
PerSecond
Increase
Add
Reset
)

// IsValid checks if the transformation type is valid.
func (t Type) IsValid() bool {
return t.IsUnaryTransform() || t.IsBinaryTransform()
return t.IsUnaryTransform() || t.IsBinaryTransform() || t.IsUnaryMultiOutputTransform()
}

// IsUnaryTransform returns whether this is a unary transformation.
@@ -56,29 +57,38 @@ func (t Type) IsBinaryTransform() bool {
return exists
}

func (t Type) IsUnaryMultiOutputTransform() bool {
_, exists := unaryMultiOutputTransforms[t]
return exists
}

// NewOp returns a constructed operation that is allocated once and can be
// reused.
func (t Type) NewOp() (Op, error) {
var (
err error
unary UnaryTransform
binary BinaryTransform
err error
unary UnaryTransform
binary BinaryTransform
unaryMulti UnaryMultiOutputTransform
)
switch {
case t.IsUnaryTransform():
unary, err = t.UnaryTransform()
case t.IsBinaryTransform():
binary, err = t.BinaryTransform()
case t.IsUnaryMultiOutputTransform():
unaryMulti, err = t.UnaryMultiOutputTransform()
default:
err = fmt.Errorf("unknown transformation type: %v", t)
}
if err != nil {
return Op{}, err
}
return Op{
opType: t,
unary: unary,
binary: binary,
opType: t,
unary: unary,
binary: binary,
unaryMulti: unaryMulti,
}, nil
}

@@ -122,6 +132,26 @@ func (t Type) MustBinaryTransform() BinaryTransform {
return tf
}

// UnaryMultiOutputTransform returns the unary transformation function associated with
// the transformation type if applicable, or an error otherwise.
func (t Type) UnaryMultiOutputTransform() (UnaryMultiOutputTransform, error) {
tf, exists := unaryMultiOutputTransforms[t]
if !exists {
return nil, fmt.Errorf("%v is not a unary transfomration", t)
}
return tf(), nil
}

// MustUnaryMultiOutputTransform returns the unary transformation function associated with
// the transformation type if applicable, or panics otherwise.
func (t Type) MustUnaryMultiOutputTransform() UnaryMultiOutputTransform {
tf, err := t.UnaryMultiOutputTransform()
if err != nil {
panic(err)
}
return tf
}

// ToProto converts the transformation type to a protobuf message in place.
func (t Type) ToProto(pb *transformationpb.TransformationType) error {
switch t {
@@ -133,6 +163,8 @@ func (t Type) ToProto(pb *transformationpb.TransformationType) error {
*pb = transformationpb.TransformationType_INCREASE
case Add:
*pb = transformationpb.TransformationType_ADD
case Reset:
*pb = transformationpb.TransformationType_RESET
default:
return fmt.Errorf("unknown transformation type: %v", t)
}
@@ -150,6 +182,8 @@ func (t *Type) FromProto(pb transformationpb.TransformationType) error {
*t = Increase
case transformationpb.TransformationType_ADD:
*t = Add
case transformationpb.TransformationType_RESET:
*t = Reset
default:
return fmt.Errorf("unknown transformation type in proto: %v", pb)
}
@@ -201,9 +235,10 @@ func ParseType(str string) (Type, error) {
type Op struct {
opType Type

// might have either unary or binary
unary UnaryTransform
binary BinaryTransform
// has one of the following
unary UnaryTransform
binary BinaryTransform
unaryMulti UnaryMultiOutputTransform
}

// Type returns the op type.
@@ -227,6 +262,14 @@ func (o Op) BinaryTransform() (BinaryTransform, bool) {
return o.binary, true
}

// UnaryMultiOutputTransform returns the active unary multi transform if op is unary multi transform.
func (o Op) UnaryMultiOutputTransform() (UnaryMultiOutputTransform, bool) {
if !o.Type().IsUnaryMultiOutputTransform() {
return nil, false
}
return o.unaryMulti, true
}

var (
unaryTransforms = map[Type]func() UnaryTransform{
Absolute: transformAbsolute,
@@ -236,6 +279,9 @@ var (
PerSecond: transformPerSecond,
Increase: transformIncrease,
}
unaryMultiOutputTransforms = map[Type]func() UnaryMultiOutputTransform{
Reset: transformReset,
}
typeStringMap map[string]Type
)

@@ -247,4 +293,7 @@ func init() {
for t := range binaryTransforms {
typeStringMap[t.String()] = t
}
for t := range unaryMultiOutputTransforms {
typeStringMap[t.String()] = t
}
}
5 changes: 3 additions & 2 deletions src/metrics/transformation/type_string.go
47 changes: 47 additions & 0 deletions src/metrics/transformation/unary_multi.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (c) 2020 Uber Technologies, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

package transformation

import "time"

// transformReset returns the provided datapoint and a zero datapoint one second later.
//
// This transform is useful for force resetting a counter value in Prometheus. When running the M3Aggregator in HA, both
// the follower and leader are computing aggregate counters, but they started counting at different times. If these
// counters are emitted as monotonic cumulative counters, during failover the counter decreases if the new leader
// started counting later. Prometheus assumes any decrease in a counter is due a counter reset, which leads to strange
// display results since the counter did not actually reset.
//
// This transform gets around this issue by explicitly not accumulating results, like Add, and force resets the counter
// with a zero value so PromQL properly graphs the delta as the rate value.
//
// This does have the downside of an extra 0 datapoint per resolution period. The storage cost is more than just the
// extra 0 value since the value is stored 1 second after the actual datapoint. This degrades the timestamp encoding
// since the timestamps are no longer at a fixed interval. In practice we see a 3x increase in storage for these
// aggregated counters.
//
// Currently only a single extra datapoint per aggregation is supported. If multiple transforms in an aggregation emit
// an additional datapoint, only the last one is used.
func transformReset() UnaryMultiOutputTransform {
return UnaryMultiOutputTransformFn(func(dp Datapoint) (Datapoint, Datapoint) {
return dp, Datapoint{Value: 0, TimeNanos: dp.TimeNanos + int64(time.Second)}
})
}
44 changes: 44 additions & 0 deletions src/metrics/transformation/unary_multi_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (c) 2020 Uber Technologies, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

package transformation

import (
"testing"
"time"

"github.com/stretchr/testify/require"
)

func TestReset(t *testing.T) {
reset, err := Reset.UnaryMultiOutputTransform()
require.NoError(t, err)
now := time.Now()
dp := Datapoint{Value: 1000, TimeNanos: now.UnixNano()}
this, other := reset.Evaluate(dp)
require.Equal(t, dp, this)
require.Equal(t, Datapoint{Value: 0, TimeNanos: now.Add(time.Second).UnixNano()}, other)
}

func TestUnaryMultiParse(t *testing.T) {
parsed, err := ParseType("Reset")
require.NoError(t, err)
require.Equal(t, Reset, parsed)
}

0 comments on commit 89215d5

Please sign in to comment.