Add support for Distribution bucket for a double valued metric (#47)

* Add support for Distribution bucket for a double valued metric * doc updated * fixed the java doc * A common interface for the distribution bucket * fixed review comments * distribution buckets in nano seconds * fixed the typo * documentated the constant fields * review comment fixed Co-authored-by: Smruti Ranjan Sahoo <[email protected]>
ultrabrew · Jun 24, 2021 · 42d8ad8 · 42d8ad8
1 parent 57c83b4
commit 42d8ad8
Show file tree

Hide file tree

Showing 11 changed files with 800 additions and 33 deletions.
diff --git a/README.md b/README.md
@@ -116,7 +116,7 @@ repositories {
 }
 
 dependencies {
-  compile group: 'io.ultrabrew.metrics', name: 'metrics-{your reporter}', version: '0.8.0'
+  compile group: 'io.ultrabrew.metrics', name: 'metrics-{your reporter}', version: '0.9.0'
 }
 ```
 
@@ -127,7 +127,7 @@ dependencies {
    <dependency>
      <groupId>io.ultrabrew.metrics</groupId>
      <artifactId>metrics-{your reporter}</artifactId>
-     <version>0.8.0</version>
+     <version>0.9.0</version>
    </dependency>
 </dependencies>
 ```
@@ -280,6 +280,72 @@ SLF4J Logger.
   metricRegistry.addReporter(reporter);
 ```
 
+#### Histograms
+
+In the current implementation, clients must define the distribution buckets and associate them in the reporter with the name of the metric to be histogrammed.
+
+There two types of distribution buckets available:
+- `DistributionBucket` represented by a primitive `long` array.
+- `DoubleValuedDistributionBucket` represented by a primitive `double` array
+
+##### DistributionBucket
+Used to represent the distribution of an integer value. For example time spent in nanoseconds or size of a messaging queue.
+
+For a given array of latency distribution in nanoseconds [0, 10_000_000, 100_000_000, 500_000_000, 1000_000_000], the buckets would be like:
+* [0, 10_000_000) for 0 <= value < 9_999_999
+* [10_000_000, 100_000_000) for 10_000_000 <= value < 99_999_999
+* [100_000_000, 500_000_000) for 100_000_000 <= value < 499_999_999
+* [500_000_000, 1000_000_000) for 500_000_000 <= value < 999_999_999
+* overflow  for values  >= 1000_000_000
+* underflow for values  < 0
+
+```Java
+  String metricId = "latency";
+  DistributionBucket distributionBucket = new DistributionBucket(new long[]{0, 10_000_000, 100_000_000, 500_000_000, 1000_000_000});
+
+  SLF4JReporter reporter =
+      SLF4JReporter.builder().withName("metrics")
+          .addHistogram(metricId, distributionBucket)    // add histogram for metric with id "latency"
+          .build();
+
+  String[] tagset = new String[] {"method", "GET", "resource", "metrics", "status", "200"};
+
+  Timer timer = metricRegistry.timer(metricId);    // creates a timer metric with id "latency"
+
+  long start = Timer.start();
+  // doSomething();
+  timer.stop(start, tagset); // records the latency and the distribution in nanoseconds.
+```
+
+##### DoubleValuedDistributionBucket
+Used to represent the distribution of a double-precision floating point value. For example ads auction price.
+
+For a given distribution array: [0.0, 0.25, 0.5, 1.0, 5.0, 10.0], the buckets would be like:
+* [0.0, 0.25) for 0.0 <= value < 0.25
+* [0.25, 0.5) for 0.25 <= value < 0.5
+* [0.5, 1.0) for 0.5 <= value < 1.0
+* [1.0, 5.0) for 1.0 <= value < 5.0
+* [5.0, 10.0) for 5.0 <= value < 10.0
+* overflow  for values  >= 10.0
+* underflow for values  < 0.0
+
+```Java
+  String metricId = "auction_price";
+  DoubleValuedDistributionBucket distributionBucket = new DoubleValuedDistributionBucket(new double[]{0.0, 0.25, 0.5, 1.0, 5.0, 10.0});
+
+  SLF4JReporter reporter =
+      SLF4JReporter.builder().withName("metrics")
+            .addHistogram(metricId, distributionBucket)    // add histogram for metric with id "auction_price"
+            .build();
+
+  String[] tagset = new String[] {"experiment", "exp1"};
+
+  GaugeDouble auctionPrice = metricRegistry.gaugeDouble(metricId);    // creates a gauge double metric with id "auction_price"
+
+  auctionPrice.set(getAuctionPrice(), tagset); // records the auction_price and the distribution.
+```
+
+
 ## Contribute
 
 Please refer to [the Contributing.md file](Contributing.md) for information about how to get

diff --git a/core/src/main/java/io/ultrabrew/metrics/data/BasicDoubleValuedHistogramAggregator.java b/core/src/main/java/io/ultrabrew/metrics/data/BasicDoubleValuedHistogramAggregator.java
@@ -0,0 +1,155 @@
+// Copyright 2021, Oath Inc.
+// Licensed under the terms of the Apache License 2.0 license. See LICENSE file in Ultrabrew Metrics
+// for terms.
+
+package io.ultrabrew.metrics.data;
+
+import io.ultrabrew.metrics.GaugeDouble;
+
+import java.util.Arrays;
+
+import static io.ultrabrew.metrics.Metric.DEFAULT_CARDINALITY;
+import static io.ultrabrew.metrics.Metric.DEFAULT_MAX_CARDINALITY;
+
+/**
+ * A monoid to generate histogram buckets along with the common aggregation functions for a given
+ * metric of type {@link GaugeDouble}
+ *
+ * <p>Performs the following aggregation functions on the measurements:
+ *
+ * <ul>
+ *   <li>count of measurements
+ *   <li>sum of the measurement values
+ *   <li>minimum measured value
+ *   <li>maximum measured value
+ *   <li>last measured value
+ * </ul>
+ *
+ * The histogram buckets are derived from {@link DoubleValuedDistributionBucket}.
+ *
+ * <p>A sample histogram would look like:
+ *
+ * <ul>
+ *   <li>[0.0_0.1)
+ *   <li>[0.1_0.5)
+ *   <li>[0.5_50.0)
+ *   <li>overflow
+ *   <li>underflow
+ * </ul>
+ *
+ * @see DoubleValuedDistributionBucket
+ */
+public class BasicDoubleValuedHistogramAggregator extends BasicGaugeDoubleAggregator {
+
+  private final DoubleValuedDistributionBucket buckets;
+
+  /**
+   * Creates a monoid for the histogram buckets for a {@link GaugeDouble}
+   *
+   * @param metricId identifier of the metric associated with this aggregator
+   * @param bucket distribution bucket spec
+   */
+  public BasicDoubleValuedHistogramAggregator(
+      final String metricId, final DoubleValuedDistributionBucket bucket) {
+    this(metricId, bucket, DEFAULT_MAX_CARDINALITY);
+  }
+
+  /**
+   * Creates a monoid for the histogram buckets for a {@link GaugeDouble}
+   *
+   * @param metricId identifier of the metric associated with this aggregator
+   * @param bucket distribution bucket spec
+   * @param maxCardinality requested max capacity of table in records. Table doesn't grow beyond
+   */
+  public BasicDoubleValuedHistogramAggregator(
+      final String metricId,
+      final DoubleValuedDistributionBucket bucket,
+      final int maxCardinality) {
+    this(metricId, bucket, maxCardinality, DEFAULT_CARDINALITY);
+  }
+
+  /**
+   * Creates a monoid for the histogram buckets for a {@link GaugeDouble}
+   *
+   * @param metricId identifier of the metric associated with this aggregator
+   * @param bucket distribution bucket spec
+   * @param maxCardinality requested max capacity of table in records. Table doesn't grow beyond
+   * @param cardinality requested capacity of table in records, actual capacity may be higher
+   */
+  public BasicDoubleValuedHistogramAggregator(
+      final String metricId,
+      final DoubleValuedDistributionBucket bucket,
+      final int maxCardinality,
+      final int cardinality) {
+    super(
+        metricId,
+        maxCardinality,
+        cardinality,
+        buildFields(bucket),
+        buildTypes(bucket),
+        buildIdentity(bucket));
+    this.buckets = bucket;
+  }
+
+  /**
+   * Creates an array containing the identity values of the aggregation field of {@link
+   * BasicGaugeDoubleAggregator} followed by zeros for the histogram buckets. These values are used
+   * to initialize and reset the field after reading.
+   *
+   * @param buckets distribution bucket spec
+   * @return array of identity values
+   * @see {@link BasicGaugeDoubleAggregator#FIELDS}
+   * @see {@link BasicGaugeDoubleAggregator#IDENTITY}
+   */
+  private static long[] buildIdentity(DoubleValuedDistributionBucket buckets) {
+    long[] identity = new long[IDENTITY.length + buckets.getCount()];
+    System.arraycopy(IDENTITY, 0, identity, 0, IDENTITY.length);
+    Arrays.fill(identity, IDENTITY.length, identity.length, 0L);
+    return identity;
+  }
+
+  /**
+   * Creates an array of {@link Type} of the aggregation field and histogram buckets.
+   *
+   * @param buckets distribution bucket spec
+   * @return array of {@link Type}s of the fields
+   * @see {@link BasicGaugeDoubleAggregator#FIELDS}
+   * @see {@link BasicGaugeDoubleAggregator#IDENTITY}
+   * @see {@link BasicGaugeDoubleAggregator#TYPES}
+   */
+  private static Type[] buildTypes(DoubleValuedDistributionBucket buckets) {
+    Type[] types = new Type[TYPES.length + buckets.getCount()];
+    System.arraycopy(TYPES, 0, types, 0, TYPES.length);
+    Arrays.fill(types, TYPES.length, types.length, Type.LONG);
+    return types;
+  }
+
+  /**
+   * Creates an array of names of the aggregation field and histogram buckets.
+   *
+   * @param buckets distribution bucket spec
+   * @return array of field names
+   * @see {@link BasicGaugeDoubleAggregator#FIELDS}
+   */
+  private static String[] buildFields(DoubleValuedDistributionBucket buckets) {
+    String[] fields = new String[FIELDS.length + buckets.getCount()];
+    String[] bucketNames = buckets.getBucketNames();
+    System.arraycopy(FIELDS, 0, fields, 0, FIELDS.length);
+    System.arraycopy(bucketNames, 0, fields, FIELDS.length, bucketNames.length);
+    return fields;
+  }
+
+  @Override
+  public void combine(long[] table, long baseOffset, long value) {
+    final double d = Double.longBitsToDouble(value);
+    add(table, baseOffset, 0, 1L);
+    add(table, baseOffset, 1, d);
+    min(table, baseOffset, 2, d);
+    max(table, baseOffset, 3, d);
+    set(table, baseOffset, 4, d);
+
+    // Increments the bucket counter by 1 responding to the given value
+    int bucketIndex = buckets.getBucketIndex(d);
+    add(table, baseOffset, FIELDS.length + bucketIndex, 1);
+  }
+}
diff --git a/core/src/main/java/io/ultrabrew/metrics/data/BasicGaugeDoubleAggregator.java b/core/src/main/java/io/ultrabrew/metrics/data/BasicGaugeDoubleAggregator.java
@@ -25,10 +25,9 @@
  */
 public class BasicGaugeDoubleAggregator extends ConcurrentMonoidLongTable {
 
-  private static final String[] FIELDS = {"count", "sum", "min", "max", "lastValue"};
-  private static final Type[] TYPES = {Type.LONG, Type.DOUBLE, Type.DOUBLE, Type.DOUBLE,
-      Type.DOUBLE};
-  private static final long[] IDENTITY = {0L, 0L, Long.MAX_VALUE, Long.MIN_VALUE, 0L};
+  static final String[] FIELDS = {"count", "sum", "min", "max", "lastValue"};
+  static final Type[] TYPES = {Type.LONG, Type.DOUBLE, Type.DOUBLE, Type.DOUBLE, Type.DOUBLE};
+  static final long[] IDENTITY = {0L, 0L, Long.MAX_VALUE, Long.MIN_VALUE, 0L};
 
 
   /**
@@ -71,7 +70,12 @@ public BasicGaugeDoubleAggregator(final String metricId, final int maxCardinalit
    */
   public BasicGaugeDoubleAggregator(final String metricId, final int maxCardinality,
       final int cardinality) {
-    super(metricId, maxCardinality, cardinality, FIELDS, TYPES, IDENTITY);
+    this(metricId, maxCardinality, cardinality, FIELDS, TYPES, IDENTITY);
+  }
+
+  protected BasicGaugeDoubleAggregator(final String metricId, final int maxCardinality,
+      final int cardinality, final String[] fields, final Type[] types, final long[] identity) {
+    super(metricId, maxCardinality, cardinality, fields, types, identity);
   }
 
   @Override
@@ -83,4 +87,5 @@ public void combine(final long[] table, final long baseOffset, final long value)
     max(table, baseOffset, 3, d);
     set(table, baseOffset, 4, d);
   }
+
 }
diff --git a/core/src/main/java/io/ultrabrew/metrics/data/DistributionBucket.java b/core/src/main/java/io/ultrabrew/metrics/data/DistributionBucket.java
@@ -13,21 +13,18 @@
  *
  * <P>For a given distribution array: [0, 10, 100, 500, 1000], the buckets would be like:</P>
  * <ul>
- * <li>[0,10) for values 0-9</li>
- * <li>[10,100) for values 10-99</li>
- * <li>[100,500) for values 100-499</li>
- * <li>[500,1000) for values 500-999</li>
- * <li>overflow  for values {@literal >}= 1000</li>
- * <li>underflow for values {@literal <} 0</li>
+ * <li>[0, 10) for 0 {@literal <=} value {@literal <} 9
+ * <li>[10, 100) for 10 {@literal <=} value {@literal <} 99
+ * <li>[100, 500) for 100 {@literal <=} value {@literal <} 499
+ * <li>[500, 1000) for 500 {@literal <=} value {@literal <} 999
+ * <li>overflow  for values {@literal >}= 1000
+ * <li>underflow for values {@literal <} 0
  * </ul>
  *
  * @see BasicHistogramAggregator
  * @see NameSpec
  */
-public class DistributionBucket {
-
-  private static final String UNDERFLOW = "underflow";
-  private static final String OVERFLOW = "overflow";
+public class DistributionBucket implements DistributionBucketIF<DistributionBucket> {
 
   private final long[] buckets;
   private final NameSpec nameSpec;
@@ -56,8 +53,12 @@ public DistributionBucket(final long[] buckets, final NameSpec nameSpec) {
     this.nameSpec = nameSpec;
   }
 
+  /**
+   * @return count of buckets including {@link #OVERFLOW} and {@link #UNDERFLOW} buckets.
+   */
+  @Override
   public int getCount() {
-    return buckets.length + 1; // includes the underflow and overflow buckets
+    return buckets.length + 1;
   }
 
   /**
@@ -117,6 +118,7 @@ public int getBucketIndex(long value) {
    *
    * @return array of bucket names
    */
+  @Override
   public String[] getBucketNames() {
     int bucketCount = buckets.length;
     String[] names = new String[bucketCount + 1];
@@ -129,6 +131,11 @@ public String[] getBucketNames() {
     return names;
   }
 
+  @Override
+  public Aggregator buildAggregator(String metricId, DistributionBucket bucket, int maxCardinality) {
+    return new BasicHistogramAggregator(metricId, bucket, maxCardinality);
+  }
+
   private static boolean isSorted(final long[] buckets) {
     return !matchAny(buckets, i -> buckets[i] > buckets[i + 1]);
   }