diff --git a/ArgusCore/pom.xml b/ArgusCore/pom.xml index 012395b8d..f023262a7 100644 --- a/ArgusCore/pom.xml +++ b/ArgusCore/pom.xml @@ -372,6 +372,11 @@ asynchbase 1.7.0 + + nz.ac.waikato.cms.weka + weka-stable + 3.6.14 + diff --git a/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianDensityTransform.java b/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianDensityTransform.java new file mode 100644 index 000000000..19663c9f5 --- /dev/null +++ b/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianDensityTransform.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, Salesforce.com, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Salesforce.com nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package com.salesforce.dva.argus.service.metric.transform; + +/** + * Gaussian-based anomaly detection using probability density estimation. + * Source: http://www.holehouse.org/mlclass/15_Anomaly_Detection.html (Andrew Ng) + * + * @author Shouvik Mani (shouvik.mani@salesforce.com) + */ +public class AnomalyDetectionGaussianDensityTransform extends AnomalyDetectionGaussianTransform { + + private static final String RESULT_METRIC_NAME = "probability density (neg. log)"; + + @Override + public String getResultScopeName() { + return TransformFactory.Function.ANOMALY_DENSITY.name(); + } + + @Override + public String getResultMetricName() { + return RESULT_METRIC_NAME; + } + + /** + * Calculates the probability density (PDF) of the data point, which + * describes the relative likelihood of the point occurring in the + * Gaussian distribution. + * + * Large variances in data causes floating point underflow during the + * probability density calculation. Since we cannot take the negative + * log of 0.0, data points that cause underflow are omitted from the + * anomaly score results. + * + * @param value the value of the data point + * @return the negative log of the probability density of the data point + */ + @Override + public double calculateAnomalyScore(double value) { + double probabilityDensity = (1.0/Math.sqrt(2.0 * Math.PI * variance)) * + Math.exp((-1.0 * Math.pow((value - mean), 2.0)) / (2.0 * variance)); + + if (probabilityDensity == 0.0) { + throw new ArithmeticException("Cannot take the log of 0."); + } + + /** + * Taking negative log transforms the probability density + * into a human-readable anomaly score + */ + return -1.0 * Math.log(probabilityDensity); + } + +} +/* Copyright (c) 2016, Salesforce.com, Inc. All rights reserved. */ diff --git a/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianTransform.java b/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianTransform.java new file mode 100644 index 000000000..d9f6561ee --- /dev/null +++ b/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianTransform.java @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2016, Salesforce.com, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Salesforce.com nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package com.salesforce.dva.argus.service.metric.transform; + +import com.salesforce.dva.argus.entity.Metric; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +/** + * Abstract class for Gaussian distribution based anomaly detection transforms. + * + * Estimates mean and variance parameters to build a model from the data. Then + * calculates an anomaly score for each data point, indicating how likely it is + * to be an anomaly relative to other points. + * + * @author Shouvik Mani (shouvik.mani@salesforce.com) + */ +public abstract class AnomalyDetectionGaussianTransform extends AnomalyDetectionTransform { + + //Parameters for Gaussian distribution + protected double mean; + protected double variance; + + @Override + public List transform(List metrics) { + if (metrics == null) { + throw new MissingDataException("The metrics list cannot be null or empty while performing transforms."); + } + if (metrics.size() != 1) { + throw new UnsupportedOperationException("Anomaly Detection Transform can only be used with one metric."); + } + + Metric metric = metrics.get(0); + Map metricData = metric.getDatapoints(); + if (metricData.size() == 0) { + throw new MissingDataException("Metric must contain data points to perform transforms."); + } + + fitParameters(metricData); + Metric predictions = predictAnomalies(metricData); + Metric predictionsNormalized = normalizePredictions(predictions); + + List resultMetrics = new ArrayList<>(); + resultMetrics.add(predictionsNormalized); + return resultMetrics; + } + + //Fits the mean and variance parameters to the data + private void fitParameters(Map metricData) { + mean = getMetricMean(metricData); + variance = getMetricVariance(metricData); + } + + /** + * Assigns an anomaly score to each data point, indicating how likely it is + * to be an anomaly relative to other points. + */ + private Metric predictAnomalies(Map metricData) { + Metric predictions = new Metric(getResultScopeName(), getResultMetricName()); + Map predictionDatapoints = new HashMap<>(); + + if (variance == 0.0) { + /** + * If variance is 0, there are no anomalies. + * Also, using 0 for variance would cause divide by zero operations + * in Gaussian anomaly formulas. This condition avoids such operations. + */ + for (Entry entry : metricData.entrySet()) { + Long timestamp = entry.getKey(); + predictionDatapoints.put(timestamp, "0.0"); + } + } else { + for (Entry entry : metricData.entrySet()) { + Long timestamp = entry.getKey(); + String valueString = entry.getValue(); + double valueDouble = Double.parseDouble(valueString); + try { + double anomalyScore = calculateAnomalyScore(valueDouble); + predictionDatapoints.put(timestamp, String.valueOf(anomalyScore)); + } catch (ArithmeticException e) { + continue; + } + } + } + + predictions.setDatapoints(predictionDatapoints); + return predictions; + } + + private double getMetricMean(Map metricData) { + double sum = 0; + for (String valueString : metricData.values()) { + double valueDouble = Double.parseDouble(valueString); + sum += valueDouble; + } + return sum/metricData.size(); + } + + private double getMetricVariance(Map metricData) { + double sumSquareDiff = 0; + for (String valueString : metricData.values()) { + double valueDouble = Double.parseDouble(valueString); + sumSquareDiff += Math.pow((valueDouble - mean), 2); + } + return sumSquareDiff/metricData.size(); + } + +} +/* Copyright (c) 2016, Salesforce.com, Inc. All rights reserved. */ diff --git a/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianZScoreTransform.java b/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianZScoreTransform.java new file mode 100644 index 000000000..a5b8fbac4 --- /dev/null +++ b/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianZScoreTransform.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016, Salesforce.com, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Salesforce.com nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package com.salesforce.dva.argus.service.metric.transform; + +/** + * Gaussian-based anomaly detection using z-score calculation. + * Source: http://trevorwhitney.com/data_mining/anomaly_detection + * + * @author Shouvik Mani (shouvik.mani@salesforce.com) + */ +public class AnomalyDetectionGaussianZScoreTransform extends AnomalyDetectionGaussianTransform { + + private static final String RESULT_METRIC_NAME = "z-score (abs value)"; + + @Override + public String getResultScopeName() { + return TransformFactory.Function.ANOMALY_ZSCORE.name(); + } + + @Override + public String getResultMetricName() { + return RESULT_METRIC_NAME; + } + + /** + * Calculates the z-score of the data point, which measures how many + * standard deviations the data point is away from the mean. + * + * @param value the value of the data point + * @return the absolute value of the z-score of the data point + */ + @Override + public double calculateAnomalyScore(double value) { + double zScore = (value - mean) / Math.sqrt(variance); + //Taking absolute value for a more human-readable anomaly score + return Math.abs(zScore); + } + +} +/* Copyright (c) 2016, Salesforce.com, Inc. All rights reserved. */ diff --git a/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionKMeansTransform.java b/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionKMeansTransform.java new file mode 100644 index 000000000..382d4a2b4 --- /dev/null +++ b/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionKMeansTransform.java @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2016, Salesforce.com, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Salesforce.com nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package com.salesforce.dva.argus.service.metric.transform; + +import com.salesforce.dva.argus.entity.Metric; + +import java.util.*; +import java.util.stream.Collectors; + +import com.salesforce.dva.argus.system.SystemAssert; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import weka.clusterers.SimpleKMeans; +import weka.core.*; + +/** + * Anomaly detection based on the K-means clustering algorithm. Does not make + * a Gaussian assumption and can be applied to data from any distribution. + * + * Sources: + * (i) http://www.holehouse.org/mlclass/13_Clustering.html (Andrew Ng) + * (ii) http://trevorwhitney.com/data_mining/anomaly_detection (Clustering section) + * + * @author Shouvik Mani (shouvik.mani@salesforce.com) + */ +public class AnomalyDetectionKMeansTransform extends AnomalyDetectionTransform { + + private int k; + private List metricDataValues; + private Instances trainingData; + private SimpleKMeans model; + private Instances clusterCentroids; + private int[] centroidAssignments; + private Map meanDistancesToCentroids; + private static final String RESULT_METRIC_NAME = "K-means anomaly score"; + + @Override + public List transform(List metrics) { + SystemAssert.requireArgument(k > 0, "K-means anomaly detection transform requires a positive integer " + + "k constant."); + + Map metricData = metrics.get(0).getDatapoints(); + metricDataValues = metricData.values().stream().map(Double::parseDouble).collect(Collectors.toList()); + if (metricData.size() == 0) throw new MissingDataException("Metric must contain data points to perform transforms."); + + try { + trainModel(metricData); + } catch (Exception e) { + throw new UnsupportedOperationException("Cluster creation unsuccessful"); + } + + Metric predictions = predictAnomalies(metricData); + Metric predictionsNormalized = normalizePredictions(predictions); + + List resultMetrics = new ArrayList<>(); + resultMetrics.add(predictionsNormalized); + return resultMetrics; + } + + @Override + public List transform(List metrics, List constants) { + SystemAssert.requireArgument(metrics != null, "Cannot transform null or empty metrics"); + SystemAssert.requireArgument(metrics.size() == 1, "Anomaly Detection Transform can only be used with one metric."); + SystemAssert.requireArgument(constants.size() > 0, "K-means anomaly detection transform requires a k constant."); + SystemAssert.requireArgument(constants.size() < 2, "K-means anomaly detection transform does not support " + + "contextual anomaly detection."); + + try { + k = Integer.valueOf(constants.get(0)); + } catch (NumberFormatException e) { + throw new UnsupportedOperationException("K-means anomaly detection transform requires a positive integer " + + "k constant."); + } + + return transform(metrics); + } + + private void trainModel(Map metricData) throws Exception { + //Model has a single metric_value attribute + Attribute value = new Attribute("metric_value"); + FastVector attributes = new FastVector(); + attributes.addElement(value); + + trainingData = new Instances("metric_value_data", attributes, 0); + for (String val : metricData.values()) { + double[] valArray = new double[] { Double.parseDouble(val) }; + Instance instance = new Instance(1.0, valArray); + trainingData.add(instance); + } + + //Create and train the model + model = new SimpleKMeans(); + model.setNumClusters(k); + model.setMaxIterations(20); + model.setPreserveInstancesOrder(true); + model.buildClusterer(trainingData); + + clusterCentroids = model.getClusterCentroids(); + centroidAssignments = model.getAssignments(); + setMeanDistancesToCentroids(); + } + + /** + * For each cluster, caches the mean distance from data points in the + * cluster to the cluster centroid. Mean distances are used later in + * anomaly score calculations. + */ + private void setMeanDistancesToCentroids() { + meanDistancesToCentroids = new HashMap<>(); + for (int i = 0; i < clusterCentroids.numInstances(); i++) { //For each centroid + int countAssignedInstances = 0; + double sumDistancesToCentroid = 0.0; + Instance centroidInstance = clusterCentroids.instance(i); + for (int j = 0; j < trainingData.numInstances(); j++) { //For each data point + if (i == centroidAssignments[j]) { + Instance valueInstance = trainingData.instance(j); + double distanceToCentroid = Math.abs(valueInstance.value(0) - + centroidInstance.value(0)); + sumDistancesToCentroid += distanceToCentroid; + countAssignedInstances++; + } + } + double meanDistanceToCentroid = sumDistancesToCentroid / countAssignedInstances; + meanDistancesToCentroids.put(centroidInstance, meanDistanceToCentroid); + } + } + + /** + * Assigns an anomaly score to each data point, indicating how likely it is + * to be an anomaly relative to other points. + */ + private Metric predictAnomalies(Map metricData) { + Metric predictions = new Metric(getResultScopeName(), getResultMetricName()); + Map predictionDatapoints = new HashMap<>(); + + for (Map.Entry entry : metricData.entrySet()) { + Long timestamp = entry.getKey(); + double value = Double.parseDouble(entry.getValue()); + try { + double anomalyScore = calculateAnomalyScore(value); + predictionDatapoints.put(timestamp, String.valueOf(anomalyScore)); + } catch (ArithmeticException e) { + continue; + } + } + + predictions.setDatapoints(predictionDatapoints); + return predictions; + } + + @Override + public String getResultScopeName() { + return TransformFactory.Function.ANOMALY_KMEANS.name(); + } + + @Override + public String getResultMetricName() { + return RESULT_METRIC_NAME; + } + + /** + * Calculates the relative distance of the data point to the centroid, + * which is the ratio of the distance of the point to the centroid to + * the mean distance of all points in that cluster to the centroid. + * + * Anomaly score here is defined as how "far" a data point is from its + * assigned centroid. Relative distance is used to ensure normalization + * of distances since clusters can have different densities. + * + * @param value value the value of the data point + * @return the relative distance of the data point from the centroid + */ + @Override + public double calculateAnomalyScore(double value) { + int instanceIndex = metricDataValues.indexOf(value); + Instance valueInstance = trainingData.instance(instanceIndex); + //Centroid that is assigned to valueInstance + Instance centroidInstance = clusterCentroids.instance(centroidAssignments[instanceIndex]); + + if (meanDistancesToCentroids.get(centroidInstance) == 0.0) { + throw new ArithmeticException("Cannot divide by 0"); + } + + double distanceToCentroid = Math.abs(valueInstance.value(0) - centroidInstance.value(0)); + double relativeDistanceToCentroid = distanceToCentroid / meanDistancesToCentroids.get(centroidInstance); + return relativeDistanceToCentroid; + } +} diff --git a/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionTransform.java b/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionTransform.java new file mode 100644 index 000000000..d2403cf12 --- /dev/null +++ b/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionTransform.java @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2016, Salesforce.com, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Salesforce.com nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package com.salesforce.dva.argus.service.metric.transform; + + +import com.salesforce.dva.argus.entity.Metric; +import com.salesforce.dva.argus.service.metric.MetricReader; +import com.salesforce.dva.argus.system.SystemAssert; + +import java.util.*; +import java.util.Map.Entry; + +/** + * Abstract class for anomaly detection transforms + * + * @author Shouvik Mani (shouvik.mani@salesforce.com) + */ +public abstract class AnomalyDetectionTransform implements Transform { + + @Override + /** + * This implementation of transform() handles contextual anomaly detection, which + * identifies anomalies within pre-defined intervals of the metric + * + * Ex: ANOMALY_DENSITY(-100d:-0d:foo:bar:sum, $7d), where the interval is 7 days + */ + public List transform(List metrics, List constants) { + SystemAssert.requireArgument(metrics != null, "Cannot transform null or empty metrics"); + SystemAssert.requireArgument(metrics.size() == 1, "Anomaly Detection Transform can only be used with one metric."); + + Metric predictions = new Metric(getResultScopeName(), getResultMetricName()); + Map predictionDatapoints = new HashMap<>(); + + long detectionIntervalInSeconds = getDetectionIntervalInSeconds(constants.get(0)); + + //Create a sorted array of the metric's timestamps + Map completeDatapoints = metrics.get(0).getDatapoints(); + Long[] timestamps = completeDatapoints.keySet().toArray(new Long[completeDatapoints.size()]); + Arrays.sort(timestamps); + + int currentIndex = 0; + currentIndex = advanceCurrentIndexByInterval(currentIndex, predictionDatapoints, + timestamps, detectionIntervalInSeconds); + calculateContextualAnomalyScores(predictionDatapoints, completeDatapoints, timestamps, + currentIndex, detectionIntervalInSeconds); + + predictions.setDatapoints(predictionDatapoints); + List resultMetrics = new ArrayList<>(); + resultMetrics.add(predictions); + return resultMetrics; + } + + @Override + public List transform(List... metrics) { + throw new UnsupportedOperationException("This transform only supports anomaly detection on a single list of metrics"); + } + + /** + * Normalize predictions into a range of [0, 100]. Lowest value will map + * to 0, highest value will map to 100, rest will map to values in between. + * Normalization formula: normalizedValue = (rawValue - min) * (100 / (max - min)) + * + * @param predictions Metric to normalize + * @return Normalized metric + */ + public Metric normalizePredictions(Metric predictions) { + Map metricData = predictions.getDatapoints(); + Map minMax = getMinMax(metricData); + double min = minMax.get("min"); + double max = minMax.get("max"); + + Metric predictionsNormalized = new Metric(getResultScopeName(), getResultMetricName()); + Map metricDataNormalized = new HashMap<>(); + + if (max - min == 0.0) { + /** + * If (max - min) == 0.0, all data points in the predictions metric + * have the same value. So, all data points in the normalized metric + * will have value 0. This avoids divide by zero operations later on. + */ + for (Long timestamp : metricData.keySet()) { + metricDataNormalized.put(timestamp, "0.0"); + } + } else { + double normalizationConstant = 100.0 / (max - min); + + for (Entry entry : metricData.entrySet()) { + Long timestamp = entry.getKey(); + Double value = Double.parseDouble(entry.getValue()); + + // Formula: normalizedValue = (rawValue - min) * (100 / (max - min)) + Double valueNormalized = (value - min) * normalizationConstant; + metricDataNormalized.put(timestamp, String.valueOf(valueNormalized)); + } + } + + predictionsNormalized.setDatapoints(metricDataNormalized); + return predictionsNormalized; + } + + /** + * Identifies the min and max values of a metric + * + * @param metricData Metric to find the min and max values of + * @return Map containing the min and max values of the metric + */ + private Map getMinMax(Map metricData) { + double min = 0.0; + double max = 0.0; + boolean isMinMaxSet = false; + for (String valueString : metricData.values()) { + double valueDouble = Double.parseDouble(valueString); + if (!isMinMaxSet) { + min = valueDouble; + max = valueDouble; + isMinMaxSet = true; + } else { + if (valueDouble < min) { + min = valueDouble; + } else if (valueDouble > max) { + max = valueDouble; + } + } + } + + Map minMax = new HashMap<>(); + minMax.put("min", min); + minMax.put("max", max); + return minMax; + } + + private long getDetectionIntervalInSeconds(String detectionInterval) { + try { + //Parse constant for anomaly detection interval + String detectionIntervalValue = detectionInterval.substring(0, detectionInterval.length() - 1); + String detectionIntervalUnit = detectionInterval.substring(detectionInterval.length() - 1); + Long timeValue = Long.parseLong(detectionIntervalValue); + MetricReader.TimeUnit timeUnit = MetricReader.TimeUnit.fromString(detectionIntervalUnit); + //Convert interval to seconds + long detectionIntervalInSeconds = timeValue * timeUnit.getValue() / 1000; + return detectionIntervalInSeconds; + } catch (Exception e) { + throw new IllegalArgumentException("Invalid expression for anomaly detection interval constant."); + } + } + + /** + * Advances currentIndex to a point where it is one anomaly detection + * interval beyond the first timestamp. Sets the anomaly scores for + * these intermediate points to 0 in the predictions metric (since + * there is not enough data in its past for a complete interval). + * + * @param currentIndex index that gets advanced to one anomaly detection interval + * beyond the first timestamp + * @param predictionDatapoints datapoints that get filled with anomaly scores of 0 + * @param timestamps sorted timestamps of the original metric + * @param detectionIntervalInSeconds anomaly detection interval + * @return new advanced value of currentIndex + */ + private int advanceCurrentIndexByInterval(int currentIndex, Map predictionDatapoints, + Long[] timestamps, long detectionIntervalInSeconds) { + //Projected end of interval + long firstIntervalEndTime = timestamps[0] + detectionIntervalInSeconds; + while (true) { + if (currentIndex >= timestamps.length || timestamps[currentIndex] > firstIntervalEndTime) { + //Stop once the interval ends (or the entire metric is exhausted) + break; + } else { + predictionDatapoints.put(timestamps[currentIndex], "0.0"); + currentIndex += 1; + } + } + return currentIndex; + } + + /** + * Creates an interval for each data point (after currentIndex) in the metric + * and calculates the anomaly score for that data point using only other data + * points in that same interval, i.e. "a moving contextual anomaly score" + * + * @param predictionDatapoints data points to fill with contextual anomaly scores + * @param completeDatapoints original metric data points + * @param timestamps sorted timestamps of the original metric + * @param currentIndex index at which to start contextual anomaly detection + * @param detectionIntervalInSeconds anomaly detection interval + */ + private void calculateContextualAnomalyScores(Map predictionDatapoints, + Map completeDatapoints, + Long[] timestamps, + int currentIndex, long detectionIntervalInSeconds) { + for (int i = currentIndex; i < timestamps.length; i++) { + long timestampAtCurrentIndex = timestamps[i]; + long projectedIntervalStartTime = timestampAtCurrentIndex - detectionIntervalInSeconds; + + Metric intervalMetric = createIntervalMetric(i, completeDatapoints, timestamps, + projectedIntervalStartTime); + List intervalRawDataMetrics = new ArrayList<>(); + intervalRawDataMetrics.add(intervalMetric); + + //Apply the anomaly detection transform to each interval separately + Metric intervalAnomaliesMetric = transform(intervalRawDataMetrics).get(0); + Map intervalAnomaliesMetricData = intervalAnomaliesMetric.getDatapoints(); + predictionDatapoints.put(timestamps[i], + intervalAnomaliesMetricData.get(timestamps[i])); + } + } + + /** + * Creates an interval metric containing data points from a starting point + * (defined by the detection interval) to an ending point (currentDatapointIndex) + * + * @param currentDatapointIndex index of the current data point to create an interval for, + * will serve as the ending point of the interval + * @param completeDatapoints original metric data points + * @param timestamps sorted timestamps of the original metric + * @param projectedIntervalStartTime starting point of the interval + * @return Metric containing data points for the interval + */ + private Metric createIntervalMetric(int currentDatapointIndex, Map completeDatapoints, + Long[] timestamps, long projectedIntervalStartTime) { + Metric intervalMetric = new Metric(getResultScopeName(), getResultMetricName()); + Map intervalMetricData = new HashMap<>(); + + //Decrease intervalStartIndex until it's at the start of the interval + int intervalStartIndex = currentDatapointIndex; + while (intervalStartIndex >= 0 && timestamps[intervalStartIndex] >= projectedIntervalStartTime) { + long tempTimestamp = timestamps[intervalStartIndex]; + //Fill in the intervalMetricData as we traverse backwards through the interval + intervalMetricData.put(tempTimestamp, completeDatapoints.get(tempTimestamp)); + intervalStartIndex--; + } + + intervalMetric.setDatapoints(intervalMetricData); + return intervalMetric; + } + + @Override + abstract public List transform(List metrics); + + @Override + abstract public String getResultScopeName(); + + abstract public String getResultMetricName(); + + abstract public double calculateAnomalyScore(double value); +} +/* Copyright (c) 2016, Salesforce.com, Inc. All rights reserved. */ diff --git a/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/TransformFactory.java b/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/TransformFactory.java index 30c2ebf84..0b3f3c418 100644 --- a/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/TransformFactory.java +++ b/ArgusCore/src/main/java/com/salesforce/dva/argus/service/metric/transform/TransformFactory.java @@ -174,6 +174,12 @@ public Transform getTransform(String functionName) { return new MetricZipperTransform(new DivideValueZipper()); case GROUPBY: throw new UnsupportedOperationException(functionName); + case ANOMALY_DENSITY: + return new AnomalyDetectionGaussianDensityTransform(); + case ANOMALY_ZSCORE: + return new AnomalyDetectionGaussianZScoreTransform(); + case ANOMALY_KMEANS: + return new AnomalyDetectionKMeansTransform(); default: throw new UnsupportedOperationException(functionName); } // end switch @@ -240,7 +246,10 @@ public enum Function { CONSECUTIVE("CONSECUTIVE","Filter out all values that are non-consecutive"), HW_FORECAST("HW_FORECAST", "Performns HoltWinters Forecast."), HW_DEVIATION("HW_DEVIATION", "Performns HoltWinters Deviation."), - GROUPBY("GROUPBY", "Creates groups of metrics based on some matching criteria and then performs the given aggregation."); + GROUPBY("GROUPBY", "Creates groups of metrics based on some matching criteria and then performs the given aggregation."), + ANOMALY_DENSITY("ANOMALY_DENSITY", "Calculates an anomaly score (0-100) for each value of the metric based on the probability density of each value with a Gaussian distribution."), + ANOMALY_ZSCORE("ANOMALY_ZSCORE", "Calculates an anomaly score (0-100) for each value of the metric based on the z-score of each value with a Gaussian distribution."), + ANOMALY_KMEANS("ANOMALY_KMEANS", "Calculates an anomaly score (0-100) for each value of the metric based on a K-means clustering of the metric data."); private final String _name; private final String _description; diff --git a/ArgusCore/src/main/javacc/MetricReader.jj b/ArgusCore/src/main/javacc/MetricReader.jj index 099e68440..c2dd833ff 100644 --- a/ArgusCore/src/main/javacc/MetricReader.jj +++ b/ArgusCore/src/main/javacc/MetricReader.jj @@ -187,6 +187,9 @@ TOKEN : { < JOIN : "JOIN" > } TOKEN : { < CONSECUTIVE : "CONSECUTIVE" > } TOKEN : { < HW_FORECAST : "HW_FORECAST" > } TOKEN : { < HW_DEVIATION : "HW_DEVIATION" > } +TOKEN : { < ANOMALY_DENSITY : "ANOMALY_DENSITY" > } +TOKEN : { < ANOMALY_ZSCORE : "ANOMALY_ZSCORE" > } +TOKEN : { < ANOMALY_KMEANS : "ANOMALY_KMEANS" > } TOKEN : { < COLON : ":" > } @@ -438,6 +441,15 @@ private String functionName() : | t = { return t.image; } + | + t = + { return t.image; } + | + t = + { return t.image; } + | + t = + { return t.image; } } private List evaluateFunction(String functionName, List result, List constants, boolean syntaxOnly, Class clazz) : @@ -445,7 +457,8 @@ private List evaluateFunction(String functionName, List result, List) result; } else if(Metric.class.equals(clazz)) { + return (List) result; + } else if(Metric.class.equals(clazz)) { if(syntaxOnly) { return (List) Arrays.asList( new Metric[] { new Metric("test","metric") }); } else { @@ -454,7 +467,8 @@ private List evaluateFunction(String functionName, List result, List) ((constants == null || constants.isEmpty()) ? transform.transform((List) result) : transform.transform((List) result, constants)); } } else { - throw new IllegalArgumentException("Invalid class type: " + clazz); } + throw new IllegalArgumentException("Invalid class type: " + clazz); + } } } @@ -491,7 +505,9 @@ private List expression(long offsetInMillis, boolean syntaxOnly, Class cla query.setDownsampler(downsampler); query.setDownsamplingPeriod(downsamplingPeriod); List queries = discoveryService.getMatchingQueries(query); - return (List) queries; } else if(Metric.class.equals(clazz)) { if(syntaxOnly) { + return (List) queries; + } else if(Metric.class.equals(clazz)) { + if(syntaxOnly) { return (List) Arrays.asList( new Metric[] { new Metric("test","metric") }); } else { downsampler = downsampleTokenStr != null ? getDownsampler(downsampleTokenStr) : null; @@ -511,7 +527,8 @@ private List expression(long offsetInMillis, boolean syntaxOnly, Class cla } return (List) metrics; } - } else { throw new IllegalArgumentException("Invalid class type: " + clazz); + } else { + throw new IllegalArgumentException("Invalid class type: " + clazz); } } } diff --git a/ArgusCore/src/test/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianDensityTransformTest.java b/ArgusCore/src/test/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianDensityTransformTest.java new file mode 100644 index 000000000..7da21c287 --- /dev/null +++ b/ArgusCore/src/test/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianDensityTransformTest.java @@ -0,0 +1,408 @@ +/* + * Copyright (c) 2016, Salesforce.com, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Salesforce.com nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package com.salesforce.dva.argus.service.metric.transform; + +import com.salesforce.dva.argus.entity.Metric; +import org.junit.Before; +import org.junit.Test; + +import java.util.*; + +import static org.junit.Assert.assertEquals; + +public class AnomalyDetectionGaussianDensityTransformTest { + + private static final String TEST_SCOPE = "test-scope"; + private static final String TEST_METRIC = "test-metric"; + private Transform gaussianDensityTransform; + private List metrics; + private Metric metric; + private Map metricData; + private Map expected; + + @Before + public void setup() { + gaussianDensityTransform = new AnomalyDetectionGaussianDensityTransform(); + metrics = new ArrayList<>(); + metric = new Metric(TEST_SCOPE, TEST_METRIC); + metricData = new HashMap<>(); + expected = new HashMap<>(); + } + + @Test + public void gaussianDensityTransformSimpleTest1() { + metricData.put(1000L, "5"); + metricData.put(2000L, "10"); + metricData.put(3000L, "15"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List results = gaussianDensityTransform.transform(metrics); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(1000L, 99.99); + expected.put(2000L, 0.0); + expected.put(3000L, 99.99); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + public void gaussianDensityTransformSimpleTest2() { + metricData.put(1000L, "84"); + metricData.put(2000L, "21"); + metricData.put(3000L, "904"); + metricData.put(4000L, "485"); + metricData.put(5000L, "38"); + metricData.put(6000L, "85408"); + metricData.put(7000L, "283497"); + metricData.put(8000L, "43"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List results = gaussianDensityTransform.transform(metrics); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(1000L, 1.11); + expected.put(2000L, 1.12); + expected.put(3000L, 0.97); + expected.put(4000L, 1.04); + expected.put(5000L, 1.11); + expected.put(6000L, 0.0); + expected.put(7000L, 100.0); + expected.put(8000L, 1.12); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + public void gaussianDensityTransformSimpleTest3() { + metricData.put(1000L, "0"); + metricData.put(2000L, "8"); + metricData.put(3000L, "-98"); + metricData.put(4000L, "400"); + metricData.put(5000L, "-268"); + metricData.put(6000L, "-900"); + metricData.put(7000L, "68"); + metricData.put(8000L, "300"); + metricData.put(9000L, "-12"); + metricData.put(10000L, "314"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List results = gaussianDensityTransform.transform(metrics); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(1000L, 0.03); + expected.put(2000L, 0.08); + expected.put(3000L, 0.80); + expected.put(4000L, 22.58); + expected.put(5000L, 7.99); + expected.put(6000L, 100.0); + expected.put(7000L, 0.96); + expected.put(8000L, 13.08); + expected.put(9000L, 0.0); + expected.put(10000L, 14.25); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + public void gaussianDensityTransformWithDetectionIntervalTest1() { + metricData.put(2L, "-1.20"); + metricData.put(4L, "-1.64"); + metricData.put(6L, "-1.68"); + metricData.put(8L, "-0.46"); + metricData.put(10L, "-1.21"); + metricData.put(12L, "-0.29"); + metricData.put(14L, "0.32"); + metricData.put(16L, "0.35"); + metricData.put(18L, "-2.26"); + metricData.put(20L, "-1.41"); + metricData.put(22L, "0.47"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List constants = new ArrayList<>(); + String detectionInterval = "10s"; + constants.add(detectionInterval); + + List results = gaussianDensityTransform.transform(metrics, constants); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(2L, 0.0); + expected.put(4L, 0.0); + expected.put(6L, 0.0); + expected.put(8L, 0.0); + expected.put(10L, 0.0); + expected.put(12L, 0.0); + expected.put(14L, 100.0); + expected.put(16L, 50.80); + expected.put(18L, 100.0); + expected.put(20L, 10.82); + expected.put(22L, 26.83); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + public void gaussianDensityTransformWithDetectionIntervalTest2() { + metricData.put(0L, "0.35"); + metricData.put(10800L, "-0.16"); + metricData.put(21600L, "1.82"); + metricData.put(32400L, "-0.37"); + metricData.put(43200L, "-2.16"); + metricData.put(54000L, "-0.05"); + metricData.put(64800L, "-1.76"); + metricData.put(75600L, "2.13"); + metricData.put(86400L, "0.18"); + metricData.put(97200L, "-0.07"); + metricData.put(108000L, "0.81"); + metricData.put(118800L, "0.47"); + metricData.put(129600L, "0.60"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List constants = new ArrayList<>(); + String detectionInterval = "12h"; + constants.add(detectionInterval); + + List results = gaussianDensityTransform.transform(metrics, constants); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(0L, 0.0); + expected.put(10800L, 0.0); + expected.put(21600L, 0.0); + expected.put(32400L, 0.0); + expected.put(43200L, 0.0); + expected.put(54000L, 0.43); + expected.put(64800L, 28.97); + expected.put(75600L, 99.99); + expected.put(86400L, 3.05); + expected.put(97200L, 0.37); + expected.put(108000L, 7.34); + expected.put(118800L, 2.15); + expected.put(129600L, 16.65); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + public void gaussianDensityTransformWithDetectionIntervalTest3() { + metricData.put(0L, "0.64"); + metricData.put(151200L, "-1.13"); + metricData.put(302400L, "0.00"); + metricData.put(453600L, "0.90"); + metricData.put(604800L, "-0.96"); + metricData.put(756000L, "-0.52"); + metricData.put(907200L, "0.24"); + metricData.put(1058400L, "-0.01"); + metricData.put(1209600L, "0.53"); + metricData.put(1360800L, "-0.34"); + metricData.put(1512000L, "1.11"); + metricData.put(1663200L, "-0.21"); + metricData.put(1814400L, "0.54"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List constants = new ArrayList<>(); + String detectionInterval = "7d"; + constants.add(detectionInterval); + + List results = gaussianDensityTransform.transform(metrics, constants); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(0L, 0.0); + expected.put(151200L, 0.0); + expected.put(302400L, 0.0); + expected.put(453600L, 0.0); + expected.put(604800L, 0.0); + expected.put(756000L, 0.0); + expected.put(907200L, 9.67); + expected.put(1058400L, 0.0); + expected.put(1209600L, 67.34); + expected.put(1360800L, 33.82); + expected.put(1512000L, 100.0); + expected.put(1663200L, 17.42); + expected.put(1814400L, 0.72); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + /** + * Edge Case: When the detection interval is greater than the time range + * of the metric, the transform should return 0 for the anomaly score of + * every point (since there is not enough data to learn from) + */ + public void gaussianDensityTransformWithDetectionIntervalTest4() { + metricData.put(0L, "10"); + metricData.put(1000L, "-1.13"); + metricData.put(2000L, "0.00"); + metricData.put(3000L, "0.90"); + metricData.put(4000L, "-0.96"); + metricData.put(5000L, "-0.52"); + metricData.put(6000L, "0.24"); + metricData.put(7000L, "-0.01"); + metricData.put(8000L, "0.53"); + metricData.put(9000L, "-0.34"); + metricData.put(10000L, "1.11"); + metricData.put(11000L, "-0.21"); + metricData.put(12000L, "0.54"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List constants = new ArrayList<>(); + String detectionInterval = "100d"; //Detection interval > time range of metricData + constants.add(detectionInterval); + + List results = gaussianDensityTransform.transform(metrics, constants); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(0L, 0.0); + expected.put(1000L, 0.0); + expected.put(2000L, 0.0); + expected.put(3000L, 0.0); + expected.put(4000L, 0.0); + expected.put(5000L, 0.0); + expected.put(6000L, 0.0); + expected.put(7000L, 0.0); + expected.put(8000L, 0.0); + expected.put(9000L, 0.0); + expected.put(10000L, 0.0); + expected.put(11000L, 0.0); + expected.put(12000L, 0.0); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + //If variance is 0, none of the points should be anomalies + public void gaussianDensityTransformWithZeroVarianceTest() { + //These points have 0 variance + metricData.put(1000L, "100"); + metricData.put(2000L, "100"); + metricData.put(3000L, "100"); + metricData.put(4000L, "100"); + metricData.put(5000L, "100"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List results = gaussianDensityTransform.transform(metrics); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(1000L, 0.0); + expected.put(2000L, 0.0); + expected.put(3000L, 0.0); + expected.put(4000L, 0.0); + expected.put(5000L, 0.0); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + /** + * Large variances in data causes floating point underflow during + * the probability density calculation. Since underflow results in 0.0 + * and we cannot take the negative log of 0.0, data points that cause + * underflow are omitted from the anomaly score results. + * + */ + public void gaussianDensityTransformWithLogOfZeroTest() { + for (long i = 1; i < 10001; i++) { + metricData.put(i, "0.0"); + } + //This point will get omitted in the result because + //it will cause underflow in the calculation + metricData.put(10001L, "9e150"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List results = gaussianDensityTransform.transform(metrics); + Map resultDatapoints = results.get(0).getDatapoints(); + + for (long i = 1; i < 10001; i++) { + expected.put(i, 0.0); + assertEquals(expected.get(i), Double.parseDouble(resultDatapoints.get(i)), 0.01); + } + //Omitted point + expected.put(10001L, null); + + assertEquals(expected.get(10001L), resultDatapoints.get(10001L)); + } + + @Test(expected = MissingDataException.class) + public void gaussianDensityTransformWithNoDataTest() { + //metricData map is empty + metric.setDatapoints(metricData); + metrics.add(metric); + + List results = gaussianDensityTransform.transform(metrics); + } + + @Test(expected = UnsupportedOperationException.class) + public void gaussianDensityTransformWithTwoMetricsTest() { + metricData.put(1000L, "1"); + metricData.put(2000L, "2"); + metricData.put(3000L, "3"); + metric.setDatapoints(metricData); + + Metric metric_2 = new Metric(TEST_SCOPE, TEST_METRIC); + Map metricData_2 = new HashMap<>(); + metricData_2.put(1000L, "4"); + metricData_2.put(2000L, "5"); + metricData_2.put(3000L, "6"); + metric_2.setDatapoints(metricData_2); + + metrics.add(metric); + metrics.add(metric_2); + + List results = gaussianDensityTransform.transform(metrics); + } +} +/* Copyright (c) 2016, Salesforce.com, Inc. All rights reserved. */ diff --git a/ArgusCore/src/test/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianZScoreTransformTest.java b/ArgusCore/src/test/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianZScoreTransformTest.java new file mode 100644 index 000000000..66ede304a --- /dev/null +++ b/ArgusCore/src/test/java/com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianZScoreTransformTest.java @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2016, Salesforce.com, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Salesforce.com nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package com.salesforce.dva.argus.service.metric.transform; + +import com.salesforce.dva.argus.entity.Metric; +import org.junit.Before; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +public class AnomalyDetectionGaussianZScoreTransformTest { + + private static final String TEST_SCOPE = "test-scope"; + private static final String TEST_METRIC = "test-metric"; + private Transform gaussianZScoreTransform; + private List metrics; + private Metric metric; + private Map metricData; + private Map expected; + + @Before + public void setup() { + gaussianZScoreTransform = new AnomalyDetectionGaussianZScoreTransform(); + metrics = new ArrayList<>(); + metric = new Metric(TEST_SCOPE, TEST_METRIC); + metricData = new HashMap<>(); + expected = new HashMap<>(); + } + + @Test + public void gaussianZScoreTransformSimpleTest1() { + metricData.put(1000L, "5"); + metricData.put(2000L, "10"); + metricData.put(3000L, "15"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List results = gaussianZScoreTransform.transform(metrics); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(1000L, 100.0); + expected.put(2000L, 0.0); + expected.put(3000L, 100.0); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + public void gaussianZScoreTransformSimpleTest2() { + metricData.put(1000L, "84"); + metricData.put(2000L, "21"); + metricData.put(3000L, "904"); + metricData.put(4000L, "485"); + metricData.put(5000L, "38"); + metricData.put(6000L, "85408"); + metricData.put(7000L, "283497"); + metricData.put(8000L, "43"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List results = gaussianZScoreTransform.transform(metrics); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(1000L, 3.59); + expected.put(2000L, 3.63); + expected.put(3000L, 3.18); + expected.put(4000L, 3.39); + expected.put(5000L, 3.62); + expected.put(6000L, 0.0); + expected.put(7000L, 100.0); + expected.put(8000L, 3.61); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + public void gaussianZScoreTransformSimpleTest3() { + metricData.put(1000L, "0"); + metricData.put(2000L, "8"); + metricData.put(3000L, "-98"); + metricData.put(4000L, "400"); + metricData.put(5000L, "-268"); + metricData.put(6000L, "-900"); + metricData.put(7000L, "68"); + metricData.put(8000L, "300"); + metricData.put(9000L, "-12"); + metricData.put(10000L, "314"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List results = gaussianZScoreTransform.transform(metrics); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(1000L, 1.37); + expected.put(2000L, 2.28); + expected.put(3000L, 8.27); + expected.put(4000L, 47.11); + expected.put(5000L, 27.72); + expected.put(6000L, 100.0); + expected.put(7000L, 9.14); + expected.put(8000L, 35.68); + expected.put(9000L, 0.0); + expected.put(10000L, 37.28); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + public void gaussianZScoreTransformWithDetectionIntervalTest1() { + metricData.put(2L, "-1.20"); + metricData.put(4L, "-1.64"); + metricData.put(6L, "-1.68"); + metricData.put(8L, "-0.46"); + metricData.put(10L, "-1.21"); + metricData.put(12L, "-0.29"); + metricData.put(14L, "0.32"); + metricData.put(16L, "0.35"); + metricData.put(18L, "-2.26"); + metricData.put(20L, "-1.41"); + metricData.put(22L, "0.47"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List constants = new ArrayList<>(); + String detectionInterval = "10s"; + constants.add(detectionInterval); + + List results = gaussianZScoreTransform.transform(metrics, constants); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(2L, 0.0); + expected.put(4L, 0.0); + expected.put(6L, 0.0); + expected.put(8L, 0.0); + expected.put(10L, 0.0); + expected.put(12L, 0.0); + expected.put(14L, 100.0); + expected.put(16L, 70.43); + expected.put(18L, 100.0); + expected.put(20L, 19.04); + expected.put(22L, 47.20); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + public void gaussianZScoreTransformWithDetectionIntervalTest2() { + metricData.put(0L, "0.35"); + metricData.put(10800L, "-0.16"); + metricData.put(21600L, "1.82"); + metricData.put(32400L, "-0.37"); + metricData.put(43200L, "-2.16"); + metricData.put(54000L, "-0.05"); + metricData.put(64800L, "-1.76"); + metricData.put(75600L, "2.13"); + metricData.put(86400L, "0.18"); + metricData.put(97200L, "-0.07"); + metricData.put(108000L, "0.81"); + metricData.put(118800L, "0.47"); + metricData.put(129600L, "0.60"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List constants = new ArrayList<>(); + String detectionInterval = "12h"; + constants.add(detectionInterval); + + List results = gaussianZScoreTransform.transform(metrics, constants); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(0L, 0.0); + expected.put(10800L, 0.0); + expected.put(21600L, 0.0); + expected.put(32400L, 0.0); + expected.put(43200L, 0.0); + expected.put(54000L, 5.55); + expected.put(64800L, 51.23); + expected.put(75600L, 100.0); + expected.put(86400L, 10.55); + expected.put(97200L, 3.17); + expected.put(108000L, 24.43); + expected.put(118800L, 9.69); + expected.put(129600L, 32.82); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + public void gaussianZScoreTransformWithDetectionIntervalTest3() { + metricData.put(0L, "0.64"); + metricData.put(151200L, "-1.13"); + metricData.put(302400L, "0.00"); + metricData.put(453600L, "0.90"); + metricData.put(604800L, "-0.96"); + metricData.put(756000L, "-0.52"); + metricData.put(907200L, "0.24"); + metricData.put(1058400L, "-0.01"); + metricData.put(1209600L, "0.53"); + metricData.put(1360800L, "-0.34"); + metricData.put(1512000L, "1.11"); + metricData.put(1663200L, "-0.21"); + metricData.put(1814400L, "0.54"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List constants = new ArrayList<>(); + String detectionInterval = "7d"; + constants.add(detectionInterval); + + List results = gaussianZScoreTransform.transform(metrics, constants); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(0L, 0.0); + expected.put(151200L, 0.0); + expected.put(302400L, 0.0); + expected.put(453600L, 0.0); + expected.put(604800L, 0.0); + expected.put(756000L, 0.0); + expected.put(907200L, 26.66); + expected.put(1058400L, 0.0); + expected.put(1209600L, 79.17); + expected.put(1360800L, 57.40); + expected.put(1512000L, 100.0); + expected.put(1663200L, 29.94); + expected.put(1814400L, 1.72); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + /** + * Edge Case: When the detection interval is greater than the time range + * of the metric, the transform should return 0 for the anomaly score of + * every point (since there is not enough data to learn from) + */ + public void gaussianZScoreTransformWithDetectionIntervalTest4() { + metricData.put(0L, "10"); + metricData.put(1000L, "-1.13"); + metricData.put(2000L, "0.00"); + metricData.put(3000L, "0.90"); + metricData.put(4000L, "-0.96"); + metricData.put(5000L, "-0.52"); + metricData.put(6000L, "0.24"); + metricData.put(7000L, "-0.01"); + metricData.put(8000L, "0.53"); + metricData.put(9000L, "-0.34"); + metricData.put(10000L, "1.11"); + metricData.put(11000L, "-0.21"); + metricData.put(12000L, "0.54"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List constants = new ArrayList<>(); + String detectionInterval = "100d"; //Detection interval > time range of metricData + constants.add(detectionInterval); + + List results = gaussianZScoreTransform.transform(metrics, constants); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(0L, 0.0); + expected.put(1000L, 0.0); + expected.put(2000L, 0.0); + expected.put(3000L, 0.0); + expected.put(4000L, 0.0); + expected.put(5000L, 0.0); + expected.put(6000L, 0.0); + expected.put(7000L, 0.0); + expected.put(8000L, 0.0); + expected.put(9000L, 0.0); + expected.put(10000L, 0.0); + expected.put(11000L, 0.0); + expected.put(12000L, 0.0); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test + //If variance is 0, none of the points should be anomalies + public void gaussianZScoreTransformWithZeroVarianceTest() { + //These points have 0 variance + metricData.put(1000L, "100"); + metricData.put(2000L, "100"); + metricData.put(3000L, "100"); + metricData.put(4000L, "100"); + metricData.put(5000L, "100"); + metric.setDatapoints(metricData); + metrics.add(metric); + + List results = gaussianZScoreTransform.transform(metrics); + Map resultDatapoints = results.get(0).getDatapoints(); + + expected.put(1000L, 0.0); + expected.put(2000L, 0.0); + expected.put(3000L, 0.0); + expected.put(4000L, 0.0); + expected.put(5000L, 0.0); + + for (Long timestamp : expected.keySet()) { + assertEquals(expected.get(timestamp), Double.parseDouble(resultDatapoints.get(timestamp)), 0.01); + } + } + + @Test(expected = MissingDataException.class) + public void gaussianZScoreTransformWithNoDataTest() { + //metricData map is empty + metric.setDatapoints(metricData); + metrics.add(metric); + + List results = gaussianZScoreTransform.transform(metrics); + } + + @Test(expected = UnsupportedOperationException.class) + public void gaussianZScoreTransformWithTwoMetricsTest() { + metricData.put(1000L, "1"); + metricData.put(2000L, "2"); + metricData.put(3000L, "3"); + metric.setDatapoints(metricData); + + Metric metric_2 = new Metric(TEST_SCOPE, TEST_METRIC); + Map metricData_2 = new HashMap<>(); + metricData_2.put(1000L, "4"); + metricData_2.put(2000L, "5"); + metricData_2.put(3000L, "6"); + metric_2.setDatapoints(metricData_2); + + metrics.add(metric); + metrics.add(metric_2); + + List results = gaussianZScoreTransform.transform(metrics); + } + +} +/* Copyright (c) 2016, Salesforce.com, Inc. All rights reserved. */ diff --git a/ArgusWeb/app/index.html b/ArgusWeb/app/index.html index ed08b18b1..5653ccee2 100644 --- a/ArgusWeb/app/index.html +++ b/ArgusWeb/app/index.html @@ -4,14 +4,14 @@ - + - + @@ -67,7 +67,7 @@