This repository has been archived by the owner on Feb 12, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 145
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Update Angular Selectize dependency * Implement anomaly detection using Gaussian distribution * Convert to abstract class for Gaussian-based anomaly detection transforms * Implementation and unit tests for anomaly detection (Gaussian) using probability density * Implementation and unit tests for anomaly detection (Gaussian) using z-score * Add Gaussian anomaly detection transforms to TransformFactory and MetricReader * Add back original argus-build.properties file * Add back argus-build.properties to gitignore * Update anomaly detection transform grammar with shorter names * Correct message for UnsupportedOperationException * Add Override annotation to all implementations of abstract methods * Move important comment into method Javadoc * Remove index.html from repo * Normalize anomaly detection results. New abstract class for anomaly detection transforms. * Update Gaussian anomaly detection tests with normalized results * Contextual anomaly detection with unit tests * K-means anomaly detection transform * Update K-means transform to use Weka 3.6.14 for license compliance
- Loading branch information
Shouvik Mani
authored and
Tom Valine
committed
Jul 28, 2016
1 parent
22561bc
commit a4b93b1
Showing
12 changed files
with
1,646 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
84 changes: 84 additions & 0 deletions
84
...lesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianDensityTransform.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* Copyright (c) 2016, Salesforce.com, Inc. | ||
* All rights reserved. | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
* | ||
* 1. Redistributions of source code must retain the above copyright notice, | ||
* this list of conditions and the following disclaimer. | ||
* | ||
* 2. Redistributions in binary form must reproduce the above copyright notice, | ||
* this list of conditions and the following disclaimer in the documentation | ||
* and/or other materials provided with the distribution. | ||
* | ||
* 3. Neither the name of Salesforce.com nor the names of its contributors may | ||
* be used to endorse or promote products derived from this software without | ||
* specific prior written permission. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | ||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
* POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
|
||
package com.salesforce.dva.argus.service.metric.transform; | ||
|
||
/** | ||
* Gaussian-based anomaly detection using probability density estimation. | ||
* Source: http://www.holehouse.org/mlclass/15_Anomaly_Detection.html (Andrew Ng) | ||
* | ||
* @author Shouvik Mani ([email protected]) | ||
*/ | ||
public class AnomalyDetectionGaussianDensityTransform extends AnomalyDetectionGaussianTransform { | ||
|
||
private static final String RESULT_METRIC_NAME = "probability density (neg. log)"; | ||
|
||
@Override | ||
public String getResultScopeName() { | ||
return TransformFactory.Function.ANOMALY_DENSITY.name(); | ||
} | ||
|
||
@Override | ||
public String getResultMetricName() { | ||
return RESULT_METRIC_NAME; | ||
} | ||
|
||
/** | ||
* Calculates the probability density (PDF) of the data point, which | ||
* describes the relative likelihood of the point occurring in the | ||
* Gaussian distribution. | ||
* | ||
* Large variances in data causes floating point underflow during the | ||
* probability density calculation. Since we cannot take the negative | ||
* log of 0.0, data points that cause underflow are omitted from the | ||
* anomaly score results. | ||
* | ||
* @param value the value of the data point | ||
* @return the negative log of the probability density of the data point | ||
*/ | ||
@Override | ||
public double calculateAnomalyScore(double value) { | ||
double probabilityDensity = (1.0/Math.sqrt(2.0 * Math.PI * variance)) * | ||
Math.exp((-1.0 * Math.pow((value - mean), 2.0)) / (2.0 * variance)); | ||
|
||
if (probabilityDensity == 0.0) { | ||
throw new ArithmeticException("Cannot take the log of 0."); | ||
} | ||
|
||
/** | ||
* Taking negative log transforms the probability density | ||
* into a human-readable anomaly score | ||
*/ | ||
return -1.0 * Math.log(probabilityDensity); | ||
} | ||
|
||
} | ||
/* Copyright (c) 2016, Salesforce.com, Inc. All rights reserved. */ |
142 changes: 142 additions & 0 deletions
142
.../com/salesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianTransform.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
/* | ||
* Copyright (c) 2016, Salesforce.com, Inc. | ||
* All rights reserved. | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
* | ||
* 1. Redistributions of source code must retain the above copyright notice, | ||
* this list of conditions and the following disclaimer. | ||
* | ||
* 2. Redistributions in binary form must reproduce the above copyright notice, | ||
* this list of conditions and the following disclaimer in the documentation | ||
* and/or other materials provided with the distribution. | ||
* | ||
* 3. Neither the name of Salesforce.com nor the names of its contributors may | ||
* be used to endorse or promote products derived from this software without | ||
* specific prior written permission. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | ||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
* POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
|
||
package com.salesforce.dva.argus.service.metric.transform; | ||
|
||
import com.salesforce.dva.argus.entity.Metric; | ||
|
||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Map.Entry; | ||
|
||
/** | ||
* Abstract class for Gaussian distribution based anomaly detection transforms. | ||
* | ||
* Estimates mean and variance parameters to build a model from the data. Then | ||
* calculates an anomaly score for each data point, indicating how likely it is | ||
* to be an anomaly relative to other points. | ||
* | ||
* @author Shouvik Mani ([email protected]) | ||
*/ | ||
public abstract class AnomalyDetectionGaussianTransform extends AnomalyDetectionTransform { | ||
|
||
//Parameters for Gaussian distribution | ||
protected double mean; | ||
protected double variance; | ||
|
||
@Override | ||
public List<Metric> transform(List<Metric> metrics) { | ||
if (metrics == null) { | ||
throw new MissingDataException("The metrics list cannot be null or empty while performing transforms."); | ||
} | ||
if (metrics.size() != 1) { | ||
throw new UnsupportedOperationException("Anomaly Detection Transform can only be used with one metric."); | ||
} | ||
|
||
Metric metric = metrics.get(0); | ||
Map<Long, String> metricData = metric.getDatapoints(); | ||
if (metricData.size() == 0) { | ||
throw new MissingDataException("Metric must contain data points to perform transforms."); | ||
} | ||
|
||
fitParameters(metricData); | ||
Metric predictions = predictAnomalies(metricData); | ||
Metric predictionsNormalized = normalizePredictions(predictions); | ||
|
||
List<Metric> resultMetrics = new ArrayList<>(); | ||
resultMetrics.add(predictionsNormalized); | ||
return resultMetrics; | ||
} | ||
|
||
//Fits the mean and variance parameters to the data | ||
private void fitParameters(Map<Long, String> metricData) { | ||
mean = getMetricMean(metricData); | ||
variance = getMetricVariance(metricData); | ||
} | ||
|
||
/** | ||
* Assigns an anomaly score to each data point, indicating how likely it is | ||
* to be an anomaly relative to other points. | ||
*/ | ||
private Metric predictAnomalies(Map<Long, String> metricData) { | ||
Metric predictions = new Metric(getResultScopeName(), getResultMetricName()); | ||
Map<Long, String> predictionDatapoints = new HashMap<>(); | ||
|
||
if (variance == 0.0) { | ||
/** | ||
* If variance is 0, there are no anomalies. | ||
* Also, using 0 for variance would cause divide by zero operations | ||
* in Gaussian anomaly formulas. This condition avoids such operations. | ||
*/ | ||
for (Entry<Long, String> entry : metricData.entrySet()) { | ||
Long timestamp = entry.getKey(); | ||
predictionDatapoints.put(timestamp, "0.0"); | ||
} | ||
} else { | ||
for (Entry<Long, String> entry : metricData.entrySet()) { | ||
Long timestamp = entry.getKey(); | ||
String valueString = entry.getValue(); | ||
double valueDouble = Double.parseDouble(valueString); | ||
try { | ||
double anomalyScore = calculateAnomalyScore(valueDouble); | ||
predictionDatapoints.put(timestamp, String.valueOf(anomalyScore)); | ||
} catch (ArithmeticException e) { | ||
continue; | ||
} | ||
} | ||
} | ||
|
||
predictions.setDatapoints(predictionDatapoints); | ||
return predictions; | ||
} | ||
|
||
private double getMetricMean(Map<Long, String> metricData) { | ||
double sum = 0; | ||
for (String valueString : metricData.values()) { | ||
double valueDouble = Double.parseDouble(valueString); | ||
sum += valueDouble; | ||
} | ||
return sum/metricData.size(); | ||
} | ||
|
||
private double getMetricVariance(Map<Long, String> metricData) { | ||
double sumSquareDiff = 0; | ||
for (String valueString : metricData.values()) { | ||
double valueDouble = Double.parseDouble(valueString); | ||
sumSquareDiff += Math.pow((valueDouble - mean), 2); | ||
} | ||
return sumSquareDiff/metricData.size(); | ||
} | ||
|
||
} | ||
/* Copyright (c) 2016, Salesforce.com, Inc. All rights reserved. */ |
69 changes: 69 additions & 0 deletions
69
...alesforce/dva/argus/service/metric/transform/AnomalyDetectionGaussianZScoreTransform.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* | ||
* Copyright (c) 2016, Salesforce.com, Inc. | ||
* All rights reserved. | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
* | ||
* 1. Redistributions of source code must retain the above copyright notice, | ||
* this list of conditions and the following disclaimer. | ||
* | ||
* 2. Redistributions in binary form must reproduce the above copyright notice, | ||
* this list of conditions and the following disclaimer in the documentation | ||
* and/or other materials provided with the distribution. | ||
* | ||
* 3. Neither the name of Salesforce.com nor the names of its contributors may | ||
* be used to endorse or promote products derived from this software without | ||
* specific prior written permission. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | ||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
* POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
|
||
package com.salesforce.dva.argus.service.metric.transform; | ||
|
||
/** | ||
* Gaussian-based anomaly detection using z-score calculation. | ||
* Source: http://trevorwhitney.com/data_mining/anomaly_detection | ||
* | ||
* @author Shouvik Mani ([email protected]) | ||
*/ | ||
public class AnomalyDetectionGaussianZScoreTransform extends AnomalyDetectionGaussianTransform { | ||
|
||
private static final String RESULT_METRIC_NAME = "z-score (abs value)"; | ||
|
||
@Override | ||
public String getResultScopeName() { | ||
return TransformFactory.Function.ANOMALY_ZSCORE.name(); | ||
} | ||
|
||
@Override | ||
public String getResultMetricName() { | ||
return RESULT_METRIC_NAME; | ||
} | ||
|
||
/** | ||
* Calculates the z-score of the data point, which measures how many | ||
* standard deviations the data point is away from the mean. | ||
* | ||
* @param value the value of the data point | ||
* @return the absolute value of the z-score of the data point | ||
*/ | ||
@Override | ||
public double calculateAnomalyScore(double value) { | ||
double zScore = (value - mean) / Math.sqrt(variance); | ||
//Taking absolute value for a more human-readable anomaly score | ||
return Math.abs(zScore); | ||
} | ||
|
||
} | ||
/* Copyright (c) 2016, Salesforce.com, Inc. All rights reserved. */ |
Oops, something went wrong.