Skip to content
This repository has been archived by the owner on Feb 12, 2022. It is now read-only.

Commit

Permalink
Anomaly Detection Transforms (#69)
Browse files Browse the repository at this point in the history
* Update Angular Selectize dependency

* Implement anomaly detection using Gaussian distribution

* Convert to abstract class for Gaussian-based anomaly detection transforms

* Implementation and unit tests for anomaly detection (Gaussian) using probability density

* Implementation and unit tests for anomaly detection (Gaussian) using z-score

* Add Gaussian anomaly detection transforms to TransformFactory and MetricReader

* Add back original argus-build.properties file

* Add back argus-build.properties to gitignore

* Update anomaly detection transform grammar with shorter names

* Correct message for UnsupportedOperationException

* Add Override annotation to all implementations of abstract methods

* Move important comment into method Javadoc

* Remove index.html from repo

* Normalize anomaly detection results. New abstract class for anomaly detection transforms.

* Update Gaussian anomaly detection tests with normalized results

* Contextual anomaly detection with unit tests

* K-means anomaly detection transform

* Update K-means transform to use Weka 3.6.14 for license compliance
  • Loading branch information
Shouvik Mani authored and Tom Valine committed Jul 28, 2016
1 parent 22561bc commit a4b93b1
Show file tree
Hide file tree
Showing 12 changed files with 1,646 additions and 38 deletions.
5 changes: 5 additions & 0 deletions ArgusCore/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,11 @@
<artifactId>asynchbase</artifactId>
<version>1.7.0</version>
</dependency>
<dependency>
<groupId>nz.ac.waikato.cms.weka</groupId>
<artifactId>weka-stable</artifactId>
<version>3.6.14</version>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright (c) 2016, Salesforce.com, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of Salesforce.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

package com.salesforce.dva.argus.service.metric.transform;

/**
* Gaussian-based anomaly detection using probability density estimation.
* Source: http://www.holehouse.org/mlclass/15_Anomaly_Detection.html (Andrew Ng)
*
* @author Shouvik Mani ([email protected])
*/
public class AnomalyDetectionGaussianDensityTransform extends AnomalyDetectionGaussianTransform {

private static final String RESULT_METRIC_NAME = "probability density (neg. log)";

@Override
public String getResultScopeName() {
return TransformFactory.Function.ANOMALY_DENSITY.name();
}

@Override
public String getResultMetricName() {
return RESULT_METRIC_NAME;
}

/**
* Calculates the probability density (PDF) of the data point, which
* describes the relative likelihood of the point occurring in the
* Gaussian distribution.
*
* Large variances in data causes floating point underflow during the
* probability density calculation. Since we cannot take the negative
* log of 0.0, data points that cause underflow are omitted from the
* anomaly score results.
*
* @param value the value of the data point
* @return the negative log of the probability density of the data point
*/
@Override
public double calculateAnomalyScore(double value) {
double probabilityDensity = (1.0/Math.sqrt(2.0 * Math.PI * variance)) *
Math.exp((-1.0 * Math.pow((value - mean), 2.0)) / (2.0 * variance));

if (probabilityDensity == 0.0) {
throw new ArithmeticException("Cannot take the log of 0.");
}

/**
* Taking negative log transforms the probability density
* into a human-readable anomaly score
*/
return -1.0 * Math.log(probabilityDensity);
}

}
/* Copyright (c) 2016, Salesforce.com, Inc. All rights reserved. */
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* Copyright (c) 2016, Salesforce.com, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of Salesforce.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

package com.salesforce.dva.argus.service.metric.transform;

import com.salesforce.dva.argus.entity.Metric;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

/**
* Abstract class for Gaussian distribution based anomaly detection transforms.
*
* Estimates mean and variance parameters to build a model from the data. Then
* calculates an anomaly score for each data point, indicating how likely it is
* to be an anomaly relative to other points.
*
* @author Shouvik Mani ([email protected])
*/
public abstract class AnomalyDetectionGaussianTransform extends AnomalyDetectionTransform {

//Parameters for Gaussian distribution
protected double mean;
protected double variance;

@Override
public List<Metric> transform(List<Metric> metrics) {
if (metrics == null) {
throw new MissingDataException("The metrics list cannot be null or empty while performing transforms.");
}
if (metrics.size() != 1) {
throw new UnsupportedOperationException("Anomaly Detection Transform can only be used with one metric.");
}

Metric metric = metrics.get(0);
Map<Long, String> metricData = metric.getDatapoints();
if (metricData.size() == 0) {
throw new MissingDataException("Metric must contain data points to perform transforms.");
}

fitParameters(metricData);
Metric predictions = predictAnomalies(metricData);
Metric predictionsNormalized = normalizePredictions(predictions);

List<Metric> resultMetrics = new ArrayList<>();
resultMetrics.add(predictionsNormalized);
return resultMetrics;
}

//Fits the mean and variance parameters to the data
private void fitParameters(Map<Long, String> metricData) {
mean = getMetricMean(metricData);
variance = getMetricVariance(metricData);
}

/**
* Assigns an anomaly score to each data point, indicating how likely it is
* to be an anomaly relative to other points.
*/
private Metric predictAnomalies(Map<Long, String> metricData) {
Metric predictions = new Metric(getResultScopeName(), getResultMetricName());
Map<Long, String> predictionDatapoints = new HashMap<>();

if (variance == 0.0) {
/**
* If variance is 0, there are no anomalies.
* Also, using 0 for variance would cause divide by zero operations
* in Gaussian anomaly formulas. This condition avoids such operations.
*/
for (Entry<Long, String> entry : metricData.entrySet()) {
Long timestamp = entry.getKey();
predictionDatapoints.put(timestamp, "0.0");
}
} else {
for (Entry<Long, String> entry : metricData.entrySet()) {
Long timestamp = entry.getKey();
String valueString = entry.getValue();
double valueDouble = Double.parseDouble(valueString);
try {
double anomalyScore = calculateAnomalyScore(valueDouble);
predictionDatapoints.put(timestamp, String.valueOf(anomalyScore));
} catch (ArithmeticException e) {
continue;
}
}
}

predictions.setDatapoints(predictionDatapoints);
return predictions;
}

private double getMetricMean(Map<Long, String> metricData) {
double sum = 0;
for (String valueString : metricData.values()) {
double valueDouble = Double.parseDouble(valueString);
sum += valueDouble;
}
return sum/metricData.size();
}

private double getMetricVariance(Map<Long, String> metricData) {
double sumSquareDiff = 0;
for (String valueString : metricData.values()) {
double valueDouble = Double.parseDouble(valueString);
sumSquareDiff += Math.pow((valueDouble - mean), 2);
}
return sumSquareDiff/metricData.size();
}

}
/* Copyright (c) 2016, Salesforce.com, Inc. All rights reserved. */
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright (c) 2016, Salesforce.com, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of Salesforce.com nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

package com.salesforce.dva.argus.service.metric.transform;

/**
* Gaussian-based anomaly detection using z-score calculation.
* Source: http://trevorwhitney.com/data_mining/anomaly_detection
*
* @author Shouvik Mani ([email protected])
*/
public class AnomalyDetectionGaussianZScoreTransform extends AnomalyDetectionGaussianTransform {

private static final String RESULT_METRIC_NAME = "z-score (abs value)";

@Override
public String getResultScopeName() {
return TransformFactory.Function.ANOMALY_ZSCORE.name();
}

@Override
public String getResultMetricName() {
return RESULT_METRIC_NAME;
}

/**
* Calculates the z-score of the data point, which measures how many
* standard deviations the data point is away from the mean.
*
* @param value the value of the data point
* @return the absolute value of the z-score of the data point
*/
@Override
public double calculateAnomalyScore(double value) {
double zScore = (value - mean) / Math.sqrt(variance);
//Taking absolute value for a more human-readable anomaly score
return Math.abs(zScore);
}

}
/* Copyright (c) 2016, Salesforce.com, Inc. All rights reserved. */
Loading

0 comments on commit a4b93b1

Please sign in to comment.