-
-
Notifications
You must be signed in to change notification settings - Fork 301
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
78a3c75
commit 32aeecd
Showing
11 changed files
with
561 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
package iforest | ||
|
||
import ( | ||
"math" | ||
"math/rand" | ||
"sync" | ||
) | ||
|
||
const ( | ||
defaultNumTrees = 100 | ||
defaultSampleSize = 256 | ||
defaultScoreThreshold = 0.6 | ||
defaultDetectionType = DetectionTypeThreshold | ||
offset = 0.5 | ||
) | ||
|
||
type DetectionType string | ||
|
||
const ( | ||
DetectionTypeThreshold DetectionType = "threshold" | ||
DetectionTypeProportion DetectionType = "proportion" | ||
) | ||
|
||
type Options struct { | ||
// The method used for anomaly detection | ||
DetectionType DetectionType `json:"detectionType"` | ||
|
||
// The anomaly score threshold | ||
Threshold float64 `json:"threshold"` | ||
|
||
// The proportion of outliers in the dataset | ||
Proportion float64 `json:"proportion"` | ||
|
||
// The number of trees to build in the forest | ||
NumTrees int `json:"numTrees"` | ||
|
||
// The sample size for each isolation tree | ||
SampleSize int `json:"sampleSize"` | ||
|
||
// The maximum depth of each isolation tree | ||
MaxDepth int `json:"maxDepth"` | ||
} | ||
|
||
// SetDefaultValues applies default settings to unspecified fields | ||
func (o *Options) SetDefaultValues() { | ||
if o.DetectionType == "" { | ||
o.DetectionType = defaultDetectionType | ||
} | ||
|
||
if o.Threshold == 0 { | ||
o.Threshold = defaultScoreThreshold | ||
} | ||
|
||
if o.NumTrees == 0 { | ||
o.NumTrees = defaultNumTrees | ||
} | ||
|
||
if o.SampleSize == 0 { | ||
o.SampleSize = defaultSampleSize | ||
} | ||
|
||
if o.MaxDepth == 0 { | ||
o.MaxDepth = int(math.Ceil(math.Log2(float64(o.SampleSize)))) | ||
} | ||
} | ||
|
||
// IsolationForest orchestrates anomaly detection using isolation trees | ||
type IsolationForest struct { | ||
*Options | ||
|
||
Trees []*TreeNode | ||
} | ||
|
||
// New creates an IsolationForest with default options. | ||
func New() *IsolationForest { | ||
options := &Options{} | ||
options.SetDefaultValues() | ||
return &IsolationForest{Options: options} | ||
} | ||
|
||
// NewWithOptions creates an IsolationForest with the specified options. | ||
func NewWithOptions(options Options) *IsolationForest { | ||
options.SetDefaultValues() | ||
return &IsolationForest{Options: &options} | ||
} | ||
|
||
// Fit constructs isolation trees from a given dataset | ||
func (f *IsolationForest) Fit(samples [][]float64) { | ||
wg := sync.WaitGroup{} | ||
wg.Add(f.NumTrees) | ||
|
||
f.Trees = make([]*TreeNode, f.NumTrees) | ||
for i := 0; i < f.NumTrees; i++ { | ||
sampled := SampleRows(samples, f.SampleSize) | ||
go func(index int) { | ||
defer wg.Done() | ||
tree := f.BuildTree(sampled, 0) | ||
f.Trees[index] = tree | ||
}(i) | ||
} | ||
wg.Wait() | ||
} | ||
|
||
// BuildTree recursively partitions samples to isolate outliers | ||
func (f *IsolationForest) BuildTree(samples [][]float64, depth int) *TreeNode { | ||
numSamples := len(samples) | ||
if numSamples == 0 { | ||
return &TreeNode{} | ||
} | ||
numFeatures := len(samples[0]) | ||
if depth >= f.MaxDepth || numSamples <= 1 { | ||
return &TreeNode{Size: numSamples} | ||
} | ||
|
||
splitIndex := rand.Intn(numFeatures) | ||
column := Column(samples, splitIndex) | ||
minValue, maxValue := MinMax(column) | ||
splitValue := rand.Float64()*(maxValue-minValue) + minValue | ||
|
||
leftSamples := make([][]float64, 0) | ||
rightSamples := make([][]float64, 0) | ||
for _, sample := range samples { | ||
if sample[splitIndex] < splitValue { | ||
leftSamples = append(leftSamples, sample) | ||
} else { | ||
rightSamples = append(rightSamples, sample) | ||
} | ||
} | ||
|
||
return &TreeNode{ | ||
Left: f.BuildTree(leftSamples, depth+1), | ||
Right: f.BuildTree(rightSamples, depth+1), | ||
SplitIndex: splitIndex, | ||
SplitValue: splitValue, | ||
} | ||
} | ||
|
||
// Score computes anomaly scores for each sample | ||
func (f *IsolationForest) Score(samples [][]float64) []float64 { | ||
scores := make([]float64, len(samples)) | ||
for i, sample := range samples { | ||
score := 0.0 | ||
for _, tree := range f.Trees { | ||
score += pathLength(sample, tree, 0) | ||
} | ||
scores[i] = math.Pow(2.0, -score/float64(len(f.Trees))/averagePathLength(float64(f.SampleSize))) | ||
} | ||
return scores | ||
} | ||
|
||
// Predict labels samples as outliers (1) or normal (0) based on the detection type | ||
func (f *IsolationForest) Predict(samples [][]float64) []int { | ||
predictions := make([]int, len(samples)) | ||
scores := f.Score(samples) | ||
|
||
var threshold float64 | ||
switch f.DetectionType { | ||
case DetectionTypeThreshold: | ||
threshold = f.Threshold | ||
case DetectionTypeProportion: | ||
threshold = Quantile(f.Score(samples), 1-f.Proportion) | ||
default: | ||
panic("Invalid detection type") | ||
} | ||
|
||
for i, score := range scores { | ||
if score >= threshold { | ||
predictions[i] = 1 | ||
} else { | ||
predictions[i] = 0 | ||
} | ||
} | ||
|
||
return predictions | ||
} | ||
|
||
// FeatureImportance computes an importance score for each feature | ||
func (f *IsolationForest) FeatureImportance(sample []float64) []int { | ||
importance := make([]int, len(sample)) | ||
for _, tree := range f.Trees { | ||
for i, value := range tree.FeatureImportance(sample) { | ||
importance[i] += value | ||
} | ||
} | ||
return importance | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package iforest | ||
|
||
import ( | ||
"testing" | ||
) | ||
|
||
func TestIsolationForest(t *testing.T) { | ||
tests := []struct { | ||
features [][]float64 | ||
predictions []int | ||
}{ | ||
{ | ||
[][]float64{ | ||
{0, 0, 0}, | ||
{0, 0, 0}, | ||
{0, 0, 0}, | ||
{1, 1, 1}, | ||
}, | ||
[]int{0, 0, 0, 1}, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
forest := New() | ||
forest.Fit(tt.features) | ||
|
||
preds := forest.Predict(tt.features) | ||
for i, pred := range preds { | ||
if pred != tt.predictions[i] { | ||
t.Errorf("expected %v, got %v", tt.predictions[i], pred) | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
package iforest | ||
|
||
import ( | ||
"math" | ||
"math/rand" | ||
) | ||
|
||
// SampleRows randomly selects 'size' rows from the matrix. | ||
func SampleRows(matrix [][]float64, size int) [][]float64 { | ||
if size <= 0 { | ||
panic("size must be greater than 0") | ||
} | ||
|
||
if len(matrix) <= size { | ||
return matrix | ||
} | ||
|
||
perm := rand.Perm(len(matrix)) | ||
sampled := make([][]float64, size) | ||
for i := 0; i < size; i++ { | ||
sampled[i] = matrix[perm[i]] | ||
} | ||
return sampled | ||
} | ||
|
||
// Column returns a slice containing the specified column from the matrix. | ||
func Column(matrix [][]float64, columnIndex int) []float64 { | ||
column := make([]float64, len(matrix)) | ||
for i, row := range matrix { | ||
column[i] = row[columnIndex] | ||
} | ||
return column | ||
} | ||
|
||
// MinMax returns the minimum and maximum values from a slice of float64. | ||
func MinMax(floats []float64) (float64, float64) { | ||
min, max := math.Inf(1), math.Inf(-1) | ||
for _, v := range floats { | ||
if v < min { | ||
min = v | ||
} | ||
if v > max { | ||
max = v | ||
} | ||
} | ||
return min, max | ||
} |
Oops, something went wrong.