Skip to content

Commit

Permalink
updated readme
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrey Zavgorodniy committed Aug 19, 2015
1 parent 9896bbc commit 558859a
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 56 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ Get a trained `LOF` type value:

``` go

lofGetter := lof.NewLOF(5, samples)
lofGetter := lof.NewLOF(5)
lofGetter.Train(samples)

```

Expand Down
3 changes: 2 additions & 1 deletion example.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ func main() {
}

samples := lof.GetSamplesFromFloat64s(points)
lofGetter := lof.NewLOF(5, samples)
lofGetter := lof.NewLOF(5)
lofGetter.Train(samples)
mapping := lofGetter.GetLOFs(samples, "fast")
for sample, factor := range mapping {
fmt.Printf("Sample: %v, \tLOF: %f\n", sample.GetPoint(), factor)
Expand Down
110 changes: 56 additions & 54 deletions lof/factor.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,31 @@ type LOF struct {

// Constructor for LOF type. Check out ./samples.GetSamplesFromFloat64s()
// for fast [][]float64 -> []ISample conversion.
func NewLOF(minPts int, trnSamples []ISample) *LOF {
func NewLOF(minPts int) *LOF {

numSamples := len(trnSamples)
// After training we want to compute LOF values for
// new samples, and we need some space for their
// distances; if we find LOF for one new sample at a
// time, a single additional slot will be enough.
addedIndex := len(trnSamples) + 1
// Create the LOF object
lof := &LOF{
TrainingSet: trnSamples,
MinPts: minPts,
NumSamples: numSamples,
AddedIndex: addedIndex,
}

return lof
}

// Pre-compute distances between training samples and store their
// nearest neighbors in LOF.KNNs.
func (lof *LOF) Train(samples []ISample) {

numSamples := len(samples)
// After training we want to compute LOF values for
// new samples, and we need some space for their
// distances; if we find LOF for one new sample at a
// time, a single additional slot will be enough.
addedIndex := len(samples) + 1

lof.TrainingSet = samples
lof.NumSamples = numSamples
lof.AddedIndex = addedIndex

// Prepare storage between training samples
lof.Distances = make([][]DistItem, addedIndex)
for idx := 0; idx < addedIndex; idx++ {
Expand All @@ -84,12 +94,43 @@ func NewLOF(minPts int, trnSamples []ISample) *LOF {
lof.KNNs = make([][]int, addedIndex)
lof.KNNsBackup = make([][]int, addedIndex)
for idx := 0; idx < addedIndex; idx++ {
lof.KNNs[idx] = make([]int, minPts)
lof.KNNsBackup[idx] = make([]int, minPts)
lof.KNNs[idx] = make([]int, lof.MinPts)
lof.KNNsBackup[idx] = make([]int, lof.MinPts)
}

// Throughout the train() method this value is used for direct indexing
// (i.e., not inside a for ...;...;... statement), so we need
// to subtract 1 in order not to get out of range
addedIndex = lof.AddedIndex - 1
numSamples = lof.NumSamples
for idx, sample := range samples {
sample.SetId(idx) // Just additional info
}

// Compute distances between training samples
for i := 0; i < numSamples; i++ {
for j := 0; j < numSamples; j++ {
if i == j {
lof.Distances[i][j].Value = 0 // This is distinctive
lof.Distances[i][j].Index = j
} else {
lof.Distances[i][j].Value = SampleDist(samples[i], samples[j])
lof.Distances[j][i].Value = lof.Distances[i][j].Value
lof.Distances[i][j].Index = j
lof.Distances[j][i].Index = i
}
}
// Set the additional slot's last value
lof.Distances[addedIndex][addedIndex].Value = 0
lof.Distances[addedIndex][addedIndex].Index = addedIndex
lof.updateNNTable(i, "train")
}
// Save the nearest neighbors table state in the backup storage
for i := 0; i < numSamples; i++ {
for k := 1; k < lof.MinPts; k++ {
lof.KNNsBackup[i][k - 1] = lof.KNNs[i][k - 1]
}
}
lof.train(trnSamples)

return lof
}

// Shortcut for getting LOF for many samples. See GetLOF() method.
Expand Down Expand Up @@ -171,45 +212,6 @@ func (lof *LOF) Reset() {
}
}

// Pre-compute distances between training samples and store their
// nearest neighbors in LOF.KNNs.
func (lof *LOF) train(samples []ISample) {

// Throughout the train() method this value is used for direct indexing
// (i.e., not inside a for ...;...;... statement), so we need
// to subtract 1 in order not to get out of range
addedIndex := lof.AddedIndex - 1
numSamples := lof.NumSamples
for idx, sample := range samples {
sample.SetId(idx) // Just additional info
}

// Compute distances between training samples
for i := 0; i < numSamples; i++ {
for j := 0; j < numSamples; j++ {
if i == j {
lof.Distances[i][j].Value = 0 // This is distinctive
lof.Distances[i][j].Index = j
} else {
lof.Distances[i][j].Value = SampleDist(samples[i], samples[j])
lof.Distances[j][i].Value = lof.Distances[i][j].Value
lof.Distances[i][j].Index = j
lof.Distances[j][i].Index = i
}
}
// Set the additional slot's last value
lof.Distances[addedIndex][addedIndex].Value = 0
lof.Distances[addedIndex][addedIndex].Index = addedIndex
lof.updateNNTable(i, "train")
}
// Save the nearest neighbors table state in the backup storage
for i := 0; i < numSamples; i++ {
for k := 1; k < lof.MinPts; k++ {
lof.KNNsBackup[i][k - 1] = lof.KNNs[i][k - 1]
}
}
}

// Given a sample's index in Distance table, update this sample's
// row in the nearest neighbors table. The @mode parameter
// controls whether we use the whole table row length (with added
Expand Down

0 comments on commit 558859a

Please sign in to comment.