-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add a general-purpose autoencoder class AE
to serve as a building block for other autoencoder models
#932
Add a general-purpose autoencoder class AE
to serve as a building block for other autoencoder models
#932
Changes from all commits
3e83771
d2153e6
d1bab11
3eccd36
98b47ce
6719978
b18bdf0
2408360
82483e9
6dd6dce
8572cc8
03def58
e3c4c6d
a4257fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import AE from "./autoencoder"; | ||
|
||
const trainingData = [ | ||
[0, 0, 0], | ||
[0, 1, 1], | ||
[1, 0, 1], | ||
[1, 1, 0] | ||
]; | ||
|
||
const xornet = new AE<number[], number[]>( | ||
{ | ||
decodedSize: 3, | ||
hiddenLayers: [ 5, 2, 5 ] | ||
} | ||
); | ||
|
||
const errorThresh = 0.011; | ||
|
||
const result = xornet.train( | ||
trainingData, { | ||
iterations: 100000, | ||
errorThresh | ||
} | ||
); | ||
|
||
test( | ||
"denoise a data sample", | ||
async () => { | ||
expect(result.error).toBeLessThanOrEqual(errorThresh); | ||
|
||
function xor(...args: number[]) { | ||
return Math.round(xornet.denoise(args)[2]); | ||
} | ||
|
||
const run1 = xor(0, 0, 0); | ||
const run2 = xor(0, 1, 1); | ||
const run3 = xor(1, 0, 1); | ||
const run4 = xor(1, 1, 0); | ||
|
||
expect(run1).toBe(0); | ||
expect(run2).toBe(1); | ||
expect(run3).toBe(1); | ||
expect(run4).toBe(0); | ||
} | ||
); | ||
|
||
test( | ||
"encode and decode a data sample", | ||
async () => { | ||
expect(result.error).toBeLessThanOrEqual(errorThresh); | ||
|
||
const run1$input = [0, 0, 0]; | ||
const run1$encoded = xornet.encode(run1$input); | ||
const run1$decoded = xornet.decode(run1$encoded); | ||
|
||
const run2$input = [0, 1, 1]; | ||
const run2$encoded = xornet.encode(run2$input); | ||
const run2$decoded = xornet.decode(run2$encoded); | ||
|
||
for (let i = 0; i < 3; i++) expect(Math.round(run1$decoded[i])).toBe(run1$input[i]); | ||
for (let i = 0; i < 3; i++) expect(Math.round(run2$decoded[i])).toBe(run2$input[i]); | ||
} | ||
); | ||
|
||
test( | ||
"test a data sample for anomalies", | ||
async () => { | ||
expect(result.error).toBeLessThanOrEqual(errorThresh); | ||
|
||
function includesAnomalies(...args: number[]) { | ||
expect(xornet.likelyIncludesAnomalies(args)).toBe(false); | ||
} | ||
|
||
includesAnomalies(0, 0, 0); | ||
includesAnomalies(0, 1, 1); | ||
includesAnomalies(1, 0, 1); | ||
includesAnomalies(1, 1, 0); | ||
} | ||
); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
import { KernelOutput, Texture, TextureArrayOutput } from "gpu.js"; | ||
import { IJSONLayer, INeuralNetworkData, INeuralNetworkDatum, INeuralNetworkTrainOptions } from "./neural-network"; | ||
import { INeuralNetworkGPUOptions, NeuralNetworkGPU } from "./neural-network-gpu"; | ||
import { INeuralNetworkState } from "./neural-network-types"; | ||
import { UntrainedNeuralNetworkError } from "./errors/untrained-neural-network-error"; | ||
|
||
export interface IAEOptions { | ||
binaryThresh: number; | ||
decodedSize: number; | ||
hiddenLayers: number[]; | ||
} | ||
|
||
/** | ||
* An autoencoder learns to compress input data down to relevant features and reconstruct input data from its compressed representation. | ||
*/ | ||
export class AE<DecodedData extends INeuralNetworkData, EncodedData extends INeuralNetworkData> { | ||
private decoder?: NeuralNetworkGPU<EncodedData, DecodedData>; | ||
private denoiser: NeuralNetworkGPU<DecodedData, DecodedData>; | ||
|
||
constructor ( | ||
options?: Partial<IAEOptions> | ||
) { | ||
// Create default options for the autoencoder. | ||
options ??= {}; | ||
|
||
// Create default options for the autoencoder's denoiser subnet. | ||
const denoiserOptions: Partial<INeuralNetworkGPUOptions> = {}; | ||
|
||
// Inherit the binary threshold of the parent autoencoder. | ||
denoiserOptions.binaryThresh = options.binaryThresh; | ||
// Inherit the hidden layers of the parent autoencoder. | ||
denoiserOptions.hiddenLayers = options.hiddenLayers; | ||
|
||
// Define the denoiser subnet's input and output sizes. | ||
if (options.decodedSize) denoiserOptions.inputSize = denoiserOptions.outputSize = options.decodedSize; | ||
|
||
// Create the denoiser subnet of the autoencoder. | ||
this.denoiser = new NeuralNetworkGPU<DecodedData, DecodedData>(options); | ||
} | ||
|
||
/** | ||
* Denoise input data, removing any anomalies from the data. | ||
* @param {DecodedData} input | ||
* @returns {DecodedData} | ||
*/ | ||
denoise(input: DecodedData): DecodedData { | ||
// Run the input through the generic denoiser. | ||
// This isn't the best denoiser implementation, but it's efficient. | ||
// Efficiency is important here because training should focus on | ||
// optimizing for feature extraction as quickly as possible rather than | ||
// denoising and anomaly detection; there are other specialized topologies | ||
// better suited for these tasks anyways, many of which can be implemented | ||
// by using an autoencoder. | ||
return this.denoiser.run(input); | ||
} | ||
|
||
/** | ||
* Decode `EncodedData` into an approximation of its original form. | ||
* | ||
* @param {EncodedData} input | ||
* @returns {DecodedData} | ||
*/ | ||
decode(input: EncodedData): DecodedData { | ||
// If the decoder has not been trained yet, throw an error. | ||
if (!this.decoder) throw new UntrainedNeuralNetworkError(this); | ||
|
||
// Decode the encoded input. | ||
return this.decoder.run(input); | ||
} | ||
|
||
/** | ||
* Encode data to extract features, reduce dimensionality, etc. | ||
* | ||
* @param {DecodedData} input | ||
* @returns {EncodedData} | ||
*/ | ||
encode(input: DecodedData): EncodedData { | ||
// If the decoder has not been trained yet, throw an error. | ||
if (!this.denoiser) throw new UntrainedNeuralNetworkError(this); | ||
|
||
// Process the input. | ||
this.denoiser.run(input); | ||
|
||
// Get the auto-encoded input. | ||
let encodedInput: TextureArrayOutput = this.encodedLayer as TextureArrayOutput; | ||
|
||
// If the encoded input is a `Texture`, convert it into an `Array`. | ||
if (encodedInput instanceof Texture) encodedInput = encodedInput.toArray(); | ||
else encodedInput = encodedInput.slice(0); | ||
|
||
// Return the encoded input. | ||
return encodedInput as EncodedData; | ||
} | ||
|
||
/** | ||
* Test whether or not a data sample likely contains anomalies. | ||
* If anomalies are likely present in the sample, returns `true`. | ||
* Otherwise, returns `false`. | ||
* | ||
* @param {DecodedData} input | ||
* @returns {boolean} | ||
*/ | ||
likelyIncludesAnomalies(input: DecodedData, anomalyThreshold: number = 0.2): boolean { | ||
// Create the anomaly vector. | ||
const anomalies: number[] = []; | ||
|
||
// Attempt to denoise the input. | ||
const denoised = this.denoise(input); | ||
|
||
// Calculate the anomaly vector. | ||
for (let i = 0; i < (input.length ?? 0); i++) { | ||
anomalies[i] = Math.abs((input as number[])[i] - (denoised as number[])[i]); | ||
} | ||
|
||
// Calculate the sum of all anomalies within the vector. | ||
const sum = anomalies.reduce( | ||
(previousValue, value) => previousValue + value | ||
); | ||
|
||
// Calculate the mean anomaly. | ||
const mean = sum / (input as number[]).length; | ||
|
||
// Return whether or not the mean anomaly rate is greater than the anomaly threshold. | ||
return mean > anomalyThreshold; | ||
} | ||
|
||
/** | ||
* Train the auto encoder. | ||
* | ||
* @param {DecodedData[]} data | ||
* @param {Partial<INeuralNetworkTrainOptions>} options | ||
* @returns {INeuralNetworkState} | ||
*/ | ||
train(data: DecodedData[], options?: Partial<INeuralNetworkTrainOptions>): INeuralNetworkState { | ||
const preprocessedData: INeuralNetworkDatum<Partial<DecodedData>, Partial<DecodedData>>[] = []; | ||
|
||
for (let datum of data) { | ||
preprocessedData.push( { input: datum, output: datum } ); | ||
} | ||
|
||
const results = this.denoiser.train(preprocessedData, options); | ||
|
||
this.decoder = this.createDecoder(); | ||
|
||
return results; | ||
} | ||
|
||
/** | ||
* Create a new decoder from the trained denoiser. | ||
* | ||
* @returns {NeuralNetworkGPU<EncodedData, DecodedData>} | ||
*/ | ||
private createDecoder() { | ||
const json = this.denoiser.toJSON(); | ||
|
||
const layers: IJSONLayer[] = []; | ||
const sizes: number[] = []; | ||
|
||
for (let i = this.encodedLayerIndex; i < this.denoiser.sizes.length; i++) { | ||
layers.push(json.layers[i]); | ||
sizes.push(json.sizes[i]); | ||
} | ||
|
||
json.layers = layers; | ||
json.sizes = sizes; | ||
|
||
json.options.inputSize = json.sizes[0]; | ||
|
||
const decoder = new NeuralNetworkGPU().fromJSON(json); | ||
|
||
return decoder as unknown as NeuralNetworkGPU<EncodedData, DecodedData>; | ||
} | ||
|
||
/** | ||
* Get the layer containing the encoded representation. | ||
*/ | ||
private get encodedLayer(): KernelOutput { | ||
return this.denoiser.outputs[this.encodedLayerIndex]; | ||
} | ||
|
||
/** | ||
* Get the offset of the encoded layer. | ||
*/ | ||
private get encodedLayerIndex(): number { | ||
return Math.round(this.denoiser.outputs.length * 0.5) - 1; | ||
} | ||
} | ||
|
||
export default AE; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
export class UntrainedNeuralNetworkError extends Error { | ||
constructor ( | ||
Check warning on line 2 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 16.x and ubuntu-latest
Check warning on line 2 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 16.x and ubuntu-latest
Check warning on line 2 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 16.x and windows-latest
Check warning on line 2 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 16.x and windows-latest
Check warning on line 2 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 18.x and ubuntu-latest
Check warning on line 2 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 18.x and ubuntu-latest
Check warning on line 2 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 18.x and windows-latest
Check warning on line 2 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 18.x and windows-latest
Check warning on line 2 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 20.x and ubuntu-latest
Check warning on line 2 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 20.x and ubuntu-latest
Check warning on line 2 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 20.x and windows-latest
|
||
neuralNetwork: any | ||
) { | ||
Check failure on line 4 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 16.x and ubuntu-latest
Check failure on line 4 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 16.x and windows-latest
Check failure on line 4 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 18.x and ubuntu-latest
Check failure on line 4 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 18.x and windows-latest
Check failure on line 4 in src/errors/untrained-neural-network-error.ts GitHub Actions / Build, lint, and test on Node 20.x and ubuntu-latest
|
||
super(`Cannot run a ${neuralNetwork.constructor.name} before it is trained.`); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we make AE support non-gpu Neural network? Or is there a specific reason why we are tying this to GPU NNs only?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if it ever made it into mainstream or not, but I already wrote both CPU and GPU implementations of the autoencoder class (
Autoencoder
andAutoencoderGPU
IIRC). If I forgot to create a PR, I'll make one soon 😊Sorry for the lack of updates for awhile. Health got in the way of work, but I've mostly recovered and am back in good health so I've recently started to work full time again, so I'll try to make more progress with the autoencoder and loss function features 💖
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I made a PR that adds serialization (toJSON & fromJSON) to AE - #950