diff --git a/src/autograd/function.ts b/src/autograd/function.ts index b460d0c..5771c33 100644 --- a/src/autograd/function.ts +++ b/src/autograd/function.ts @@ -155,21 +155,22 @@ export abstract class BinaryOp extends AutogradFunction { pass.setPipeline(this.pipeline); pass.setBindGroup(0, bindGroup); + // TODO: set these as overrides in the layers/ops level since the kernels are different const WORKGROUP_SIZE = 16; const TILE_SIZE = 8; - const workgropuA = Math.ceil(a.shape[0] / (TILE_SIZE * WORKGROUP_SIZE)); - const workgropuB = Math.ceil(b.shape[1] / (TILE_SIZE * WORKGROUP_SIZE)); + const workgroupA = Math.ceil(a.shape[0] / (TILE_SIZE * WORKGROUP_SIZE)); + const workgroupB = Math.ceil(b.shape[1] / (TILE_SIZE * WORKGROUP_SIZE)); console.log( "a.shape[0]:", a.shape[0], "b.shape[1]:", b.shape[1], "launching workgroups", - workgropuA, + workgroupA, ",", - workgropuB, + workgroupB, ); - pass.dispatchWorkgroups(workgropuA, workgropuB); + pass.dispatchWorkgroups(workgroupA, workgroupB); pass.end(); const stagingBuffer = this.device.createBuffer({ diff --git a/src/index.ts b/src/index.ts index db66762..4eb468d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -15,3 +15,4 @@ export * from "./layers/linear.js"; export * from "./layers/norm.js"; export * from "./layers/mlp.js"; export * from "./layers/attention.js"; +export * from "./model/nomic_embed.js" \ No newline at end of file diff --git a/src/layers/attention.ts b/src/layers/attention.ts index 488e9e2..236f727 100644 --- a/src/layers/attention.ts +++ b/src/layers/attention.ts @@ -51,8 +51,11 @@ export class MultiHeadAttention extends Module { ): Promise<[Tensor, number]> { // Scale factor is 1/sqrt(head_dim) const scale = 1 / Math.sqrt(this.head_dim); - const scaleTensor = Tensor.full(query.shape, scale, false); - + const scaleTensor = Tensor.full( + [query.shape[0], key.shape[0]], + scale, + false, + ); // Compute attention scores const [scores] = await query.matmul(key.transpose()); const [scaledScores] = await scores.mul(scaleTensor); diff --git a/src/layers/embedding.ts b/src/layers/embedding.ts index 54ae9ce..7608a5b 100644 --- a/src/layers/embedding.ts +++ b/src/layers/embedding.ts @@ -10,7 +10,7 @@ export class Embedding extends Module { this.vocab_size = vocab_size; this.emb_dim = emb_dim; - this.embedding = Tensor.randn([vocab_size, emb_dim], true); + this.embedding = Tensor.normal([vocab_size, emb_dim], true, 0.02); } async forward(...inputs: [Tensor]): Promise<[Tensor]> { diff --git a/src/layers/linear.ts b/src/layers/linear.ts index dcd0157..5aea6af 100644 --- a/src/layers/linear.ts +++ b/src/layers/linear.ts @@ -7,8 +7,8 @@ export class Linear extends Module { constructor(inputSize: number, outputSize: number) { super("linear"); - this.weight = Tensor.randn([inputSize, outputSize], true); - this.bias = Tensor.randn([outputSize], true); + this.weight = Tensor.normal([inputSize, outputSize], true, 0.02); + this.bias = Tensor.full([outputSize], 0, true); } async forward(...inputs: [Tensor]): Promise<[Tensor]> { diff --git a/src/layers/norm.ts b/src/layers/norm.ts index ec1be60..8adda7d 100644 --- a/src/layers/norm.ts +++ b/src/layers/norm.ts @@ -21,18 +21,14 @@ export class LayerNorm extends Module { // Calculate mean and reshape for broadcasting const mean = await x.mean(reduction_dims); + console.log("mean.data", mean.data.toString()); mean.shape = [mean.shape[0], 1]; // [2, 1] const variance = await x.variance(reduction_dims); variance.shape = [variance.shape[0], 1]; // [2, 1] - console.log("x shape:", x.shape); // [2, 3] - console.log("mean shape:", mean.shape); // [2, 1] - console.log("variance shape:", variance.shape); // [2, 1] - console.log("gamma shape:", this.gamma.shape); // [1, 3] - console.log("beta shape:", this.beta.shape); // [1, 3] - const [numerator] = await x.sub(mean); // [2, 3] + console.log("numerator.data", numerator.data.toString()); const [denominator] = await variance.add(this.eps); const sqrtDenom = await denominator.sqrt(); const [normalized] = await numerator.div(sqrtDenom); diff --git a/src/model/nomic_embed.ts b/src/model/nomic_embed.ts new file mode 100644 index 0000000..7a64acc --- /dev/null +++ b/src/model/nomic_embed.ts @@ -0,0 +1,195 @@ +import { Tensor } from "../tensor/tensor.js"; +import { Module } from "../layers/module.js"; +import { LayerNorm } from "../layers/norm.js"; +import { MultiHeadAttention } from "../layers/attention.js"; +import { MLP } from "../layers/mlp.js"; +import { Embedding } from "../layers/embedding.js"; + +export interface NomicEmbedConfig { + vocab_size: number; + hidden_size: number; + num_hidden_layers: number; + num_attention_heads: number; + intermediate_size: number; + hidden_act: string; + hidden_dropout_prob: number; + attention_probs_dropout_prob: number; + max_position_embeddings: number; + type_vocab_size: number; + initializer_range: number; + layer_norm_eps: number; + pad_token_id: number; + position_embedding_type: string; + use_cache: boolean; + classifier_dropout: number | null; + rotary_emb_fraction: number; + use_flash_attn: boolean; + qkv_proj_bias: boolean; + mlp_fc1_bias: boolean; + mlp_fc2_bias: boolean; + causal: boolean; +} + +class NomicBertEmbeddings extends Module { + private wordEmbeddings: Embedding; + private positionEmbeddings: Embedding | null; + private typeEmbeddings: Embedding | null; + private maxPositionEmbeddings: number; + private typeVocabSize: number; + + constructor(config: NomicEmbedConfig) { + super("bert_embeddings"); + + // Word embeddings + this.wordEmbeddings = new Embedding(config.vocab_size, config.hidden_size); + + // Position embeddings if using absolute positions + this.maxPositionEmbeddings = config.max_position_embeddings; + this.positionEmbeddings = + this.maxPositionEmbeddings > 0 && config.rotary_emb_fraction <= 0 + ? new Embedding(config.max_position_embeddings, config.hidden_size) + : null; + + // Token type embeddings if used + this.typeVocabSize = config.type_vocab_size; + this.typeEmbeddings = + this.typeVocabSize > 0 + ? new Embedding(config.type_vocab_size, config.hidden_size) + : null; + } + + async forward( + inputIds: Tensor, + positionIds?: Tensor, + tokenTypeIds?: Tensor, + inputsEmbeds?: Tensor, + ): Promise<[Tensor]> { + // Get word embeddings + let [embeddings] = inputsEmbeds + ? [inputsEmbeds] + : await this.wordEmbeddings.forward(inputIds); + + // Add token type embeddings if used + // if (this.typeEmbeddings && this.typeVocabSize > 0 && tokenTypeIds) { + // const [typeEmbeddings] = await this.typeEmbeddings.forward(tokenTypeIds); + // console.log("typeEmbeddings.data", typeEmbeddings.data.toString()); + // console.log("typeEmbeddings.shape", typeEmbeddings.shape); + // [embeddings] = await embeddings.add(typeEmbeddings); + // } + + return [embeddings]; + } +} + +class NomicBertLayer extends Module { + private attention: MultiHeadAttention; + private mlp: MLP; + private layerNorm1: LayerNorm; + private layerNorm2: LayerNorm; + + constructor(config: NomicEmbedConfig) { + super("bert_layer"); + this.attention = new MultiHeadAttention( + config.hidden_size, + config.num_attention_heads, + ); + this.mlp = new MLP(config.hidden_size, config.intermediate_size); + this.layerNorm1 = new LayerNorm( + [config.hidden_size], + config.layer_norm_eps, + ); + this.layerNorm2 = new LayerNorm( + [config.hidden_size], + config.layer_norm_eps, + ); + } + + async forward(...inputs: [Tensor]): Promise<[Tensor]> { + // Self-attention + const [hiddenStates] = inputs; + const [normed1] = await this.layerNorm1.forward(hiddenStates); + const [attnOutput] = await this.attention.forward(normed1); + const [residual1] = await hiddenStates.add(attnOutput); + + // MLP + const [normed2] = await this.layerNorm2.forward(residual1); + const [mlpOutput] = await this.mlp.forward(normed2); + const [residual2] = await residual1.add(mlpOutput); + return [residual2]; + } +} + +class NomicBertEncoder extends Module { + private layers: NomicBertLayer[]; + + constructor(config: NomicEmbedConfig) { + super("bert_encoder"); + this.layers = Array(config.num_hidden_layers) + .fill(null) + .map(() => new NomicBertLayer(config)); + } + + async forward(...args: Tensor[]): Promise<[Tensor]> { + let [hiddenStates, attentionMask] = args; + let currentOutput = hiddenStates; + + // Pass through each layer + for (const layer of this.layers) { + [currentOutput] = await layer.forward(currentOutput); + } + + return [currentOutput]; + } +} + +export class NomicEmbed extends Module { + private embeddings: NomicBertEmbeddings; + private encoder: NomicBertEncoder; + private emb_ln: LayerNorm; + + constructor(config: NomicEmbedConfig) { + super("nomic_embed"); + + // Initialize components + this.embeddings = new NomicBertEmbeddings(config); + this.encoder = new NomicBertEncoder(config); + this.emb_ln = new LayerNorm([config.hidden_size], config.layer_norm_eps); + } + + private async meanPooling( + modelOutput: Tensor, + attentionMask: Tensor, + ): Promise<[Tensor]> { + return [await modelOutput.mean([0])]; + } + + async forward(...args: Tensor[]): Promise<[Tensor]> { + // Get embeddings + const [inputIds, attentionMask, positionIds, tokenTypeIds] = args; + const [hidden] = await this.embeddings.forward( + inputIds, + positionIds, + tokenTypeIds, + ); + console.log("hidden.data", hidden.data.toString()); + + // Apply layer norm + const [normed] = await this.emb_ln.forward(hidden); + console.log("normed.data", normed.data.toString()); + + // Pass through encoder + const [encoded] = await this.encoder.forward(normed, attentionMask); + // Mean pooling + console.log("encoded.data", encoded.data.toString()); + const [pooled] = await this.meanPooling(encoded, attentionMask); + console.log("pooled.shape", pooled.shape); + + const [norm] = await pooled.norm(2, 0); + console.log("norm.shape", norm.shape); + console.log("norm", norm.data.toString()); + + const [pooledNormed] = await pooled.div(norm); + // Normalize embeddings + return [pooledNormed]; + } +} diff --git a/src/ops/add.ts b/src/ops/add.ts index 382601e..a0476fa 100644 --- a/src/ops/add.ts +++ b/src/ops/add.ts @@ -45,6 +45,11 @@ export class Add extends BinaryOp { ); } } + console.log("add a.shape:", a.shape); + console.log("a.data:", a.data.toString()); + console.log("add broadcasted b.shape:", b.shape); + console.log("b.data:", b.data.toString()); + return b; } diff --git a/src/tensor/tensor.ts b/src/tensor/tensor.ts index bf4c68b..ac90168 100644 --- a/src/tensor/tensor.ts +++ b/src/tensor/tensor.ts @@ -70,6 +70,20 @@ export class Tensor { return new Tensor(data, shape, requires_grad); } + static normal( + shape: number[], + requires_grad = false, + initializer_range = 0.01, + ) { + const data = new Float32Array(shape.reduce((a, b) => a * b)); + + for (let i = 0; i < data.length; i++) { + data[i] = Math.random() * 2 * initializer_range - initializer_range; + } + + return new Tensor(data, shape, requires_grad); + } + static broadcast(tensor: Tensor, size: number, requires_grad = false) { const shape = [size, ...tensor.shape]; const data = new Float32Array(shape.reduce((a, b) => a * b)); @@ -102,6 +116,7 @@ export class Tensor { const negOne = Tensor.full(tensor.shape, -1, false); const [negTensor] = await tensor.mul(negOne); + console.log("this.shape", this.shape); return this.add(negTensor); } @@ -305,21 +320,27 @@ export class Tensor { } async gather(indices: Tensor): Promise<[Tensor, number]> { - // Convert indices to one-hot - const oneHot = new Float32Array(indices.shape[0] * this.shape[0]).fill(0); - for (let i = 0; i < indices.shape[0]; i++) { - const index = indices.data[i] + i * this.shape[0]; - // set one hot value for the whole vector - oneHot.fill(1, index, index + 1); - } - - const oneHotTensor = new Tensor( - oneHot, - [indices.shape[0], this.shape[0]], - indices.requires_grad, - ); + // For input shape [batch_size] and embedding matrix [vocab_size, embedding_dim] + // We want output shape [batch_size, embedding_dim] + const batchSize = indices.shape[0]; + const embeddingDim = this.shape[1]; + const result = new Float32Array(batchSize * embeddingDim); + + // For each item in the batch + for (let i = 0; i < batchSize; i++) { + const tokenId = indices.data[i]; + // Copy the entire embedding vector for this token + const sourceOffset = tokenId * embeddingDim; + const targetOffset = i * embeddingDim; + for (let j = 0; j < embeddingDim; j++) { + result[targetOffset + j] = this.data[sourceOffset + j]; + } + } - return oneHotTensor.matmul(this); + return [ + new Tensor(result, [batchSize, embeddingDim], indices.requires_grad), + -1, + ]; } transpose() { diff --git a/tests/integration/nomic_embed.test.ts b/tests/integration/nomic_embed.test.ts new file mode 100644 index 0000000..432ea9c --- /dev/null +++ b/tests/integration/nomic_embed.test.ts @@ -0,0 +1,127 @@ +import { test, expect } from "@playwright/test"; + +test("NomicEmbed forward pass with known values", async ({ page }) => { + await page.goto("http://localhost:8080"); + + page.on("console", (msg) => { + console.log(msg); + }); + + // Inject test function + await page.evaluate(() => { + return new Promise((resolve) => { + // @ts-expect-error ignore error for tests + import("/dist/bundle.js").then((module) => { + const { Tensor, NomicEmbed } = module; + + window.runNomicEmbedTest = async function () { + // Create configuration matching the HF config + const config = { + vocab_size: 30528, + hidden_size: 768, + num_hidden_layers: 2, + num_attention_heads: 2, + intermediate_size: 3072, + hidden_act: "swiglu", + hidden_dropout_prob: 0.0, + attention_probs_dropout_prob: 0.0, + max_position_embeddings: 8192, + type_vocab_size: 2, + initializer_range: 0.02, + layer_norm_eps: 1e-12, + pad_token_id: 0, + position_embedding_type: "rotary", + use_cache: true, + classifier_dropout: null, + rotary_emb_fraction: 1.0, + qkv_proj_bias: false, + mlp_fc1_bias: false, + mlp_fc2_bias: false, + causal: false, + }; + + // Create sample input tensors + const seqLength = 1; // Small sequence for testing + + // Create input IDs tensor with some token IDs + const inputIds = new Tensor( + new Float32Array([1]), + [seqLength], + false, + ); + + // Create attention mask (all 1s for no masking) + const attentionMask = new Tensor( + new Float32Array([1]), + [seqLength], + false, + ); + + // Create position IDs (optional) + const positionIds = new Tensor( + new Float32Array([0]), + [seqLength], + false, + ); + + // Create token type IDs (optional) + const tokenTypeIds = new Tensor( + new Float32Array([0]), + [seqLength], + false, + ); + + // Initialize model + const model = new NomicEmbed(config); + + // Forward pass + const [output] = await model.forward( + inputIds, + attentionMask, + positionIds, + tokenTypeIds, + ); + + return { + inputShape: inputIds.shape, + outputShape: output.shape, + outputData: Array.from(output.data), + }; + }; + resolve(); + }); + }); + }); + + // Run the test function in the browser context + const result = await page.evaluate(() => window.runNomicEmbedTest()); + + // Test input shape + expect(result.inputShape).toEqual([1]); // [sequence_length] + + // Test output shape - should be [hidden_size] after pooling and normalization + expect(result.outputShape).toEqual([768]); // [hidden_size] + + // Verify output is normalized (L2 norm should be close to 1) + const l2Norm = Math.sqrt( + result.outputData.reduce((sum, val) => sum + val * val, 0), + ); + expect(l2Norm).toBeCloseTo(1, 6); + + // Verify output values are within reasonable range + result.outputData.forEach((value) => { + expect(Math.abs(value)).toBeLessThan(1); // Normalized values should be < 1 + }); + + await page.close(); +}); + +declare global { + interface Window { + runNomicEmbedTest: () => Promise<{ + inputShape: number[]; + outputShape: number[]; + outputData: number[]; + }>; + } +}