From 7549418303d46b8c208be61e28aefa524f7fdd48 Mon Sep 17 00:00:00 2001 From: mramanathan Date: Mon, 24 Feb 2025 07:51:33 +0000 Subject: [PATCH] Add test for deepseek_coder --- .../deepseek_coder/test_deepseek_coder.py | 57 +++++++++++++++ .../deepseek_coder/utils/model_utils.py | 71 +++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 forge/test/models/pytorch/multimodal/deepseek_coder/test_deepseek_coder.py create mode 100644 forge/test/models/pytorch/multimodal/deepseek_coder/utils/model_utils.py diff --git a/forge/test/models/pytorch/multimodal/deepseek_coder/test_deepseek_coder.py b/forge/test/models/pytorch/multimodal/deepseek_coder/test_deepseek_coder.py new file mode 100644 index 000000000..a074ac85a --- /dev/null +++ b/forge/test/models/pytorch/multimodal/deepseek_coder/test_deepseek_coder.py @@ -0,0 +1,57 @@ +# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import pytest +import torch + +import forge +from forge.verify.verify import verify + +from test.models.pytorch.multimodal.deepseek_coder.utils.model_utils import ( + DeepSeekWrapper, + download_model_and_tokenizer, + generate_no_cache, +) +from test.models.utils import Framework, Source, Task, build_module_name + + +@pytest.mark.parametrize("variant", ["deepseek-coder-1.3b-instruct"]) +def test_deepseek_inference_no_cache(record_forge_property, variant): + + # Build Module Name + module_name = build_module_name( + framework=Framework.PYTORCH, model="deepseek", variant=variant, task=Task.QA, source=Source.HUGGINGFACE + ) + + # Record Forge Property + record_forge_property("model_name", module_name) + + # Load Model and Tokenizer + model_name = f"deepseek-ai/{variant}" + model, tokenizer, inputs = download_model_and_tokenizer(model_name) + framework_model = DeepSeekWrapper(model) + batch_size, seq_len = inputs.shape + max_new_tokens = 200 + max_seq_len = seq_len + max_new_tokens + padded_inputs = torch.randint(low=0, high=1024, size=(1, max_seq_len), dtype=torch.int64) + + # Forge compile framework model + compiled_model = forge.compile(framework_model, sample_inputs=[padded_inputs], module_name=module_name) + padded_inputs[:, :seq_len] = inputs + + # Model Verification + verify([padded_inputs], framework_model, compiled_model) + + generated_text = generate_no_cache(max_new_tokens=200, model=compiled_model, inputs=inputs, tokenizer=tokenizer) + print(generated_text) + + +@pytest.mark.parametrize("variant", ["deepseek-coder-1.3b-instruct"]) +def test_deepseek_inference_no_cache_cpu(variant): + model_name = f"deepseek-ai/{variant}" + model, tokenizer, inputs = download_model_and_tokenizer(model_name) + + framework_model = DeepSeekWrapper(model) + + generated_text = generate(max_new_tokens=200, model=framework_model, inputs=inputs, tokenizer=tokenizer) + print(generated_text) diff --git a/forge/test/models/pytorch/multimodal/deepseek_coder/utils/model_utils.py b/forge/test/models/pytorch/multimodal/deepseek_coder/utils/model_utils.py new file mode 100644 index 000000000..f858c61cd --- /dev/null +++ b/forge/test/models/pytorch/multimodal/deepseek_coder/utils/model_utils.py @@ -0,0 +1,71 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + +# SPDX-License-Identifier: Apache-2.0 +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + + +def generate_no_cache(max_new_tokens, model, inputs, tokenizer): + """ + Generates text tokens autoregressively up to a maximum length by iteratively predicting the next token + using the model and appending it to the sequence until the limit is reached or an EOS token is encountered. + + Args: + max_new_tokens (int): Maximum number of tokens to generate. + model (torch.nn.Module): The language model used for generation. + inputs (torch.Tensor): Input tensor of shape (batch_size, seq_len). + tokenizer: Tokenizer for decoding token IDs into text. + + Returns: + str: The generated text. + """ + batch_size, seq_len = inputs.shape + max_seq_len = seq_len + max_new_tokens # Fixed total sequence length + + padded_inputs = torch.zeros((batch_size, max_seq_len), dtype=inputs.dtype, device=inputs.device) + padded_inputs[:, :seq_len] = inputs + + current_pos = seq_len + + for _ in range(max_new_tokens): + logits = model(padded_inputs) + + # Get only the logits corresponding to the last valid token + if isinstance(logits, list): + logits = logits[0] + next_token_logits = logits[:, current_pos - 1, :] + next_token_id = torch.argmax(next_token_logits, dim=-1) + # Stop if EOS token is encountered + if next_token_id.item() == tokenizer.eos_token_id: + break + + padded_inputs[:, current_pos] = next_token_id + + current_pos += 1 # Move to next position + + # Decode valid tokens + valid_tokens = padded_inputs[:, seq_len:current_pos].view(-1).tolist() + answer = tokenizer.decode(valid_tokens, skip_special_tokens=True) + + return answer + + +def download_model_and_tokenizer(model_name, **kwargs): + tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True) + model = AutoModelForCausalLM.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True) + + # Prepare input sentence + messages = [{"role": "user", "content": "write a bubble sort algorithm in python."}] + inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device) + + return model, tokenizer, inputs + + +class DeepSeekWrapper(torch.nn.Module): + def __init__(self, model, max_new_tokens=200): + super().__init__() + self.model = model + self.max_new_tokens = max_new_tokens + + def forward(self, input_tensor): + return self.model(input_tensor, max_new_tokens=self.max_new_tokens).logits