Add test for deepseek_coder

tenstorrent · Feb 27, 2025 · 7549418 · 7549418
1 parent d3e67bd
commit 7549418
Show file tree

Hide file tree

Showing 2 changed files with 128 additions and 0 deletions.
diff --git a/forge/test/models/pytorch/multimodal/deepseek_coder/test_deepseek_coder.py b/forge/test/models/pytorch/multimodal/deepseek_coder/test_deepseek_coder.py
@@ -0,0 +1,57 @@
+# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+import pytest
+import torch
+
+import forge
+from forge.verify.verify import verify
+
+from test.models.pytorch.multimodal.deepseek_coder.utils.model_utils import (
+    DeepSeekWrapper,
+    download_model_and_tokenizer,
+    generate_no_cache,
+)
+from test.models.utils import Framework, Source, Task, build_module_name
+
+
+@pytest.mark.parametrize("variant", ["deepseek-coder-1.3b-instruct"])
+def test_deepseek_inference_no_cache(record_forge_property, variant):
+
+    # Build Module Name
+    module_name = build_module_name(
+        framework=Framework.PYTORCH, model="deepseek", variant=variant, task=Task.QA, source=Source.HUGGINGFACE
+    )
+
+    # Record Forge Property
+    record_forge_property("model_name", module_name)
+
+    # Load Model and Tokenizer
+    model_name = f"deepseek-ai/{variant}"
+    model, tokenizer, inputs = download_model_and_tokenizer(model_name)
+    framework_model = DeepSeekWrapper(model)
+    batch_size, seq_len = inputs.shape
+    max_new_tokens = 200
+    max_seq_len = seq_len + max_new_tokens
+    padded_inputs = torch.randint(low=0, high=1024, size=(1, max_seq_len), dtype=torch.int64)
+
+    # Forge compile framework model
+    compiled_model = forge.compile(framework_model, sample_inputs=[padded_inputs], module_name=module_name)
+    padded_inputs[:, :seq_len] = inputs
+
+    # Model Verification
+    verify([padded_inputs], framework_model, compiled_model)
+
+    generated_text = generate_no_cache(max_new_tokens=200, model=compiled_model, inputs=inputs, tokenizer=tokenizer)
+    print(generated_text)
+
+
+@pytest.mark.parametrize("variant", ["deepseek-coder-1.3b-instruct"])
+def test_deepseek_inference_no_cache_cpu(variant):
+    model_name = f"deepseek-ai/{variant}"
+    model, tokenizer, inputs = download_model_and_tokenizer(model_name)
+
+    framework_model = DeepSeekWrapper(model)
+
+    generated_text = generate(max_new_tokens=200, model=framework_model, inputs=inputs, tokenizer=tokenizer)
+    print(generated_text)
diff --git a/forge/test/models/pytorch/multimodal/deepseek_coder/utils/model_utils.py b/forge/test/models/pytorch/multimodal/deepseek_coder/utils/model_utils.py
@@ -0,0 +1,71 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
+
+# SPDX-License-Identifier: Apache-2.0
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+def generate_no_cache(max_new_tokens, model, inputs, tokenizer):
+    """
+    Generates text tokens autoregressively up to a maximum length by iteratively predicting the next token
+    using the model and appending it to the sequence until the limit is reached or an EOS token is encountered.
+
+    Args:
+        max_new_tokens (int): Maximum number of tokens to generate.
+        model (torch.nn.Module): The language model used for generation.
+        inputs (torch.Tensor): Input tensor of shape (batch_size, seq_len).
+        tokenizer: Tokenizer for decoding token IDs into text.
+
+    Returns:
+        str: The generated text.
+    """
+    batch_size, seq_len = inputs.shape
+    max_seq_len = seq_len + max_new_tokens  # Fixed total sequence length
+
+    padded_inputs = torch.zeros((batch_size, max_seq_len), dtype=inputs.dtype, device=inputs.device)
+    padded_inputs[:, :seq_len] = inputs
+
+    current_pos = seq_len
+
+    for _ in range(max_new_tokens):
+        logits = model(padded_inputs)
+
+        # Get only the logits corresponding to the last valid token
+        if isinstance(logits, list):
+            logits = logits[0]
+        next_token_logits = logits[:, current_pos - 1, :]
+        next_token_id = torch.argmax(next_token_logits, dim=-1)
+        # Stop if EOS token is encountered
+        if next_token_id.item() == tokenizer.eos_token_id:
+            break
+
+        padded_inputs[:, current_pos] = next_token_id
+
+        current_pos += 1  # Move to next position
+
+    # Decode valid tokens
+    valid_tokens = padded_inputs[:, seq_len:current_pos].view(-1).tolist()
+    answer = tokenizer.decode(valid_tokens, skip_special_tokens=True)
+
+    return answer
+
+
+def download_model_and_tokenizer(model_name, **kwargs):
+    tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True)
+
+    # Prepare input sentence
+    messages = [{"role": "user", "content": "write a bubble sort algorithm in python."}]
+    inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
+
+    return model, tokenizer, inputs
+
+
+class DeepSeekWrapper(torch.nn.Module):
+    def __init__(self, model, max_new_tokens=200):
+        super().__init__()
+        self.model = model
+        self.max_new_tokens = max_new_tokens
+
+    def forward(self, input_tensor):
+        return self.model(input_tensor, max_new_tokens=self.max_new_tokens).logits