From ca0f4caf189efbaa41b3a4aa041e1632b8b8dcbd Mon Sep 17 00:00:00 2001 From: mramanathan Date: Mon, 24 Feb 2025 07:51:33 +0000 Subject: [PATCH] Add test for deepseek_coder --- .../deepseek_coder/test_deepseek_coder.py | 63 +++++++++++++++ .../deepseek_coder/utils/model_utils.py | 80 +++++++++++++++++++ .../test_deepseek_math.py | 4 +- .../test_deepseek_math_prefill.py | 3 +- .../utils/model_utils.py} | 0 5 files changed, 148 insertions(+), 2 deletions(-) create mode 100644 forge/test/models/pytorch/multimodal/deepseek_coder/test_deepseek_coder.py create mode 100644 forge/test/models/pytorch/multimodal/deepseek_coder/utils/model_utils.py rename forge/test/models/pytorch/multimodal/{deepseek => deepseek_math}/test_deepseek_math.py (92%) rename forge/test/models/pytorch/multimodal/{deepseek => deepseek_math}/test_deepseek_math_prefill.py (97%) rename forge/test/models/pytorch/multimodal/{deepseek/utils/model.py => deepseek_math/utils/model_utils.py} (100%) diff --git a/forge/test/models/pytorch/multimodal/deepseek_coder/test_deepseek_coder.py b/forge/test/models/pytorch/multimodal/deepseek_coder/test_deepseek_coder.py new file mode 100644 index 000000000..080dc2ba7 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/deepseek_coder/test_deepseek_coder.py @@ -0,0 +1,63 @@ +# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import pytest + +import forge +from forge.verify.verify import verify + +from test.models.pytorch.multimodal.deepseek_coder.utils.model_utils import ( + DeepSeekWrapper, + download_model_and_tokenizer, + generate_no_cache, + pad_inputs, +) +from test.models.utils import Framework, Source, Task, build_module_name + + +@pytest.mark.nightly +@pytest.mark.parametrize("variant", ["deepseek-coder-1.3b-instruct"]) +def test_deepseek_inference_no_cache(record_forge_property, variant): + + # Build Module Name + module_name = build_module_name( + framework=Framework.PYTORCH, model="deepseek", variant=variant, task=Task.QA, source=Source.HUGGINGFACE + ) + + # Record Forge Property + record_forge_property("model_name", module_name) + + # Load Model and Tokenizer + model_name = f"deepseek-ai/{variant}" + model, tokenizer, inputs = download_model_and_tokenizer(model_name) + framework_model = DeepSeekWrapper(model) + framework_model.eval() + + padded_inputs, seq_len = pad_inputs(inputs) + + # Forge compile framework model + compiled_model = forge.compile(framework_model, sample_inputs=[padded_inputs], module_name=module_name) + + # Model Verification + verify([padded_inputs], framework_model, compiled_model) + + generated_text = generate_no_cache( + max_new_tokens=512, model=compiled_model, inputs=padded_inputs, seq_len=seq_len, tokenizer=tokenizer + ) + print(generated_text) + + +@pytest.mark.parametrize("variant", ["deepseek-coder-1.3b-instruct"]) +def test_deepseek_inference_no_cache_cpu(variant): + model_name = f"deepseek-ai/{variant}" + model, tokenizer, inputs = download_model_and_tokenizer(model_name) + + framework_model = DeepSeekWrapper(model) + framework_model.eval() + + padded_inputs, seq_len = pad_inputs(inputs) + + generated_text = generate_no_cache( + max_new_tokens=512, model=framework_model, inputs=padded_inputs, seq_len=seq_len, tokenizer=tokenizer + ) + print(generated_text) diff --git a/forge/test/models/pytorch/multimodal/deepseek_coder/utils/model_utils.py b/forge/test/models/pytorch/multimodal/deepseek_coder/utils/model_utils.py new file mode 100644 index 000000000..2f605b954 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/deepseek_coder/utils/model_utils.py @@ -0,0 +1,80 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + +# SPDX-License-Identifier: Apache-2.0 +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask + + +def generate_no_cache(max_new_tokens, model, inputs, seq_len, tokenizer): + """ + Generates text autoregressively without using a KV cache, iteratively predicting one token at a time. + The function stops generation if the maximum number of new tokens is reached or an end-of-sequence (EOS) token is encountered. + + Args: + max_new_tokens (int): The maximum number of new tokens to generate. + model (torch.nn.Module): The language model used for token generation. + inputs (torch.Tensor): Input tensor of shape (batch_size, seq_len), representing tokenized text. + seq_len (int): The current sequence length before generation starts. + tokenizer: The tokenizer used to decode token IDs into text. + + Returns: + str: The generated text after decoding the new tokens. + """ + current_pos = seq_len + + for _ in range(max_new_tokens): + logits = model(inputs) + + # Get only the logits corresponding to the last valid token + if isinstance(logits, list): + logits = logits[0] + next_token_logits = logits[:, current_pos - 1, :] + next_token_id = torch.argmax(next_token_logits, dim=-1) + # Stop if EOS token is encountered + if next_token_id.item() == tokenizer.eos_token_id: + break + + inputs[:, current_pos] = next_token_id + + current_pos += 1 # Move to next position + + # Decode valid tokens + valid_tokens = inputs[:, seq_len:current_pos].view(-1).tolist() + answer = tokenizer.decode(valid_tokens, skip_special_tokens=True) + + return answer + + +def pad_inputs(inputs, max_new_tokens=512): + batch_size, seq_len = inputs.shape + max_seq_len = seq_len + max_new_tokens + padded_inputs = torch.zeros((batch_size, max_seq_len), dtype=inputs.dtype, device=inputs.device) + padded_inputs[:, :seq_len] = inputs + return padded_inputs, seq_len + + +def download_model_and_tokenizer(model_name, **kwargs): + tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) + model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) + + # Prepare input sentence + messages = [{"role": "user", "content": "write a bubble sort algorithm in python."}] + inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device) + + return model, tokenizer, inputs + + +class DeepSeekWrapper(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + self.embed_tokens = model.model.embed_tokens + + def forward(self, input_tensor, attention_mask=None, past_key_values=None): + inputs_embeds = self.embed_tokens(input_tensor) + past_key_values_length = past_key_values[0][0].shape[-2] if past_key_values is not None else 0 + causal_attention_mask = _prepare_4d_causal_attention_mask( + attention_mask, input_tensor.shape, inputs_embeds, past_key_values_length + ) + return self.model(input_ids=input_tensor, attention_mask=causal_attention_mask).logits diff --git a/forge/test/models/pytorch/multimodal/deepseek/test_deepseek_math.py b/forge/test/models/pytorch/multimodal/deepseek_math/test_deepseek_math.py similarity index 92% rename from forge/test/models/pytorch/multimodal/deepseek/test_deepseek_math.py rename to forge/test/models/pytorch/multimodal/deepseek_math/test_deepseek_math.py index 88140ed39..8b1f60772 100644 --- a/forge/test/models/pytorch/multimodal/deepseek/test_deepseek_math.py +++ b/forge/test/models/pytorch/multimodal/deepseek_math/test_deepseek_math.py @@ -5,7 +5,7 @@ import forge -from test.models.pytorch.multimodal.deepseek.utils.model import ( +from test.models.pytorch.multimodal.deepseek_math.utils.model_utils import ( DeepSeekWrapper, download_model_and_tokenizer, generation, @@ -19,6 +19,7 @@ def test_deepseek_inference_no_cache_cpu(variant): model, tokenizer, input_ids = download_model_and_tokenizer(model_name) framework_model = DeepSeekWrapper(model) + framework_model.eval() generated_text = generation( max_new_tokens=200, compiled_model=framework_model, input_ids=input_ids, tokenizer=tokenizer @@ -39,6 +40,7 @@ def test_deepseek_inference(record_forge_property, variant): model_name = f"deepseek-ai/{variant}" model, tokenizer, input_ids = download_model_and_tokenizer(model_name) framework_model = DeepSeekWrapper(model) + framework_model.eval() compiled_model = forge.compile(framework_model, sample_inputs=[input_ids], module_name=module_name) generated_text = generation( diff --git a/forge/test/models/pytorch/multimodal/deepseek/test_deepseek_math_prefill.py b/forge/test/models/pytorch/multimodal/deepseek_math/test_deepseek_math_prefill.py similarity index 97% rename from forge/test/models/pytorch/multimodal/deepseek/test_deepseek_math_prefill.py rename to forge/test/models/pytorch/multimodal/deepseek_math/test_deepseek_math_prefill.py index 690001100..3e47f6b0c 100644 --- a/forge/test/models/pytorch/multimodal/deepseek/test_deepseek_math_prefill.py +++ b/forge/test/models/pytorch/multimodal/deepseek_math/test_deepseek_math_prefill.py @@ -7,7 +7,7 @@ import forge from forge.verify.compare import compare_with_golden -from test.models.pytorch.multimodal.deepseek.utils.model import ( +from test.models.pytorch.multimodal.deepseek_math.utils.model_utils import ( DeepSeekWrapper_decoder, download_model_and_tokenizer, ) @@ -67,6 +67,7 @@ def test_deepseek_prefil_on_device_decode_on_cpu(variant): # This is the part of the model needed for prefill; model without the last Linear layer (lm_head) model_decoder = model.get_decoder() model_decoder = DeepSeekWrapper_decoder(model_decoder) + model_decoder.eval() compiled_decoder = forge.compile(model_decoder, sample_inputs=input_ids) # Prefill Phase - Process the initial prompt on device diff --git a/forge/test/models/pytorch/multimodal/deepseek/utils/model.py b/forge/test/models/pytorch/multimodal/deepseek_math/utils/model_utils.py similarity index 100% rename from forge/test/models/pytorch/multimodal/deepseek/utils/model.py rename to forge/test/models/pytorch/multimodal/deepseek_math/utils/model_utils.py