Skip to content

Commit

Permalink
Add timeout in subprocess run method for generate and export unique o…
Browse files Browse the repository at this point in the history
…ps config (#1231)

The [generate model ops test
pipeline](https://github.com/tenstorrent/tt-forge-fe/actions/runs/13328380954/job/37226649520)
is currently freezing during the unique ops configuration extraction
phase

Error:
`Failed on "DecomposeEinsum" TVM callback`

This error is encountered in the test case:

`forge/test/models/pytorch/vision/detr/test_detr.py::test_detr_segmentation[facebook/detr-resnet-50-panoptic]`
test cases.

To prevent the extraction process from hanging indefinitely, a timeout
of 1200 seconds (20 minutes) has been added. This ensures that if the
unique ops configuration extraction takes too long, the test will be
terminated.
  • Loading branch information
chandrasekaranpradeep authored and meenakshiramanathan1 committed Feb 17, 2025
1 parent f75187f commit 76b1781
Show file tree
Hide file tree
Showing 10 changed files with 2,449 additions and 10 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
forge/test/models/pytorch/multimodal/deepseek_vl/image/training_pipelines.jpg filter=lfs diff=lfs merge=lfs -text
2 changes: 2 additions & 0 deletions env/core_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,5 @@ pytorch_forecasting==1.0.0
patool
openpyxl==3.1.5
GitPython==3.1.44
dotmap==1.3.30
einops==0.8.1
4 changes: 2 additions & 2 deletions env/linux_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ sacrebleu==2.1.0
sacremoses==0.0.53
seaborn
scikit-image==0.20.0 # For DenseNet 121 HF XRay model
segmentation_models_pytorch==0.3.2
segmentation_models_pytorch==0.4.0
sentencepiece==0.2.0
subword-nmt==0.3.8
tensorflow-hub==0.12.0
timm==0.6.12
timm==0.9.16
yolov5==7.0.9
# The CPU versions of torch and torch visions are used due to their size being
# several GB smaller which made a large impact on the performance of CI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

import forge

from test.models.pytorch.multimodal.deepseek.utils.model import (
from test.models.pytorch.multimodal.deepseek_math.utils.load_model import (
DeepSeekWrapper,
download_model_and_tokenizer,
generation,
generate,
)
from test.models.utils import Framework, Source, Task, build_module_name

Expand All @@ -20,7 +20,7 @@ def test_deepseek_inference_no_cache_cpu(variant):

framework_model = DeepSeekWrapper(model)

generated_text = generation(
generated_text = generate(
max_new_tokens=200, compiled_model=framework_model, input_ids=input_ids, tokenizer=tokenizer
)
print(generated_text)
Expand All @@ -41,7 +41,5 @@ def test_deepseek_inference(record_forge_property, variant):
framework_model = DeepSeekWrapper(model)

compiled_model = forge.compile(framework_model, sample_inputs=[input_ids], module_name=module_name)
generated_text = generation(
max_new_tokens=1, compiled_model=compiled_model, input_ids=input_ids, tokenizer=tokenizer
)
generated_text = generate(max_new_tokens=1, compiled_model=compiled_model, input_ids=input_ids, tokenizer=tokenizer)
print(generated_text)
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import forge
from forge.verify.compare import compare_with_golden

from test.models.pytorch.multimodal.deepseek.utils.model import (
from test.models.pytorch.multimodal.deepseek_math.utils.load_model import (
DeepSeekWrapper_decoder,
download_model_and_tokenizer,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig


def generation(max_new_tokens, compiled_model, input_ids, tokenizer):
def generate(max_new_tokens, compiled_model, input_ids, tokenizer):
for i in range(max_new_tokens):
logits = compiled_model(input_ids)
next_token_logits = logits[:, -1, :]
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC

# SPDX-License-Identifier: Apache-2.0

import pytest
import torch

import forge

from test.models.pytorch.multimodal.deepseek_vl.utils.load_model import (
generate,
generate_model_deepseek_vl_pytorch,
verify_deepseek_vl,
)
from test.models.utils import Framework, Source, Task, build_module_name


@pytest.mark.parametrize("variant", ["deepseek-ai/deepseek-vl-1.3b-base"])
def test_deepseek_vl_no_cache_cpu_pytorch(record_forge_property, variant):

framework_model, vl_gpt, tokenizer, inputs_embeds = generate_model_deepseek_vl_pytorch(variant)
answer = generate(
max_new_tokens=512, model=framework_model, inputs_embeds=inputs_embeds, tokenizer=tokenizer, vl_gpt=vl_gpt
)
print(answer)


@pytest.mark.parametrize("variant", ["deepseek-ai/deepseek-vl-1.3b-base"])
def test_deepseek_vl_pytorch(record_forge_property, variant):

# Build Module Name
module_name = build_module_name(
framework=Framework.PYTORCH, model="deepseek", variant=variant, task=Task.QA, source=Source.HUGGINGFACE
)

# Record Forge Property
record_forge_property("model_name", module_name)

framework_model, vl_gpt, tokenizer, inputs_embeds = generate_model_deepseek_vl_pytorch(variant)
padded_inputs_embeds = torch.randn(1, 1140, 2048, dtype=torch.float32)
compiled_model = forge.compile(framework_model, sample_inputs=[padded_inputs_embeds], module_name=module_name)
verify_deepseek_vl(inputs_embeds, framework_model, compiled_model)
answer = generate(
max_new_tokens=512, model=compiled_model, inputs_embeds=inputs_embeds, tokenizer=tokenizer, vl_gpt=vl_gpt
)

print(answer)
118 changes: 118 additions & 0 deletions forge/test/models/pytorch/multimodal/deepseek_vl/utils/load_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC
#
# SPDX-License-Identifier: Apache-2.0
import torch
from transformers import AutoModelForCausalLM

from forge.verify.verify import verify

from test.models.pytorch.multimodal.deepseek_vl.utils.models.modeling_vlm import (
MultiModalityCausalLM,
VLChatProcessor,
load_pil_images,
)


def verify_deepseek_vl(inputs_embeds, framework_model, compiled_model, max_new_tokens=512):
batch_size, seq_len, embed_dim = inputs_embeds.shape
max_seq_len = seq_len + max_new_tokens # Fixed total sequence length

padded_inputs_embeds = torch.zeros(
(batch_size, max_seq_len, embed_dim), dtype=inputs_embeds.dtype, device=inputs_embeds.device
)
padded_inputs_embeds[:, :seq_len, :] = inputs_embeds # Copy initial embeddings
verify([padded_inputs_embeds], framework_model, compiled_model)


def generate_model_deepseek_vl_pytorch(variant):

model_path = variant
vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
tokenizer = vl_chat_processor.tokenizer

vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True)
vl_gpt = vl_gpt.eval()

class Wrapper(torch.nn.Module):
def __init__(self, model, max_new_tokens=512):
super().__init__()
self.model = model
self.eos_token_id = tokenizer.eos_token_id
self.bos_token_id = tokenizer.bos_token_id
self.pad_token_id = tokenizer.pad_token_id
self.max_new_tokens = max_new_tokens

def forward(self, inputs_embeds):
return self.model.language_model(
inputs_embeds=inputs_embeds,
pad_token_id=self.pad_token_id,
bos_token_id=self.bos_token_id,
eos_token_id=self.eos_token_id,
max_new_tokens=self.max_new_tokens,
do_sample=False,
use_cache=False,
).logits

framework_model = Wrapper(vl_gpt)

# Single image conversation example
conversation = [
{
"role": "User",
"content": "<image_placeholder>Describe each stage of this image.",
"images": ["forge/test/models/pytorch/multimodal/deepseek_vl/image/training_pipelines.jpg"],
},
{"role": "Assistant", "content": ""},
]

# Load images and prepare for inputs
pil_images = load_pil_images(conversation)
prepare_inputs = vl_chat_processor(conversations=conversation, images=pil_images, force_batchify=True)

# Run image encoder to get the image embeddings
inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
return framework_model, vl_gpt, tokenizer, inputs_embeds


def generate(max_new_tokens, model, inputs_embeds, tokenizer, vl_gpt):
batch_size, seq_len, embed_dim = inputs_embeds.shape
max_seq_len = seq_len + max_new_tokens # Fixed total sequence length

padded_inputs_embeds = torch.zeros(
(batch_size, max_seq_len, embed_dim), dtype=inputs_embeds.dtype, device=inputs_embeds.device
)
padded_inputs_embeds[:, :seq_len, :] = inputs_embeds # Copy initial embeddings

generated_token_ids = torch.full(
(batch_size, max_seq_len), tokenizer.eos_token_id, dtype=torch.long, device=vl_gpt.device
)
current_pos = seq_len

for _ in range(max_new_tokens):
logits = model(padded_inputs_embeds)

# Get only the logits corresponding to the last valid token
if isinstance(logits, list):
next_token_logits = logits[0][:, current_pos - 1, :]
else:
next_token_logits = logits[:, current_pos - 1, :]
next_token_id = torch.argmax(next_token_logits, dim=-1)

# Stop if EOS token is encountered
if next_token_id.item() == tokenizer.eos_token_id:
break

# Store generated token
generated_token_ids[:, current_pos] = next_token_id

# Update embeddings in fixed position
new_embedding = vl_gpt.language_model.get_input_embeddings()(next_token_id.unsqueeze(0))
padded_inputs_embeds[:, current_pos, :] = new_embedding.squeeze(0)

current_pos += 1 # Move to next position

# Decode valid tokens
valid_tokens = generated_token_ids[:, seq_len:current_pos].view(-1).tolist()
answer = tokenizer.decode(valid_tokens, skip_special_tokens=True)

return answer
Loading

0 comments on commit 76b1781

Please sign in to comment.