Skip to content

Commit

Permalink
Bringup tt-torch models in forge
Browse files Browse the repository at this point in the history
  • Loading branch information
kamalrajkannan78 committed Feb 25, 2025
1 parent 9be605e commit 899912e
Show file tree
Hide file tree
Showing 30 changed files with 628 additions and 1 deletion.
1 change: 1 addition & 0 deletions env/core_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ pytorch_forecasting==1.0.0
patool
openpyxl==3.1.5
GitPython==3.1.44
kornia==0.8.0
72 changes: 72 additions & 0 deletions forge/test/models/pytorch/text/albert/test_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
import torch
from transformers import (
AlbertForMaskedLM,
AlbertForQuestionAnswering,
AlbertForSequenceClassification,
AlbertForTokenClassification,
AlbertTokenizer,
AutoTokenizer,
)

import forge
Expand Down Expand Up @@ -162,3 +165,72 @@ def test_albert_token_classification_pytorch(record_forge_property, size, varian

print(f"Context: {sample_text}")
print(f"Answer: {predicted_tokens_classes}")


@pytest.mark.nightly
def test_albert_question_answering_pytorch(record_forge_property):

# Build Module Name
module_name = build_module_name(
framework=Framework.PYTORCH,
model="albert",
task=Task.QA,
source=Source.HUGGINGFACE,
)

# Record Forge Property
record_forge_property("model_name", module_name)

# Load Albert tokenizer and model from HuggingFace
tokenizer = download_model(AutoTokenizer.from_pretrained, "twmkn9/albert-base-v2-squad2")
framework_model = download_model(
AlbertForQuestionAnswering.from_pretrained, "twmkn9/albert-base-v2-squad2", return_dict=False
)

# Load data sample
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"

# Data preprocessing
input_tokens = tokenizer(question, text, return_tensors="pt")
inputs = [input_tokens["input_ids"], input_tokens["attention_mask"]]

# Forge compile framework model
compiled_model = forge.compile(framework_model, sample_inputs=inputs, module_name=module_name)

# Model Verification
verify(inputs, framework_model, compiled_model)


@pytest.mark.nightly
@pytest.mark.push
def test_albert_sequence_classification_pytorch(record_forge_property):

# Build Module Name
module_name = build_module_name(
framework=Framework.PYTORCH,
model="albert",
task=Task.SEQUENCE_CLASSIFICATION,
source=Source.HUGGINGFACE,
)

# Record Forge Property
record_forge_property("model_name", module_name)

# Load Albert tokenizer and model from HuggingFace
tokenizer = download_model(AlbertTokenizer.from_pretrained, "textattack/albert-base-v2-imdb")
framework_model = download_model(
AlbertForSequenceClassification.from_pretrained, "textattack/albert-base-v2-imdb", return_dict=False
)

# Load data sample
input_text = "Hello, my dog is cute."

# Data preprocessing
input_tokens = tokenizer(input_text, return_tensors="pt")
inputs = [input_tokens["input_ids"], input_tokens["attention_mask"]]

# Forge compile framework model
compiled_model = forge.compile(framework_model, sample_inputs=inputs, module_name=module_name)

# Model Verification
verify(inputs, framework_model, compiled_model)
Empty file.
47 changes: 47 additions & 0 deletions forge/test/models/pytorch/text/bloom/test_bloom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
#
# SPDX-License-Identifier: Apache-2.0
import torch

import forge
from forge.verify.verify import verify

from test.models.pytorch.text.bloom.utils.utils import load_input, load_model
from test.models.utils import Framework, Source, Task, build_module_name


# Wrapper to get around past key values
class Wrapper(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.model = model

def forward(self, input_ids, attention_mask):
output = self.model(input_ids, None, attention_mask)
return output


@pytest.mark.nightly
def test_bloom(record_forge_property):

# Build Module Name
module_name = build_module_name(
framework=Framework.PYTORCH,
model="bloom",
source=Source.HUGGINGFACE,
task=Task.CAUSAL_LM,
)

# Record Forge Property
record_forge_property("model_name", module_name)

# Load model and input
model = load_model()
framework_model = Wrapper(model)
inputs = load_input()

# Forge compile framework model
compiled_model = forge.compile(framework_model, sample_inputs=inputs, module_name=module_name)

# Model Verification
verify(inputs, framework_model, compiled_model)
25 changes: 25 additions & 0 deletions forge/test/models/pytorch/text/bloom/utils/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
#
# SPDX-License-Identifier: Apache-2.0
from transformers import AutoModelForCausalLM, AutoTokenizer


def load_model():
model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-1b1")
model.config.use_cache = False
model.eval()
return model


def load_input():
test_input = "This is a sample text from "
tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-1b1", padding_side="left")
inputs = tokenizer.encode_plus(
test_input,
return_tensors="pt",
max_length=32,
padding="max_length",
add_special_tokens=True,
truncation=True,
)
return [inputs["input_ids"], inputs["attention_mask"]]
Empty file.
58 changes: 58 additions & 0 deletions forge/test/models/pytorch/text/mamba/test_mamba.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC

# SPDX-License-Identifier: Apache-2.0
# Reference: https://huggingface.co/state-spaces/mamba-2.8b-hf

import pytest
import torch

import forge
from forge.verify.verify import verify

from test.models.pytorch.text.mamba.utils.utils import load_input, load_model
from test.models.utils import Framework, Source, Task, build_module_name


# Wrapper to return only the output tensor, excluding cache or additional outputs
class Wrapper(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.model = model

def forward(self, input_ids):
output = self.model(input_ids)
return output[0]


variants = [
"state-spaces/mamba-790m-hf",
"state-spaces/mamba-2.8b-hf",
"state-spaces/mamba-1.4b-hf",
"state-spaces/mamba-370m-hf",
]


@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants)
def test_mamba(record_forge_property, variant):
if variant != "state-spaces/mamba-790m-hf":
pytest.skip("Skipping this variant; only testing the base model (mamba-790m-hf) for now.")

# Build Module Name
module_name = build_module_name(
framework=Framework.PYTORCH, model="mamba", variant=variant, task=Task.CAUSAL_LM, source=Source.HUGGINGFACE
)

# Record Forge Property
record_forge_property("model_name", module_name)

# Load model and input
model = load_model(variant)
framework_model = Wrapper(model)
inputs = load_input(variant)

# Forge compile framework model
compiled_model = forge.compile(framework_model, sample_inputs=inputs, module_name=module_name)

# Model Verification
verify(inputs, framework_model, compiled_model)
Empty file.
19 changes: 19 additions & 0 deletions forge/test/models/pytorch/text/mamba/utils/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC

# SPDX-License-Identifier: Apache-2.0
# Reference: https://huggingface.co/state-spaces/mamba-2.8b-hf

from transformers import AutoTokenizer, MambaForCausalLM


def load_model(variant):
model = MambaForCausalLM.from_pretrained(variant)
model.eval()
return model


def load_input(variant):
prompt = "Hey how are you doing?"
tokenizer = AutoTokenizer.from_pretrained(variant)
input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"]
return [input_ids]
34 changes: 33 additions & 1 deletion forge/test/models/pytorch/text/qwen/test_qwen_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
#
# SPDX-License-Identifier: Apache-2.0
import pytest
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
Qwen2ForTokenClassification,
)

import forge
from forge.verify.verify import verify
Expand Down Expand Up @@ -57,3 +61,31 @@ def test_qwen_clm(record_forge_property, variant):

# Model Verification
verify(inputs, framework_model, compiled_model)


@pytest.mark.nightly
def test_qwen2_token_classification(record_forge_property):

# Build Module Name
module_name = build_module_name(
framework=Framework.PYTORCH, model="qwen_v2", task=Task.TOKEN_CLASSIFICATION, source=Source.HUGGINGFACE
)

# Record Forge Property
record_forge_property("model_name", module_name)

# Load model and tokenizer
framework_model = Qwen2ForTokenClassification.from_pretrained("Qwen/Qwen2-7B")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B")

# Prepare input
text = "HuggingFace is a company based in Paris and New York."
model_inputs = tokenizer(text, add_special_tokens=False, return_tensors="pt")

inputs = [model_inputs["input_ids"], model_inputs["attention_mask"]]

# Forge compile framework model
compiled_model = forge.compile(framework_model, sample_inputs=inputs, module_name=module_name)

# Model Verification
verify(inputs, framework_model, compiled_model)
Empty file.
39 changes: 39 additions & 0 deletions forge/test/models/pytorch/vision/beit/test_beit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC

# SPDX-License-Identifier: Apache-2.0
import pytest

import forge
from forge.verify.verify import verify

from test.models.pytorch.vision.beit.utils.utils import load_input, load_model
from test.models.utils import Framework, Source, Task, build_module_name

variants = ["microsoft/beit-base-patch16-224", "microsoft/beit-large-patch16-224"]


@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants)
def test_beit_image_classification(record_forge_property, variant):

# Build Module Name
module_name = build_module_name(
framework=Framework.PYTORCH,
model="beit",
variant=variant,
source=Source.HUGGINGFACE,
task=Task.IMAGE_CLASSIFICATION,
)

# Record Forge Property
record_forge_property("model_name", module_name)

# Load model and input
framework_model = load_model(variant)
inputs = load_input(variant)

# Forge compile framework model
compiled_model = forge.compile(framework_model, sample_inputs=inputs, module_name=module_name)

# Model Verification
verify(inputs, framework_model, compiled_model)
Empty file.
20 changes: 20 additions & 0 deletions forge/test/models/pytorch/vision/beit/utils/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
#
# SPDX-License-Identifier: Apache-2.0
import requests
from PIL import Image
from transformers import BeitForImageClassification, BeitImageProcessor


def load_model(variant):
model = BeitForImageClassification.from_pretrained(variant)
model.eval()
return model


def load_input(variant):
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
processor = BeitImageProcessor.from_pretrained(variant)
inputs = processor(images=image, return_tensors="pt")
return [inputs["pixel_values"]]
Empty file.
33 changes: 33 additions & 0 deletions forge/test/models/pytorch/vision/glpn_kitti/test_glpn_kitti.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC

# SPDX-License-Identifier: Apache-2.0
import forge
from forge.verify.verify import verify

from test.models.pytorch.vision.glpn_kitti.utils.utils import load_input, load_model
from test.models.utils import Framework, Source, Task, build_module_name


@pytest.mark.nightly
def test_glpn_kitti(record_forge_property):

# Build Module Name
module_name = build_module_name(
framework=Framework.PYTORCH,
model="glpn_kitti",
source=Source.HUGGINGFACE,
task=Task.DEPTH_ESTIMATION,
)

# Record Forge Property
record_forge_property("model_name", module_name)

# Load model and input
framework_model = load_model()
inputs = load_input()

# Forge compile framework model
compiled_model = forge.compile(framework_model, sample_inputs=inputs, module_name=module_name)

# Model Verification
verify(inputs, framework_model, compiled_model)
Empty file.
Loading

0 comments on commit 899912e

Please sign in to comment.