Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add phi-3-medium model #1235

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 137 additions & 0 deletions forge/test/models/pytorch/text/phi3/test_phi3_medium.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC
#
# SPDX-License-Identifier: Apache-2.0
import pytest
from transformers import (
AutoTokenizer,
Phi3Config,
Phi3ForCausalLM,
Phi3ForSequenceClassification,
Phi3ForTokenClassification,
)

import forge

from test.models.utils import Framework, Source, Task, build_module_name

variants = ["microsoft/Phi-3-medium-128k-instruct", "microsoft/Phi-3-mini-128k-instruct"]


@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants)
def test_phi3_causal_lm(record_forge_property, variant):

# Build Module Name
module_name = build_module_name(variant, Source.HUGGINGFACE, Framework.PYTORCH, Task.CAUSAL_LM)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's update to

    # Build Module Name
     module_name = build_module_name(
         framework=Framework.PYTORCH,
         model="phi3",
         variant=variant,
         task=Task.CAUSAL_LM,
         source=Source.HUGGINGFACE,
     )


# Record Forge Property
record_forge_property("model_name", module_name)

# Phi3Config from pretrained variant, disable return_dict and caching.
config = Phi3Config.from_pretrained(variant)
config_dict = config.to_dict()
config_dict["return_dict"] = False
config_dict["use_cache"] = False
config = Phi3Config(**config_dict)

# Load tokenizer and model from HuggingFace
tokenizer = AutoTokenizer.from_pretrained(variant)
framework_model = Phi3ForCausalLM.from_pretrained(variant, config=config).to("cpu")
framework_model.eval()

# input_prompt
input_prompt = "Africa is an emerging economy because"

# Tokenize input
inputs = tokenizer(input_prompt, return_tensors="pt").to("cpu")

sample_inputs = [inputs["input_ids"], inputs["attention_mask"]]

# Forge compile framework model
compiled_model = forge.compile(framework_model, sample_inputs, module_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls add verify function



@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants)
def test_phi3_token_classification(record_forge_property, variant):

# Build Module Name
module_name = build_module_name(
framework=Framework.PYTORCH,
model="phi3",
variant=variant,
task=Task.TOKEN_CLASSIFICATION,
source=Source.HUGGINGFACE,
)

# Record Forge Property
record_forge_property("model_name", module_name)

# Phi3Config from pretrained variant, disable return_dict and caching.
config = Phi3Config.from_pretrained(variant)
config_dict = config.to_dict()
config_dict["return_dict"] = False
config_dict["use_cache"] = False
config = Phi3Config(**config_dict)

# Tokenize input
tokenizer = AutoTokenizer.from_pretrained(variant, trust_remote_code=True)
framework_model = Phi3ForTokenClassification.from_pretrained(variant, trust_remote_code=True, config=config)
framework_model.eval()

# input_prompt
input_prompt = "HuggingFace is a company based in Paris and New York"

# Tokenize input
inputs = tokenizer(input_prompt, return_tensors="pt")
inputs = [inputs["input_ids"]]

# Forge compile framework model
compiled_model = forge.compile(framework_model, inputs, module_name)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls add verify function


@pytest.mark.nightly
@pytest.mark.parametrize("variant", variants)
def test_phi3_sequence_classification(record_forge_property, variant):

# Build Module Name
module_name = build_module_name(
framework=Framework.PYTORCH,
model="phi3",
variant=variant,
task=Task.SEQUENCE_CLASSIFICATION,
source=Source.HUGGINGFACE,
)

# Record Forge Property
record_forge_property("model_name", module_name)

# Phi3Config from pretrained variant, disable return_dict and caching.
config = Phi3Config.from_pretrained(variant)
config_dict = config.to_dict()
config_dict["return_dict"] = False
config_dict["use_cache"] = False
config_dict["pad_token_id"] = None
config = Phi3Config(**config_dict)

# Load tokenizer and model from HuggingFace
tokenizer = AutoTokenizer.from_pretrained(variant, return_tensors="pt", trust_remote_code=True)
framework_model = Phi3ForSequenceClassification.from_pretrained(variant, trust_remote_code=True, config=config)
framework_model.eval()

# input_prompt
input_prompt = "the movie was great!"

# Tokenize input
inputs = tokenizer(
input_prompt,
return_tensors="pt",
max_length=256,
pad_to_max_length=True,
truncation=True,
)

inputs = [inputs["input_ids"]]

# Forge compile framework model
compiled_model = forge.compile(framework_model, inputs, module_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls add verify function

Loading