Skip to content

Commit

Permalink
[QEff. Finetune] : Bypass qeff import and add trust_remote_code flag …
Browse files Browse the repository at this point in the history
…for samsum dataset (#206)

1. Bypass qeff import
2. Add trust_remote_code flag for samsum dataset

---------

Signed-off-by: Mamta Singh <[email protected]>
  • Loading branch information
quic-mamta authored Jan 13, 2025
1 parent 0cf32b0 commit b635803
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 44 deletions.
61 changes: 42 additions & 19 deletions QEfficient/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,47 @@
#
# -----------------------------------------------------------------------------

from QEfficient.base import QEFFAutoModel, QEFFAutoModelForCausalLM, QEFFCommonLoader
from QEfficient.compile.compile_helper import compile
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
from QEfficient.peft import QEffAutoPeftModelForCausalLM
from QEfficient.transformers.transform import transform

# Users can use QEfficient.export for exporting models to ONNX
export = qualcomm_efficient_converter

def check_qaic_sdk():
"""Check if QAIC SDK is installed"""
try:
import platform
import sys

sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}")
import qaicrt # noqa: F401

return True
except ImportError:
return False


QAIC_INSTALLED = check_qaic_sdk()

# Conditionally import QAIC-related modules if the SDK is installed
__version__ = "0.0.1.dev0"
if QAIC_INSTALLED:
from QEfficient.base import QEFFAutoModel, QEFFAutoModelForCausalLM, QEFFCommonLoader
from QEfficient.compile.compile_helper import compile
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
from QEfficient.peft import QEffAutoPeftModelForCausalLM
from QEfficient.transformers.transform import transform

# Users can use QEfficient.export for exporting models to ONNX
export = qualcomm_efficient_converter

__all__ = [
"transform",
"export",
"compile",
"cloud_ai_100_exec_kv",
"QEFFAutoModel",
"QEFFAutoModelForCausalLM",
"QEffAutoPeftModelForCausalLM",
"QEFFCommonLoader",
]

__all__ = [
"transform",
"export",
"compile",
"cloud_ai_100_exec_kv",
"QEFFAutoModel",
"QEFFAutoModelForCausalLM",
"QEffAutoPeftModelForCausalLM",
"QEFFCommonLoader",
]
print("QAIC SDK is installed.")
else:
print("QAIC SDK is not installed. Proceeding without it.")
3 changes: 0 additions & 3 deletions QEfficient/cloud/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#
# -----------------------------------------------------------------------------

import os
import random
import warnings

Expand Down Expand Up @@ -58,8 +57,6 @@ def main(**kwargs):
update_config(train_config, **kwargs)
device = train_config.device

os.environ["HF_DATASETS_TRUST_REMOTE_CODE"] = "True"

# dist init
if train_config.enable_ddp:
# TODO: may have to init qccl backend, next try run with torchrun command
Expand Down
2 changes: 1 addition & 1 deletion QEfficient/finetune/configs/peft_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class lora_config:
bias = "none"
task_type: str = "CAUSAL_LM"
lora_dropout: float = 0.05
inference_mode: bool = False
inference_mode: bool = False # should be False for finetuning


# CAUTION prefix tuning is currently not supported
Expand Down
2 changes: 1 addition & 1 deletion QEfficient/finetune/configs/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class train_config:
save_metrics: bool = True # saves training metrics to a json file for later plotting
intermediate_step_save: int = 1000
batching_strategy: str = "packing"
enable_sorting_for_ddp: bool = "True"
enable_sorting_for_ddp: bool = True

# TODO: vbaddi: Uncomment post adding qaic to Pytorch Profiler
# flop_counter: bool = False # Enable flop counter to measure model throughput, can not be used with pytorch profiler at the same time.
Expand Down
18 changes: 1 addition & 17 deletions QEfficient/finetune/dataset/samsum_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,11 @@
#
# -----------------------------------------------------------------------------

from unittest.mock import patch

import datasets


@patch("builtins.input", return_value="N")
def load_samsum(split, _):
try:
ds = datasets.load_dataset("Samsung/samsum", split=split)
except ValueError as e:
if "trust_remote_code" in str(e):
raise ValueError(
"Loading Samsung/samsum requires you to execute the dataset script in that repo on your local machine. Make sure you have read the code there to avoid malicious use, then set HF_DATASETS_TRUST_REMOTE_CODE env variable to True."
) from e
else:
raise e
return ds


def get_preprocessed_samsum(dataset_config, tokenizer, split, context_length=None):
dataset = load_samsum(split)
dataset = datasets.load_dataset("Samsung/samsum", split=split, trust_remote_code=True)

prompt = "Summarize this dialog:\n{dialog}\n---\nSummary:\n"

Expand Down
7 changes: 4 additions & 3 deletions scripts/finetune/run_ft_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,18 @@
import warnings

import torch
from configs.training import train_config as TRAIN_CONFIG
from peft import AutoPeftModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer

from QEfficient.finetune.configs.training import train_config as TRAIN_CONFIG

# Suppress all warnings
warnings.filterwarnings("ignore")

try:
import torch_qaic # noqa: F401

device = "qaic:1"
device = "qaic:0"
except ImportError as e:
print(f"Warning: {e}. Moving ahead without these qaic modules.")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
Expand All @@ -29,7 +30,7 @@
train_config.model_name,
use_cache=False,
attn_implementation="sdpa",
torch_dtype=torch.float16 if torch.cuda.is_available() or device == "qaic:1" else None,
torch_dtype=torch.float16 if torch.cuda.is_available() or device == "qaic:0" else None,
)

# Load the tokenizer and add special tokens
Expand Down

0 comments on commit b635803

Please sign in to comment.