Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Speed up GenAi-Perf's help call" #670

Merged
merged 2 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 11 additions & 37 deletions src/c++/perf_analyzer/genai-perf/genai_perf/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,33 +30,17 @@
import traceback
from argparse import Namespace
from pathlib import Path
from typing import TYPE_CHECKING

import genai_perf.logging as logging
from genai_perf import parser
from genai_perf.constants import DEFAULT_PARQUET_FILE
from genai_perf.exceptions import GenAIPerfException

# Import heavy modules to make type checker happy
if TYPE_CHECKING:
from genai_perf import parser
from genai_perf.constants import DEFAULT_PARQUET_FILE
from genai_perf.export_data.json_exporter import JsonExporter
from genai_perf.llm_inputs.llm_inputs import LlmInputs
from genai_perf.llm_metrics import LLMProfileDataParser
from genai_perf.plots.plot_config_parser import PlotConfigParser
from genai_perf.plots.plot_manager import PlotManager
from genai_perf.tokenizer import Tokenizer, get_tokenizer


def import_heavy_modules():
global parser, DEFAULT_PARQUET_FILE, JsonExporter, LlmInputs, LLMProfileDataParser, PlotConfigParser, PlotManager, get_tokenizer
from genai_perf import parser
from genai_perf.constants import DEFAULT_PARQUET_FILE
from genai_perf.export_data.json_exporter import JsonExporter
from genai_perf.llm_inputs.llm_inputs import LlmInputs
from genai_perf.llm_metrics import LLMProfileDataParser
from genai_perf.plots.plot_config_parser import PlotConfigParser
from genai_perf.plots.plot_manager import PlotManager
from genai_perf.tokenizer import get_tokenizer
from genai_perf.export_data.json_exporter import JsonExporter
from genai_perf.llm_inputs.llm_inputs import LlmInputs
from genai_perf.llm_metrics import LLMProfileDataParser
from genai_perf.plots.plot_config_parser import PlotConfigParser
from genai_perf.plots.plot_manager import PlotManager
from genai_perf.tokenizer import Tokenizer, get_tokenizer


def create_artifacts_dirs(args: Namespace) -> None:
Expand All @@ -66,9 +50,8 @@ def create_artifacts_dirs(args: Namespace) -> None:
os.makedirs(plot_dir, exist_ok=True)


def generate_inputs(args: Namespace, tokenizer: "Tokenizer") -> None:
def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
# TODO (TMA-1759): review if add_model_name is always true
import_heavy_modules()
input_filename = Path(args.input_file.name) if args.input_file else None
add_model_name = True
try:
Expand Down Expand Up @@ -99,17 +82,14 @@ def generate_inputs(args: Namespace, tokenizer: "Tokenizer") -> None:
)


def calculate_metrics(
args: Namespace, tokenizer: "Tokenizer"
) -> "LLMProfileDataParser":
def calculate_metrics(args: Namespace, tokenizer: Tokenizer) -> LLMProfileDataParser:
return LLMProfileDataParser(
filename=args.profile_export_file,
tokenizer=tokenizer,
)


def report_output(data_parser: "LLMProfileDataParser", args: Namespace) -> None:
import_heavy_modules()
def report_output(data_parser: LLMProfileDataParser, args: Namespace) -> None:
if args.concurrency:
infer_mode = "concurrency"
load_level = f"{args.concurrency}"
Expand Down Expand Up @@ -152,12 +132,10 @@ def run():
try:
# TMA-1900: refactor CLI handler
logging.init_logging()
import_heavy_modules()
args, extra_args = parser.parse_args()
if args.subcommand == "compare":
args.func(args)
else:
import_heavy_modules()
create_artifacts_dirs(args)
tokenizer = get_tokenizer(args.tokenizer)
generate_inputs(args, tokenizer)
Expand All @@ -169,10 +147,6 @@ def run():


def main():
# Check if help is requested early
if any(arg in sys.argv for arg in ("--help", "-h")):
return 0

# Interactive use will catch exceptions and log formatted errors rather than
# tracebacks.
try:
Expand Down
19 changes: 10 additions & 9 deletions src/c++/perf_analyzer/genai-perf/genai_perf/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,21 @@

import contextlib
import io
from typing import TYPE_CHECKING, Union
from typing import Union

from genai_perf.exceptions import GenAIPerfException

Tokenizer = Union["PreTrainedTokenizer", "PreTrainedTokenizerFast"]
DEFAULT_TOKENIZER = "hf-internal-testing/llama-tokenizer"
# Silence tokenizer warning on import
with contextlib.redirect_stdout(io.StringIO()) as stdout, contextlib.redirect_stderr(
io.StringIO()
) as stderr:
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
from transformers import logging as token_logger

token_logger.set_verbosity_error()

if TYPE_CHECKING:
from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
Tokenizer = Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
DEFAULT_TOKENIZER = "hf-internal-testing/llama-tokenizer"


def get_tokenizer(
Expand All @@ -37,10 +42,6 @@ def get_tokenizer(
with contextlib.redirect_stdout(
io.StringIO()
) as stdout, contextlib.redirect_stderr(io.StringIO()) as stderr:
from transformers import AutoTokenizer
from transformers import logging as token_logger

token_logger.set_verbosity_error()
tokenizer = AutoTokenizer.from_pretrained(tokenizer_model)
except Exception as e:
raise GenAIPerfException(e)
Expand Down
Loading