Skip to content

Commit

Permalink
Revert "Speed up GenAi-Perf's help call (#669)" (#670)
Browse files Browse the repository at this point in the history
dyastremsky authored May 21, 2024
1 parent f93f012 commit bba524d
Showing 2 changed files with 21 additions and 46 deletions.
48 changes: 11 additions & 37 deletions src/c++/perf_analyzer/genai-perf/genai_perf/main.py
Original file line number Diff line number Diff line change
@@ -30,33 +30,17 @@
import traceback
from argparse import Namespace
from pathlib import Path
from typing import TYPE_CHECKING

import genai_perf.logging as logging
from genai_perf import parser
from genai_perf.constants import DEFAULT_PARQUET_FILE
from genai_perf.exceptions import GenAIPerfException

# Import heavy modules to make type checker happy
if TYPE_CHECKING:
from genai_perf import parser
from genai_perf.constants import DEFAULT_PARQUET_FILE
from genai_perf.export_data.json_exporter import JsonExporter
from genai_perf.llm_inputs.llm_inputs import LlmInputs
from genai_perf.llm_metrics import LLMProfileDataParser
from genai_perf.plots.plot_config_parser import PlotConfigParser
from genai_perf.plots.plot_manager import PlotManager
from genai_perf.tokenizer import Tokenizer, get_tokenizer


def import_heavy_modules():
global parser, DEFAULT_PARQUET_FILE, JsonExporter, LlmInputs, LLMProfileDataParser, PlotConfigParser, PlotManager, get_tokenizer
from genai_perf import parser
from genai_perf.constants import DEFAULT_PARQUET_FILE
from genai_perf.export_data.json_exporter import JsonExporter
from genai_perf.llm_inputs.llm_inputs import LlmInputs
from genai_perf.llm_metrics import LLMProfileDataParser
from genai_perf.plots.plot_config_parser import PlotConfigParser
from genai_perf.plots.plot_manager import PlotManager
from genai_perf.tokenizer import get_tokenizer
from genai_perf.export_data.json_exporter import JsonExporter
from genai_perf.llm_inputs.llm_inputs import LlmInputs
from genai_perf.llm_metrics import LLMProfileDataParser
from genai_perf.plots.plot_config_parser import PlotConfigParser
from genai_perf.plots.plot_manager import PlotManager
from genai_perf.tokenizer import Tokenizer, get_tokenizer


def create_artifacts_dirs(args: Namespace) -> None:
@@ -66,9 +50,8 @@ def create_artifacts_dirs(args: Namespace) -> None:
os.makedirs(plot_dir, exist_ok=True)


def generate_inputs(args: Namespace, tokenizer: "Tokenizer") -> None:
def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
# TODO (TMA-1759): review if add_model_name is always true
import_heavy_modules()
input_filename = Path(args.input_file.name) if args.input_file else None
add_model_name = True
try:
@@ -99,17 +82,14 @@ def generate_inputs(args: Namespace, tokenizer: "Tokenizer") -> None:
)


def calculate_metrics(
args: Namespace, tokenizer: "Tokenizer"
) -> "LLMProfileDataParser":
def calculate_metrics(args: Namespace, tokenizer: Tokenizer) -> LLMProfileDataParser:
return LLMProfileDataParser(
filename=args.profile_export_file,
tokenizer=tokenizer,
)


def report_output(data_parser: "LLMProfileDataParser", args: Namespace) -> None:
import_heavy_modules()
def report_output(data_parser: LLMProfileDataParser, args: Namespace) -> None:
if args.concurrency:
infer_mode = "concurrency"
load_level = f"{args.concurrency}"
@@ -152,12 +132,10 @@ def run():
try:
# TMA-1900: refactor CLI handler
logging.init_logging()
import_heavy_modules()
args, extra_args = parser.parse_args()
if args.subcommand == "compare":
args.func(args)
else:
import_heavy_modules()
create_artifacts_dirs(args)
tokenizer = get_tokenizer(args.tokenizer)
generate_inputs(args, tokenizer)
@@ -169,10 +147,6 @@ def run():


def main():
# Check if help is requested early
if any(arg in sys.argv for arg in ("--help", "-h")):
return 0

# Interactive use will catch exceptions and log formatted errors rather than
# tracebacks.
try:
19 changes: 10 additions & 9 deletions src/c++/perf_analyzer/genai-perf/genai_perf/tokenizer.py
Original file line number Diff line number Diff line change
@@ -14,16 +14,21 @@

import contextlib
import io
from typing import TYPE_CHECKING, Union
from typing import Union

from genai_perf.exceptions import GenAIPerfException

Tokenizer = Union["PreTrainedTokenizer", "PreTrainedTokenizerFast"]
DEFAULT_TOKENIZER = "hf-internal-testing/llama-tokenizer"
# Silence tokenizer warning on import
with contextlib.redirect_stdout(io.StringIO()) as stdout, contextlib.redirect_stderr(
io.StringIO()
) as stderr:
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
from transformers import logging as token_logger

token_logger.set_verbosity_error()

if TYPE_CHECKING:
from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
Tokenizer = Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
DEFAULT_TOKENIZER = "hf-internal-testing/llama-tokenizer"


def get_tokenizer(
@@ -37,10 +42,6 @@ def get_tokenizer(
with contextlib.redirect_stdout(
io.StringIO()
) as stdout, contextlib.redirect_stderr(io.StringIO()) as stderr:
from transformers import AutoTokenizer
from transformers import logging as token_logger

token_logger.set_verbosity_error()
tokenizer = AutoTokenizer.from_pretrained(tokenizer_model)
except Exception as e:
raise GenAIPerfException(e)

0 comments on commit bba524d

Please sign in to comment.