From bba524d4e6f2bf878d8504d61769710a8d423432 Mon Sep 17 00:00:00 2001 From: David Yastremsky <58150256+dyastremsky@users.noreply.github.com> Date: Tue, 21 May 2024 09:48:07 -0700 Subject: [PATCH] Revert "Speed up GenAi-Perf's help call (#669)" (#670) --- .../genai-perf/genai_perf/main.py | 48 +++++-------------- .../genai-perf/genai_perf/tokenizer.py | 19 ++++---- 2 files changed, 21 insertions(+), 46 deletions(-) diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/main.py b/src/c++/perf_analyzer/genai-perf/genai_perf/main.py index f80e1ea10..08bd3760c 100755 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/main.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/main.py @@ -30,33 +30,17 @@ import traceback from argparse import Namespace from pathlib import Path -from typing import TYPE_CHECKING import genai_perf.logging as logging +from genai_perf import parser +from genai_perf.constants import DEFAULT_PARQUET_FILE from genai_perf.exceptions import GenAIPerfException - -# Import heavy modules to make type checker happy -if TYPE_CHECKING: - from genai_perf import parser - from genai_perf.constants import DEFAULT_PARQUET_FILE - from genai_perf.export_data.json_exporter import JsonExporter - from genai_perf.llm_inputs.llm_inputs import LlmInputs - from genai_perf.llm_metrics import LLMProfileDataParser - from genai_perf.plots.plot_config_parser import PlotConfigParser - from genai_perf.plots.plot_manager import PlotManager - from genai_perf.tokenizer import Tokenizer, get_tokenizer - - -def import_heavy_modules(): - global parser, DEFAULT_PARQUET_FILE, JsonExporter, LlmInputs, LLMProfileDataParser, PlotConfigParser, PlotManager, get_tokenizer - from genai_perf import parser - from genai_perf.constants import DEFAULT_PARQUET_FILE - from genai_perf.export_data.json_exporter import JsonExporter - from genai_perf.llm_inputs.llm_inputs import LlmInputs - from genai_perf.llm_metrics import LLMProfileDataParser - from genai_perf.plots.plot_config_parser import PlotConfigParser - from genai_perf.plots.plot_manager import PlotManager - from genai_perf.tokenizer import get_tokenizer +from genai_perf.export_data.json_exporter import JsonExporter +from genai_perf.llm_inputs.llm_inputs import LlmInputs +from genai_perf.llm_metrics import LLMProfileDataParser +from genai_perf.plots.plot_config_parser import PlotConfigParser +from genai_perf.plots.plot_manager import PlotManager +from genai_perf.tokenizer import Tokenizer, get_tokenizer def create_artifacts_dirs(args: Namespace) -> None: @@ -66,9 +50,8 @@ def create_artifacts_dirs(args: Namespace) -> None: os.makedirs(plot_dir, exist_ok=True) -def generate_inputs(args: Namespace, tokenizer: "Tokenizer") -> None: +def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None: # TODO (TMA-1759): review if add_model_name is always true - import_heavy_modules() input_filename = Path(args.input_file.name) if args.input_file else None add_model_name = True try: @@ -99,17 +82,14 @@ def generate_inputs(args: Namespace, tokenizer: "Tokenizer") -> None: ) -def calculate_metrics( - args: Namespace, tokenizer: "Tokenizer" -) -> "LLMProfileDataParser": +def calculate_metrics(args: Namespace, tokenizer: Tokenizer) -> LLMProfileDataParser: return LLMProfileDataParser( filename=args.profile_export_file, tokenizer=tokenizer, ) -def report_output(data_parser: "LLMProfileDataParser", args: Namespace) -> None: - import_heavy_modules() +def report_output(data_parser: LLMProfileDataParser, args: Namespace) -> None: if args.concurrency: infer_mode = "concurrency" load_level = f"{args.concurrency}" @@ -152,12 +132,10 @@ def run(): try: # TMA-1900: refactor CLI handler logging.init_logging() - import_heavy_modules() args, extra_args = parser.parse_args() if args.subcommand == "compare": args.func(args) else: - import_heavy_modules() create_artifacts_dirs(args) tokenizer = get_tokenizer(args.tokenizer) generate_inputs(args, tokenizer) @@ -169,10 +147,6 @@ def run(): def main(): - # Check if help is requested early - if any(arg in sys.argv for arg in ("--help", "-h")): - return 0 - # Interactive use will catch exceptions and log formatted errors rather than # tracebacks. try: diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/tokenizer.py b/src/c++/perf_analyzer/genai-perf/genai_perf/tokenizer.py index f983e7914..a46a28aad 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/tokenizer.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/tokenizer.py @@ -14,16 +14,21 @@ import contextlib import io -from typing import TYPE_CHECKING, Union +from typing import Union from genai_perf.exceptions import GenAIPerfException -Tokenizer = Union["PreTrainedTokenizer", "PreTrainedTokenizerFast"] -DEFAULT_TOKENIZER = "hf-internal-testing/llama-tokenizer" +# Silence tokenizer warning on import +with contextlib.redirect_stdout(io.StringIO()) as stdout, contextlib.redirect_stderr( + io.StringIO() +) as stderr: + from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast + from transformers import logging as token_logger + token_logger.set_verbosity_error() -if TYPE_CHECKING: - from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast +Tokenizer = Union[PreTrainedTokenizer, PreTrainedTokenizerFast] +DEFAULT_TOKENIZER = "hf-internal-testing/llama-tokenizer" def get_tokenizer( @@ -37,10 +42,6 @@ def get_tokenizer( with contextlib.redirect_stdout( io.StringIO() ) as stdout, contextlib.redirect_stderr(io.StringIO()) as stderr: - from transformers import AutoTokenizer - from transformers import logging as token_logger - - token_logger.set_verbosity_error() tokenizer = AutoTokenizer.from_pretrained(tokenizer_model) except Exception as e: raise GenAIPerfException(e)