Skip to content

Commit

Permalink
MultiLoRA Support (#662)
Browse files Browse the repository at this point in the history
  • Loading branch information
IzzyPutterman authored May 24, 2024
1 parent eae8430 commit 9612fbe
Show file tree
Hide file tree
Showing 9 changed files with 784 additions and 497 deletions.
11 changes: 9 additions & 2 deletions src/c++/perf_analyzer/genai-perf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -342,11 +342,18 @@ Show the help message and exit.

## Endpoint Options:

##### `-m <str>`
##### `--model <str>`
##### `-m <list>`
##### `--model <list>`

The name of the model to benchmark. (default: `None`)

##### `--model-selection-strategy {round_robin, random}`

When multiple model are specified, this is how a specific model
should be assigned to a prompt. round_robin means that ith prompt in the
list gets assigned to i mod len(models). random means that assignment is
uniformly random (default: `round_robin`)

##### `--backend {tensorrtllm,vllm}`

When using the "triton" service-kind, this is the backend of the model. For the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@
from requests import Response


class ModelSelectionStrategy(Enum):
ROUND_ROBIN = auto()
RANDOM = auto()


class PromptSource(Enum):
SYNTHETIC = auto()
DATASET = auto()
Expand Down Expand Up @@ -78,7 +83,8 @@ def create_llm_inputs(
input_type: PromptSource,
output_format: OutputFormat,
dataset_name: str = "",
model_name: str = "",
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
input_filename: Optional[Path] = Path(""),
starting_index: int = DEFAULT_STARTING_INDEX,
length: int = DEFAULT_LENGTH,
Expand Down Expand Up @@ -194,6 +200,7 @@ def create_llm_inputs(
output_tokens_stddev,
output_tokens_deterministic,
model_name,
model_selection_strategy,
)
cls._write_json_to_file(json_in_pa_format, output_dir)

Expand Down Expand Up @@ -354,7 +361,8 @@ def _convert_generic_json_to_output_format(
output_tokens_mean: int,
output_tokens_stddev: int,
output_tokens_deterministic: bool,
model_name: str = "",
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
) -> Dict:
if output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS:
output_json = cls._convert_generic_json_to_openai_chat_completions_format(
Expand All @@ -366,6 +374,7 @@ def _convert_generic_json_to_output_format(
output_tokens_stddev,
output_tokens_deterministic,
model_name,
model_selection_strategy,
)
elif output_format == OutputFormat.OPENAI_COMPLETIONS:
output_json = cls._convert_generic_json_to_openai_completions_format(
Expand All @@ -377,6 +386,7 @@ def _convert_generic_json_to_output_format(
output_tokens_stddev,
output_tokens_deterministic,
model_name,
model_selection_strategy,
)
elif output_format == OutputFormat.VLLM:
output_json = cls._convert_generic_json_to_vllm_format(
Expand All @@ -388,6 +398,7 @@ def _convert_generic_json_to_output_format(
output_tokens_stddev,
output_tokens_deterministic,
model_name,
model_selection_strategy,
)
elif output_format == OutputFormat.TENSORRTLLM:
output_json = cls._convert_generic_json_to_trtllm_format(
Expand All @@ -399,6 +410,7 @@ def _convert_generic_json_to_output_format(
output_tokens_stddev,
output_tokens_deterministic,
model_name,
model_selection_strategy,
)
else:
raise GenAIPerfException(
Expand All @@ -417,7 +429,8 @@ def _convert_generic_json_to_openai_chat_completions_format(
output_tokens_mean: int,
output_tokens_stddev: int,
output_tokens_deterministic: bool,
model_name: str = "",
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
) -> Dict:
# TODO (TMA-1757): Implement a way to select a role for `text_input`
(
Expand All @@ -436,6 +449,7 @@ def _convert_generic_json_to_openai_chat_completions_format(
output_tokens_stddev,
output_tokens_deterministic,
model_name,
model_selection_strategy,
)

return pa_json
Expand All @@ -450,7 +464,8 @@ def _convert_generic_json_to_openai_completions_format(
output_tokens_mean: int,
output_tokens_stddev: int,
output_tokens_deterministic: bool,
model_name: str = "",
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
) -> Dict:
(
system_role_headers,
Expand All @@ -469,6 +484,7 @@ def _convert_generic_json_to_openai_completions_format(
output_tokens_stddev,
output_tokens_deterministic,
model_name,
model_selection_strategy,
)

return pa_json
Expand All @@ -483,7 +499,8 @@ def _convert_generic_json_to_vllm_format(
output_tokens_mean: int,
output_tokens_stddev: int,
output_tokens_deterministic: bool,
model_name: str = "",
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
) -> Dict:
(
system_role_headers,
Expand All @@ -503,6 +520,7 @@ def _convert_generic_json_to_vllm_format(
output_tokens_stddev,
output_tokens_deterministic,
model_name,
model_selection_strategy,
)

return pa_json
Expand All @@ -517,7 +535,8 @@ def _convert_generic_json_to_trtllm_format(
output_tokens_mean: int,
output_tokens_stddev: int,
output_tokens_deterministic: bool,
model_name: str = "",
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
) -> Dict:
(
system_role_headers,
Expand All @@ -537,6 +556,7 @@ def _convert_generic_json_to_trtllm_format(
output_tokens_stddev,
output_tokens_deterministic,
model_name,
model_selection_strategy,
)

return pa_json
Expand Down Expand Up @@ -577,6 +597,17 @@ def _determine_json_feature_roles(

return system_role_headers, user_role_headers, text_input_headers

@classmethod
def _select_model_name(cls, model_name, index, model_selection_strategy):
if model_selection_strategy == ModelSelectionStrategy.ROUND_ROBIN:
return model_name[index % len(model_name)]
elif model_selection_strategy == ModelSelectionStrategy.RANDOM:
return random.choice(model_name)
else:
raise GenAIPerfException(
f"Model selection strategy '{model_selection_strategy}' is unsupported"
)

@classmethod
def _populate_openai_chat_completions_output_json(
cls,
Expand All @@ -589,11 +620,15 @@ def _populate_openai_chat_completions_output_json(
output_tokens_mean: int,
output_tokens_stddev: int,
output_tokens_deterministic: bool,
model_name: str = "",
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
) -> Dict:
pa_json = cls._create_empty_openai_pa_json()

for index, entry in enumerate(dataset_json["rows"]):
iter_model_name = cls._select_model_name(
model_name, index, model_selection_strategy
)
pa_json["data"].append({"payload": []})
pa_json["data"][index]["payload"].append({"messages": []})

Expand All @@ -613,7 +648,7 @@ def _populate_openai_chat_completions_output_json(
output_tokens_mean,
output_tokens_stddev,
output_tokens_deterministic,
model_name,
iter_model_name,
)

return pa_json
Expand All @@ -631,11 +666,15 @@ def _populate_openai_completions_output_json(
output_tokens_mean: int,
output_tokens_stddev: int,
output_tokens_deterministic: bool,
model_name: str = "",
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
) -> Dict:
pa_json = cls._create_empty_openai_pa_json()

for index, entry in enumerate(dataset_json["rows"]):
iter_model_name = cls._select_model_name(
model_name, index, model_selection_strategy
)
pa_json["data"].append({"payload": []})
pa_json["data"][index]["payload"].append({"prompt": ""})

Expand All @@ -659,7 +698,7 @@ def _populate_openai_completions_output_json(
output_tokens_mean,
output_tokens_stddev,
output_tokens_deterministic,
model_name,
iter_model_name,
)

return pa_json
Expand All @@ -677,11 +716,15 @@ def _populate_vllm_output_json(
output_tokens_mean: int,
output_tokens_stddev: int,
output_tokens_deterministic: bool,
model_name: str = "",
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
) -> Dict:
pa_json = cls._create_empty_vllm_pa_json()

for index, entry in enumerate(dataset_json["rows"]):
iter_model_name = cls._select_model_name(
model_name, index, model_selection_strategy
)
pa_json["data"].append({"text_input": [""]})

for header, content in entry.items():
Expand All @@ -706,7 +749,7 @@ def _populate_vllm_output_json(
output_tokens_mean,
output_tokens_stddev,
output_tokens_deterministic,
model_name,
iter_model_name,
)

return pa_json
Expand All @@ -724,7 +767,8 @@ def _populate_trtllm_output_json(
output_tokens_mean: int,
output_tokens_stddev: int,
output_tokens_deterministic: bool,
model_name: str = "",
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
) -> Dict:
pa_json = cls._create_empty_trtllm_pa_json()
default_max_tokens = (
Expand All @@ -733,6 +777,9 @@ def _populate_trtllm_output_json(
)

for index, entry in enumerate(dataset_json["rows"]):
iter_model_name = cls._select_model_name(
model_name, index, model_selection_strategy
)
pa_json["data"].append({"text_input": [""]})

for header, content in entry.items():
Expand Down Expand Up @@ -760,7 +807,7 @@ def _populate_trtllm_output_json(
output_tokens_mean,
output_tokens_stddev,
output_tokens_deterministic,
model_name,
iter_model_name,
)

return pa_json
Expand Down
1 change: 1 addition & 0 deletions src/c++/perf_analyzer/genai-perf/genai_perf/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
output_format=args.output_format,
dataset_name=args.input_dataset,
model_name=args.model,
model_selection_strategy=args.model_selection_strategy,
input_filename=input_filename,
starting_index=LlmInputs.DEFAULT_STARTING_INDEX,
length=args.num_prompts,
Expand Down
46 changes: 39 additions & 7 deletions src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@
DEFAULT_COMPARE_DIR,
OPEN_ORCA,
)
from genai_perf.llm_inputs.llm_inputs import LlmInputs, OutputFormat, PromptSource
from genai_perf.llm_inputs.llm_inputs import (
LlmInputs,
ModelSelectionStrategy,
OutputFormat,
PromptSource,
)
from genai_perf.plots.plot_config_parser import PlotConfigParser
from genai_perf.plots.plot_manager import PlotManager
from genai_perf.tokenizer import DEFAULT_TOKENIZER
Expand All @@ -57,9 +62,23 @@ def _check_model_args(
"""
if not args.subcommand and not args.model:
parser.error("The -m/--model option is required and cannot be empty.")
args = _convert_str_to_enum_entry(
args, "model_selection_strategy", ModelSelectionStrategy
)
_generate_formatted_model_name(args)
return args


def _generate_formatted_model_name(args: argparse.Namespace) -> None:
if len(args.model) == 1:
args.formatted_model_name = args.model[0]
elif len(args.model) == 0:
args.model = None
args.formatted_model_name = None
else:
args.formatted_model_name = args.model[0] + "_multi"


def _check_compare_args(
parser: argparse.ArgumentParser, args: argparse.Namespace
) -> argparse.Namespace:
Expand Down Expand Up @@ -140,15 +159,17 @@ def _set_artifact_paths(args: argparse.Namespace) -> argparse.Namespace:
"""
if args.artifact_dir == Path(DEFAULT_ARTIFACT_DIR):
# Preprocess Huggingface model names that include '/' in their model name.
if (args.model is not None) and ("/" in args.model):
filtered_name = "_".join(args.model.split("/"))
if (args.formatted_model_name is not None) and (
"/" in args.formatted_model_name
):
filtered_name = "_".join(args.formatted_model_name.split("/"))
logger.info(
f"Model name '{args.model}' cannot be used to create artifact "
f"Model name '{args.formatted_model_name}' cannot be used to create artifact "
f"directory. Instead, '{filtered_name}' will be used."
)
name = [f"{filtered_name}"]
else:
name = [f"{args.model}"]
name = [f"{args.formatted_model_name}"]

if args.service_kind == "openai":
name += [f"{args.service_kind}-{args.endpoint_type}"]
Expand Down Expand Up @@ -340,9 +361,20 @@ def _add_endpoint_args(parser):
endpoint_group.add_argument(
"-m",
"--model",
nargs="+",
default=[],
help=f"The name of the model(s) to benchmark.",
)
endpoint_group.add_argument(
"--model-selection-strategy",
type=str,
default=None,
help=f"The name of the model to benchmark.",
choices=utils.get_enum_names(ModelSelectionStrategy),
default="round_robin",
required=False,
help=f"When multiple model are specified, this is how a specific model "
"should be assigned to a prompt. round_robin means that ith prompt in the "
"list gets assigned to i mod len(models). random means that assignment is "
"uniformly random",
)

endpoint_group.add_argument(
Expand Down
Loading

0 comments on commit 9612fbe

Please sign in to comment.