Skip to content

Commit

Permalink
benchmark and evals changes for Llama 3.1 70B v0 drop testing
Browse files Browse the repository at this point in the history
change log:
- add benchmark_summary.py to give readable markdown summary stats and store .csv
- update benchmark scripts for stats calculation and context length pairs
- add setup to evals/run_evals.sh
- update documentation for new v0 drop
  • Loading branch information
tstescoTT committed Jan 14, 2025
1 parent 1a15966 commit 4ae94dd
Show file tree
Hide file tree
Showing 16 changed files with 538 additions and 282 deletions.
43 changes: 15 additions & 28 deletions benchmarking/benchmark_serving.patch
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
index 4813fde2..0cb3e72e 100644
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -235,9 +235,7 @@ async def async_request_openai_completions(
"model": request_func_input.model,
"prompt": request_func_input.prompt,
"temperature": 0.0,
- "best_of": request_func_input.best_of,
"max_tokens": request_func_input.output_len,
- "logprobs": request_func_input.logprobs,
"stream": True,
"ignore_eos": request_func_input.ignore_eos,
}
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index c1a396c8..463e0e93 100644
index c1a396c8..74f75a15 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -22,6 +22,12 @@ On the client side, run:
Expand All @@ -24,30 +38,3 @@ index c1a396c8..463e0e93 100644
multi_modal_content=test_mm_content,
ignore_eos=ignore_eos,
)
@@ -458,7 +464,7 @@ async def benchmark(
prompt_len=prompt_len,
output_len=output_len,
logprobs=logprobs,
- best_of=best_of,
+ best_of=None,
multi_modal_content=mm_content,
ignore_eos=ignore_eos)
tasks.append(
diff --git a/vllm/worker/tt_model_runner.py b/vllm/worker/tt_model_runner.py
index 1c586dd3..2e77bf72 100644
--- a/vllm/worker/tt_model_runner.py
+++ b/vllm/worker/tt_model_runner.py
@@ -425,12 +425,7 @@ class TTModelRunner(ModelRunnerBase[TTModelInput]):
)

def _validate_sampling_params(self, sampling_params):
- assert sampling_params.n == 1, "Currently only supporting n=1"
- assert sampling_params.best_of is None, "Currently not supporting best_of"
- assert sampling_params.logprobs is None, "Currently not supporting logprobs"
- assert sampling_params.prompt_logprobs is None, "Currently not supporting prompt_logprobs"
-
- ## Destructor (used to delete ttnn trace if using trace mode)
+ return

def __del__(self):
if self.trace_mode and self.execute_trace_kwargs is not None:
Loading

0 comments on commit 4ae94dd

Please sign in to comment.