Skip to content

Commit

Permalink
test dashboard
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed May 22, 2024
1 parent dd4de27 commit 159547e
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 83 deletions.
23 changes: 1 addition & 22 deletions .github/workflows/benchmark_nightly_lmi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,25 +56,4 @@ jobs:
- name: Clean up weights
run: |
rm -rf /home/ubuntu/Llama-2-7b-chat-hf
- name: Save benchmark artifacts
uses: actions/upload-artifact@v2
with:
name: nightly ${{ matrix.hardware }} artifact
path: /tmp/ts_benchmark
- name: Download benchmark artifacts for auto validation
uses: dawidd6/action-download-artifact@v2
with:
workflow: ${{ github.event.workflow_run.workflow_id }}
workflow_conclusion: success
if_no_artifact_found: ignore
path: /tmp/ts_artifacts
name: ${{ matrix.hardware }}_benchmark_validation
- name: Validate Benchmark result
run: python benchmarks/validate_report.py --input-artifacts-dir /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
- name: Update benchmark artifacts for auto validation
run: python benchmarks/utils/update_artifacts.py --output /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
- name: Upload the updated benchmark artifacts for auto validation
uses: actions/upload-artifact@v2
with:
name: ${{ matrix.hardware }}_benchmark_validation
path: /tmp/ts_artifacts
15 changes: 7 additions & 8 deletions benchmarks/benchmark_config_lmi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ ts_version:
# a list of model configure yaml files defined in benchmarks/models_config
# or a list of model configure yaml files with full path
models:
#- "llama-2-7b.yaml"
#- "llama-2-13b.yaml"
- "llama-2-70b.yaml"
- "llama-2-7b.yaml"
#- "llama-2-70b.yaml"

# benchmark on "cpu" or "gpu".
# "cpu" is set if "hardware" is not specified
Expand All @@ -24,11 +23,11 @@ hardware: &hardware "gpu"
# - keep the values order as the same as the command definition.
# - set up the command before enabling `metrics_cmd`.
# For example, aws client and AWS credentials need to be setup before trying this example.
# metrics_cmd:
# - "cmd": "aws cloudwatch put-metric-data"
# - "--namespace": ["torchserve_benchmark_nightly_", *hardware]
# - "--region": "us-east-2"
# - "--metric-data": 'file:///tmp/benchmark/logs/stats_metrics.json'
metrics_cmd:
- "cmd": "aws cloudwatch put-metric-data"
- "--namespace": ["torchserve_benchmark_nightly_lmi"]
- "--region": "us-east-2"
- "--metric-data": 'file:///tmp/benchmark/logs/stats_metrics.json'

# load report to remote storage or local different path if "report_cmd" is set.
# the command line to load report to remote storage.
Expand Down
75 changes: 22 additions & 53 deletions benchmarks/models_config/llama-2-7b.yaml
Original file line number Diff line number Diff line change
@@ -1,40 +1,8 @@
---
llama-2-7b:
int8:
benchmark_engine: "ab"
url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int8.mar
workers:
- 1
batch_delay: 100
batch_size:
- 1
input: "./examples/large_models/gpt_fast/request.json"
requests: 1000
concurrency: 1
backend_profiling: False
exec_env: "local"
processors:
- "gpus": "all"
stream: "false"
int4:
benchmark_engine: "ab"
url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int4.mar
workers:
- 1
batch_delay: 100
batch_size:
- 1
input: "./examples/large_models/gpt_fast/request.json"
requests: 1000
concurrency: 1
backend_profiling: False
exec_env: "local"
processors:
- "gpus": "all"
stream: "false"
# base:
# int8:
# benchmark_engine: "ab"
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-base.mar
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int8.mar
# workers:
# - 1
# batch_delay: 100
Expand All @@ -48,36 +16,37 @@ llama-2-7b:
# processors:
# - "gpus": "all"
# stream: "false"
# int8-tp:
# int4:
# benchmark_engine: "ab"
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/llama-2-7b-int8-tp.mar
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int4.mar
# workers:
# - 4
# batch_delay: 100
# batch_size:
# - 1
# input: "./examples/large_models/gpt_fast/request.json"
# requests: 1000
# concurrency: 4
# backend_profiling: False
# exec_env: "local"
# processors:
# - "gpus": "all"
# stream: "false"
# int4-tp:
# benchmark_engine: "ab"
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/llama-2-7b-int4-tp.mar
# workers:
# - 4
# batch_delay: 100
# batch_size:
# - 1
# input: "./examples/large_models/gpt_fast/request.json"
# requests: 1000
# concurrency: 4
# concurrency: 1
# backend_profiling: False
# exec_env: "local"
# processors:
# - "gpus": "all"
# stream: "false"
base:
benchmark_engine: "ab"
url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-base.mar
workers:
- 1
batch_delay: 100
batch_size:
- 1
input: "./examples/large_models/gpt_fast/request.json"
requests: 1000
concurrency: 1
backend_profiling: False
exec_env: "local"
processors:
- "gpus": "all"
stream: "false"


0 comments on commit 159547e

Please sign in to comment.