forked from octoml/mlc-llm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_benchmarks.sh
11 lines (9 loc) · 1.6 KB
/
run_benchmarks.sh
1
2
3
4
5
6
7
8
9
10
11
python3 tests/benchmark.py --model vicuna-v1-7b --quantization q0f16 --num-warm-up=5 --num-measurements=20 --benchmark-mode=torch-eager --num-input-tokens=32 --num-output-tokens=32
python3 tests/benchmark.py --model vicuna-v1-7b --quantization q0f16 --num-warm-up=5 --num-measurements=20 --benchmark-mode=torch-inductor --num-input-tokens=32 --num-output-tokens=32
python3 tests/benchmark.py --model vicuna-v1-7b --quantization q0f16 --num-warm-up=5 --num-measurements=20 --benchmark-mode=tvm --num-input-tokens=32 --num-output-tokens=32
python3 tests/benchmark.py --model vicuna-v1-7b --quantization q0f16 --num-warm-up=5 --num-measurements=20 --benchmark-mode=torch-eager --num-input-tokens=128 --num-output-tokens=128
python3 tests/benchmark.py --model vicuna-v1-7b --quantization q0f16 --num-warm-up=5 --num-measurements=20 --benchmark-mode=torch-inductor --num-input-tokens=128 --num-output-tokens=128
python3 tests/benchmark.py --model vicuna-v1-7b --quantization q0f16 --num-warm-up=5 --num-measurements=20 --benchmark-mode=tvm --num-input-tokens=128 --num-output-tokens=128
python3 tests/benchmark.py --model vicuna-v1-7b --quantization q0f16 --num-warm-up=5 --num-measurements=20 --benchmark-mode=torch-eager --num-input-tokens=512 --num-output-tokens=512
python3 tests/benchmark.py --model vicuna-v1-7b --quantization q0f16 --num-warm-up=5 --num-measurements=20 --benchmark-mode=torch-inductor --num-input-tokens=512 --num-output-tokens=512
python3 tests/benchmark.py --model vicuna-v1-7b --quantization q0f16 --num-warm-up=5 --num-measurements=20 --benchmark-mode=tvm --num-input-tokens=512 --num-output-tokens=512