-
Notifications
You must be signed in to change notification settings - Fork 36
130 lines (123 loc) · 5.52 KB
/
gpu-bench.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# Run regression check only when attempting to merge, shown as skipped status check beforehand
name: GPU benchmark regression test
on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
branches: [dev]
merge_group:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
# Run comparative benchmark against dev, open issue on regression
gpu-benchmark:
if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
name: Run benchmarks on GPU
runs-on: [self-hosted, gpu-bench]
steps:
- uses: actions/checkout@v4
with:
repository: lurk-lab/ci-workflows
- uses: ./.github/actions/gpu-setup
with:
gpu-framework: 'cuda'
- uses: ./.github/actions/ci-env
- uses: actions/checkout@v4
# Install dependencies
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@v2
with:
tool: [email protected]
- name: Install criterion
run: |
cargo install cargo-criterion
cargo install criterion-table
- name: Set bench output format and base SHA
run: |
echo "ARECIBO_BENCH_OUTPUT=commit-comment" | tee -a $GITHUB_ENV
echo "ARECIBO_BENCH_NUM_CONS=16384,1038732" | tee -a $GITHUB_ENV
echo "BASE_COMMIT=${{ github.event.merge_group.base_sha }}" | tee -a $GITHUB_ENV
GPU_NAME=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader,nounits | tail -n1)
echo "GPU_ID=$(echo $GPU_NAME | awk '{ print $NF }')" | tee -a $GITHUB_ENV
echo "GPU_NAME=$GPU_NAME" | tee -a $GITHUB_ENV
# Checkout base branch for comparative bench
- uses: actions/checkout@v4
with:
ref: dev
path: dev
# Copy the script so the base can bench with the same parameters
- name: Run GPU bench on base branch
run: |
# Copy justfile to dev, overwriting existing config with that of PR branch
cp ../benches/justfile .
# Run benchmark
just gpu-bench-ci supernova-ci
# Copy bench output to PR branch
cp supernova-ci-${{ env.BASE_COMMIT }}.json ..
working-directory: ${{ github.workspace }}/dev
- name: Run GPU bench on PR branch
run: |
just gpu-bench-ci supernova-ci
cp supernova-ci-${{ github.sha }}.json ..
working-directory: ${{ github.workspace }}/benches
- name: copy the benchmark template and prepare it with data
run: |
cp .github/tables.toml .
# Get CPU model
CPU_MODEL=$(grep '^model name' /proc/cpuinfo | head -1 | awk -F ': ' '{ print $2 }')
# Get vCPU count
NUM_VCPUS=$(nproc --all)
# Get total RAM in GB
TOTAL_RAM=$(grep MemTotal /proc/meminfo | awk '{$2=$2/(1024^2); print int($2), "GB RAM";}')
WORKFLOW_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
# Use conditionals to ensure that only non-empty variables are inserted
[[ ! -z "${{ env.GPU_NAME }}" ]] && sed -i "/^\"\"\"$/i ${{ env.GPU_NAME }}" tables.toml
[[ ! -z "$CPU_MODEL" ]] && sed -i "/^\"\"\"$/i $CPU_MODEL" tables.toml
[[ ! -z "$NUM_VCPUS" ]] && sed -i "/^\"\"\"$/i $NUM_VCPUS vCPUs" tables.toml
[[ ! -z "$TOTAL_RAM" ]] && sed -i "/^\"\"\"$/i $TOTAL_RAM" tables.toml
sed -i "/^\"\"\"$/i Workflow run: $WORKFLOW_URL" tables.toml
echo "WORKFLOW_URL=$WORKFLOW_URL" | tee -a $GITHUB_ENV
working-directory: ${{ github.workspace }}
# Create a `criterion-table` and write in commit comment
- name: Run `criterion-table`
run: |
cat supernova-ci-${{ env.BASE_COMMIT }}.json | criterion-table > BENCHMARKS.md
- name: Write bench on commit comment
uses: peter-evans/commit-comment@v3
with:
body-path: BENCHMARKS.md
# Check for a slowdown >= `$ARECIBO_BENCH_NOISE_THRESHOLD` (fallback is 30%/1.3x). If so, open an issue but don't block merge
# Since we are parsing for slowdowns, we simply add 1 to the noise threshold decimal to get the regression factor
- name: Check for perf regression
id: regression-check
run: |
REGRESSIONS=$(grep -o '[0-9.]*x slower' BENCHMARKS.md | cut -d 'x' -f1)
echo $REGRESSIONS
if [ ! -z "${{ env.ARECIBO_BENCH_NOISE_THRESHOLD}}" ]; then
REGRESSION_FACTOR=$(echo "${{ env.ARECIBO_BENCH_NOISE_THRESHOLD }}+1" | bc)
else
REGRESSION_FACTOR=1.3
fi
for r in $REGRESSIONS
do
if (( $(echo "$r >= $REGRESSION_FACTOR" | bc -l) ))
then
exit 1
fi
done
echo "NOISE_THRESHOLD=$("(REGRESSION_FACTOR-1)*100" | bc) | tee -a $GITHUB_ENV
continue-on-error: true
# Not possible to use ${{ github.event.number }} with the `merge_group` trigger
- name: Get PR number from merge branch
run: |
echo "PR_NUMBER=$(echo ${{ github.event.merge_group.head_ref }} | sed -e 's/.*pr-\(.*\)-.*/\1/')" | tee -a $GITHUB_ENV
- uses: JasonEtco/create-an-issue@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ env.PR_NUMBER }}
GIT_SHA: ${{ github.sha }}
WORKFLOW_URL: ${{ env.WORKFLOW_URL }}
NOISE_THRESHOLD: ${{ env.NOISE_THRESHOLD }}
with:
filename: .github/PERF_REGRESSION.md