Execute ZKVM-Perf (Matrix) #19
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Execute ZKVM-Perf (Matrix) | |
on: | |
workflow_dispatch: | |
inputs: | |
provers: | |
description: 'Provers to use (comma-separated)' | |
required: false | |
type: string | |
default: 'sp1' | |
programs: | |
description: 'Programs to benchmark (comma-separated)' | |
required: false | |
type: string | |
default: 'loop,fibonacci,tendermint,reth1,reth2' | |
filename: | |
description: 'Filename for the benchmark' | |
required: false | |
type: string | |
default: 'benchmark' | |
trials: | |
description: 'Number of trials to run' | |
required: false | |
type: string | |
default: '1' | |
sp1_ref: | |
description: 'SP1 reference (commit hash or branch name)' | |
required: false | |
type: string | |
default: 'dev' | |
additional_params: | |
description: 'Additional parameters as JSON' | |
required: false | |
type: string | |
default: '{"hashfns":"poseidon","shard_sizes":"22"}' | |
jobs: | |
run-benchmarks: | |
strategy: | |
matrix: | |
include: | |
- instance_type: g6.16xlarge | |
enable_gpu: true | |
ami_id: ami-079a6a210557ef0e4 | |
- instance_type: r7i.16xlarge | |
enable_gpu: false | |
ami_id: ami-079a6a210557ef0e4 | |
name: Run on ${{ matrix.instance_type }} | |
runs-on: ubuntu-latest | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: ${{ secrets.AWS_REGION }} | |
- name: Start EC2 runner | |
id: start-ec2-runner | |
uses: xJonathanLEI/ec2-github-runner@main | |
with: | |
mode: start | |
# Must use personal access token here as `GITHUB_TOKEN` does not have access to runners. | |
# Use a fine-grained token with these permissions to at least this repository: | |
# - Administration: Read and write | |
# - Contents: Read and write | |
# - Metadata: Read-only | |
# - Workflows: Read and write | |
# - Actions: Read and write | |
github-token: ${{ secrets.GH_PAT }} | |
ec2-image-id: ${{ matrix.ami_id }} | |
ec2-instance-type: ${{ matrix.instance_type }} | |
subnet-id: ${{ secrets.AWS_SUBNET_ID }} | |
security-group-id: ${{ secrets.AWS_SG_ID }} | |
storage-size: 1024 | |
- name: Run benchmarks | |
id: run-benchmarks | |
uses: actions/github-script@v6 | |
with: | |
github-token: ${{ secrets.GH_PAT }} | |
script: | | |
const runnerName = '${{ steps.start-ec2-runner.outputs.label }}'; | |
const maxAttempts = 30; // Increased for polling | |
const pollInterval = 10000; // 10 seconds | |
let triggeredRunId = null; | |
console.log('Triggering benchmark workflow'); | |
try { | |
await github.rest.actions.createWorkflowDispatch({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
workflow_id: 'run-on-runner.yml', | |
ref: context.ref, | |
inputs: { | |
runner_name: runnerName, | |
instance_type: '${{ matrix.instance_type }}', | |
enable_gpu: '${{ matrix.enable_gpu }}', | |
provers: '${{ inputs.provers }}', | |
programs: '${{ inputs.programs }}', | |
filename: '${{ inputs.filename }}_${{ matrix.instance_type }}', | |
trials: '${{ inputs.trials }}', | |
sp1_ref: '${{ inputs.sp1_ref }}', | |
additional_params: '${{ inputs.additional_params }}' | |
} | |
}); | |
console.log('Benchmark workflow triggered successfully'); | |
} catch (error) { | |
core.setFailed(`Failed to trigger workflow: ${error.message}`); | |
return; | |
} | |
console.log('Polling for the triggered run'); | |
for (let attempt = 1; attempt <= maxAttempts; attempt++) { | |
await new Promise(resolve => setTimeout(resolve, pollInterval)); | |
try { | |
const runs = await github.rest.actions.listWorkflowRuns({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
workflow_id: 'run-on-runner.yml', | |
status: 'in_progress' | |
}); | |
for (const run of runs.data.workflow_runs) { | |
if (new Date(run.created_at).getTime() > Date.now() - 300000) { // Within last 5 minutes | |
try { | |
const runDetails = await github.rest.actions.getWorkflowRun({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
run_id: run.id | |
}); | |
const inputs = runDetails.data.inputs; | |
if ( | |
inputs.runner_name === runnerName && | |
inputs.instance_type === '${{ matrix.instance_type }}' && | |
inputs.enable_gpu === '${{ matrix.enable_gpu }}' && | |
inputs.provers === '${{ inputs.provers }}' && | |
inputs.programs === '${{ inputs.programs }}' && | |
inputs.filename === '${{ inputs.filename }}_${{ matrix.instance_type }}' && | |
inputs.trials === '${{ inputs.trials }}' && | |
inputs.sp1_ref === '${{ inputs.sp1_ref }}' && | |
inputs.additional_params === '${{ inputs.additional_params }}' | |
) { | |
triggeredRunId = run.id; | |
console.log(`Found matching run. Triggered run ID: ${triggeredRunId}`); | |
break; | |
} | |
} catch (error) { | |
console.log(`Error checking inputs for run ${run.id}: ${error.message}`); | |
// Continue to the next run | |
continue; | |
} | |
} | |
} | |
if (triggeredRunId) break; | |
console.log(`Attempt ${attempt}: Matching run not found yet. Continuing to poll...`); | |
} catch (error) { | |
console.log(`Error while polling: ${error.message}`); | |
} | |
} | |
if (!triggeredRunId) { | |
core.setFailed('Failed to find the triggered workflow run with matching inputs after maximum attempts'); | |
return; | |
} | |
core.exportVariable('TRIGGERED_RUN_ID', triggeredRunId); | |
console.log(`Triggered run ID: ${triggeredRunId}`); | |
- name: Wait for benchmark completion | |
uses: actions/github-script@v6 | |
with: | |
github-token: ${{ secrets.GH_PAT }} | |
script: | | |
const triggeredRunId = process.env.TRIGGERED_RUN_ID; | |
if (!triggeredRunId) { | |
core.setFailed('No triggered run ID found'); | |
return; | |
} | |
const maxWaitTime = 3600000; // 1 hour in milliseconds | |
const checkInterval = 60000; // 1 minute in milliseconds | |
const startTime = Date.now(); | |
const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${triggeredRunId}`; | |
console.log(`Waiting for benchmark job to complete. Job URL: ${runUrl}`); | |
while (true) { | |
const run = await github.rest.actions.getWorkflowRun({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
run_id: triggeredRunId | |
}); | |
if (run.data.status === 'completed') { | |
console.log(`Benchmark workflow completed with conclusion: ${run.data.conclusion}`); | |
if (run.data.conclusion !== 'success') { | |
core.setFailed(`Benchmark workflow failed with conclusion: ${run.data.conclusion}. Job URL: ${runUrl}`); | |
} | |
break; | |
} | |
if (Date.now() - startTime > maxWaitTime) { | |
core.setFailed(`Benchmark workflow did not complete within the maximum wait time. Job URL: ${runUrl}`); | |
break; | |
} | |
console.log(`Waiting for benchmark to complete... Current status: ${run.data.status}. Job URL: ${runUrl}`); | |
await new Promise(resolve => setTimeout(resolve, checkInterval)); | |
} | |
- name: Download benchmark results | |
uses: actions/download-artifact@v3 | |
with: | |
name: benchmark-results-${{ matrix.instance_type }}-${{ env.TRIGGERED_RUN_ID }} | |
path: ./benchmark-results-${{ matrix.instance_type }} | |
- name: Process benchmark results | |
run: | | |
echo "Results for ${{ matrix.instance_type }}:" | |
cat ./benchmark-results-${{ matrix.instance_type }}/*.csv | |
- name: Stop EC2 runner | |
if: always() | |
uses: xJonathanLEI/ec2-github-runner@main | |
with: | |
mode: stop | |
github-token: ${{ secrets.GH_PAT }} | |
label: ${{ steps.start-ec2-runner.outputs.label }} | |
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | |
combine-results: | |
needs: run-benchmarks | |
runs-on: ubuntu-latest | |
steps: | |
- name: Download all artifacts | |
uses: actions/download-artifact@v3 | |
- name: Combine CSV files | |
run: | | |
echo "Combining CSV files:" | |
# Get the header from the first CSV file | |
head -n 1 $(ls benchmark-results-*/*.csv | head -n 1) > combined_results.csv | |
# Append data from all CSV files, skipping the header | |
for file in benchmark-results-*/*.csv; do | |
tail -n +2 "$file" >> combined_results.csv | |
done | |
cat combined_results.csv | |
- name: Upload combined results | |
uses: actions/upload-artifact@v2 | |
with: | |
name: combined-benchmark-results | |
path: combined_results.csv | |
- name: Create summary | |
run: | | |
echo "## Benchmark Results Summary" >> $GITHUB_STEP_SUMMARY | |
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY | |
cat combined_results.csv >> $GITHUB_STEP_SUMMARY | |
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY |