From 7b5a92dbcf3a9feb3c61d456a3ff48f7afa8d42a Mon Sep 17 00:00:00 2001
From: Samuel Burnham <45365069+samuelburnham@users.noreply.github.com>
Date: Fri, 5 Jan 2024 13:17:58 -0500
Subject: [PATCH 1/3] ci: Add comparative benchmarks on `merge_group` (WIP)

---
 .github/workflows/gpu-bench.yml | 155 ++++++++++++++++++++++++++++++++
 Cargo.toml                      |   4 +
 benches/justfile                |  36 ++++++++
 benches/recursive-snark.rs      |  73 +++++++++++++--
 build.rs                        |   8 ++
 5 files changed, 267 insertions(+), 9 deletions(-)
 create mode 100644 .github/workflows/gpu-bench.yml
 create mode 100644 benches/justfile
 create mode 100644 build.rs

diff --git a/.github/workflows/gpu-bench.yml b/.github/workflows/gpu-bench.yml
new file mode 100644
index 000000000..9e2fa10b2
--- /dev/null
+++ b/.github/workflows/gpu-bench.yml
@@ -0,0 +1,155 @@
+# Run final tests only when attempting to merge, shown as skipped status checks beforehand
+name: GPU benchmark regression test
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    branches: [dev]
+  merge_group:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  # Run comparative benchmark against dev, open issue on regression
+  gpu-benchmark:
+    if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
+    name: Run benchmarks on GPU
+    runs-on: [self-hosted, gpu-bench]
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          repository: lurk-lab/ci-workflows
+      - uses: ./.github/actions/gpu-setup
+        with:
+          gpu-framework: 'cuda'
+      - uses: ./.github/actions/ci-env
+      - uses: actions/checkout@v4
+      # Install dependencies
+      - uses: dtolnay/rust-toolchain@stable
+      - uses: Swatinem/rust-cache@v2
+      - uses: taiki-e/install-action@v2
+        with:
+          tool: just@1.22
+      - name: Install criterion
+        run: |
+          cargo install cargo-criterion
+          cargo install criterion-table
+      - name: Set bench output format and base SHA
+        run: |
+          echo "ARECIBO_BENCH_OUTPUT=commit-comment" | tee -a $GITHUB_ENV
+          echo "BASE_COMMIT=${{ github.event.merge_group.base_sha }}" | tee -a $GITHUB_ENV
+          GPU_NAME=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader,nounits | tail -n1)
+          echo "GPU_ID=$(echo $GPU_NAME | awk '{ print $NF }')" | tee -a $GITHUB_ENV
+          echo "GPU_NAME=$GPU_NAME" | tee -a $GITHUB_ENV
+      # Checkout gh-pages to check for cached bench result
+      - name: Checkout gh-pages
+        uses: actions/checkout@v4
+        with:
+          ref: gh-pages
+          path: gh-pages
+      - name: Check for cached bench result
+        id: cached-bench
+        run: |
+          if [ -f "${{ env.BASE_COMMIT }}-${{ env.GPU_ID }}.json" ]
+          then
+            echo "cached=true" | tee -a $GITHUB_OUTPUT
+            cp ${{ env.BASE_COMMIT }}-${{ env.GPU_ID }}.json ../${{ env.BASE_COMMIT }}.json
+          else
+            echo "cached=false" | tee -a $GITHUB_OUTPUT
+          fi
+        working-directory: ${{ github.workspace }}/gh-pages
+      # Checkout base branch for comparative bench
+      - uses: actions/checkout@v4
+        if: steps.cached-bench.outputs.cached == 'false'
+        with:
+          ref: dev
+          path: dev
+      # Copy the script so the base can bench with the same parameters
+      - name: Run GPU bench on base branch
+        if: steps.cached-bench.outputs.cached == 'false'
+        run: |
+          # Copy justfile & env to dev, overwriting existing config with that of PR branch
+          cp ../benches/justfile ../benches/bench.env .
+          # Run benchmark
+          just gpu-bench-ci recursive-snark recursive-snark-supernova compressed-snark compressed-snark-supernova
+          # Copy bench output to PR branch
+          cp ${{ env.BASE_COMMIT }}.json ..
+        working-directory: ${{ github.workspace }}/dev
+      - name: Run GPU bench on PR branch
+        run: |
+          just gpu-bench-ci recursive-snark recursive-snark-supernova compressed-snark compressed-snark-supernova
+          cp ${{ github.sha }}.json ..
+        working-directory: ${{ github.workspace }}/benches
+      - name: copy the benchmark template and prepare it with data
+        run: |
+          cp .github/tables.toml .
+          # Get CPU model
+          CPU_MODEL=$(grep '^model name' /proc/cpuinfo | head -1 | awk -F ': ' '{ print $2 }')
+          # Get vCPU count
+          NUM_VCPUS=$(nproc --all)
+          # Get total RAM in GB
+          TOTAL_RAM=$(grep MemTotal /proc/meminfo | awk '{$2=$2/(1024^2); print int($2), "GB RAM";}')
+          
+          # Use conditionals to ensure that only non-empty variables are inserted
+          [[ ! -z "${{ env.GPU_NAME }}" ]] && sed -i "/^\"\"\"$/i ${{ env.GPU_NAME }}" tables.toml
+          [[ ! -z "$CPU_MODEL" ]] && sed -i "/^\"\"\"$/i $CPU_MODEL" tables.toml
+          [[ ! -z "$NUM_VCPUS" ]] && sed -i "/^\"\"\"$/i $NUM_VCPUS" tables.toml
+          [[ ! -z "$TOTAL_RAM" ]] && sed -i "/^\"\"\"$/i $TOTAL_RAM" tables.toml          
+          sed -i "/^\"\"\"$/i Workflow run: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" tables.toml
+        working-directory: ${{ github.workspace }}
+      # Create a `criterion-table` and write in commit comment
+      - name: Run `criterion-table`
+        run: cat ${{ env.BASE_COMMIT }}.json ${{ github.sha }}.json | criterion-table > BENCHMARKS.md
+      - name: Write bench on commit comment
+        uses: peter-evans/commit-comment@v3
+        with:
+          body-path: BENCHMARKS.md
+      # Check for a slowdown >= 10%. If so, open an issue but don't block merge
+      - name: Check for perf regression
+        id: regression-check
+        run: |
+          regressions=$(awk -F'[*x]' '/slower/{print $12}' BENCHMARKS.md)
+
+          echo $regressions
+
+          for r in $regressions
+          do
+            if (( $(echo "$r >= 1.10" | bc -l) ))
+            then
+              exit 1
+            fi
+          done
+        continue-on-error: true
+      # Not possible to use ${{ github.event.number }} with the `merge_group` trigger
+      - name: Get PR number from merge branch
+        run: |
+          echo "PR_NUMBER=$(echo ${{ github.event.merge_group.head_ref }} | sed -e 's/.*pr-\(.*\)-.*/\1/')" | tee -a $GITHUB_ENV
+      - name: Create file for issue
+        if: steps.regression-check.outcome == 'failure'
+        run: |
+          printf '%s\n' "Regression >= 10% found during merge for PR #${{ env.PR_NUMBER }}
+          Commit: ${{ github.sha }}
+          Workflow run: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" > ./_body.md
+      - name: Open issue on regression
+        if: steps.regression-check.outcome == 'failure'
+        uses: peter-evans/create-issue-from-file@v4
+        with:
+          title: ':rotating_light: Performance regression detected for PR #${{ env.PR_NUMBER }}'
+          content-filepath: ./_body.md
+          labels: |
+            P-Performance
+            automated issue
+      - name: Remove old dev bench
+        run: |
+          rm ${{ env.BASE_COMMIT }}.json
+          mv ${{ github.sha }}.json ${{ github.sha }}-${{ env.GPU_ID }}.json
+        working-directory: ${{ github.workspace }}
+      - name: Commit bench result to `gh-pages` branch if no regression
+        if: steps.regression-check.outcome != 'failure'
+        uses: stefanzweifel/git-auto-commit-action@v5
+        with:
+          branch: gh-pages
+          commit_message: '[automated] GPU Benchmark from PR #${{ env.PR_NUMBER }}'
+          file_pattern: '${{ github.sha }}-${{ env.GPU_ID }}.json'
\ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
index b6ff8187d..fe5f16c88 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -74,6 +74,10 @@ hex = "0.4.3"
 sha2 = "0.10.7"
 tracing-test = "0.2.4"
 expect-test = "1.4.1"
+anyhow = "1.0.72"
+
+[build-dependencies]
+vergen = { version = "8", features = ["build", "git", "gitcl"] }
 
 [[bench]]
 name = "recursive-snark"
diff --git a/benches/justfile b/benches/justfile
new file mode 100644
index 000000000..0e7873379
--- /dev/null
+++ b/benches/justfile
@@ -0,0 +1,36 @@
+# Install with `cargo install just`
+# Usage: `just <bench|gpu-bench|gpu-bench-ci> <args>`
+set dotenv-load
+set dotenv-filename := "bench.env"
+set ignore-comments := true
+
+commit := `git rev-parse HEAD`
+
+# Run CPU benchmarks
+bench +benches:
+  #!/bin/sh
+  for bench in {{benches}}; do
+    cargo criterion --bench $bench
+  done
+
+gpu-env: 
+  # The `compute`/`sm` number corresponds to the Nvidia GPU architecture
+  # In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable
+  # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
+  export CUDA_ARCH := `nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g'`
+  export EC_GPU_CUDA_NVCC_ARGS := "--fatbin --gpu-architecture=sm_$CUDA_ARCH --generate-code=arch=compute_$CUDA_ARCH,code=sm_$CUDA_ARCH"
+  export EC_GPU_FRAMEWORK := "cuda"
+
+# Run CUDA benchmarks on GPU
+gpu-bench +benches: gpu-env
+  #!/bin/sh
+  for bench in {{benches}}; do
+    cargo criterion --bench $bench --features "cuda"
+  done
+
+# Run CUDA benchmarks on GPU, tuned for CI
+gpu-bench-ci +benches:
+  #!/bin/sh
+  for bench in {{benches}}; do
+    cargo criterion --bench $bench --features "cuda" --message-format=json > {{commit}}.json
+  done
\ No newline at end of file
diff --git a/benches/recursive-snark.rs b/benches/recursive-snark.rs
index 353b7fa56..815b5cf7d 100644
--- a/benches/recursive-snark.rs
+++ b/benches/recursive-snark.rs
@@ -1,4 +1,5 @@
 #![allow(non_snake_case)]
+use anyhow::anyhow;
 use arecibo::{
   provider::{PallasEngine, VestaEngine},
   traits::{
@@ -45,25 +46,63 @@ criterion_main!(recursive_snark);
 const NUM_CONS_VERIFIER_CIRCUIT_PRIMARY: usize = 9825;
 const NUM_SAMPLES: usize = 10;
 
+// TODO: Why Copy and &'static str over String?
+#[derive(Clone, Debug, Copy)]
+struct BenchParams {
+  name: &'static str,
+  step_size: usize,
+  // TODO: Is this useful?
+  //date: &'static str,
+  sha: &'static str,
+}
+impl BenchParams {
+  fn name_params(&self) -> (String, String) {
+    let output_type = bench_parameters_env().unwrap_or("stdout".into());
+    match output_type.as_ref() {
+      "pr-comment" => todo!(),
+      "commit-comment" => (
+        format!("ref={}", self.sha),
+        format!("{}-StepCircuitSize-{}", self.name, self.step_size),
+      ),
+      // TODO: refine "gh-pages" and "stdout"
+      _ => todo!(),
+    }
+  }
+}
+
+fn bench_parameters_env() -> anyhow::Result<String> {
+  std::env::var("ARECIBO_BENCH_OUTPUT").map_err(|e| anyhow!("Bench output env var isn't set: {e}"))
+}
+
+fn noise_threshold_env() -> anyhow::Result<f64> {
+  std::env::var("ARECIBO_BENCH_NOISE_THRESHOLD")
+    .map_err(|e| anyhow!("Noise threshold env var isn't set: {e}"))
+    .and_then(|nt| {
+      nt.parse::<f64>()
+        .map_err(|e| anyhow!("Failed to parse noise threshold: {e}"))
+    })
+}
+
 fn bench_recursive_snark(c: &mut Criterion) {
   // we vary the number of constraints in the step circuit
   for &num_cons_in_augmented_circuit in [
     NUM_CONS_VERIFIER_CIRCUIT_PRIMARY,
     16384,
-    32768,
-    65536,
-    131072,
-    262144,
-    524288,
-    1048576,
+    //32768,
+    //65536,
+    //131072,
+    //262144,
+    //524288,
+    //1048576,
   ]
   .iter()
   {
     // number of constraints in the step circuit
     let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
 
-    let mut group = c.benchmark_group(format!("RecursiveSNARK-StepCircuitSize-{num_cons}"));
+    let mut group = c.benchmark_group("RecursiveSNARK");
     group.sample_size(NUM_SAMPLES);
+    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
 
     let c_primary = NonTrivialCircuit::new(num_cons);
     let c_secondary = TrivialCircuit::default();
@@ -104,7 +143,15 @@ fn bench_recursive_snark(c: &mut Criterion) {
       assert!(res.is_ok());
     }
 
-    group.bench_function("Prove", |b| {
+    let prove_params = BenchParams {
+      name: "Prove",
+      step_size: num_cons,
+      sha: env!("VERGEN_GIT_SHA"),
+    };
+    let (name, params) = prove_params.name_params();
+    let bench_id = BenchmarkId::new(name, params);
+
+    group.bench_function(bench_id, |b| {
       b.iter(|| {
         // produce a recursive SNARK for a step of the recursion
         assert!(black_box(&mut recursive_snark.clone())
@@ -117,8 +164,16 @@ fn bench_recursive_snark(c: &mut Criterion) {
       })
     });
 
+    let verify_params = BenchParams {
+      name: "Verify",
+      step_size: num_cons,
+      sha: env!("VERGEN_GIT_SHA"),
+    };
+    let (name, params) = verify_params.name_params();
+    let bench_id = BenchmarkId::new(name, params);
+
     // Benchmark the verification time
-    group.bench_function("Verify", |b| {
+    group.bench_function(bench_id, |b| {
       b.iter(|| {
         assert!(black_box(&recursive_snark)
           .verify(
diff --git a/build.rs b/build.rs
new file mode 100644
index 000000000..e39d2522e
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,8 @@
+use std::error::Error;
+use vergen::EmitBuilder;
+
+fn main() -> Result<(), Box<dyn Error>> {
+  // Emit the instructions
+  EmitBuilder::builder().all_git().emit()?;
+  Ok(())
+}

From 3b5e43c672a43d2bc49c753fdb8790cd82434a36 Mon Sep 17 00:00:00 2001
From: Samuel Burnham <45365069+samuelburnham@users.noreply.github.com>
Date: Fri, 5 Jan 2024 21:58:15 -0500
Subject: [PATCH 2/3] Add other benchmarks and refactor

---
 .github/PERF_REGRESSION.md            |  7 +++
 .github/tables.toml                   |  6 +++
 .github/workflows/gpu-bench.yml       | 25 ++++------
 benches/common/mod.rs                 | 37 ++++++++++++++
 benches/compressed-snark-supernova.rs | 31 +++++++-----
 benches/compressed-snark.rs           | 25 ++++++----
 benches/justfile                      |  2 +-
 benches/recursive-snark-supernova.rs  | 22 ++++++---
 benches/recursive-snark.rs            | 70 +++++----------------------
 9 files changed, 121 insertions(+), 104 deletions(-)
 create mode 100644 .github/PERF_REGRESSION.md
 create mode 100644 .github/tables.toml
 create mode 100644 benches/common/mod.rs

diff --git a/.github/PERF_REGRESSION.md b/.github/PERF_REGRESSION.md
new file mode 100644
index 000000000..9f9b04115
--- /dev/null
+++ b/.github/PERF_REGRESSION.md
@@ -0,0 +1,7 @@
+---
+title: ":rotating_light: Performance regression in #{{ env.PR_NUMBER }}"
+labels: P-Performance, automated issue
+---
+Regression >= 5% found during merge of: #{{ env.PR_NUMBER }}
+Commit: {{ env.GIT_SHA }}
+Triggered by: {{ env.WORKFLOW_URL }}
\ No newline at end of file
diff --git a/.github/tables.toml b/.github/tables.toml
new file mode 100644
index 000000000..cb4e6595e
--- /dev/null
+++ b/.github/tables.toml
@@ -0,0 +1,6 @@
+[table_comments]
+
+[top_comments]
+Overview = """
+This benchmark report shows the Arecibo GPU benchmarks.
+"""
\ No newline at end of file
diff --git a/.github/workflows/gpu-bench.yml b/.github/workflows/gpu-bench.yml
index 9e2fa10b2..ec0f9ca02 100644
--- a/.github/workflows/gpu-bench.yml
+++ b/.github/workflows/gpu-bench.yml
@@ -91,13 +91,15 @@ jobs:
           NUM_VCPUS=$(nproc --all)
           # Get total RAM in GB
           TOTAL_RAM=$(grep MemTotal /proc/meminfo | awk '{$2=$2/(1024^2); print int($2), "GB RAM";}')
+          WORKFLOW_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
           
           # Use conditionals to ensure that only non-empty variables are inserted
           [[ ! -z "${{ env.GPU_NAME }}" ]] && sed -i "/^\"\"\"$/i ${{ env.GPU_NAME }}" tables.toml
           [[ ! -z "$CPU_MODEL" ]] && sed -i "/^\"\"\"$/i $CPU_MODEL" tables.toml
           [[ ! -z "$NUM_VCPUS" ]] && sed -i "/^\"\"\"$/i $NUM_VCPUS" tables.toml
           [[ ! -z "$TOTAL_RAM" ]] && sed -i "/^\"\"\"$/i $TOTAL_RAM" tables.toml          
-          sed -i "/^\"\"\"$/i Workflow run: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" tables.toml
+          sed -i "/^\"\"\"$/i Workflow run: $WORKFLOW_URL" tables.toml
+          echo "WORKFLOW_URL=$WORKFLOW_URL" | tee -a $GITHUB_ENV
         working-directory: ${{ github.workspace }}
       # Create a `criterion-table` and write in commit comment
       - name: Run `criterion-table`
@@ -126,21 +128,14 @@ jobs:
       - name: Get PR number from merge branch
         run: |
           echo "PR_NUMBER=$(echo ${{ github.event.merge_group.head_ref }} | sed -e 's/.*pr-\(.*\)-.*/\1/')" | tee -a $GITHUB_ENV
-      - name: Create file for issue
-        if: steps.regression-check.outcome == 'failure'
-        run: |
-          printf '%s\n' "Regression >= 10% found during merge for PR #${{ env.PR_NUMBER }}
-          Commit: ${{ github.sha }}
-          Workflow run: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" > ./_body.md
-      - name: Open issue on regression
-        if: steps.regression-check.outcome == 'failure'
-        uses: peter-evans/create-issue-from-file@v4
+      - uses: JasonEtco/create-an-issue@v2
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ env.PR_NUMBER }}
+          GIT_SHA: ${{ github.sha }}
+          WORKFLOW_URL: ${{ env.WORKFLOW_URL }}
         with:
-          title: ':rotating_light: Performance regression detected for PR #${{ env.PR_NUMBER }}'
-          content-filepath: ./_body.md
-          labels: |
-            P-Performance
-            automated issue
+          filename: .github/PERF_REGRESSION.md
       - name: Remove old dev bench
         run: |
           rm ${{ env.BASE_COMMIT }}.json
diff --git a/benches/common/mod.rs b/benches/common/mod.rs
new file mode 100644
index 000000000..43afd0959
--- /dev/null
+++ b/benches/common/mod.rs
@@ -0,0 +1,37 @@
+use anyhow::anyhow;
+use criterion::BenchmarkId;
+
+// TODO: Why Copy and &'static str over String?
+#[derive(Clone, Debug, Copy)]
+pub(crate) struct BenchParams {
+  pub step_size: usize,
+  pub date: &'static str,
+  pub sha: &'static str,
+}
+impl BenchParams {
+  pub(crate) fn bench_id(&self, name: &str) -> BenchmarkId {
+    let output_type = bench_output_env().unwrap_or("stdout".into());
+    match output_type.as_ref() {
+      "pr-comment" => BenchmarkId::new(name, format!("StepCircuitSize-{}", self.step_size)),
+      "commit-comment" => BenchmarkId::new(
+        format!("ref={}", self.sha),
+        format!("{}-StepCircuitSize-{}", name, self.step_size),
+      ),
+      // TODO: refine "gh-pages"
+      _ => BenchmarkId::new(name, format!("StepCircuitSize-{}-{}", self.sha, self.date)),
+    }
+  }
+}
+
+fn bench_output_env() -> anyhow::Result<String> {
+  std::env::var("ARECIBO_BENCH_OUTPUT").map_err(|e| anyhow!("Bench output env var isn't set: {e}"))
+}
+
+pub(crate) fn noise_threshold_env() -> anyhow::Result<f64> {
+  std::env::var("ARECIBO_BENCH_NOISE_THRESHOLD")
+    .map_err(|e| anyhow!("Noise threshold env var isn't set: {e}"))
+    .and_then(|nt| {
+      nt.parse::<f64>()
+        .map_err(|e| anyhow!("Failed to parse noise threshold: {e}"))
+    })
+}
diff --git a/benches/compressed-snark-supernova.rs b/benches/compressed-snark-supernova.rs
index 9c1e73484..7d9dd83ef 100644
--- a/benches/compressed-snark-supernova.rs
+++ b/benches/compressed-snark-supernova.rs
@@ -11,14 +11,17 @@ use criterion::{measurement::WallTime, *};
 use ff::PrimeField;
 use std::time::Duration;
 
+mod common;
+use common::{noise_threshold_env, BenchParams};
+
 type E1 = arecibo::provider::PallasEngine;
 type E2 = arecibo::provider::VestaEngine;
 type EE1 = arecibo::provider::ipa_pc::EvaluationEngine<E1>;
 type EE2 = arecibo::provider::ipa_pc::EvaluationEngine<E2>;
-// SNARKs without computation commitmnets
+// SNARKs without computation commitments
 type S1 = arecibo::spartan::batched::BatchedRelaxedR1CSSNARK<E1, EE1>;
 type S2 = arecibo::spartan::snark::RelaxedR1CSSNARK<E2, EE2>;
-// SNARKs with computation commitmnets
+// SNARKs with computation commitments
 type SS1 = arecibo::spartan::batched_ppsnark::BatchedRelaxedR1CSSNARK<E1, EE1>;
 type SS2 = arecibo::spartan::ppsnark::RelaxedR1CSSNARK<E2, EE2>;
 
@@ -162,8 +165,13 @@ fn bench_compressed_snark_internal_with_arity<
 
   let (prover_key, verifier_key) = CompressedSNARK::<_, _, _, _, S1, S2>::setup(&pp).unwrap();
 
+  let bench_params = BenchParams {
+    step_size: num_cons,
+    sha: env!("VERGEN_GIT_SHA"),
+  };
+
   // Benchmark the prove time
-  group.bench_function("Prove", |b| {
+  group.bench_function(bench_params.bench_id("Prove"), |b| {
     b.iter(|| {
       assert!(CompressedSNARK::<_, _, _, _, S1, S2>::prove(
         black_box(&pp),
@@ -180,7 +188,7 @@ fn bench_compressed_snark_internal_with_arity<
   let compressed_snark = res.unwrap();
 
   // Benchmark the verification time
-  group.bench_function("Verify", |b| {
+  group.bench_function(bench_params.bench_id("Verify"), |b| {
     b.iter(|| {
       assert!(black_box(&compressed_snark)
         .verify(
@@ -211,10 +219,9 @@ fn bench_one_augmented_circuit_compressed_snark(c: &mut Criterion) {
     // number of constraints in the step circuit
     let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
 
-    let mut group = c.benchmark_group(format!(
-      "CompressedSNARKSuperNova-1circuit-StepCircuitSize-{num_cons}"
-    ));
+    let mut group = c.benchmark_group("CompressedSNARKSuperNova-1circuit");
     group.sample_size(NUM_SAMPLES);
+    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
 
     bench_compressed_snark_internal_with_arity::<S1, S2>(&mut group, 1, num_cons);
 
@@ -239,10 +246,9 @@ fn bench_two_augmented_circuit_compressed_snark(c: &mut Criterion) {
     // number of constraints in the step circuit
     let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
 
-    let mut group = c.benchmark_group(format!(
-      "CompressedSNARKSuperNova-2circuit-StepCircuitSize-{num_cons}"
-    ));
+    let mut group = c.benchmark_group("CompressedSNARKSuperNova-2circuit");
     group.sample_size(NUM_SAMPLES);
+    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
 
     bench_compressed_snark_internal_with_arity::<S1, S2>(&mut group, 2, num_cons);
 
@@ -267,10 +273,9 @@ fn bench_two_augmented_circuit_compressed_snark_with_computational_commitments(c
     // number of constraints in the step circuit
     let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
 
-    let mut group = c.benchmark_group(format!(
-      "CompressedSNARKSuperNova-Commitments-2circuit-StepCircuitSize-{num_cons}"
-    ));
+    let mut group = c.benchmark_group("CompressedSNARKSuperNova-Commitments-2circuit");
     group.sample_size(NUM_SAMPLES);
+    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
 
     bench_compressed_snark_internal_with_arity::<SS1, SS2>(&mut group, 2, num_cons);
 
diff --git a/benches/compressed-snark.rs b/benches/compressed-snark.rs
index a620c93cb..f7751c45b 100644
--- a/benches/compressed-snark.rs
+++ b/benches/compressed-snark.rs
@@ -14,6 +14,9 @@ use criterion::{measurement::WallTime, *};
 use ff::PrimeField;
 use std::time::Duration;
 
+mod common;
+use common::{noise_threshold_env, BenchParams};
+
 type E1 = PallasEngine;
 type E2 = VestaEngine;
 type EE1 = arecibo::provider::ipa_pc::EvaluationEngine<E1>;
@@ -101,8 +104,13 @@ fn bench_compressed_snark_internal<S1: RelaxedR1CSSNARKTrait<E1>, S2: RelaxedR1C
     assert!(res.is_ok());
   }
 
+  let bench_params = BenchParams {
+    step_size: num_cons,
+    sha: env!("VERGEN_GIT_SHA"),
+  };
+
   // Bench time to produce a compressed SNARK
-  group.bench_function("Prove", |b| {
+  group.bench_function(bench_params.bench_id("Prove"), |b| {
     b.iter(|| {
       assert!(CompressedSNARK::<_, _, _, _, S1, S2>::prove(
         black_box(&pp),
@@ -117,7 +125,7 @@ fn bench_compressed_snark_internal<S1: RelaxedR1CSSNARKTrait<E1>, S2: RelaxedR1C
   let compressed_snark = res.unwrap();
 
   // Benchmark the verification time
-  group.bench_function("Verify", |b| {
+  group.bench_function(bench_params.bench_id("Verify"), |b| {
     b.iter(|| {
       assert!(black_box(&compressed_snark)
         .verify(
@@ -148,8 +156,9 @@ fn bench_compressed_snark(c: &mut Criterion) {
     // number of constraints in the step circuit
     let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
 
-    let mut group = c.benchmark_group(format!("CompressedSNARK-StepCircuitSize-{num_cons}"));
+    let mut group = c.benchmark_group("CompressedSNARK");
     group.sample_size(NUM_SAMPLES);
+    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
 
     bench_compressed_snark_internal::<S1, S2>(&mut group, num_cons);
 
@@ -172,12 +181,10 @@ fn bench_compressed_snark_with_computational_commitments(c: &mut Criterion) {
     // number of constraints in the step circuit
     let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
 
-    let mut group = c.benchmark_group(format!(
-      "CompressedSNARK-Commitments-StepCircuitSize-{num_cons}"
-    ));
-    group
-      .sampling_mode(SamplingMode::Flat)
-      .sample_size(NUM_SAMPLES);
+    let mut group = c.benchmark_group("CompressedSNARK-Commitments");
+    group.sampling_mode(SamplingMode::Flat);
+    group.sample_size(NUM_SAMPLES);
+    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
 
     bench_compressed_snark_internal::<SS1, SS2>(&mut group, num_cons);
 
diff --git a/benches/justfile b/benches/justfile
index 0e7873379..4e74be610 100644
--- a/benches/justfile
+++ b/benches/justfile
@@ -32,5 +32,5 @@ gpu-bench +benches: gpu-env
 gpu-bench-ci +benches:
   #!/bin/sh
   for bench in {{benches}}; do
-    cargo criterion --bench $bench --features "cuda" --message-format=json > {{commit}}.json
+    cargo criterion --bench $bench --features "cuda" --message-format=json > "$bench-{{commit}}".json
   done
\ No newline at end of file
diff --git a/benches/recursive-snark-supernova.rs b/benches/recursive-snark-supernova.rs
index e2bd83e11..7050a0019 100644
--- a/benches/recursive-snark-supernova.rs
+++ b/benches/recursive-snark-supernova.rs
@@ -12,6 +12,9 @@ use criterion::{measurement::WallTime, *};
 use ff::PrimeField;
 use std::time::Duration;
 
+mod common;
+use common::{noise_threshold_env, BenchParams};
+
 // To run these benchmarks, first download `criterion` with `cargo install cargo-criterion`.
 // Then `cargo criterion --bench recursive-snark-supernova`. The results are located in `target/criterion/data/<name-of-benchmark>`.
 // For flamegraphs, run `cargo criterion --bench recursive-snark-supernova --features flamegraph -- --profile-time <secs>`.
@@ -152,8 +155,13 @@ fn bench_recursive_snark_internal_with_arity(
   assert!(recursive_snark_option.is_some());
   let recursive_snark = recursive_snark_option.unwrap();
 
+  let bench_params = BenchParams {
+    step_size: num_cons,
+    sha: env!("VERGEN_GIT_SHA"),
+  };
+
   // Benchmark the prove time
-  group.bench_function("Prove", |b| {
+  group.bench_function(bench_params.bench_id("Prove"), |b| {
     b.iter(|| {
       // produce a recursive SNARK for a step of the recursion
       assert!(black_box(&mut recursive_snark.clone())
@@ -167,7 +175,7 @@ fn bench_recursive_snark_internal_with_arity(
   });
 
   // Benchmark the verification time
-  group.bench_function("Verify", |b| {
+  group.bench_function(bench_params.bench_id("Verify"), |b| {
     b.iter(|| {
       assert!(black_box(&mut recursive_snark.clone())
         .verify(
@@ -197,10 +205,9 @@ fn bench_one_augmented_circuit_recursive_snark(c: &mut Criterion) {
     // number of constraints in the step circuit
     let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
 
-    let mut group = c.benchmark_group(format!(
-      "RecursiveSNARKSuperNova-1circuit-StepCircuitSize-{num_cons}"
-    ));
+    let mut group = c.benchmark_group("RecursiveSNARKSuperNova-1circuit");
     group.sample_size(NUM_SAMPLES);
+    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
 
     bench_recursive_snark_internal_with_arity(&mut group, 1, num_cons);
     group.finish();
@@ -224,10 +231,9 @@ fn bench_two_augmented_circuit_recursive_snark(c: &mut Criterion) {
     // number of constraints in the step circuit
     let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
 
-    let mut group = c.benchmark_group(format!(
-      "RecursiveSNARKSuperNova-2circuit-StepCircuitSize-{num_cons}"
-    ));
+    let mut group = c.benchmark_group("RecursiveSNARKSuperNova-2circuit");
     group.sample_size(NUM_SAMPLES);
+    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
 
     bench_recursive_snark_internal_with_arity(&mut group, 2, num_cons);
     group.finish();
diff --git a/benches/recursive-snark.rs b/benches/recursive-snark.rs
index 815b5cf7d..40d5c6b8f 100644
--- a/benches/recursive-snark.rs
+++ b/benches/recursive-snark.rs
@@ -1,5 +1,4 @@
 #![allow(non_snake_case)]
-use anyhow::anyhow;
 use arecibo::{
   provider::{PallasEngine, VestaEngine},
   traits::{
@@ -15,6 +14,9 @@ use criterion::*;
 use ff::PrimeField;
 use std::time::Duration;
 
+mod common;
+use common::{noise_threshold_env, BenchParams};
+
 type E1 = PallasEngine;
 type E2 = VestaEngine;
 type C1 = NonTrivialCircuit<<E1 as Engine>::Scalar>;
@@ -46,54 +48,17 @@ criterion_main!(recursive_snark);
 const NUM_CONS_VERIFIER_CIRCUIT_PRIMARY: usize = 9825;
 const NUM_SAMPLES: usize = 10;
 
-// TODO: Why Copy and &'static str over String?
-#[derive(Clone, Debug, Copy)]
-struct BenchParams {
-  name: &'static str,
-  step_size: usize,
-  // TODO: Is this useful?
-  //date: &'static str,
-  sha: &'static str,
-}
-impl BenchParams {
-  fn name_params(&self) -> (String, String) {
-    let output_type = bench_parameters_env().unwrap_or("stdout".into());
-    match output_type.as_ref() {
-      "pr-comment" => todo!(),
-      "commit-comment" => (
-        format!("ref={}", self.sha),
-        format!("{}-StepCircuitSize-{}", self.name, self.step_size),
-      ),
-      // TODO: refine "gh-pages" and "stdout"
-      _ => todo!(),
-    }
-  }
-}
-
-fn bench_parameters_env() -> anyhow::Result<String> {
-  std::env::var("ARECIBO_BENCH_OUTPUT").map_err(|e| anyhow!("Bench output env var isn't set: {e}"))
-}
-
-fn noise_threshold_env() -> anyhow::Result<f64> {
-  std::env::var("ARECIBO_BENCH_NOISE_THRESHOLD")
-    .map_err(|e| anyhow!("Noise threshold env var isn't set: {e}"))
-    .and_then(|nt| {
-      nt.parse::<f64>()
-        .map_err(|e| anyhow!("Failed to parse noise threshold: {e}"))
-    })
-}
-
 fn bench_recursive_snark(c: &mut Criterion) {
   // we vary the number of constraints in the step circuit
   for &num_cons_in_augmented_circuit in [
     NUM_CONS_VERIFIER_CIRCUIT_PRIMARY,
     16384,
-    //32768,
-    //65536,
-    //131072,
-    //262144,
-    //524288,
-    //1048576,
+    32768,
+    65536,
+    131072,
+    262144,
+    524288,
+    1048576,
   ]
   .iter()
   {
@@ -143,15 +108,12 @@ fn bench_recursive_snark(c: &mut Criterion) {
       assert!(res.is_ok());
     }
 
-    let prove_params = BenchParams {
-      name: "Prove",
+    let bench_params = BenchParams {
       step_size: num_cons,
       sha: env!("VERGEN_GIT_SHA"),
     };
-    let (name, params) = prove_params.name_params();
-    let bench_id = BenchmarkId::new(name, params);
 
-    group.bench_function(bench_id, |b| {
+    group.bench_function(bench_params.bench_id("Prove"), |b| {
       b.iter(|| {
         // produce a recursive SNARK for a step of the recursion
         assert!(black_box(&mut recursive_snark.clone())
@@ -164,16 +126,8 @@ fn bench_recursive_snark(c: &mut Criterion) {
       })
     });
 
-    let verify_params = BenchParams {
-      name: "Verify",
-      step_size: num_cons,
-      sha: env!("VERGEN_GIT_SHA"),
-    };
-    let (name, params) = verify_params.name_params();
-    let bench_id = BenchmarkId::new(name, params);
-
     // Benchmark the verification time
-    group.bench_function(bench_id, |b| {
+    group.bench_function(bench_params.bench_id("Verify"), |b| {
       b.iter(|| {
         assert!(black_box(&recursive_snark)
           .verify(

From ef6253d2c337b28668b02bb22b89605f3bd6c01b Mon Sep 17 00:00:00 2001
From: Samuel Burnham <45365069+samuelburnham@users.noreply.github.com>
Date: Mon, 8 Jan 2024 16:50:43 -0500
Subject: [PATCH 3/3] Remove caching for now and test action on `pull_request`

---
 .github/PERF_REGRESSION.md            |  2 +-
 .github/workflows/gpu-bench.yml       | 65 ++++++++++-----------------
 benches/common/mod.rs                 |  8 +++-
 benches/compressed-snark-supernova.rs |  1 +
 benches/compressed-snark.rs           |  1 +
 benches/recursive-snark-supernova.rs  |  1 +
 benches/recursive-snark.rs            |  1 +
 7 files changed, 35 insertions(+), 44 deletions(-)

diff --git a/.github/PERF_REGRESSION.md b/.github/PERF_REGRESSION.md
index 9f9b04115..090be25bd 100644
--- a/.github/PERF_REGRESSION.md
+++ b/.github/PERF_REGRESSION.md
@@ -2,6 +2,6 @@
 title: ":rotating_light: Performance regression in #{{ env.PR_NUMBER }}"
 labels: P-Performance, automated issue
 ---
-Regression >= 5% found during merge of: #{{ env.PR_NUMBER }}
+Regression >= {{ env.NOISE_THRESHOLD }} found during merge of: #{{ env.PR_NUMBER }}
 Commit: {{ env.GIT_SHA }}
 Triggered by: {{ env.WORKFLOW_URL }}
\ No newline at end of file
diff --git a/.github/workflows/gpu-bench.yml b/.github/workflows/gpu-bench.yml
index ec0f9ca02..db826474f 100644
--- a/.github/workflows/gpu-bench.yml
+++ b/.github/workflows/gpu-bench.yml
@@ -43,44 +43,25 @@ jobs:
           GPU_NAME=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader,nounits | tail -n1)
           echo "GPU_ID=$(echo $GPU_NAME | awk '{ print $NF }')" | tee -a $GITHUB_ENV
           echo "GPU_NAME=$GPU_NAME" | tee -a $GITHUB_ENV
-      # Checkout gh-pages to check for cached bench result
-      - name: Checkout gh-pages
-        uses: actions/checkout@v4
-        with:
-          ref: gh-pages
-          path: gh-pages
-      - name: Check for cached bench result
-        id: cached-bench
-        run: |
-          if [ -f "${{ env.BASE_COMMIT }}-${{ env.GPU_ID }}.json" ]
-          then
-            echo "cached=true" | tee -a $GITHUB_OUTPUT
-            cp ${{ env.BASE_COMMIT }}-${{ env.GPU_ID }}.json ../${{ env.BASE_COMMIT }}.json
-          else
-            echo "cached=false" | tee -a $GITHUB_OUTPUT
-          fi
-        working-directory: ${{ github.workspace }}/gh-pages
       # Checkout base branch for comparative bench
       - uses: actions/checkout@v4
-        if: steps.cached-bench.outputs.cached == 'false'
         with:
           ref: dev
           path: dev
       # Copy the script so the base can bench with the same parameters
       - name: Run GPU bench on base branch
-        if: steps.cached-bench.outputs.cached == 'false'
         run: |
-          # Copy justfile & env to dev, overwriting existing config with that of PR branch
-          cp ../benches/justfile ../benches/bench.env .
+          # Copy justfile to dev, overwriting existing config with that of PR branch
+          cp ../benches/justfile .
           # Run benchmark
           just gpu-bench-ci recursive-snark recursive-snark-supernova compressed-snark compressed-snark-supernova
           # Copy bench output to PR branch
-          cp ${{ env.BASE_COMMIT }}.json ..
+          cp *-${{ env.BASE_COMMIT }}.json ..
         working-directory: ${{ github.workspace }}/dev
       - name: Run GPU bench on PR branch
         run: |
           just gpu-bench-ci recursive-snark recursive-snark-supernova compressed-snark compressed-snark-supernova
-          cp ${{ github.sha }}.json ..
+          cp *-${{ github.sha }}.json ..
         working-directory: ${{ github.workspace }}/benches
       - name: copy the benchmark template and prepare it with data
         run: |
@@ -103,26 +84,37 @@ jobs:
         working-directory: ${{ github.workspace }}
       # Create a `criterion-table` and write in commit comment
       - name: Run `criterion-table`
-        run: cat ${{ env.BASE_COMMIT }}.json ${{ github.sha }}.json | criterion-table > BENCHMARKS.md
+        run: |
+          cat recursive-snark-${{ env.BASE_COMMIT }}.json recursive-snark-${{ github.sha }}.json \
+          recursive-snark-supernova-${{ env.BASE_COMMIT }}.json recursive-snark-supernova- ${{ github.sha }}.json \
+          compressed-snark-${{ env.BASE_COMMIT }}.json compressed-snark-${{ github.sha }}.json \
+          compressed-snark-supernova-${{ env.BASE_COMMIT }}.json compressed-snark-supernova- ${{ github.sha }}.json \
+          | criterion-table > BENCHMARKS.md
       - name: Write bench on commit comment
         uses: peter-evans/commit-comment@v3
         with:
           body-path: BENCHMARKS.md
-      # Check for a slowdown >= 10%. If so, open an issue but don't block merge
+      # Check for a slowdown >= `$ARECIBO_NOISE_THRESHOLD` (fallback is 5%). If so, open an issue but don't block merge
       - name: Check for perf regression
         id: regression-check
         run: |
-          regressions=$(awk -F'[*x]' '/slower/{print $12}' BENCHMARKS.md)
-
+          REGRESSIONS=$(awk -F'[*x]' '/slower/{print $12}' BENCHMARKS.md)
           echo $regressions
 
-          for r in $regressions
+          if [ ! -z "${{ env.ARECIBO_NOISE_THRESHOLD}}" ]; then
+            NOISE_THRESHOLD=$(echo "1+${{ env.ARECIBO_NOISE_THRESHOLD }}" | bc)
+          else
+            NOISE_THRESHOLD=1.05
+          fi
+
+          for r in $REGRESSIONS
           do
-            if (( $(echo "$r >= 1.10" | bc -l) ))
+            if (( $(echo "$r >= $NOISE_THRESHOLD" | bc -l) ))
             then
               exit 1
             fi
           done
+          echo "NOISE_THRESHOLD=$NOISE_THRESHOLD" | tee -a $GITHUB_ENV
         continue-on-error: true
       # Not possible to use ${{ github.event.number }} with the `merge_group` trigger
       - name: Get PR number from merge branch
@@ -134,17 +126,6 @@ jobs:
           PR_NUMBER: ${{ env.PR_NUMBER }}
           GIT_SHA: ${{ github.sha }}
           WORKFLOW_URL: ${{ env.WORKFLOW_URL }}
+          NOISE_THRESHOLD: $${{ env.NOISE_THRESHOLD }}
         with:
-          filename: .github/PERF_REGRESSION.md
-      - name: Remove old dev bench
-        run: |
-          rm ${{ env.BASE_COMMIT }}.json
-          mv ${{ github.sha }}.json ${{ github.sha }}-${{ env.GPU_ID }}.json
-        working-directory: ${{ github.workspace }}
-      - name: Commit bench result to `gh-pages` branch if no regression
-        if: steps.regression-check.outcome != 'failure'
-        uses: stefanzweifel/git-auto-commit-action@v5
-        with:
-          branch: gh-pages
-          commit_message: '[automated] GPU Benchmark from PR #${{ env.PR_NUMBER }}'
-          file_pattern: '${{ github.sha }}-${{ env.GPU_ID }}.json'
\ No newline at end of file
+          filename: .github/PERF_REGRESSION.md
\ No newline at end of file
diff --git a/benches/common/mod.rs b/benches/common/mod.rs
index 43afd0959..43ec5ac13 100644
--- a/benches/common/mod.rs
+++ b/benches/common/mod.rs
@@ -18,7 +18,13 @@ impl BenchParams {
         format!("{}-StepCircuitSize-{}", name, self.step_size),
       ),
       // TODO: refine "gh-pages"
-      _ => BenchmarkId::new(name, format!("StepCircuitSize-{}-{}", self.sha, self.date)),
+      _ => BenchmarkId::new(
+        name,
+        format!(
+          "StepCircuitSize-{}-{}-{}",
+          self.step_size, self.sha, self.date
+        ),
+      ),
     }
   }
 }
diff --git a/benches/compressed-snark-supernova.rs b/benches/compressed-snark-supernova.rs
index 7d9dd83ef..44a4607b6 100644
--- a/benches/compressed-snark-supernova.rs
+++ b/benches/compressed-snark-supernova.rs
@@ -167,6 +167,7 @@ fn bench_compressed_snark_internal_with_arity<
 
   let bench_params = BenchParams {
     step_size: num_cons,
+    date: env!("VERGEN_GIT_COMMIT_DATE"),
     sha: env!("VERGEN_GIT_SHA"),
   };
 
diff --git a/benches/compressed-snark.rs b/benches/compressed-snark.rs
index f7751c45b..c3eab2a17 100644
--- a/benches/compressed-snark.rs
+++ b/benches/compressed-snark.rs
@@ -106,6 +106,7 @@ fn bench_compressed_snark_internal<S1: RelaxedR1CSSNARKTrait<E1>, S2: RelaxedR1C
 
   let bench_params = BenchParams {
     step_size: num_cons,
+    date: env!("VERGEN_GIT_COMMIT_DATE"),
     sha: env!("VERGEN_GIT_SHA"),
   };
 
diff --git a/benches/recursive-snark-supernova.rs b/benches/recursive-snark-supernova.rs
index 7050a0019..23b0d4c2b 100644
--- a/benches/recursive-snark-supernova.rs
+++ b/benches/recursive-snark-supernova.rs
@@ -157,6 +157,7 @@ fn bench_recursive_snark_internal_with_arity(
 
   let bench_params = BenchParams {
     step_size: num_cons,
+    date: env!("VERGEN_GIT_COMMIT_DATE"),
     sha: env!("VERGEN_GIT_SHA"),
   };
 
diff --git a/benches/recursive-snark.rs b/benches/recursive-snark.rs
index 40d5c6b8f..08d3ffac4 100644
--- a/benches/recursive-snark.rs
+++ b/benches/recursive-snark.rs
@@ -110,6 +110,7 @@ fn bench_recursive_snark(c: &mut Criterion) {
 
     let bench_params = BenchParams {
       step_size: num_cons,
+      date: env!("VERGEN_GIT_COMMIT_DATE"),
       sha: env!("VERGEN_GIT_SHA"),
     };