zkmopro · oskarth · Apr 19, 2024 · Mar 24, 2024 · Mar 24, 2024 · Mar 31, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,3 +7,10 @@ exclude = ["mopro-cli-example"]
 # NOTE: Forked wasmer to work around memory limits
 # See https://github.com/wasmerio/wasmer/commit/09c7070
 wasmer = { git = "https://github.com/oskarth/wasmer.git", rev = "09c7070" }
+
+# NOTE: For gpu exploration on zprize works, will only compile when `gpu-benchmarks` feature is enabled
+ark-bls12-377-3 = { git = 'https://github.com/arkworks-rs/curves.git', package = 'ark-bls12-377', tag = 'v0.3.0', optional = true}
+ark-ec-3 = { git = 'https://github.com/arkworks-rs/algebra.git', package = 'ark-ec', tag = 'v0.3.0', features = ["parallel"], optional = true }
+ark-ff-3 = { git = 'https://github.com/arkworks-rs/algebra.git', package = 'ark-ff', tag = 'v0.3.0', features = ["parallel"], optional = true }
+ark-serialize-3 = { git = 'https://github.com/arkworks-rs/algebra.git', package = 'ark-serialize', tag = 'v0.3.0', optional = true }
+ark-std-3 = { git = 'https://github.com/arkworks-rs/std.git', package = 'ark-std', tag = 'v0.3.0', optional = true }
diff --git a/mopro-core/.gitignore b/mopro-core/.gitignore
@@ -12,3 +12,6 @@ Cargo.lock
 
 # MSVC Windows builds of rustc generate these, which store debugging information
 *.pdb
+
+# GPU exploration - preprocessed vectors
+src/middleware/gpu_explorations/utils/vectors
diff --git a/mopro-core/Cargo.toml b/mopro-core/Cargo.toml
@@ -8,7 +8,7 @@ edition = "2021"
 [features]
 default = ["wasmer/dylib"]
 dylib = []                  # NOTE: can probably remove this if we use env config instead
-gpu-benchmarks = []
+gpu-benchmarks = ["ark-bls12-377", "ark-bls12-381", "ark-ed-on-bls12-377", "ark-ed-on-bls12-381", "ark-poly", "ark-poly-commit", "ark-sponge", "duration-string", "rand", "rand_chacha", "lazy_static", "ark-ec-3", "ark-ff-3", "ark-serialize-3", "ark-std-3", "ark-bls12-377-3"]
 calc-native-witness = ["witness"] # experimental feature to calculate witness with witness graph
 build-native-witness = ["witness/build-witness"] # only enable build-native-witness feature when building the witness graph
 
@@ -46,6 +46,29 @@ thiserror = "=1.0.39"
 color-eyre = "=0.6.2"
 criterion = "=0.3.6"
 
+# GPU explorations
+ark-bls12-377 = { version = "0.4", optional = true }
+ark-bls12-381 = { version = "0.3", optional = true }
+ark-ed-on-bls12-377 = { version = "0.3", optional = true }
+ark-ed-on-bls12-381 = { version = "0.3", optional = true }
+ark-poly = { version = "0.3", optional = true }
+ark-poly-commit = { version = "0.3", optional = true }
+ark-sponge = { version = "0.3", optional = true }
+duration-string = { version = "0.0.6", optional = true }
+rand = { version = "0.8.0", optional = true }
+rand_chacha = { version = "0.3.1", optional = true }
+lazy_static = { version = "1.4.0", optional = true }
+ark-ff = { version = "=0.4.1", default-features = false, features = [
+    "parallel",
+] }
+
+# GPU explorations from mopro/Cargo.toml patch
+ark-bls12-377-3 = { git = 'https://github.com/arkworks-rs/curves.git', package = 'ark-bls12-377', tag = 'v0.3.0', optional = true}
+ark-ec-3 = { git = 'https://github.com/arkworks-rs/algebra.git', package = 'ark-ec', tag = 'v0.3.0', features = ["parallel"], optional = true}
+ark-ff-3 = { git = 'https://github.com/arkworks-rs/algebra.git', package = 'ark-ff', tag = 'v0.3.0', features = ["parallel"], optional = true }
+ark-serialize-3 = { git = 'https://github.com/arkworks-rs/algebra.git', package = 'ark-serialize', tag = 'v0.3.0', optional = true }
+ark-std-3 = { git = 'https://github.com/arkworks-rs/std.git', package = 'ark-std', tag = 'v0.3.0', optional = true }
+
 [build-dependencies]
 color-eyre = "0.6"
 enumset = "1.0.8"
@@ -54,7 +77,3 @@ toml = "0.8"
 serde = "1.0"
 serde_derive = "1.0"
 witness = { git = "https://github.com/philsippl/circom-witness-rs.git", optional = true}
-
-[[bin]]
-name = "generate_benchmark_report"
-path = "src/middleware/gpu_explorations/bin/generate_benchmark_report.rs"
diff --git a/mopro-core/benchmarks/gpu_explorations/msm_bench_rust_laptop.csv b/mopro-core/benchmarks/gpu_explorations/msm_bench_rust_laptop.csv
diff --git a/mopro-core/benchmarks/gpu_explorations/msm_bench_swift_laptop.csv b/mopro-core/benchmarks/gpu_explorations/msm_bench_swift_laptop.csv
diff --git a/mopro-core/gpu_explorations/README.md b/mopro-core/gpu_explorations/README.md
@@ -0,0 +1,65 @@
+# GPU exploration
+
+## Report links
+
+- [Benchmarking TrapdoorTechZprize MSM & arkworks(0.4) MSM](https://hackmd.io/ZCxFpQ8AROyYGTl5GLqAGQ)
+
+## Steps to run the benchmarks
+
+There are currently 2 algorithms for benchmarking:
+
+- `arkworks_pippenger`
+- `trapdoortech_zprize_msm`
+
+### mopro-core tests
+
+To run the benchmarks of the instance size of $2^{16}$ on BLS12_377 curve in `mopro-core`, replace `<algorithm_you_want_to_test>` with the algorithm name listed above.
+
+```bash
+cargo test --release --features gpu-benchmarks --package mopro-core --lib -- middleware::gpu_explorations::<algorithm_you_want_to_test>::tests::test_run_benchmark --exact --nocapture
+```
+
+Run the benchmarks for multiple instance size. You can customize your own benchmark parameters on modular files in `mopro-core\src\middleware\gpu_explorations`
+```bash
+cargo test --release --features gpu-benchmarks --package mopro-core --lib -- middleware::gpu_explorations::<algorithm_you_want_to_test>::tests::test_run_multi_benchmarks --exact --nocapture
+```
+
+More context about the benchmark:
+- It would generate instances size on BLS12_377 curve with scalar size of 32 bytes (i.e. can represent 0 to $2^{256}-1$ unsigned integer.) in `mopro-core/src/middlware/gpu-explorations/utils/vectors/`
+- The instance size mean the amount of points and scalars.
+- The msm time is linear to the size of instance.
+
+The results are as below:
+
+```bash
+Vectors already generated
+Average time to execute MSM with 65536 points and scalars in 1 iterations is: 195.635083ms
+Average time to execute MSM with 65536 points and scalars in 1 iterations is: 206.639791ms
+Average time to execute MSM with 65536 points and scalars in 1 iterations is: 205.1675ms
+Average time to execute MSM with 65536 points and scalars in 1 iterations is: 197.742167ms
+Average time to execute MSM with 65536 points and scalars in 1 iterations is: 207.147166ms
+Average time to execute MSM with 65536 points and scalars in 1 iterations is: 199.729459ms
+Average time to execute MSM with 65536 points and scalars in 1 iterations is: 203.080416ms
+Average time to execute MSM with 65536 points and scalars in 1 iterations is: 198.15875ms
+Average time to execute MSM with 65536 points and scalars in 1 iterations is: 201.636916ms
+Average time to execute MSM with 65536 points and scalars in 1 iterations is: 210.273792ms
+Done running benchmark. Check the result at: "../mopro-core/benchmarks/gpu_explorations"
+16x10 result: BenchmarkResult {
+    instance_size: 16,
+    num_instance: 10,
+    avg_processing_time: 202.52110399999998,
+}
+...
+```
+
+### mopro-ios benchmarking
+
+1. cd to the `mopro/` directory.
+2. run `./scripts/build_ios.sh config-example.toml` (remember to change your ios_device_type `simulator`/`device`) to build and update the bindings.
+3. open `mopro-ios/MoproKit/Example/MoproKit.xcworkspace` in Xcode.
+4. choose your simulator/mobile device and build the project (can also use `cmd + R` as hot key).
+5. choose `MSMBenchmark` and choose the algorithms and click the button below you want to start benchmark.
+
+### `ExampleGpuExploration` in templates
+
+> The example project would be created soon.
diff --git a/mopro-core/gpu_explorations/benchmarks/arkworks_pippenger_benchmark.txt b/mopro-core/gpu_explorations/benchmarks/arkworks_pippenger_benchmark.txt
@@ -0,0 +1,9 @@
+msm_size,num_msm,avg_processing_time(ms)
+8,5,3.302555666666667
+8,10,1.7059585000000002
+12,5,11.544680666666668
+12,10,11.7898874
+16,5,128.50465099999997
+16,10,139.1740167
+18,5,472.9359916
+18,10,477.0808459000001
diff --git a/mopro-core/gpu_explorations/benchmarks/trapdoor_benchmark.txt b/mopro-core/gpu_explorations/benchmarks/trapdoor_benchmark.txt
@@ -0,0 +1,7 @@
+msm_size,num_msm,avg_processing_time(ms)
+8,5,5.758712888888889
+8,10,5.595629300000001
+12,5,48.66521755555556
+12,10,48.5190125
+16,5,539.597666625
+16,10,527.8981749000001
diff --git a/mopro-core/src/middleware/gpu_explorations/arkworks_pippenger.rs b/mopro-core/src/middleware/gpu_explorations/arkworks_pippenger.rs
@@ -0,0 +1,149 @@
+use ark_bls12_377::{Fr as ScalarField, G1Affine, G1Projective};
+// use ark_bn254::{Fr as ScalarField, FrConfig, G1Affine as GAffine, G1Projective as G};
+use ark_ec::VariableBaseMSM;
+use ark_ff::BigInt;
+use std::time::{Duration, Instant};
+
+use crate::middleware::gpu_explorations::utils::{benchmark::BenchmarkResult, preprocess};
+
+pub fn benchmark_msm<I>(
+    instances: I,
+    iterations: u32,
+) -> Result<Vec<Duration>, preprocess::HarnessError>
+where
+    I: Iterator<Item = preprocess::Instance>,
+{
+    let mut instance_durations = Vec::new();
+
+    for instance in instances {
+        let points = &instance.0;
+        let scalars = &instance.1;
+        let mut parsed_points = Vec::<G1Affine>::new();
+        let mut parsed_scalars = Vec::<ScalarField>::new();
+
+        // parse points and scalars from arkworks 0.3 compatible format to 0.4 compatible
+        for p in points {
+            let new_p =
+                G1Affine::new_unchecked(BigInt::new(p.x.0 .0).into(), BigInt::new(p.y.0 .0).into());
+            parsed_points.push(new_p);
+        }
+
+        for s in scalars {
+            let new_s = ScalarField::new(BigInt::new(s.0));
+            parsed_scalars.push(new_s);
+        }
+
+        let mut instance_total_duration = Duration::ZERO;
+        for _i in 0..iterations {
+            let start = Instant::now();
+            let _result =
+                <G1Projective as VariableBaseMSM>::msm(&parsed_points[..], &parsed_scalars[..])
+                    .unwrap();
+
+            instance_total_duration += start.elapsed();
+        }
+        let instance_avg_duration = instance_total_duration / iterations;
+
+        println!(
+            "Average time to execute MSM with {} points and scalars in {} iterations is: {:?}",
+            points.len(),
+            iterations,
+            instance_avg_duration,
+        );
+        instance_durations.push(instance_avg_duration);
+    }
+    Ok(instance_durations)
+}
+
+pub fn run_benchmark(
+    instance_size: u32,
+    num_instance: u32,
+    utils_dir: &str,
+) -> Result<BenchmarkResult, preprocess::HarnessError> {
+    // Check if the vectors have been generated
+    match preprocess::FileInputIterator::open(&utils_dir) {
+        Ok(_) => {
+            println!("Vectors already generated");
+        }
+        Err(_) => {
+            preprocess::gen_vectors(instance_size, num_instance, &utils_dir);
+        }
+    }
+
+    let benchmark_data = preprocess::FileInputIterator::open(&utils_dir).unwrap();
+    let instance_durations = benchmark_msm(benchmark_data, 1).unwrap();
+    // in milliseconds
+    let avg_processing_time: f64 = instance_durations
+        .iter()
+        .map(|d| d.as_secs_f64() * 1000.0)
+        .sum::<f64>()
+        / instance_durations.len() as f64;
+
+    println!("Done running benchmark.");
+    Ok(BenchmarkResult {
+        instance_size: instance_size,
+        num_instance: num_instance,
+        avg_processing_time: avg_processing_time,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use ark_serialize::Write;
+    use std::fs::File;
+
+    const INSTANCE_SIZE: u32 = 16;
+    const NUM_INSTANCE: u32 = 10;
+    const UTILSPATH: &str = "../mopro-core/src/middleware/gpu_explorations/utils/vectors";
+    const BENCHMARKSPATH: &str = "../mopro-core/gpu_explorations/benchmarks";
+
+    #[test]
+    fn test_benchmark_msm() {
+        let dir = format!("{}/{}x{}", UTILSPATH, INSTANCE_SIZE, NUM_INSTANCE);
+
+        // Check if the vectors have been generated
+        match preprocess::FileInputIterator::open(&dir) {
+            Ok(_) => {
+                println!("Vectors already generated");
+            }
+            Err(_) => {
+                preprocess::gen_vectors(INSTANCE_SIZE, NUM_INSTANCE, &dir);
+            }
+        }
+
+        let benchmark_data = preprocess::FileInputIterator::open(&dir).unwrap();
+        let result = benchmark_msm(benchmark_data, 1);
+        println!("Done running benchmark: {:?}", result);
+    }
+
+    #[test]
+    fn test_run_benchmark() {
+        let utils_path = format!("{}/{}x{}", &UTILSPATH, INSTANCE_SIZE, NUM_INSTANCE);
+        let result = run_benchmark(INSTANCE_SIZE, NUM_INSTANCE, &utils_path).unwrap();
+        println!("Benchmark result: {:#?}", result);
+    }
+
+    #[test]
+    fn test_run_multi_benchmarks() {
+        let output_path = format!("{}/{}_benchmark.txt", &BENCHMARKSPATH, "arkworks_pippenger");
+        let mut output_file = File::create(output_path).expect("output file creation failed");
+        writeln!(output_file, "msm_size,num_msm,avg_processing_time(ms)");
+
+        let instance_size = vec![8, 12, 16, 18, 20];
+        let num_instance = vec![5, 10];
+        for size in &instance_size {
+            for num in &num_instance {
+                let utils_path = format!("{}/{}x{}", &UTILSPATH, *size, *num);
+                let result = run_benchmark(*size, *num, &utils_path).unwrap();
+                println!("{}x{} result: {:#?}", *size, *num, result);
+                writeln!(
+                    output_file,
+                    "{},{},{}",
+                    result.instance_size, result.num_instance, result.avg_processing_time
+                );
+            }
+        }
+    }
+}
diff --git a/mopro-core/src/middleware/gpu_explorations/bin/generate_benchmark_report.rs b/mopro-core/src/middleware/gpu_explorations/bin/generate_benchmark_report.rs