From 714195a142b9a69d8d279df8edde7d8570f5f32c Mon Sep 17 00:00:00 2001 From: Alin Dima Date: Tue, 28 Nov 2023 13:08:06 +0200 Subject: [PATCH] random cleanups and fix clippy (#25) * random cleanups * fix clippy * fix gitlab yaml * fmt --- .github/workflows/ci.yml | 31 ++++++----- .gitignore | 3 ++ .vscode/launch.json | 45 ---------------- Cargo.lock | 1 - README.md | 21 ++++++-- reed-solomon-benches/Cargo.toml | 1 + reed-solomon-benches/README.md | 52 ------------------- reed-solomon-benches/benches/criterion.rs | 30 ++++++----- reed-solomon-benches/src/naive/mod.rs | 2 +- reed-solomon-novelpoly-fuzzit/.gitignore | 2 - reed-solomon-novelpoly/Cargo.toml | 5 +- reed-solomon-novelpoly/src/cxx.rs | 1 + .../src/field/inc_encode.rs | 2 +- .../src/field/inc_log_mul.rs | 1 + reed-solomon-novelpoly/src/lib.rs | 7 +-- .../src/novel_poly_basis/mod.rs | 13 +++-- .../src/novel_poly_basis/reconstruct.rs | 2 +- reed-solomon-tester/src/lib.rs | 22 ++++---- reed-solomon-tester/src/utils.rs | 1 - 19 files changed, 79 insertions(+), 163 deletions(-) delete mode 100644 .vscode/launch.json delete mode 100644 reed-solomon-benches/README.md delete mode 100644 reed-solomon-novelpoly-fuzzit/.gitignore delete mode 100644 reed-solomon-tester/src/utils.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a254272..f6cffe0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: - uses: actions-rs/cargo@v1 with: command: check - args: --workspace + args: --workspace --all-features test: name: Test @@ -85,18 +85,17 @@ jobs: command: fmt args: --all -- --check - # clippy: - # name: Clippy - # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v4 - # - uses: actions-rs/toolchain@v1 - # with: - # profile: minimal - # toolchain: stable - # override: true - # - ru - # - uses: actions-rs/cargo@v1 - # with: - # command: clippy - # args: --workspace + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: actions-rs/cargo@v1 + with: + command: clippy + args: --workspace --all-features diff --git a/.gitignore b/.gitignore index 0592392..4d85063 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ /target .DS_Store +.vscode +**/hfuzz_target +**/hfuzz_workspace diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index 9cf971a..0000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "type": "lldb", - "request": "launch", - "name": "Debug executable 'rsc-perf'", - "cargo": { - "args": [ - "build", - "--bin=rsc-perf", - "--package=rsc-perf" - ], - "filter": { - "name": "rsc-perf", - "kind": "bin" - } - }, - "args": [], - "cwd": "${workspaceFolder}" - }, - { - "type": "lldb", - "request": "launch", - "name": "Debug unit tests in executable 'rsc-perf'", - "cargo": { - "args": [ - "test", - "--no-run", - "--bin=rsc-perf", - "--package=rsc-perf" - ], - "filter": { - "name": "rsc-perf", - "kind": "bin" - } - }, - "args": [], - "cwd": "${workspaceFolder}" - } - ] -} diff --git a/Cargo.lock b/Cargo.lock index e95892b..7deb58e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -927,7 +927,6 @@ dependencies = [ "fs-err", "itertools 0.11.0", "rand", - "reed-solomon-erasure", "reed-solomon-tester", "static_init", "thiserror", diff --git a/README.md b/README.md index 9821369..57ef3e4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # reed-solomon-novelpoly -An implementation of Novel Polynomial Basis and its Application to Reed-Solomon Erasure Codes [1] [2] . +An implementation of Novel Polynomial Basis and its Application to Reed-Solomon Erasure Codes [1] [2] [3]. Runs encoding and reconstruction in `O(n lg(n))`. Note that for small number `n` there is a static offset due to a walsh transform over the full domain in reconstruction. @@ -8,9 +8,24 @@ Runs encoding and reconstruction in `O(n lg(n))`. Note that for small number `n` Be really fast for `n > 100`. -## Non-goals +## Benchmarking -TODO +For benchmarking the implementation against itself and the naive implementation, `criterion` is used. + +```sh +cargo bench +``` + +## Fuzzing + +Currently `honggfuzz` is used. + +Install `cargo install cargo-hongg` and run with: + +```sh +cargo-hongg fuzz --bin +``` [1]: https://www.citi.sinica.edu.tw/papers/whc/4454-F.pdf [2]: https://arxiv.org/abs/1404.3458 +[3]: https://www.citi.sinica.edu.tw/papers/whc/5524-F.pdf diff --git a/reed-solomon-benches/Cargo.toml b/reed-solomon-benches/Cargo.toml index a240af1..b3c7ddb 100644 --- a/reed-solomon-benches/Cargo.toml +++ b/reed-solomon-benches/Cargo.toml @@ -31,3 +31,4 @@ default = [] novelpoly-cxx = ["reed-solomon-novelpoly/with-alt-cxx-impl"] naive = ["reed-solomon-erasure"] upperbounds = ["naive"] +avx = ["reed-solomon-novelpoly/avx"] diff --git a/reed-solomon-benches/README.md b/reed-solomon-benches/README.md deleted file mode 100644 index 00578f7..0000000 --- a/reed-solomon-benches/README.md +++ /dev/null @@ -1,52 +0,0 @@ -# reed-solomon-novelpoly - -An implementation of [reed solomon erasure encoding in a novel polynomial base (README)](../reed-solomon-novelpoly/README.md) for speed. - - -# Validation and Verification -## erasure coding - -This repo only exists, to check various erasure coding implementation algorithms in order to determin the most time and space efficient ones. - -### test - -All benches are also tests with smaller data samples to verify integrity. - -```sh -cargo test -``` - -must always pass. - - -### criterion - -For benchmarking the implementation against itself and the naive implementation, -`cargo criterion` is used. - -### bench - -```sh -cargo bench -``` - -will use `valgrind` to run the bench binaries, which will show various metrics, and their changes relative to the previous run. - -### flamegraph - -```sh -cargo run -``` - -runs a test case with 10 MB of randomly sampled data which is the recommended way to retrieve a `flamegraph` via `cargo flamegraph` (`cargo install flamegraph` to install). - - -### fuzzing - -Currently `honggfuzz` is used. - -To build that a `clang` based toolchain is required. - -Install `cargo install hongg` and run with - -Run the fuzzer with `cargo-hongg run fuzzit`. diff --git a/reed-solomon-benches/benches/criterion.rs b/reed-solomon-benches/benches/criterion.rs index 8252abe..f22abcb 100644 --- a/reed-solomon-benches/benches/criterion.rs +++ b/reed-solomon-benches/benches/criterion.rs @@ -233,6 +233,7 @@ pub mod parameterized { ) { use reed_solomon_novelpoly::f2e16::Additive; use rand::Rng; + #[cfg(feature = "avx")] { group.bench_with_input( BenchmarkId::new("novel-poly-guts-encode-faster8", param.to_string()), @@ -267,6 +268,7 @@ pub mod parameterized { }) }); } + #[cfg(feature = "avx")] { group.bench_with_input( BenchmarkId::new("novel-poly-encode-sub-faster8", param.to_string()), @@ -285,7 +287,8 @@ pub mod parameterized { let dist = rand::distributions::Uniform::new_inclusive(u8::MIN, u8::MAX); let data = Vec::from_iter(rng.sample_iter::(dist).take(k * 2)); b.iter(|| { - reed_solomon_novelpoly::f2e16::encode_sub_plain(black_box(&data), black_box(n), black_box(k)); + reed_solomon_novelpoly::f2e16::encode_sub_plain(black_box(&data), black_box(n), black_box(k)) + .unwrap(); }) }); } @@ -293,7 +296,6 @@ pub mod parameterized { pub fn bench_encode_guts(crit: &mut Criterion) { let mut rng = SmallRng::from_seed(SMALL_RNG_SEED); - use reed_solomon_novelpoly::f2e16::{Additive8x}; // factors of 1/2..1/8 are reasonable for f_exp in 1..3 { @@ -303,8 +305,12 @@ pub mod parameterized { let k = 1 << k_exp; let n = k * f; assert!(n > k); - assert_eq!(n % Additive8x::LANE, 0); - assert_eq!(k % Additive8x::LANE, 0); + #[cfg(feature = "avx")] + { + use reed_solomon_novelpoly::f2e16::{Additive8x}; + assert_eq!(n % Additive8x::LANE, 0); + assert_eq!(k % Additive8x::LANE, 0); + } let param = format!("n={n} k={k} (n/k = {f})"); encode_guts_add_to_group(&mut group, param, n, k, &mut rng); } @@ -318,14 +324,14 @@ fn parameterized_criterion() -> Criterion { } criterion_group!( -name = plot_parameterized; -config = parameterized_criterion(); -targets = - parameterized::bench_encode_2d, - parameterized::bench_reconstruct_2d, - parameterized::bench_encode_fixed_1mb_payload, - parameterized::bench_reconstruct_fixed_1mb_payload, - parameterized::bench_encode_guts, + name = plot_parameterized; + config = parameterized_criterion(); + targets = + parameterized::bench_encode_2d, + parameterized::bench_reconstruct_2d, + parameterized::bench_encode_fixed_1mb_payload, + parameterized::bench_reconstruct_fixed_1mb_payload, + parameterized::bench_encode_guts, ); #[cfg(feature = "upperbounds")] diff --git a/reed-solomon-benches/src/naive/mod.rs b/reed-solomon-benches/src/naive/mod.rs index d0d29fe..c54b499 100644 --- a/reed-solomon-benches/src/naive/mod.rs +++ b/reed-solomon-benches/src/naive/mod.rs @@ -49,7 +49,7 @@ pub fn reconstruct( // Try to reconstruct missing shards r.reconstruct_data(&mut received_shards).expect("Sufficient shards must be received. qed"); - let result = received_shards.into_iter().filter_map(|x| x).take(r.data_shard_count()).fold( + let result = received_shards.into_iter().flatten().take(r.data_shard_count()).fold( Vec::with_capacity(12 << 20), |mut acc, reconstructed_shard| { acc.extend_from_slice(AsRef::<[u8]>::as_ref(&reconstructed_shard)); diff --git a/reed-solomon-novelpoly-fuzzit/.gitignore b/reed-solomon-novelpoly-fuzzit/.gitignore deleted file mode 100644 index bf05941..0000000 --- a/reed-solomon-novelpoly-fuzzit/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -hfuzz_target/* -hfuzz_workspace/* \ No newline at end of file diff --git a/reed-solomon-novelpoly/Cargo.toml b/reed-solomon-novelpoly/Cargo.toml index 42c5a72..a5a8e1f 100644 --- a/reed-solomon-novelpoly/Cargo.toml +++ b/reed-solomon-novelpoly/Cargo.toml @@ -19,9 +19,6 @@ bindgen = { version = "0.66.1", optional = true } cc = { version = "1.0.67", features = ["parallel"], optional = true } [dependencies] -reed-solomon-erasure = { version = "6.0", features = [ - "simd-accel", -], optional = true } static_init = "1" thiserror = "1.0.23" @@ -42,7 +39,7 @@ assert_matches = "1.5.0" [features] default = [] with-alt-cxx-impl = ["cc", "bindgen"] -naive = ["reed-solomon-erasure"] mock = ["rand", "reed-solomon-tester"] # enable the small field, experimental support f256 = [] +avx = [] diff --git a/reed-solomon-novelpoly/src/cxx.rs b/reed-solomon-novelpoly/src/cxx.rs index 6e78a78..806eee1 100644 --- a/reed-solomon-novelpoly/src/cxx.rs +++ b/reed-solomon-novelpoly/src/cxx.rs @@ -3,6 +3,7 @@ #![allow(non_snake_case)] #![allow(dead_code)] +#[allow(clippy::module_inception)] mod cxx { include!(concat!(env!("OUT_DIR"), "/bindings.rs")); } diff --git a/reed-solomon-novelpoly/src/field/inc_encode.rs b/reed-solomon-novelpoly/src/field/inc_encode.rs index 4a4acd1..b934854 100644 --- a/reed-solomon-novelpoly/src/field/inc_encode.rs +++ b/reed-solomon-novelpoly/src/field/inc_encode.rs @@ -202,7 +202,7 @@ pub fn encode_sub_plain(bytes: &[u8], n: usize, k: usize) -> Result Additive { if self == Self::ZERO { return Self::ZERO; diff --git a/reed-solomon-novelpoly/src/lib.rs b/reed-solomon-novelpoly/src/lib.rs index a251604..894ac8c 100644 --- a/reed-solomon-novelpoly/src/lib.rs +++ b/reed-solomon-novelpoly/src/lib.rs @@ -1,4 +1,5 @@ #![forbid(unused_crate_dependencies)] +#![allow(clippy::needless_range_loop)] pub mod errors; pub use errors::*; @@ -33,12 +34,6 @@ mod test { use super::*; use reed_solomon_tester::{roundtrip, BYTES, N_SHARDS}; - #[cfg(feature = "naive")] - #[test] - fn status_quo_roundtrip() -> Result<()> { - roundtrip(status_quo::encode::, status_quo::reconstruct::, &BYTES[..1337], N_SHARDS) - } - #[test] fn novel_poly_basis_roundtrip() -> Result<()> { roundtrip( diff --git a/reed-solomon-novelpoly/src/novel_poly_basis/mod.rs b/reed-solomon-novelpoly/src/novel_poly_basis/mod.rs index 646d391..f027c8c 100644 --- a/reed-solomon-novelpoly/src/novel_poly_basis/mod.rs +++ b/reed-solomon-novelpoly/src/novel_poly_basis/mod.rs @@ -130,11 +130,14 @@ impl ReedSolomon { let k2 = self.k * 2; // prepare one wrapped shard per validator let mut shards = vec![ - >>::from({ - let mut v = Vec::::with_capacity(shard_len); - unsafe { v.set_len(shard_len) } - v - }); + >>::from( + #[allow(clippy::uninit_vec)] + { + let mut v = Vec::::with_capacity(shard_len); + unsafe { v.set_len(shard_len) } + v + } + ); validator_count ]; diff --git a/reed-solomon-novelpoly/src/novel_poly_basis/reconstruct.rs b/reed-solomon-novelpoly/src/novel_poly_basis/reconstruct.rs index 7089590..a9e401a 100644 --- a/reed-solomon-novelpoly/src/novel_poly_basis/reconstruct.rs +++ b/reed-solomon-novelpoly/src/novel_poly_basis/reconstruct.rs @@ -1,7 +1,7 @@ use super::*; /// each shard contains one symbol of one run of erasure coding -pub fn reconstruct<'a, S: Shard>(received_shards: Vec>, validator_count: usize) -> Result> { +pub fn reconstruct(received_shards: Vec>, validator_count: usize) -> Result> { let params = CodeParams::derive_parameters(validator_count, recoverablity_subset_size(validator_count))?; let rs = params.make_encoder(); diff --git a/reed-solomon-tester/src/lib.rs b/reed-solomon-tester/src/lib.rs index 3e1b2c2..16cabd8 100644 --- a/reed-solomon-tester/src/lib.rs +++ b/reed-solomon-tester/src/lib.rs @@ -4,11 +4,8 @@ use std::error; use std::iter; use std::result; -mod utils; -pub use crate::utils::*; - pub static SMALL_RNG_SEED: [u8; 32] = [ - 0, 6, 0xFA, 0, 0x37, 3, 19, 89, 32, 032, 0x37, 0x77, 77, 0b11, 112, 52, 12, 40, 82, 34, 0, 0, 0, 1, 4, 4, 1, 4, 99, + 0, 6, 0xFA, 0, 0x37, 3, 19, 89, 32, 32, 0x37, 0x77, 77, 0b11, 112, 52, 12, 40, 82, 34, 0, 0, 0, 1, 4, 4, 1, 4, 99, 127, 121, 107, ]; @@ -63,16 +60,16 @@ pub fn deterministic_drop_shards( let mut v = Vec::with_capacity(n - k); // k is a power of 2 let half = (n - k) >> 1; - for i in 0..half { - codewords[i] = None; + for (i, codeword) in codewords.iter_mut().enumerate().take(half) { + *codeword = None; v.push(i); } // if the codewords is shorter than n // the remaining ones were // already dropped implicitly - for i in n - half..n { + for (i, codeword) in codewords.iter_mut().enumerate().take(n).skip(n - half) { if i < l { - codewords[i] = None; + *codeword = None; v.push(i); } } @@ -85,7 +82,7 @@ pub fn deterministic_drop_shards_clone( k: usize, ) -> (Vec>, IndexVec) { let mut rng = SmallRng::from_seed(SMALL_RNG_SEED); - let mut codewords = codewords.into_iter().map(|x| Some(x.clone())).collect::>>(); + let mut codewords = codewords.iter().map(|x| Some(x.clone())).collect::>>(); let idx = deterministic_drop_shards::(&mut codewords, n, k, &mut rng); assert!(idx.len() <= n - k); (codewords, idx) @@ -121,14 +118,13 @@ where E: error::Error + Send + Sync + 'static, S: Clone + AsRef<[u8]> + AsMut<[[u8; 2]]> + AsRef<[[u8; 2]]> + iter::FromIterator<[u8; 2]> + From>, { - let v = roundtrip_w_drop_closure::<'s, Enc, Recon, _, SmallRng, S, E>( + roundtrip_w_drop_closure::<'s, Enc, Recon, _, SmallRng, S, E>( encode, reconstruct, payload, target_shard_count, drop_random_max, - )?; - Ok(v) + ) } const fn recoverablity_subset_size(n_wanted_shards: usize) -> usize { @@ -166,6 +162,6 @@ where let recovered_payload = reconstruct(received_shards, target_shard_count)?; - assert_recovery(&payload[..], &recovered_payload[..], dropped_indices, target_n, target_k); + assert_recovery(payload, &recovered_payload[..], dropped_indices, target_n, target_k); Ok(()) } diff --git a/reed-solomon-tester/src/utils.rs b/reed-solomon-tester/src/utils.rs deleted file mode 100644 index 8b13789..0000000 --- a/reed-solomon-tester/src/utils.rs +++ /dev/null @@ -1 +0,0 @@ -