From 2fb4bd4f8f1e7162c3a06b33e61bb3a55be45ce6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kh=E1=BA=A3i?= Date: Sat, 4 Nov 2023 20:01:01 +0700 Subject: [PATCH] refactor: parse integrity eagerly (#180) * refactor: parse integrity eagerly The parsing of integrity has been moved from tarball to serde * docs: clarify * docs: remove the 3rd option * refactor: remove unnecessary `pipe_ref` --- Cargo.lock | 3 ++ crates/lockfile/Cargo.toml | 1 + crates/lockfile/src/resolution.rs | 23 ++++++++----- .../src/install_package_by_snapshot.rs | 4 +-- crates/registry/Cargo.toml | 1 + crates/registry/src/package_distribution.rs | 3 +- crates/tarball/src/lib.rs | 33 +++++++------------ tasks/micro-benchmark/Cargo.toml | 1 + tasks/micro-benchmark/src/main.rs | 25 ++++++++------ 9 files changed, 51 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 071bc4041..05d751cb1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1382,6 +1382,7 @@ dependencies = [ "serde", "serde_yaml", "split-first-char", + "ssri", "text-block-macros", ] @@ -1399,6 +1400,7 @@ dependencies = [ "pipe-trait", "project-root", "reqwest", + "ssri", "tempfile", "tokio", ] @@ -1473,6 +1475,7 @@ dependencies = [ "reqwest", "serde", "serde_json", + "ssri", "tempfile", "tokio", ] diff --git a/crates/lockfile/Cargo.toml b/crates/lockfile/Cargo.toml index 2b1dcd40d..9ad49ecb8 100644 --- a/crates/lockfile/Cargo.toml +++ b/crates/lockfile/Cargo.toml @@ -19,6 +19,7 @@ node-semver = { workspace = true } pipe-trait = { workspace = true } serde = { workspace = true } serde_yaml = { workspace = true } +ssri = { workspace = true } split-first-char = { workspace = true } [dev-dependencies] diff --git a/crates/lockfile/src/resolution.rs b/crates/lockfile/src/resolution.rs index 6b3c2e63b..7c5e607b7 100644 --- a/crates/lockfile/src/resolution.rs +++ b/crates/lockfile/src/resolution.rs @@ -1,6 +1,7 @@ use derive_more::{From, TryInto}; use pipe_trait::Pipe; use serde::{Deserialize, Serialize}; +use ssri::Integrity; /// For tarball hosted remotely or locally. #[derive(Debug, Clone, PartialEq, Deserialize, Serialize)] @@ -8,14 +9,14 @@ use serde::{Deserialize, Serialize}; pub struct TarballResolution { pub tarball: String, #[serde(skip_serializing_if = "Option::is_none")] - pub integrity: Option, + pub integrity: Option, } /// For standard package specification, with package name and version range. #[derive(Debug, Clone, PartialEq, Deserialize, Serialize)] #[serde(deny_unknown_fields, rename_all = "camelCase")] pub struct RegistryResolution { - pub integrity: String, + pub integrity: Integrity, } /// For local directory on a filesystem. @@ -45,10 +46,10 @@ pub enum LockfileResolution { impl LockfileResolution { /// Get the integrity field if available. - pub fn integrity(&self) -> Option<&'_ str> { + pub fn integrity(&self) -> Option<&'_ Integrity> { match self { - LockfileResolution::Tarball(resolution) => resolution.integrity.as_deref(), - LockfileResolution::Registry(resolution) => resolution.integrity.as_str().pipe(Some), + LockfileResolution::Tarball(resolution) => resolution.integrity.as_ref(), + LockfileResolution::Registry(resolution) => Some(&resolution.integrity), LockfileResolution::Directory(_) | LockfileResolution::Git(_) => None, } } @@ -101,6 +102,10 @@ mod tests { use pretty_assertions::assert_eq; use text_block_macros::text_block; + fn integrity(integrity_str: &str) -> Integrity { + integrity_str.parse().expect("parse integrity string") + } + #[test] fn deserialize_tarball_resolution() { eprintln!("CASE: without integrity"); @@ -124,7 +129,7 @@ mod tests { dbg!(&received); let expected = LockfileResolution::Tarball(TarballResolution { tarball: "file:ts-pipe-compose-0.2.1.tgz".to_string(), - integrity: "sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==".to_string().into() + integrity: integrity("sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==").into() }); assert_eq!(received, expected); } @@ -147,7 +152,7 @@ mod tests { eprintln!("CASE: with integrity"); let resolution = LockfileResolution::Tarball(TarballResolution { tarball: "file:ts-pipe-compose-0.2.1.tgz".to_string(), - integrity: "sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==".to_string().into() + integrity: integrity("sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==").into() }); let received = serde_yaml::to_string(&resolution).unwrap(); let received = received.trim(); @@ -167,7 +172,7 @@ mod tests { let received: LockfileResolution = serde_yaml::from_str(yaml).unwrap(); dbg!(&received); let expected = LockfileResolution::Registry(RegistryResolution { - integrity: "sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==".to_string() + integrity: integrity("sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==") }); assert_eq!(received, expected); } @@ -175,7 +180,7 @@ mod tests { #[test] fn serialize_registry_resolution() { let resolution = LockfileResolution::Registry(RegistryResolution { - integrity: "sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==".to_string() + integrity: integrity("sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==") }); let received = serde_yaml::to_string(&resolution).unwrap(); let received = received.trim(); diff --git a/crates/package-manager/src/install_package_by_snapshot.rs b/crates/package-manager/src/install_package_by_snapshot.rs index 677c2b9d5..a8c27406b 100644 --- a/crates/package-manager/src/install_package_by_snapshot.rs +++ b/crates/package-manager/src/install_package_by_snapshot.rs @@ -41,7 +41,7 @@ impl<'a> InstallPackageBySnapshot<'a> { let (tarball_url, integrity) = match resolution { LockfileResolution::Tarball(tarball_resolution) => { - let integrity = tarball_resolution.integrity.as_deref().unwrap_or_else(|| { + let integrity = tarball_resolution.integrity.as_ref().unwrap_or_else(|| { // TODO: how to handle the absent of integrity field? panic!("Current implementation requires integrity, but {dependency_path} doesn't have it"); }); @@ -54,7 +54,7 @@ impl<'a> InstallPackageBySnapshot<'a> { let version = ver_peer.version(); let bare_name = name.bare.as_str(); let tarball_url = format!("{registry}/{name}/-/{bare_name}-{version}.tgz"); - let integrity = registry_resolution.integrity.as_str(); + let integrity = ®istry_resolution.integrity; (Cow::Owned(tarball_url), integrity) } LockfileResolution::Directory(_) | LockfileResolution::Git(_) => { diff --git a/crates/registry/Cargo.toml b/crates/registry/Cargo.toml index c230413d8..5982971af 100644 --- a/crates/registry/Cargo.toml +++ b/crates/registry/Cargo.toml @@ -19,6 +19,7 @@ node-semver = { workspace = true } pipe-trait = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +ssri = { workspace = true } tokio = { workspace = true } miette = { workspace = true } diff --git a/crates/registry/src/package_distribution.rs b/crates/registry/src/package_distribution.rs index 2f865eedb..b6447b9fd 100644 --- a/crates/registry/src/package_distribution.rs +++ b/crates/registry/src/package_distribution.rs @@ -1,9 +1,10 @@ use serde::{Deserialize, Serialize}; +use ssri::Integrity; #[derive(Serialize, Deserialize, Debug, Default, Clone, Eq)] #[serde(rename_all = "camelCase")] pub struct PackageDistribution { - pub integrity: Option, + pub integrity: Option, pub shasum: Option, pub tarball: String, pub file_count: Option, diff --git a/crates/tarball/src/lib.rs b/crates/tarball/src/lib.rs index 26adfa7f9..cb285b929 100644 --- a/crates/tarball/src/lib.rs +++ b/crates/tarball/src/lib.rs @@ -30,15 +30,6 @@ pub struct NetworkError { pub error: reqwest::Error, } -#[derive(Debug, Display, Error, Diagnostic)] -#[display("Cannot parse {integrity:?} from {url} as an integrity: {error}")] -pub struct ParseIntegrityError { - pub url: String, - pub integrity: String, - #[error(source)] - pub error: ssri::Error, -} - #[derive(Debug, Display, Error, Diagnostic)] #[display("Failed to verify the integrity of {url}: {error}")] pub struct VerifyChecksumError { @@ -57,9 +48,6 @@ pub enum TarballError { #[diagnostic(code(pacquet_tarball::io_error))] ReadTarballEntries(std::io::Error), - #[diagnostic(code(pacquet_tarball::parse_integrity_error))] - ParseIntegrity(ParseIntegrityError), - #[diagnostic(code(pacquet_tarball::verify_checksum_error))] Checksum(VerifyChecksumError), @@ -128,7 +116,7 @@ pub struct DownloadTarballToStore<'a> { pub tarball_cache: &'a Cache, pub http_client: &'a Client, pub store_dir: &'static StoreDir, - pub package_integrity: &'a str, + pub package_integrity: &'a Integrity, pub package_unpacked_size: Option, pub package_url: &'a str, } @@ -199,12 +187,11 @@ impl<'a> DownloadTarballToStore<'a> { tracing::info!(target: "pacquet::download", ?package_url, "Download completed"); - let package_integrity: Integrity = - package_integrity.parse().map_err(|error| ParseIntegrityError { - url: package_url.to_string(), - integrity: package_integrity.to_string(), - error, - })?; + // TODO: Cloning here is less than desirable, there are 2 possible solutions for this problem: + // 1. Use an Arc and convert this line to Arc::clone. + // 2. Replace ssri with base64 and serde magic (which supports Copy). + let package_integrity = package_integrity.clone(); + #[derive(Debug, From)] enum TaskError { Checksum(ssri::Error), @@ -303,6 +290,10 @@ mod tests { use super::*; + fn integrity(integrity_str: &str) -> Integrity { + integrity_str.parse().expect("parse integrity string") + } + /// **Problem:** /// The tested function requires `'static` paths, leaking would prevent /// temporary files from being cleaned up. @@ -328,7 +319,7 @@ mod tests { tarball_cache: &Default::default(), http_client: &Default::default(), store_dir: store_path, - package_integrity: "sha512-dj7vjIn1Ar8sVXj2yAXiMNCJDmS9MQ9XMlIecX2dIzzhjSHCyKo4DdXjXMs7wKW2kj6yvVRSpuQjOZ3YLrh56w==", + package_integrity: &integrity("sha512-dj7vjIn1Ar8sVXj2yAXiMNCJDmS9MQ9XMlIecX2dIzzhjSHCyKo4DdXjXMs7wKW2kj6yvVRSpuQjOZ3YLrh56w=="), package_unpacked_size: Some(16697), package_url: "https://registry.npmjs.org/@fastify/error/-/error-3.3.0.tgz" } @@ -368,7 +359,7 @@ mod tests { tarball_cache: &Default::default(), http_client: &Default::default(), store_dir: store_path, - package_integrity: "sha512-aaaan1Ar8sVXj2yAXiMNCJDmS9MQ9XMlIecX2dIzzhjSHCyKo4DdXjXMs7wKW2kj6yvVRSpuQjOZ3YLrh56w==", + package_integrity: &integrity("sha512-aaaan1Ar8sVXj2yAXiMNCJDmS9MQ9XMlIecX2dIzzhjSHCyKo4DdXjXMs7wKW2kj6yvVRSpuQjOZ3YLrh56w=="), package_unpacked_size: Some(16697), package_url: "https://registry.npmjs.org/@fastify/error/-/error-3.3.0.tgz", } diff --git a/tasks/micro-benchmark/Cargo.toml b/tasks/micro-benchmark/Cargo.toml index 02057066d..840636917 100644 --- a/tasks/micro-benchmark/Cargo.toml +++ b/tasks/micro-benchmark/Cargo.toml @@ -27,4 +27,5 @@ tempfile = { workspace = true } pipe-trait = { workspace = true } project-root = { workspace = true } reqwest = { workspace = true } +ssri = { workspace = true } node-semver = { workspace = true } diff --git a/tasks/micro-benchmark/src/main.rs b/tasks/micro-benchmark/src/main.rs index 2b8fb5e92..9246a1210 100644 --- a/tasks/micro-benchmark/src/main.rs +++ b/tasks/micro-benchmark/src/main.rs @@ -8,6 +8,7 @@ use pacquet_tarball::DownloadTarballToStore; use pipe_trait::Pipe; use project_root::get_project_root; use reqwest::Client; +use ssri::Integrity; use tempfile::tempdir; #[derive(Debug, Parser)] @@ -24,24 +25,28 @@ fn bench_tarball(c: &mut Criterion, server: &mut ServerGuard, fixtures_folder: & let rt = tokio::runtime::Builder::new_multi_thread().enable_all().build().unwrap(); let url = &format!("{0}/@fastify+error-3.3.0.tgz", server.url()); + let package_integrity: Integrity = "sha512-dj7vjIn1Ar8sVXj2yAXiMNCJDmS9MQ9XMlIecX2dIzzhjSHCyKo4DdXjXMs7wKW2kj6yvVRSpuQjOZ3YLrh56w==".parse().expect("parse integrity string"); group.throughput(Throughput::Bytes(file.len() as u64)); group.bench_function("download_dependency", |b| { b.to_async(&rt).iter(|| async { // NOTE: the tempdir is being leaked, meaning the cleanup would be postponed until the end of the benchmark let dir = tempdir().unwrap(); - let store_dir = dir.path().to_path_buf().pipe(StoreDir::from).pipe(Box::new).pipe(Box::leak); + let store_dir = + dir.path().to_path_buf().pipe(StoreDir::from).pipe(Box::new).pipe(Box::leak); let http_client = Client::new(); - let cas_map = - DownloadTarballToStore{ - tarball_cache: &Default::default(), - http_client: &http_client, - store_dir, - package_integrity: "sha512-dj7vjIn1Ar8sVXj2yAXiMNCJDmS9MQ9XMlIecX2dIzzhjSHCyKo4DdXjXMs7wKW2kj6yvVRSpuQjOZ3YLrh56w==", - package_unpacked_size: Some(16697), - package_url: url, - }.run().await.unwrap(); + let cas_map = DownloadTarballToStore { + tarball_cache: &Default::default(), + http_client: &http_client, + store_dir, + package_integrity: &package_integrity, + package_unpacked_size: Some(16697), + package_url: url, + } + .run() + .await + .unwrap(); cas_map.len() }); });