diff --git a/crates/cargo-test-support/src/lib.rs b/crates/cargo-test-support/src/lib.rs index 476e185dd73..689f7208cf4 100644 --- a/crates/cargo-test-support/src/lib.rs +++ b/crates/cargo-test-support/src/lib.rs @@ -1597,6 +1597,12 @@ pub fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, assert!(files.next().is_none(), "expected only 1 dep-info file"); let dep_info = fs::read(&info_path).unwrap(); let dep_info = &mut &dep_info[..]; + + // Consume the magic marker and version. Here they don't really matter. + read_usize(dep_info); + read_u8(dep_info); + read_u8(dep_info); + let deps = (0..read_usize(dep_info)) .map(|_| { let ty = read_u8(dep_info); diff --git a/src/cargo/core/compiler/fingerprint/dep_info.rs b/src/cargo/core/compiler/fingerprint/dep_info.rs index 2d092f43eb7..e8628f34ebf 100644 --- a/src/cargo/core/compiler/fingerprint/dep_info.rs +++ b/src/cargo/core/compiler/fingerprint/dep_info.rs @@ -22,6 +22,9 @@ use cargo_util::Sha256; use crate::CargoResult; use crate::CARGO_ENV; +/// The current format version of [`EncodedDepInfo`]. +const CURRENT_ENCODED_DEP_INFO_VERSION: u8 = 1; + /// The representation of the `.d` dep-info file generated by rustc #[derive(Default)] pub struct RustcDepInfo { @@ -61,20 +64,36 @@ pub enum DepInfoPathType { /// Currently the format looks like: /// /// ```text -/// +------------+------------+---------------+---------------+ -/// | # of files | file paths | # of env vars | env var pairs | -/// +------------+------------+---------------+---------------+ +/// +--------+---------+------------+------------+---------------+---------------+ +/// | marker | version | # of files | file paths | # of env vars | env var pairs | +/// +--------+---------+------------+------------+---------------+---------------+ /// ``` /// /// Each field represents /// +/// * _Marker_ --- A magic marker to ensure that older Cargoes, which only +/// recognize format v0 (prior to checksum support in [`f4ca7390`]), do not +/// proceed with parsing newer formats. Since [`EncodedDepInfo`] is merely +/// an optimization, and to avoid adding complexity, Cargo recognizes only +/// one version of [`CURRENT_ENCODED_DEP_INFO_VERSION`]. +/// The current layout looks like this +/// ```text +/// +----------------------------+ +/// | [0x01 0x00 0x00 0x00 0xff] | +/// +----------------------------+ +/// ``` +/// These bytes will be interpreted as "one file tracked and an invalid +/// [`DepInfoPathType`] variant with 255" by older Cargoes, causing them to +/// stop parsing. This could prevent problematic parsing as noted in +/// rust-lang/cargo#14712. +/// * _Version_ --- The current format version. /// * _Number of files/envs_ --- A `u32` representing the number of things. /// * _File paths_ --- Zero or more paths of files the dep-info file depends on. /// Each path is encoded as the following: /// /// ```text /// +-----------+-------------+------------+---------------+-----------+-------+ -/// | Path type | len of path | path bytes | cksum exists? | file size | cksum | +/// | path type | len of path | path bytes | cksum exists? | file size | cksum | /// +-----------+-------------+------------+---------------+-----------+-------+ /// ``` /// * _Env var pairs_ --- Zero or more env vars the dep-info file depends on. @@ -84,7 +103,9 @@ pub enum DepInfoPathType { /// | len of key | key bytes | value exists? | len of value | value bytes | /// +------------+-----------+---------------+--------------+-------------+ /// ``` -#[derive(Default)] +/// +/// [`f4ca7390`]: https://github.com/rust-lang/cargo/commit/f4ca739073185ea5e1148ff100bb4a06d3bf721d +#[derive(Default, Debug, PartialEq, Eq)] pub struct EncodedDepInfo { pub files: Vec<(DepInfoPathType, PathBuf, Option<(u64, String)>)>, pub env: Vec<(String, Option)>, @@ -93,6 +114,12 @@ pub struct EncodedDepInfo { impl EncodedDepInfo { pub fn parse(mut bytes: &[u8]) -> Option { let bytes = &mut bytes; + read_magic_marker(bytes)?; + let version = read_u8(bytes)?; + if version != CURRENT_ENCODED_DEP_INFO_VERSION { + return None; + } + let nfiles = read_usize(bytes)?; let mut files = Vec::with_capacity(nfiles); for _ in 0..nfiles { @@ -129,6 +156,18 @@ impl EncodedDepInfo { } return Some(EncodedDepInfo { files, env }); + /// See [`EncodedDepInfo`] for why a magic marker exists. + fn read_magic_marker(bytes: &mut &[u8]) -> Option<()> { + let _size = read_usize(bytes)?; + let path_type = read_u8(bytes)?; + if path_type != u8::MAX { + // Old depinfo. Give up parsing it. + None + } else { + Some(()) + } + } + fn read_usize(bytes: &mut &[u8]) -> Option { let ret = bytes.get(..4)?; *bytes = &bytes[4..]; @@ -162,6 +201,10 @@ impl EncodedDepInfo { pub fn serialize(&self) -> CargoResult> { let mut ret = Vec::new(); let dst = &mut ret; + + write_magic_marker(dst); + dst.push(CURRENT_ENCODED_DEP_INFO_VERSION); + write_usize(dst, self.files.len()); for (ty, file, checksum_info) in self.files.iter() { match ty { @@ -189,6 +232,14 @@ impl EncodedDepInfo { } return Ok(ret); + /// See [`EncodedDepInfo`] for why a magic marker exists. + /// + /// There is an assumption that there is always at least a file. + fn write_magic_marker(dst: &mut Vec) { + write_usize(dst, 1); + dst.push(u8::MAX); + } + fn write_bytes(dst: &mut Vec, val: impl AsRef<[u8]>) { let val = val.as_ref(); write_usize(dst, val.len()); @@ -614,3 +665,94 @@ pub enum InvalidChecksum { #[error("expected a string with format \"algorithm=hex_checksum\"")] InvalidFormat, } + +#[cfg(test)] +mod encoded_dep_info { + use super::*; + + #[track_caller] + fn gen_test(checksum: bool) { + let checksum = checksum.then_some((768, "c01efc669f09508b55eced32d3c88702578a7c3e".into())); + let lib_rs = ( + DepInfoPathType::TargetRootRelative, + PathBuf::from("src/lib.rs"), + checksum.clone(), + ); + + let depinfo = EncodedDepInfo { + files: vec![lib_rs.clone()], + env: Vec::new(), + }; + let data = depinfo.serialize().unwrap(); + assert_eq!(EncodedDepInfo::parse(&data).unwrap(), depinfo); + + let mod_rs = ( + DepInfoPathType::TargetRootRelative, + PathBuf::from("src/mod.rs"), + checksum.clone(), + ); + let depinfo = EncodedDepInfo { + files: vec![lib_rs.clone(), mod_rs.clone()], + env: Vec::new(), + }; + let data = depinfo.serialize().unwrap(); + assert_eq!(EncodedDepInfo::parse(&data).unwrap(), depinfo); + + let depinfo = EncodedDepInfo { + files: vec![lib_rs, mod_rs], + env: vec![ + ("Gimli".into(), Some("Legolas".into())), + ("Beren".into(), Some("LĂșthien".into())), + ], + }; + let data = depinfo.serialize().unwrap(); + assert_eq!(EncodedDepInfo::parse(&data).unwrap(), depinfo); + } + + #[test] + fn round_trip() { + gen_test(false); + } + + #[test] + fn round_trip_with_checksums() { + gen_test(true); + } + + #[test] + fn path_type_is_u8_max() { + #[rustfmt::skip] + let data = [ + 0x01, 0x00, 0x00, 0x00, 0xff, // magic marker + CURRENT_ENCODED_DEP_INFO_VERSION, // version + 0x01, 0x00, 0x00, 0x00, // # of files + 0x00, // path type + 0x04, 0x00, 0x00, 0x00, // len of path + 0x72, 0x75, 0x73, 0x74, // path bytes ("rust") + 0x00, // cksum exists? + 0x00, 0x00, 0x00, 0x00, // # of env vars + ]; + // The current cargo doesn't recognize the magic marker. + assert_eq!( + EncodedDepInfo::parse(&data).unwrap(), + EncodedDepInfo { + files: vec![(DepInfoPathType::PackageRootRelative, "rust".into(), None)], + env: Vec::new(), + } + ); + } + + #[test] + fn parse_v0_fingerprint_dep_info() { + #[rustfmt::skip] + let data = [ + 0x01, 0x00, 0x00, 0x00, // # of files + 0x00, // path type + 0x04, 0x00, 0x00, 0x00, // len of path + 0x72, 0x75, 0x73, 0x74, // path bytes: "rust" + 0x00, 0x00, 0x00, 0x00, // # of env vars + ]; + // Cargo can't recognize v0 after `-Zchecksum-freshess` added. + assert!(EncodedDepInfo::parse(&data).is_none()); + } +}