From 8b18e62e783a4ef8a51bf53c023bccdaa25bb997 Mon Sep 17 00:00:00 2001 From: aawsome <37850842+aawsome@users.noreply.github.com> Date: Mon, 2 Dec 2024 23:58:43 +0100 Subject: [PATCH] feat(commands): More dump options (#1339) Adds output as targz and zip. Also adds the options `--archive` to choose the ouput format and `--file` to directly specify a file to dump into. When a file is specified, the ouput format is automatically chosen from the file extension, if given. --------- Signed-off-by: simonsan <14062932+simonsan@users.noreply.github.com> Co-authored-by: simonsan <14062932+simonsan@users.noreply.github.com> --- Cargo.lock | 65 ++++++++++++ Cargo.toml | 2 + deny.toml | 1 + src/commands/dump.rs | 213 ++++++++++++++++++++++++++++++++++++++-- tests/backup_restore.rs | 30 ++++-- tests/repositories.rs | 4 +- 6 files changed, 296 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 68b997783..77b7b312e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,6 +197,15 @@ version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" +[[package]] +name = "arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arc-swap" version = "1.7.1" @@ -1051,6 +1060,17 @@ dependencies = [ "serde", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "derive_destructure2" version = "0.1.3" @@ -2499,6 +2519,12 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "lockfree-object-pool" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e" + [[package]] name = "log" version = "0.4.22" @@ -3921,6 +3947,7 @@ dependencies = [ "toml", "tui-textarea", "warp", + "zip", ] [[package]] @@ -4474,6 +4501,12 @@ dependencies = [ "rand_core", ] +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + [[package]] name = "similar" version = "2.6.0" @@ -5880,6 +5913,24 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "zip" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d52293fc86ea7cf13971b3bb81eb21683636e7ae24c729cdaf1b7c4157a352" +dependencies = [ + "arbitrary", + "chrono", + "crc32fast", + "crossbeam-utils", + "displaydoc", + "flate2", + "indexmap 2.6.0", + "memchr", + "thiserror 2.0.3", + "zopfli", +] + [[package]] name = "zipsign-api" version = "0.1.2" @@ -5891,6 +5942,20 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "zopfli" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5019f391bac5cf252e93bbcc53d039ffd62c7bfb7c150414d61369afe57e946" +dependencies = [ + "bumpalo", + "crc32fast", + "lockfree-object-pool", + "log", + "once_cell", + "simd-adler32", +] + [[package]] name = "zstd" version = "0.13.2" diff --git a/Cargo.toml b/Cargo.toml index 10f1d9d7e..ce686f70f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -108,6 +108,7 @@ dateparser = "0.2.1" derive_more = { version = "1", features = ["debug"] } dialoguer = "0.11.0" directories = "5" +flate2 = "1.0.34" fuse_mt = { version = "0.6", optional = true } futures = { version = "0.3.31", optional = true } gethostname = "0.5" @@ -120,6 +121,7 @@ open = "5.3.1" self_update = { version = "=0.39.0", default-features = false, optional = true, features = ["rustls", "archive-tar", "compression-flate2"] } # FIXME: Downgraded to 0.39.0 due to https://github.com/jaemk/self_update/issues/136 tar = "0.4.43" toml = "0.8" +zip = { version = "2.2.0", default-features = false, features = ["deflate", "chrono"] } # filtering jaq-core = { version = "2", optional = true } diff --git a/deny.toml b/deny.toml index 14753b951..804993957 100644 --- a/deny.toml +++ b/deny.toml @@ -106,6 +106,7 @@ allow = [ "CC0-1.0", "Zlib", "Unicode-3.0", + "BSL-1.0", ] # The confidence threshold for detecting a license from license text. # The higher the value, the more closely the license text must be to the diff --git a/src/commands/dump.rs b/src/commands/dump.rs index 496edb2f5..fdf8c40c8 100644 --- a/src/commands/dump.rs +++ b/src/commands/dump.rs @@ -1,11 +1,17 @@ //! `dump` subcommand -use std::io::{Read, Write}; +use std::{ + fs::File, + io::{copy, Cursor, Read, Seek, SeekFrom, Write}, + path::PathBuf, +}; use crate::{repository::CliIndexedRepo, status_err, Application, RUSTIC_APP}; use abscissa_core::{Command, Runnable, Shutdown}; use anyhow::Result; +use derive_more::FromStr; +use flate2::{write::GzEncoder, Compression}; use log::warn; use rustic_core::{ repofile::{Node, NodeType}, @@ -13,6 +19,7 @@ use rustic_core::{ LsOptions, }; use tar::{Builder, EntryType, Header}; +use zip::{write::SimpleFileOptions, ZipWriter}; /// `dump` subcommand #[derive(clap::Parser, Command, Debug)] @@ -21,9 +28,38 @@ pub(crate) struct DumpCmd { #[clap(value_name = "SNAPSHOT[:PATH]")] snap: String, - /// Listing options - #[clap(flatten)] - ls_opts: LsOptions, + /// set archive format to use. Possible values: auto, content, tar, targz, zip. For "auto" format is dertermined by file extension (if given) or "tar" for dirs. + #[clap(long, value_name = "FORMAT", default_value = "auto")] + archive: ArchiveKind, + + /// dump output to the given file. Use this instead of redirecting stdout to a file. + #[clap(long)] + file: Option, + + /// Glob pattern to exclude/include (can be specified multiple times) + #[clap(long, help_heading = "Exclude options")] + glob: Vec, + + /// Same as --glob pattern but ignores the casing of filenames + #[clap(long, value_name = "GLOB", help_heading = "Exclude options")] + iglob: Vec, + + /// Read glob patterns to exclude/include from this file (can be specified multiple times) + #[clap(long, value_name = "FILE", help_heading = "Exclude options")] + glob_file: Vec, + + /// Same as --glob-file ignores the casing of filenames in patterns + #[clap(long, value_name = "FILE", help_heading = "Exclude options")] + iglob_file: Vec, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, FromStr)] +enum ArchiveKind { + Auto, + Content, + Tar, + TarGz, + Zip, } impl Runnable for DumpCmd { @@ -46,17 +82,77 @@ impl DumpCmd { let node = repo.node_from_snapshot_path(&self.snap, |sn| config.snapshot_filter.matches(sn))?; - let mut stdout = std::io::stdout(); - if node.is_file() { - repo.dump(&node, &mut stdout)?; + let stdout = std::io::stdout(); + + let ls_opts = LsOptions::default() + .glob(self.glob.clone()) + .glob_file(self.glob_file.clone()) + .iglob(self.iglob.clone()) + .iglob_file(self.iglob_file.clone()) + .recursive(true); + + let ext = self + .file + .as_ref() + .and_then(|f| f.extension().map(|s| s.to_string_lossy().to_string())); + + let archive = match self.archive { + ArchiveKind::Auto => match ext.as_deref() { + Some("tar") => ArchiveKind::Tar, + Some("tgz") | Some("gz") => ArchiveKind::TarGz, + Some("zip") => ArchiveKind::Zip, + _ if node.is_dir() => ArchiveKind::Tar, + _ => ArchiveKind::Content, + }, + a => a, + }; + + let mut w: Box = if let Some(file) = &self.file { + let mut file = File::create(file)?; + if archive == ArchiveKind::Zip { + // when writing zip to a file, we use the optimized writer + return write_zip_to_file(&repo, &node, &mut file, &ls_opts); + } + Box::new(file) } else { - dump_tar(&repo, &node, &mut stdout, &self.ls_opts)?; - } + Box::new(stdout) + }; + + match archive { + ArchiveKind::Content => dump_content(&repo, &node, &mut w, &ls_opts)?, + ArchiveKind::Tar => dump_tar(&repo, &node, &mut w, &ls_opts)?, + ArchiveKind::TarGz => dump_tar_gz(&repo, &node, &mut w, &ls_opts)?, + ArchiveKind::Zip => dump_zip(&repo, &node, &mut w, &ls_opts)?, + _ => {} + }; Ok(()) } } +fn dump_content( + repo: &CliIndexedRepo, + node: &Node, + w: &mut impl Write, + ls_opts: &LsOptions, +) -> Result<()> { + for item in repo.ls(node, ls_opts)? { + let (_, node) = item?; + repo.dump(&node, w)?; + } + Ok(()) +} + +fn dump_tar_gz( + repo: &CliIndexedRepo, + node: &Node, + w: &mut impl Write, + ls_opts: &LsOptions, +) -> Result<()> { + let mut w = GzEncoder::new(w, Compression::default()); + dump_tar(repo, node, &mut w, ls_opts) +} + fn dump_tar( repo: &CliIndexedRepo, node: &Node, @@ -135,6 +231,105 @@ fn dump_tar( Ok(()) } +fn dump_zip( + repo: &CliIndexedRepo, + node: &Node, + w: &mut impl Write, + ls_opts: &LsOptions, +) -> Result<()> { + let w = SeekWriter { + write: w, + cursor: Cursor::new(Vec::new()), + written: 0, + }; + let mut zip = ZipWriter::new(w); + zip.set_flush_on_finish_file(true); + write_zip_contents(repo, node, &mut zip, ls_opts)?; + let mut inner = zip.finish()?; + inner.flush()?; + Ok(()) +} + +fn write_zip_to_file( + repo: &CliIndexedRepo, + node: &Node, + file: &mut (impl Write + Seek), + ls_opts: &LsOptions, +) -> Result<()> { + let mut zip = ZipWriter::new(file); + write_zip_contents(repo, node, &mut zip, ls_opts)?; + let _ = zip.finish()?; + Ok(()) +} + +fn write_zip_contents( + repo: &CliIndexedRepo, + node: &Node, + zip: &mut ZipWriter, + ls_opts: &LsOptions, +) -> Result<()> { + for item in repo.ls(node, ls_opts)? { + let (path, node) = item?; + + let mut options = SimpleFileOptions::default(); + if let Some(mode) = node.meta.mode { + // TODO: this is some go-mapped mode, but lower bits are the standard unix mode bits -> is this ok? + options = options.unix_permissions(mode); + } + if let Some(mtime) = node.meta.mtime { + options = + options.last_modified_time(mtime.naive_local().try_into().unwrap_or_default()); + } + if node.is_file() { + zip.start_file_from_path(path, options)?; + repo.dump(&node, zip)?; + } else { + zip.add_directory_from_path(path, options)?; + } + } + Ok(()) +} + +struct SeekWriter { + write: W, + cursor: Cursor>, + written: u64, +} + +impl Read for SeekWriter { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.cursor.read(buf) + } +} + +impl Write for SeekWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.cursor.write(buf) + } + + fn flush(&mut self) -> std::io::Result<()> { + _ = self.cursor.seek(SeekFrom::Start(0))?; + let n = copy(&mut self.cursor, &mut self.write)?; + _ = self.cursor.seek(SeekFrom::Start(0))?; + self.cursor.get_mut().clear(); + self.cursor.get_mut().shrink_to(1_000_000); + self.written += n; + Ok(()) + } +} + +impl Seek for SeekWriter { + fn seek(&mut self, pos: SeekFrom) -> std::io::Result { + match pos { + SeekFrom::Start(n) => self.cursor.seek(SeekFrom::Start(n - self.written)), + pos => self.cursor.seek(pos), + } + } + fn stream_position(&mut self) -> std::io::Result { + Ok(self.written + self.cursor.stream_position()?) + } +} + struct OpenFileReader<'a> { repo: &'a CliIndexedRepo, open_file: OpenFile, diff --git a/tests/backup_restore.rs b/tests/backup_restore.rs index 0beaf5b66..b5803833e 100644 --- a/tests/backup_restore.rs +++ b/tests/backup_restore.rs @@ -13,6 +13,9 @@ use tempfile::{tempdir, TempDir}; use assert_cmd::Command; use predicates::prelude::{predicate, PredicateBooleanExt}; +mod repositories; +use repositories::src_snapshot; + use rustic_testing::TestResult; pub fn rustic_runner(temp_dir: &TempDir) -> TestResult { @@ -46,13 +49,13 @@ fn setup() -> TestResult { #[test] fn test_backup_and_check_passes() -> TestResult<()> { let temp_dir = setup()?; - let backup = "src/"; + let backup = src_snapshot()?.into_path().into_path(); { // Run `backup` for the first time rustic_runner(&temp_dir)? .arg("backup") - .arg(backup) + .arg(&backup) .assert() .success() .stdout(predicate::str::contains("successfully saved.")); @@ -104,16 +107,15 @@ fn test_backup_and_check_passes() -> TestResult<()> { fn test_backup_and_restore_passes() -> TestResult<()> { let temp_dir = setup()?; let restore_dir = temp_dir.path().join("restore"); - let backup = "src/"; - - // actual repository root to backup - let backup_files = std::env::current_dir()?.join(backup); + let backup_files = src_snapshot()?.into_path().into_path(); { // Run `backup` for the first time rustic_runner(&temp_dir)? .arg("backup") .arg(&backup_files) + .arg("--as-path") + .arg("/") .assert() .success() .stdout(predicate::str::contains("successfully saved.")); @@ -130,11 +132,23 @@ fn test_backup_and_restore_passes() -> TestResult<()> { } // Compare the backup and the restored directory - let compare_result = - Comparison::default().compare(&backup_files, &restore_dir.join(&backup_files))?; + let compare_result = Comparison::default().compare(&backup_files, &restore_dir)?; // no differences assert!(compare_result.is_empty()); + let dump_tar_file = restore_dir.join("test.tar"); + { + // Run `dump` + rustic_runner(&temp_dir)? + .arg("dump") + .arg("latest") + .arg("--file") + .arg(&dump_tar_file) + .assert() + .success(); + } + // TODO: compare dump output with fixture + Ok(()) } diff --git a/tests/repositories.rs b/tests/repositories.rs index f8eec7883..15ab68b3d 100644 --- a/tests/repositories.rs +++ b/tests/repositories.rs @@ -8,7 +8,7 @@ use tar::Archive; use tempfile::{tempdir, TempDir}; #[derive(Debug)] -struct TestSource(TempDir); +pub struct TestSource(TempDir); impl TestSource { pub fn new(tmp: TempDir) -> Self { @@ -57,7 +57,7 @@ fn rustic_copy_repo() -> Result { } #[fixture] -fn src_snapshot() -> Result { +pub fn src_snapshot() -> Result { let dir = tempdir()?; let path = "tests/repository-fixtures/src-snapshot.tar.gz"; open_and_unpack(path, &dir)?;