From cc4f016dab5d09fb5e2814168e8d70cfaf7c4f27 Mon Sep 17 00:00:00 2001 From: aawsome <37850842+aawsome@users.noreply.github.com> Date: Thu, 3 Oct 2024 00:57:21 +0200 Subject: [PATCH] feat(config): add more filters (#1263) Adds the following filter options: - `filter-paths-exactly` - filters exactly the given pathlists (supersets don't match) - `filter-tags-exactly` - filters exactly the given taglists (supersets don't match) - `filter-before` - filters by time (date or date+time can be given) - `filter-after` - filters by time (date or date+time can be given) - `filter-size` - filters by size of snapshot'ed data (upper and lower limit may be given) - `filter-size-add` - filters by size added to the repository (upper and lower limit may be given) --------- Signed-off-by: simonsan <14062932+simonsan@users.noreply.github.com> Co-authored-by: simonsan <14062932+simonsan@users.noreply.github.com> --- Cargo.lock | 14 +++ Cargo.toml | 2 + config/README.md | 25 ++-- config/full.toml | 16 ++- src/filtering.rs | 172 ++++++++++++++++++++++++++- tests/show-config-fixtures/empty.txt | 4 + 6 files changed, 223 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 756092339..2a6797ac2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1113,6 +1113,18 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" +[[package]] +name = "dateparser" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2ef451feee09ae5ecd8a02e738bd9adee9266b8fa9b44e22d3ce968d8694238" +dependencies = [ + "anyhow", + "chrono", + "lazy_static", + "regex", +] + [[package]] name = "dav-server" version = "0.7.0" @@ -3719,7 +3731,9 @@ dependencies = [ "conflate", "convert_case", "crossterm 0.28.1", + "dateparser", "dav-server", + "derive_more", "dialoguer", "dircmp", "directories", diff --git a/Cargo.toml b/Cargo.toml index 853c01cc7..0078ef52a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -87,6 +87,8 @@ clap = { version = "4", features = ["derive", "env", "wrap_help"] } clap_complete = "4" conflate = "0.2" convert_case = "0.6.0" +dateparser = "0.2.1" +derive_more = { version = "1.0.0", features = ["debug"] } dialoguer = "0.11.0" directories = "5" gethostname = "0.5" diff --git a/config/README.md b/config/README.md index ae6ae9ee4..0063cecf4 100644 --- a/config/README.md +++ b/config/README.md @@ -117,13 +117,21 @@ see Repository Options ### Snapshot-Filter Options `[snapshot-filter]` -| Attribute | Description | Default Value | Example Value | CLI Option | -| ------------- | --------------------------------------- | ------------- | ------------------- | -------------- | -| filter-hosts | Array of hosts to filter snapshots. | Not set | ["myhost", "host2"] | --filter-host | -| filter-labels | Array of labels to filter snapshots. | Not set | ["mylabal"] | --filter-label | -| filter-paths | Array of pathlists to filter snapshots. | Not set | ["/home,/root"] | --filter-paths | -| filter-tags | Array of taglists to filter snapshots. | Not set | ["tag1,tag2"] | --filter-tags | -| filter-fn | Custom filter function for snapshots. | Not set | | --filter-fn | +| Attribute | Description | Default Value | Example Value | CLI Option | +| ------------------ | ---------------------------------------------------------------------- | ------------- | ------------------------ | -------------------- | +| filter-hosts | Array of hosts to filter snapshots. | Not set | ["myhost", "host2"] | --filter-host | +| filter-labels | Array of labels to filter snapshots. | Not set | ["mylabal"] | --filter-label | +| filter-paths | Array of pathlists to filter snapshots. | Not set | ["/home,/root"] | --filter-paths | +| filter-paths-exact | Array or string of paths to filter snapshots. Exact match. | Not set | ["path1,path2", "path3"] | --filter-paths-exact | +| filter-tags | Array of taglists to filter snapshots. | Not set | ["tag1,tag2"] | --filter-tags | +| filter-tags-exact | Array or string of tags to filter snapshots. Exact match. | Not set | ["tag1,tag2", "tag3"] | --filter-tags-exact | +| filter-before | Filter snapshots before the given date/time | Not set | "2024-01-01" | --filter-before | +| filter-after | Filter snapshots after the given date/time | Not set | "2023-01-01 11:15:23" | --filter-after | +| filter-size | Filter snapshots for a total size in the size range. | Not set | "1MB..1GB" | --filter-size | +| | If a single value is given, this is taken as lower bound. | | "500 k" | | +| filter-size-added | Filter snapshots for a size added to the repository in the size range. | Not set | "1MB..1GB" | --filter-size-added | +| | If a single value is given, this is taken as lower bound. | | "500 k" | | +| filter-fn | Custom filter function for snapshots. | Not set | | --filter-fn | ### Backup Options `[backup]` @@ -202,6 +210,9 @@ source-specific option and then only apply to this source. | keep-none | Allow to keep no snapshots. | false | true | --keep-none | | prune | If set to true, prune the repository after snapshots have been removed. | false | | --prune | +Additionally extra snapshot filter options can be given for the `forget` command +here, see Snapshot-Filter options. + ### Copy Targets `[copy]` **Note**: Copy-targets must be defined in their own config profile files. diff --git a/config/full.toml b/config/full.toml index 0736d6ded..d953b689a 100644 --- a/config/full.toml +++ b/config/full.toml @@ -67,10 +67,16 @@ throttle = "10kB,10MB" # limit and burst per second; only opendal backends; Defa # Snapshot-filter options: These options apply to all commands that use snapshot filters [snapshot-filter] -filter-hosts = ["host2", "host2"] # Default: [] +filter-hosts = ["host1", "host2"] # Default: [] filter-labels = ["label1", "label2"] # Default: [] filter-tags = ["tag1,tag2", "tag3"] # Default: [] +filter-tags-exact = ["tag1,tag2", "tag2"] # Default: [] filter-paths = ["path1", "path2,path3"] # Default: [] +filter-paths-exact = ["path1", "path2,path3"] # Default: [] +filter-after = "2024-01-01" # Default: not set +filter-before = "2024-02-05 12:15" # Default: not set +filter-size = "200MiB" # Default: not set +filter-size-added = "1 MB..10MB" # Default: not set filter-fn = '|sn| {sn.host == "host1" || sn.description.contains("test")}' # Default: no filter function # Backup options: These options are used for all sources when calling the backup command. @@ -126,10 +132,16 @@ sources = [ prune = false group-by = "host,label,paths" # Can be any combination of host,label,paths,tags # The following filter options can be also defined here and then overwrite the options for the forget command -filter-hosts = ["host2", "host2"] # Default: [] +filter-hosts = ["host1", "host2"] # Default: [] filter-labels = ["label1", "label2"] # Default: [] filter-tags = ["tag1,tag2", "tag3"] # Default: [] +filter-tags-exact = ["tag1,tag2", "tag2"] # Default: [] filter-paths = ["path1", "path2,path3"] # Default: [] +filter-paths-exact = ["path1", "path2,path3"] # Default: [] +filter-after = "2024-01-01" # Default: not set +filter-before = "2024-02-05 12:15" # Default: not set +filter-size = "200MiB" # Default: not set +filter-size-added = "1 MB..10MB" # Default: not set filter-fn = '|sn| {sn.host == "host1" || sn.description.contains("test")}' # Default: no filter function # The retention options follow. All of these are not set by default. keep-tags = ["tag1", "tag2,tag3"] # Default: not set diff --git a/src/filtering.rs b/src/filtering.rs index c4c3327e8..93327c0ed 100644 --- a/src/filtering.rs +++ b/src/filtering.rs @@ -1,10 +1,17 @@ use crate::error::RhaiErrorKinds; +use bytesize::ByteSize; +use derive_more::derive::Display; use log::warn; use rustic_core::{repofile::SnapshotFile, StringList}; -use std::{error::Error, str::FromStr}; +use std::{ + error::Error, + fmt::{Debug, Display}, + str::FromStr, +}; use cached::proc_macro::cached; +use chrono::{DateTime, Local, NaiveTime}; use conflate::Merge; use rhai::{serde::to_dynamic, Dynamic, Engine, FnPtr, AST}; use serde::{Deserialize, Serialize}; @@ -75,12 +82,48 @@ pub struct SnapshotFilter { #[merge(strategy=conflate::vec::overwrite_empty)] filter_paths: Vec, + /// Path list to filter exactly (no superset) as given (can be specified multiple times) + #[clap(long, global = true, value_name = "PATH[,PATH,..]")] + #[serde_as(as = "Vec")] + #[merge(strategy=conflate::vec::overwrite_empty)] + filter_paths_exact: Vec, + /// Tag list to filter (can be specified multiple times) #[clap(long, global = true, value_name = "TAG[,TAG,..]")] #[serde_as(as = "Vec")] #[merge(strategy=conflate::vec::overwrite_empty)] filter_tags: Vec, + /// Tag list to filter exactly (no superset) as given (can be specified multiple times) + #[clap(long, global = true, value_name = "TAG[,TAG,..]")] + #[serde_as(as = "Vec")] + #[merge(strategy=conflate::vec::overwrite_empty)] + filter_tags_exact: Vec, + + /// Only use snapshots which are taken after the given given date/time + #[serde_as(as = "Option")] + #[clap(long, global = true, value_name = "DATE(TIME)")] + #[merge(strategy=conflate::option::overwrite_none)] + filter_after: Option, + + /// Only use snapshots which are taken before the given given date/time + #[serde_as(as = "Option")] + #[clap(long, global = true, value_name = "DATE(TIME)")] + #[merge(strategy=conflate::option::overwrite_none)] + filter_before: Option, + + /// Only use snapshots with total size in given range + #[serde_as(as = "Option")] + #[clap(long, global = true, value_name = "SIZE")] + #[merge(strategy=conflate::option::overwrite_none)] + filter_size: Option, + + /// Only use snapshots with size added to the repo in given range + #[serde_as(as = "Option")] + #[clap(long, global = true, value_name = "SIZE")] + #[merge(strategy=conflate::option::overwrite_none)] + filter_size_added: Option, + /// Function to filter snapshots #[clap(long, global = true, value_name = "FUNC")] #[serde_as(as = "Option")] @@ -118,9 +161,136 @@ impl SnapshotFilter { } } + // For the `Option`s we check if the option is set and the condition is not matched. In this case we can early return false. + if matches!(&self.filter_after, Some(after) if !after.matches(snapshot.time)) + || matches!(&self.filter_before, Some(before) if !before.matches(snapshot.time)) + || matches!((&self.filter_size,&snapshot.summary), (Some(size),Some(summary)) if !size.matches(summary.total_bytes_processed)) + || matches!((&self.filter_size_added,&snapshot.summary), (Some(size),Some(summary)) if !size.matches(summary.data_added)) + { + return false; + } + + // For the the `Vec`s we have two possibilities: + // - There exists a suitable matches method on the snapshot item + // (this automatically handles empty filter correctly): snapshot.paths.matches(&self.filter_paths) && snapshot.tags.matches(&self.filter_tags) + // - manually check if the snapshot item is contained in the `Vec` + // but only if the `Vec` is not empty. + // If it is empty, no condition is given. + && (self.filter_paths_exact.is_empty() + || self.filter_paths_exact.contains(&snapshot.paths)) + && (self.filter_tags_exact.is_empty() + || self.filter_tags_exact.contains(&snapshot.tags)) && (self.filter_hosts.is_empty() || self.filter_hosts.contains(&snapshot.hostname)) && (self.filter_labels.is_empty() || self.filter_labels.contains(&snapshot.label)) } } + +#[derive(Debug, Clone, Display)] +struct AfterDate(DateTime); + +impl AfterDate { + fn matches(&self, datetime: DateTime) -> bool { + self.0 < datetime + } +} + +impl FromStr for AfterDate { + type Err = anyhow::Error; + fn from_str(s: &str) -> Result { + let before_midnight = NaiveTime::from_hms_nano_opt(23, 59, 59, 999_999_999).unwrap(); + let datetime = dateparser::parse_with(s, &Local, before_midnight)?; + Ok(Self(datetime.into())) + } +} + +#[derive(Debug, Clone, Display)] +struct BeforeDate(DateTime); + +impl BeforeDate { + fn matches(&self, datetime: DateTime) -> bool { + datetime < self.0 + } +} + +impl FromStr for BeforeDate { + type Err = anyhow::Error; + fn from_str(s: &str) -> Result { + let midnight = NaiveTime::from_hms_opt(0, 0, 0).unwrap(); + let datetime = dateparser::parse_with(s, &Local, midnight)?; + Ok(Self(datetime.into())) + } +} + +#[derive(Debug, Clone)] +struct SizeRange { + from: Option, + to: Option, +} + +impl SizeRange { + fn matches(&self, size: u64) -> bool { + // The matches-expression is only true if the `Option` is `Some` and the size is smaller than from. + // Hence, !matches is true either if `self.from` is `None` or if the size >= the values + !matches!(self.from, Some(from) if size < from.0) + // same logic here, but smaller and greater swapped. + && !matches!(self.to, Some(to) if size > to.0) + } +} + +fn parse_size(s: &str) -> Result, String> { + let s = s.trim(); + if s.is_empty() { + return Ok(None); + } + Ok(Some(s.parse()?)) +} + +impl FromStr for SizeRange { + type Err = String; + fn from_str(s: &str) -> Result { + let (from, to) = match s.split_once("..") { + Some((s1, s2)) => (parse_size(s1)?, parse_size(s2)?), + None => (parse_size(s)?, None), + }; + Ok(Self { from, to }) + } +} + +impl Display for SizeRange { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(from) = self.from { + f.write_str(&from.to_string_as(true))?; + } + f.write_str("..")?; + if let Some(to) = self.to { + f.write_str(&to.to_string_as(true))?; + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rstest::rstest; + + #[rstest] + #[case("..", None, None)] + #[case("10", Some(10), None)] + #[case("..10k", None, Some(10_000))] + #[case("1MB..", Some(1_000_000), None)] + #[case("1 MB .. 1 GiB", Some(1_000_000), Some(1_073_741_824))] + #[case("10 .. 20 ", Some(10), Some(20))] + #[case(" 2G ", Some(2_000_000_000), None)] + fn size_range_from_str( + #[case] input: SizeRange, + #[case] from: Option, + #[case] to: Option, + ) { + assert_eq!(input.from.map(|v| v.0), from); + assert_eq!(input.to.map(|v| v.0), to); + } +} diff --git a/tests/show-config-fixtures/empty.txt b/tests/show-config-fixtures/empty.txt index c4aa38029..7db3df12b 100644 --- a/tests/show-config-fixtures/empty.txt +++ b/tests/show-config-fixtures/empty.txt @@ -20,7 +20,9 @@ warm-up = false filter-hosts = [] filter-labels = [] filter-paths = [] +filter-paths-exact = [] filter-tags = [] +filter-tags-exact = [] [backup] stdin-filename = "" @@ -57,7 +59,9 @@ prune = false filter-hosts = [] filter-labels = [] filter-paths = [] +filter-paths-exact = [] filter-tags = [] +filter-tags-exact = [] [webdav] symlinks = false