Skip to content

Commit

Permalink
feat(config): add more filters (#1263)
Browse files Browse the repository at this point in the history
Adds the following filter options:
- `filter-paths-exactly` - filters exactly the given pathlists
(supersets don't match)
- `filter-tags-exactly` - filters exactly the given taglists (supersets
don't match)
- `filter-before` - filters by time (date or date+time can be given)
- `filter-after` - filters by time (date or date+time can be given)
- `filter-size` - filters by size of snapshot'ed data (upper and lower
limit may be given)
- `filter-size-add` - filters by size added to the repository (upper and
lower limit may be given)

---------

Signed-off-by: simonsan <[email protected]>
Co-authored-by: simonsan <[email protected]>
  • Loading branch information
aawsome and simonsan authored Oct 2, 2024
1 parent 68cbca3 commit cc4f016
Show file tree
Hide file tree
Showing 6 changed files with 223 additions and 10 deletions.
14 changes: 14 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ clap = { version = "4", features = ["derive", "env", "wrap_help"] }
clap_complete = "4"
conflate = "0.2"
convert_case = "0.6.0"
dateparser = "0.2.1"
derive_more = { version = "1.0.0", features = ["debug"] }
dialoguer = "0.11.0"
directories = "5"
gethostname = "0.5"
Expand Down
25 changes: 18 additions & 7 deletions config/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,21 @@ see Repository Options

### Snapshot-Filter Options `[snapshot-filter]`

| Attribute | Description | Default Value | Example Value | CLI Option |
| ------------- | --------------------------------------- | ------------- | ------------------- | -------------- |
| filter-hosts | Array of hosts to filter snapshots. | Not set | ["myhost", "host2"] | --filter-host |
| filter-labels | Array of labels to filter snapshots. | Not set | ["mylabal"] | --filter-label |
| filter-paths | Array of pathlists to filter snapshots. | Not set | ["/home,/root"] | --filter-paths |
| filter-tags | Array of taglists to filter snapshots. | Not set | ["tag1,tag2"] | --filter-tags |
| filter-fn | Custom filter function for snapshots. | Not set | | --filter-fn |
| Attribute | Description | Default Value | Example Value | CLI Option |
| ------------------ | ---------------------------------------------------------------------- | ------------- | ------------------------ | -------------------- |
| filter-hosts | Array of hosts to filter snapshots. | Not set | ["myhost", "host2"] | --filter-host |
| filter-labels | Array of labels to filter snapshots. | Not set | ["mylabal"] | --filter-label |
| filter-paths | Array of pathlists to filter snapshots. | Not set | ["/home,/root"] | --filter-paths |
| filter-paths-exact | Array or string of paths to filter snapshots. Exact match. | Not set | ["path1,path2", "path3"] | --filter-paths-exact |
| filter-tags | Array of taglists to filter snapshots. | Not set | ["tag1,tag2"] | --filter-tags |
| filter-tags-exact | Array or string of tags to filter snapshots. Exact match. | Not set | ["tag1,tag2", "tag3"] | --filter-tags-exact |
| filter-before | Filter snapshots before the given date/time | Not set | "2024-01-01" | --filter-before |
| filter-after | Filter snapshots after the given date/time | Not set | "2023-01-01 11:15:23" | --filter-after |
| filter-size | Filter snapshots for a total size in the size range. | Not set | "1MB..1GB" | --filter-size |
| | If a single value is given, this is taken as lower bound. | | "500 k" | |
| filter-size-added | Filter snapshots for a size added to the repository in the size range. | Not set | "1MB..1GB" | --filter-size-added |
| | If a single value is given, this is taken as lower bound. | | "500 k" | |
| filter-fn | Custom filter function for snapshots. | Not set | | --filter-fn |

### Backup Options `[backup]`

Expand Down Expand Up @@ -202,6 +210,9 @@ source-specific option and then only apply to this source.
| keep-none | Allow to keep no snapshots. | false | true | --keep-none |
| prune | If set to true, prune the repository after snapshots have been removed. | false | | --prune |

Additionally extra snapshot filter options can be given for the `forget` command
here, see Snapshot-Filter options.

### Copy Targets `[copy]`

**Note**: Copy-targets must be defined in their own config profile files.
Expand Down
16 changes: 14 additions & 2 deletions config/full.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,16 @@ throttle = "10kB,10MB" # limit and burst per second; only opendal backends; Defa

# Snapshot-filter options: These options apply to all commands that use snapshot filters
[snapshot-filter]
filter-hosts = ["host2", "host2"] # Default: []
filter-hosts = ["host1", "host2"] # Default: []
filter-labels = ["label1", "label2"] # Default: []
filter-tags = ["tag1,tag2", "tag3"] # Default: []
filter-tags-exact = ["tag1,tag2", "tag2"] # Default: []
filter-paths = ["path1", "path2,path3"] # Default: []
filter-paths-exact = ["path1", "path2,path3"] # Default: []
filter-after = "2024-01-01" # Default: not set
filter-before = "2024-02-05 12:15" # Default: not set
filter-size = "200MiB" # Default: not set
filter-size-added = "1 MB..10MB" # Default: not set
filter-fn = '|sn| {sn.host == "host1" || sn.description.contains("test")}' # Default: no filter function

# Backup options: These options are used for all sources when calling the backup command.
Expand Down Expand Up @@ -126,10 +132,16 @@ sources = [
prune = false
group-by = "host,label,paths" # Can be any combination of host,label,paths,tags
# The following filter options can be also defined here and then overwrite the options for the forget command
filter-hosts = ["host2", "host2"] # Default: []
filter-hosts = ["host1", "host2"] # Default: []
filter-labels = ["label1", "label2"] # Default: []
filter-tags = ["tag1,tag2", "tag3"] # Default: []
filter-tags-exact = ["tag1,tag2", "tag2"] # Default: []
filter-paths = ["path1", "path2,path3"] # Default: []
filter-paths-exact = ["path1", "path2,path3"] # Default: []
filter-after = "2024-01-01" # Default: not set
filter-before = "2024-02-05 12:15" # Default: not set
filter-size = "200MiB" # Default: not set
filter-size-added = "1 MB..10MB" # Default: not set
filter-fn = '|sn| {sn.host == "host1" || sn.description.contains("test")}' # Default: no filter function
# The retention options follow. All of these are not set by default.
keep-tags = ["tag1", "tag2,tag3"] # Default: not set
Expand Down
172 changes: 171 additions & 1 deletion src/filtering.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
use crate::error::RhaiErrorKinds;

use bytesize::ByteSize;
use derive_more::derive::Display;
use log::warn;
use rustic_core::{repofile::SnapshotFile, StringList};
use std::{error::Error, str::FromStr};
use std::{
error::Error,
fmt::{Debug, Display},
str::FromStr,
};

use cached::proc_macro::cached;
use chrono::{DateTime, Local, NaiveTime};
use conflate::Merge;
use rhai::{serde::to_dynamic, Dynamic, Engine, FnPtr, AST};
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -75,12 +82,48 @@ pub struct SnapshotFilter {
#[merge(strategy=conflate::vec::overwrite_empty)]
filter_paths: Vec<StringList>,

/// Path list to filter exactly (no superset) as given (can be specified multiple times)
#[clap(long, global = true, value_name = "PATH[,PATH,..]")]
#[serde_as(as = "Vec<DisplayFromStr>")]
#[merge(strategy=conflate::vec::overwrite_empty)]
filter_paths_exact: Vec<StringList>,

/// Tag list to filter (can be specified multiple times)
#[clap(long, global = true, value_name = "TAG[,TAG,..]")]
#[serde_as(as = "Vec<DisplayFromStr>")]
#[merge(strategy=conflate::vec::overwrite_empty)]
filter_tags: Vec<StringList>,

/// Tag list to filter exactly (no superset) as given (can be specified multiple times)
#[clap(long, global = true, value_name = "TAG[,TAG,..]")]
#[serde_as(as = "Vec<DisplayFromStr>")]
#[merge(strategy=conflate::vec::overwrite_empty)]
filter_tags_exact: Vec<StringList>,

/// Only use snapshots which are taken after the given given date/time
#[serde_as(as = "Option<DisplayFromStr>")]
#[clap(long, global = true, value_name = "DATE(TIME)")]
#[merge(strategy=conflate::option::overwrite_none)]
filter_after: Option<AfterDate>,

/// Only use snapshots which are taken before the given given date/time
#[serde_as(as = "Option<DisplayFromStr>")]
#[clap(long, global = true, value_name = "DATE(TIME)")]
#[merge(strategy=conflate::option::overwrite_none)]
filter_before: Option<BeforeDate>,

/// Only use snapshots with total size in given range
#[serde_as(as = "Option<DisplayFromStr>")]
#[clap(long, global = true, value_name = "SIZE")]
#[merge(strategy=conflate::option::overwrite_none)]
filter_size: Option<SizeRange>,

/// Only use snapshots with size added to the repo in given range
#[serde_as(as = "Option<DisplayFromStr>")]
#[clap(long, global = true, value_name = "SIZE")]
#[merge(strategy=conflate::option::overwrite_none)]
filter_size_added: Option<SizeRange>,

/// Function to filter snapshots
#[clap(long, global = true, value_name = "FUNC")]
#[serde_as(as = "Option<DisplayFromStr>")]
Expand Down Expand Up @@ -118,9 +161,136 @@ impl SnapshotFilter {
}
}

// For the `Option`s we check if the option is set and the condition is not matched. In this case we can early return false.
if matches!(&self.filter_after, Some(after) if !after.matches(snapshot.time))
|| matches!(&self.filter_before, Some(before) if !before.matches(snapshot.time))
|| matches!((&self.filter_size,&snapshot.summary), (Some(size),Some(summary)) if !size.matches(summary.total_bytes_processed))
|| matches!((&self.filter_size_added,&snapshot.summary), (Some(size),Some(summary)) if !size.matches(summary.data_added))
{
return false;
}

// For the the `Vec`s we have two possibilities:
// - There exists a suitable matches method on the snapshot item
// (this automatically handles empty filter correctly):
snapshot.paths.matches(&self.filter_paths)
&& snapshot.tags.matches(&self.filter_tags)
// - manually check if the snapshot item is contained in the `Vec`
// but only if the `Vec` is not empty.
// If it is empty, no condition is given.
&& (self.filter_paths_exact.is_empty()
|| self.filter_paths_exact.contains(&snapshot.paths))
&& (self.filter_tags_exact.is_empty()
|| self.filter_tags_exact.contains(&snapshot.tags))
&& (self.filter_hosts.is_empty() || self.filter_hosts.contains(&snapshot.hostname))
&& (self.filter_labels.is_empty() || self.filter_labels.contains(&snapshot.label))
}
}

#[derive(Debug, Clone, Display)]
struct AfterDate(DateTime<Local>);

impl AfterDate {
fn matches(&self, datetime: DateTime<Local>) -> bool {
self.0 < datetime
}
}

impl FromStr for AfterDate {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let before_midnight = NaiveTime::from_hms_nano_opt(23, 59, 59, 999_999_999).unwrap();
let datetime = dateparser::parse_with(s, &Local, before_midnight)?;
Ok(Self(datetime.into()))
}
}

#[derive(Debug, Clone, Display)]
struct BeforeDate(DateTime<Local>);

impl BeforeDate {
fn matches(&self, datetime: DateTime<Local>) -> bool {
datetime < self.0
}
}

impl FromStr for BeforeDate {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let midnight = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
let datetime = dateparser::parse_with(s, &Local, midnight)?;
Ok(Self(datetime.into()))
}
}

#[derive(Debug, Clone)]
struct SizeRange {
from: Option<ByteSize>,
to: Option<ByteSize>,
}

impl SizeRange {
fn matches(&self, size: u64) -> bool {
// The matches-expression is only true if the `Option` is `Some` and the size is smaller than from.
// Hence, !matches is true either if `self.from` is `None` or if the size >= the values
!matches!(self.from, Some(from) if size < from.0)
// same logic here, but smaller and greater swapped.
&& !matches!(self.to, Some(to) if size > to.0)
}
}

fn parse_size(s: &str) -> Result<Option<ByteSize>, String> {
let s = s.trim();
if s.is_empty() {
return Ok(None);
}
Ok(Some(s.parse()?))
}

impl FromStr for SizeRange {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (from, to) = match s.split_once("..") {
Some((s1, s2)) => (parse_size(s1)?, parse_size(s2)?),
None => (parse_size(s)?, None),
};
Ok(Self { from, to })
}
}

impl Display for SizeRange {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(from) = self.from {
f.write_str(&from.to_string_as(true))?;
}
f.write_str("..")?;
if let Some(to) = self.to {
f.write_str(&to.to_string_as(true))?;
}

Ok(())
}
}

#[cfg(test)]
mod tests {
use super::*;
use rstest::rstest;

#[rstest]
#[case("..", None, None)]
#[case("10", Some(10), None)]
#[case("..10k", None, Some(10_000))]
#[case("1MB..", Some(1_000_000), None)]
#[case("1 MB .. 1 GiB", Some(1_000_000), Some(1_073_741_824))]
#[case("10 .. 20 ", Some(10), Some(20))]
#[case(" 2G ", Some(2_000_000_000), None)]
fn size_range_from_str(
#[case] input: SizeRange,
#[case] from: Option<u64>,
#[case] to: Option<u64>,
) {
assert_eq!(input.from.map(|v| v.0), from);
assert_eq!(input.to.map(|v| v.0), to);
}
}
4 changes: 4 additions & 0 deletions tests/show-config-fixtures/empty.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ warm-up = false
filter-hosts = []
filter-labels = []
filter-paths = []
filter-paths-exact = []
filter-tags = []
filter-tags-exact = []

[backup]
stdin-filename = ""
Expand Down Expand Up @@ -57,7 +59,9 @@ prune = false
filter-hosts = []
filter-labels = []
filter-paths = []
filter-paths-exact = []
filter-tags = []
filter-tags-exact = []

[webdav]
symlinks = false
Expand Down

0 comments on commit cc4f016

Please sign in to comment.