Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(mbtiles) - Add validation to martin .mbtiles #741 #1689

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
36 changes: 36 additions & 0 deletions martin/src/args/mbtiles.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use log::info;
use mbtiles::ValidationLevel;

use crate::mbtiles::{MbtConfig, OnInvalid};

#[derive(clap::Args, Debug, PartialEq, Default)]
#[command(about, version)]
pub struct MbtArgs {
/// Level of validation to apply to .mbtiles
#[arg(long)]
pub validate_mbtiles: Option<ValidationLevel>,
/// How to handle invalid .mbtiles
#[arg(long)]
pub on_invalid_mbtiles: Option<OnInvalid>,
}

impl MbtArgs {
/// Apply CLI parameters from `self` to the configuration loaded from the config file `mbtiles`
pub fn override_config(self, mbt_config: &mut MbtConfig) {
// This ensures that if a new parameter is added to the struct, it will not be forgotten here
let Self {
validate_mbtiles,
on_invalid_mbtiles,
} = self;

if let Some(value) = validate_mbtiles {
info!("Overriding configured default mbtiles.validate to {value}");
mbt_config.validate = validate_mbtiles.unwrap_or_default();
}

if let Some(value) = on_invalid_mbtiles {
info!("Overriding configured default mbtiles.on_invalid to {value}");
mbt_config.on_invalid = on_invalid_mbtiles.unwrap_or_default();
}
}
}
4 changes: 4 additions & 0 deletions martin/src/args/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ pub use environment::{Env, OsEnv};
mod pg;
#[cfg(feature = "postgres")]
pub use pg::{BoundsCalcType, PgArgs, DEFAULT_BOUNDS_TIMEOUT};
#[cfg(feature = "mbtiles")]
mod mbtiles;
#[cfg(feature = "mbtiles")]
pub use mbtiles::MbtArgs;

mod root;
pub use root::{Args, ExtraArgs, MetaArgs};
Expand Down
13 changes: 11 additions & 2 deletions martin/src/args/root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ pub struct Args {
#[cfg(feature = "postgres")]
#[command(flatten)]
pub pg: Option<crate::args::pg::PgArgs>,
#[cfg(feature = "mbtiles")]
#[command(flatten)]
pub mbtiles: Option<crate::args::mbtiles::MbtArgs>,
}

// None of these params will be transferred to the config
Expand Down Expand Up @@ -103,8 +106,14 @@ impl Args {
}

#[cfg(feature = "mbtiles")]
if !cli_strings.is_empty() {
config.mbtiles = parse_file_args(&mut cli_strings, &["mbtiles"], false);
{
if !cli_strings.is_empty() {
config.mbtiles = parse_file_args(&mut cli_strings, &["mbtiles"], false);
}
let mbt_args = self.mbtiles.unwrap_or_default();
if let FileConfigEnum::Config(c) = &mut config.mbtiles {
mbt_args.override_config(&mut c.custom);
}
}

#[cfg(feature = "cog")]
Expand Down
5 changes: 5 additions & 0 deletions martin/src/bin/martin-cp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ pub struct CopierArgs {
#[cfg(feature = "postgres")]
#[command(flatten)]
pub pg: Option<martin::args::PgArgs>,
#[cfg(feature = "mbtiles")]
#[command(flatten)]
pub mbtiles: Option<martin::args::MbtArgs>,
}

#[serde_with::serde_as]
Expand Down Expand Up @@ -140,6 +143,8 @@ async fn start(copy_args: CopierArgs) -> MartinCpResult<()> {
srv: SrvArgs::default(),
#[cfg(feature = "postgres")]
pg: copy_args.pg,
#[cfg(feature = "mbtiles")]
mbtiles: copy_args.mbtiles,
};

args.merge_into_config(&mut config, &env)?;
Expand Down
30 changes: 26 additions & 4 deletions martin/src/file_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ pub enum FileError {
#[error(r"Unable to acquire connection to file: {0}")]
AcquireConnError(String),

#[error("Source {0} caused an abort due to validation error {1}")]
AbortOnInvalid(PathBuf, String),

#[error("Source {0} was ignored due to validation error {1}")]
IgnoreOnInvalid(PathBuf, String),

#[cfg(feature = "pmtiles")]
#[error(r"PMTiles error {0} processing {1}")]
PmtError(pmtiles::PmtError, String),
Expand Down Expand Up @@ -264,7 +270,11 @@ async fn resolve_int<T: SourceConfigExtras>(
let dup = if dup { "duplicate " } else { "" };
let id = idr.resolve(&id, url.to_string());
configs.insert(id.clone(), source);
results.push(cfg.custom.new_sources_url(id.clone(), url.clone()).await?);
match cfg.custom.new_sources_url(id.clone(), url.clone()).await {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm not a big fan of all this dup code for each call to new_sources - i wonder if we can do this logic elsewhere, and here keep the simple ? operator as before?

Err(FileError::IgnoreOnInvalid(_, _)) => {}
Err(e) => return Err(e),
Ok(src) => results.push(src),
};
info!("Configured {dup}source {id} from {}", sanitize_url(&url));
} else {
let can = source.abs_path()?;
Expand All @@ -278,7 +288,11 @@ async fn resolve_int<T: SourceConfigExtras>(
let id = idr.resolve(&id, can.to_string_lossy().to_string());
info!("Configured {dup}source {id} from {}", can.display());
configs.insert(id.clone(), source.clone());
results.push(cfg.custom.new_sources(id, source.into_path()).await?);
match cfg.custom.new_sources(id, source.into_path()).await {
Err(FileError::IgnoreOnInvalid(_, _)) => {}
Err(e) => return Err(e),
Ok(src) => results.push(src),
};
}
}
}
Expand All @@ -302,7 +316,11 @@ async fn resolve_int<T: SourceConfigExtras>(

let id = idr.resolve(id, url.to_string());
configs.insert(id.clone(), FileConfigSrc::Path(path));
results.push(cfg.custom.new_sources_url(id.clone(), url.clone()).await?);
match cfg.custom.new_sources_url(id.clone(), url.clone()).await {
Err(FileError::IgnoreOnInvalid(_, _)) => {}
Err(e) => return Err(e),
Ok(src) => results.push(src),
};
info!("Configured source {id} from URL {}", sanitize_url(&url));
} else {
let is_dir = path.is_dir();
Expand Down Expand Up @@ -331,7 +349,11 @@ async fn resolve_int<T: SourceConfigExtras>(
info!("Configured source {id} from {}", can.display());
files.insert(can);
configs.insert(id.clone(), FileConfigSrc::Path(path.clone()));
results.push(cfg.custom.new_sources(id, path).await?);
match cfg.custom.new_sources(id, path).await {
Err(FileError::IgnoreOnInvalid(_, _)) => {}
Err(e) => return Err(e),
Ok(src) => results.push(src),
};
}
}
}
Expand Down
72 changes: 65 additions & 7 deletions martin/src/mbtiles/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,47 @@ use std::path::PathBuf;
use std::sync::Arc;

use async_trait::async_trait;
use log::trace;
use clap::ValueEnum;
use enum_display::EnumDisplay;
use log::{trace, warn};
use martin_tile_utils::{TileCoord, TileInfo};
use mbtiles::MbtilesPool;
use mbtiles::{MbtResult, MbtilesPool, ValidationLevel};
use serde::{Deserialize, Serialize};
use tilejson::TileJSON;
use url::Url;

use crate::config::UnrecognizedValues;
use crate::file_config::FileError::{AcquireConnError, InvalidMetadata, IoError};
use crate::file_config::FileError::{self, AcquireConnError, InvalidMetadata, IoError};
use crate::file_config::{ConfigExtras, FileResult, SourceConfigExtras};
use crate::source::{TileData, TileInfoSource, UrlQuery};
use crate::{MartinResult, Source};

#[derive(
PartialEq, Eq, Debug, Clone, Copy, Default, Serialize, Deserialize, ValueEnum, EnumDisplay,
)]
#[serde(rename_all = "lowercase")]
pub enum OnInvalid {
/// Print warning message, and abort if the error is critical
#[default]
Warn,

/// Skip this source
Ignore,

/// Do not start Martin on any warnings
Abort,
}

#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
pub struct MbtConfig {
#[serde(flatten)]
pub unrecognized: UnrecognizedValues,

#[serde(default)]
pub validate: ValidationLevel,

#[serde(default)]
pub on_invalid: OnInvalid,
}

impl ConfigExtras for MbtConfig {
Expand All @@ -31,7 +55,31 @@ impl ConfigExtras for MbtConfig {

impl SourceConfigExtras for MbtConfig {
async fn new_sources(&self, id: String, path: PathBuf) -> FileResult<TileInfoSource> {
Ok(Box::new(MbtSource::new(id, path).await?))
let source = MbtSource::new(id, path.clone()).await?;
if let Err(validation_error) = source.validate(self.validate).await {
match self.on_invalid {
OnInvalid::Abort => {
return Err(FileError::AbortOnInvalid(
path,
validation_error.to_string(),
));
}
OnInvalid::Ignore => {
return Err(FileError::IgnoreOnInvalid(
path,
validation_error.to_string(),
));
}
OnInvalid::Warn => {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might want to do further processing of the validation_error to determine if this is a critical issue or not.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I keep wondering if "fast" and "skip" are actually the same thing. We should always do some basic validation - like making sure the tiles table exists with the right columns - or else we can't even use it. The only difference is in what we do afterwards - ignore the source or continue with it... So perhaps:

  • validation level is fast vs thorough (no skip)
  • severity would be good, warn, error
  • and the action could be abort or ignore source for error, and abort or ignore source or warn on warning?
  • we may even introduce warnings-as-errors parameter? (slowly building a compiler here :) )

warn!(
"Source {} failed validation, this may cause performance issues: {}",
path.display(),
validation_error.to_string()
);
}
}
}
Ok(Box::new(source))
}

// TODO: Remove #[allow] after switching to Rust/Clippy v1.78+ in CI
Expand Down Expand Up @@ -80,6 +128,10 @@ impl MbtSource {
tile_info: meta.tile_info,
})
}

async fn validate(&self, validation_level: ValidationLevel) -> MbtResult<()> {
self.mbtiles.validate(validation_level).await
}
}

#[async_trait]
Expand Down Expand Up @@ -131,13 +183,15 @@ mod tests {
use std::path::PathBuf;

use indoc::indoc;
use mbtiles::ValidationLevel;

use crate::file_config::{FileConfigEnum, FileConfigSource, FileConfigSrc};
use crate::mbtiles::MbtConfig;
use crate::mbtiles::{MbtConfig, OnInvalid};

#[test]
fn parse() {
let cfg = serde_yaml::from_str::<FileConfigEnum<MbtConfig>>(indoc! {"
let cfg: FileConfigEnum<MbtConfig> =
serde_yaml::from_str::<FileConfigEnum<MbtConfig>>(indoc! {"
paths:
- /dir-path
- /path/to/file2.ext
Expand All @@ -149,8 +203,10 @@ mod tests {
pm-src3: https://example.org/file3.ext
pm-src4:
path: https://example.org/file4.ext
validate: thorough
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor nit: probably best to put the simple key: value things before the nested ones - i.e. move these two above paths:

on_invalid: abort
"})
.unwrap();
.unwrap();
let res = cfg.finalize("");
assert!(res.is_empty(), "unrecognized config: {res:?}");
let FileConfigEnum::Config(cfg) = cfg else {
Expand Down Expand Up @@ -190,5 +246,7 @@ mod tests {
),
]))
);
assert_eq!(cfg.custom.validate, ValidationLevel::Thorough);
assert_eq!(cfg.custom.on_invalid, OnInvalid::Abort);
}
}
2 changes: 1 addition & 1 deletion mbtiles/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ pub enum MbtError {
#[error("The destination file {0} is not empty. Some operations like creating a diff file require the destination file to be non-existent or empty.")]
NonEmptyTargetFile(PathBuf),

#[error("The file {0} does not have the required uniqueness constraint")]
#[error("The file {0} does not have the required uniqueness constraint. Try adding a unique index on the `map` or `tiles` table.")]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps link to https://maplibre.org/martin/mbtiles-schema.html - and also update that page to include some information about performance? Note that tiles is the only official requirement of an mbtiles, but it could be either a table or a view - and it would be difficult to explain all this in a few lines.

NoUniquenessConstraint(String),

#[error("Could not copy MBTiles file: {reason}")]
Expand Down
2 changes: 1 addition & 1 deletion mbtiles/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ mod errors;
pub use errors::{MbtError, MbtResult};

mod mbtiles;
pub use mbtiles::{CopyType, MbtTypeCli, Mbtiles};
pub use mbtiles::{CopyType, MbtTypeCli, Mbtiles, ValidationLevel};

mod metadata;
pub use metadata::Metadata;
Expand Down
15 changes: 15 additions & 0 deletions mbtiles/src/mbtiles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,21 @@ pub enum MbtTypeCli {
Normalized,
}

#[derive(PartialEq, Eq, Debug, Clone, Copy, Default, Serialize, Deserialize, EnumDisplay)]
#[serde(rename_all = "lowercase")]
#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
pub enum ValidationLevel {
/// Do not validate
Skip,

/// Quickly check the file
#[default]
Fast,

/// Do a slow check of everything
Thorough,
}

#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize, EnumDisplay)]
#[enum_display(case = "Kebab")]
#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
Expand Down
19 changes: 18 additions & 1 deletion mbtiles/src/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ use std::path::Path;
use sqlx::{Pool, Sqlite, SqlitePool};

use crate::errors::MbtResult;
use crate::{Mbtiles, Metadata};
use crate::mbtiles::ValidationLevel;
use crate::{AggHashType, IntegrityCheckType, Mbtiles, Metadata};

#[derive(Clone, Debug)]
pub struct MbtilesPool {
Expand All @@ -27,4 +28,20 @@ impl MbtilesPool {
let mut conn = self.pool.acquire().await?;
self.mbtiles.get_tile(&mut *conn, z, x, y).await
}

pub async fn validate(&self, validation_level: ValidationLevel) -> MbtResult<()> {
let mut conn = self.pool.acquire().await?;
match validation_level {
ValidationLevel::Thorough => {
self.mbtiles
.validate(&mut *conn, IntegrityCheckType::Full, AggHashType::Verify)
.await?;
}
ValidationLevel::Fast => {
self.mbtiles.detect_type(&mut *conn).await?;
}
ValidationLevel::Skip => {}
}
Ok(())
}
}
Binary file modified tests/fixtures/mbtiles/webp.mbtiles
Binary file not shown.
Loading