diff --git a/martin/src/args/mbtiles.rs b/martin/src/args/mbtiles.rs new file mode 100644 index 000000000..8a564ccfe --- /dev/null +++ b/martin/src/args/mbtiles.rs @@ -0,0 +1,36 @@ +use log::info; +use mbtiles::ValidationLevel; + +use crate::mbtiles::{MbtConfig, OnInvalid}; + +#[derive(clap::Args, Debug, PartialEq, Default)] +#[command(about, version)] +pub struct MbtArgs { + /// Level of validation to apply to .mbtiles + #[arg(long)] + pub validate_mbtiles: Option, + /// How to handle invalid .mbtiles + #[arg(long)] + pub on_invalid_mbtiles: Option, +} + +impl MbtArgs { + /// Apply CLI parameters from `self` to the configuration loaded from the config file `mbtiles` + pub fn override_config(self, mbt_config: &mut MbtConfig) { + // This ensures that if a new parameter is added to the struct, it will not be forgotten here + let Self { + validate_mbtiles, + on_invalid_mbtiles, + } = self; + + if let Some(value) = validate_mbtiles { + info!("Overriding configured default mbtiles.validate to {value}"); + mbt_config.validate = validate_mbtiles.unwrap_or_default(); + } + + if let Some(value) = on_invalid_mbtiles { + info!("Overriding configured default mbtiles.on_invalid to {value}"); + mbt_config.on_invalid = on_invalid_mbtiles.unwrap_or_default(); + } + } +} diff --git a/martin/src/args/mod.rs b/martin/src/args/mod.rs index 4ff104c3d..ecf29d4e8 100644 --- a/martin/src/args/mod.rs +++ b/martin/src/args/mod.rs @@ -8,6 +8,10 @@ pub use environment::{Env, OsEnv}; mod pg; #[cfg(feature = "postgres")] pub use pg::{BoundsCalcType, PgArgs, DEFAULT_BOUNDS_TIMEOUT}; +#[cfg(feature = "mbtiles")] +mod mbtiles; +#[cfg(feature = "mbtiles")] +pub use mbtiles::MbtArgs; mod root; pub use root::{Args, ExtraArgs, MetaArgs}; diff --git a/martin/src/args/root.rs b/martin/src/args/root.rs index 57ba2cb16..9967b9473 100644 --- a/martin/src/args/root.rs +++ b/martin/src/args/root.rs @@ -28,6 +28,9 @@ pub struct Args { #[cfg(feature = "postgres")] #[command(flatten)] pub pg: Option, + #[cfg(feature = "mbtiles")] + #[command(flatten)] + pub mbtiles: Option, } // None of these params will be transferred to the config @@ -103,8 +106,14 @@ impl Args { } #[cfg(feature = "mbtiles")] - if !cli_strings.is_empty() { - config.mbtiles = parse_file_args(&mut cli_strings, &["mbtiles"], false); + { + if !cli_strings.is_empty() { + config.mbtiles = parse_file_args(&mut cli_strings, &["mbtiles"], false); + } + let mbt_args = self.mbtiles.unwrap_or_default(); + if let FileConfigEnum::Config(c) = &mut config.mbtiles { + mbt_args.override_config(&mut c.custom); + } } #[cfg(feature = "cog")] diff --git a/martin/src/bin/martin-cp.rs b/martin/src/bin/martin-cp.rs index 897bc72ad..9eceed937 100644 --- a/martin/src/bin/martin-cp.rs +++ b/martin/src/bin/martin-cp.rs @@ -49,6 +49,9 @@ pub struct CopierArgs { #[cfg(feature = "postgres")] #[command(flatten)] pub pg: Option, + #[cfg(feature = "mbtiles")] + #[command(flatten)] + pub mbtiles: Option, } #[serde_with::serde_as] @@ -140,6 +143,8 @@ async fn start(copy_args: CopierArgs) -> MartinCpResult<()> { srv: SrvArgs::default(), #[cfg(feature = "postgres")] pg: copy_args.pg, + #[cfg(feature = "mbtiles")] + mbtiles: copy_args.mbtiles, }; args.merge_into_config(&mut config, &env)?; diff --git a/martin/src/file_config.rs b/martin/src/file_config.rs index 56a1062af..72ff9b82c 100644 --- a/martin/src/file_config.rs +++ b/martin/src/file_config.rs @@ -42,6 +42,12 @@ pub enum FileError { #[error(r"Unable to acquire connection to file: {0}")] AcquireConnError(String), + #[error("Source {0} caused an abort due to validation error {1}")] + AbortOnInvalid(PathBuf, String), + + #[error("Source {0} was ignored due to validation error {1}")] + IgnoreOnInvalid(PathBuf, String), + #[cfg(feature = "pmtiles")] #[error(r"PMTiles error {0} processing {1}")] PmtError(pmtiles::PmtError, String), @@ -264,7 +270,11 @@ async fn resolve_int( let dup = if dup { "duplicate " } else { "" }; let id = idr.resolve(&id, url.to_string()); configs.insert(id.clone(), source); - results.push(cfg.custom.new_sources_url(id.clone(), url.clone()).await?); + match cfg.custom.new_sources_url(id.clone(), url.clone()).await { + Err(FileError::IgnoreOnInvalid(_, _)) => {} + Err(e) => return Err(e), + Ok(src) => results.push(src), + }; info!("Configured {dup}source {id} from {}", sanitize_url(&url)); } else { let can = source.abs_path()?; @@ -278,7 +288,11 @@ async fn resolve_int( let id = idr.resolve(&id, can.to_string_lossy().to_string()); info!("Configured {dup}source {id} from {}", can.display()); configs.insert(id.clone(), source.clone()); - results.push(cfg.custom.new_sources(id, source.into_path()).await?); + match cfg.custom.new_sources(id, source.into_path()).await { + Err(FileError::IgnoreOnInvalid(_, _)) => {} + Err(e) => return Err(e), + Ok(src) => results.push(src), + }; } } } @@ -302,7 +316,11 @@ async fn resolve_int( let id = idr.resolve(id, url.to_string()); configs.insert(id.clone(), FileConfigSrc::Path(path)); - results.push(cfg.custom.new_sources_url(id.clone(), url.clone()).await?); + match cfg.custom.new_sources_url(id.clone(), url.clone()).await { + Err(FileError::IgnoreOnInvalid(_, _)) => {} + Err(e) => return Err(e), + Ok(src) => results.push(src), + }; info!("Configured source {id} from URL {}", sanitize_url(&url)); } else { let is_dir = path.is_dir(); @@ -331,7 +349,11 @@ async fn resolve_int( info!("Configured source {id} from {}", can.display()); files.insert(can); configs.insert(id.clone(), FileConfigSrc::Path(path.clone())); - results.push(cfg.custom.new_sources(id, path).await?); + match cfg.custom.new_sources(id, path).await { + Err(FileError::IgnoreOnInvalid(_, _)) => {} + Err(e) => return Err(e), + Ok(src) => results.push(src), + }; } } } diff --git a/martin/src/mbtiles/mod.rs b/martin/src/mbtiles/mod.rs index 50d58ae07..41c2338ec 100644 --- a/martin/src/mbtiles/mod.rs +++ b/martin/src/mbtiles/mod.rs @@ -4,23 +4,47 @@ use std::path::PathBuf; use std::sync::Arc; use async_trait::async_trait; -use log::trace; +use clap::ValueEnum; +use enum_display::EnumDisplay; +use log::{trace, warn}; use martin_tile_utils::{TileCoord, TileInfo}; -use mbtiles::MbtilesPool; +use mbtiles::{MbtResult, MbtilesPool, ValidationLevel}; use serde::{Deserialize, Serialize}; use tilejson::TileJSON; use url::Url; use crate::config::UnrecognizedValues; -use crate::file_config::FileError::{AcquireConnError, InvalidMetadata, IoError}; +use crate::file_config::FileError::{self, AcquireConnError, InvalidMetadata, IoError}; use crate::file_config::{ConfigExtras, FileResult, SourceConfigExtras}; use crate::source::{TileData, TileInfoSource, UrlQuery}; use crate::{MartinResult, Source}; +#[derive( + PartialEq, Eq, Debug, Clone, Copy, Default, Serialize, Deserialize, ValueEnum, EnumDisplay, +)] +#[serde(rename_all = "lowercase")] +pub enum OnInvalid { + /// Print warning message, and abort if the error is critical + #[default] + Warn, + + /// Skip this source + Ignore, + + /// Do not start Martin on any warnings + Abort, +} + #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] pub struct MbtConfig { #[serde(flatten)] pub unrecognized: UnrecognizedValues, + + #[serde(default)] + pub validate: ValidationLevel, + + #[serde(default)] + pub on_invalid: OnInvalid, } impl ConfigExtras for MbtConfig { @@ -31,7 +55,31 @@ impl ConfigExtras for MbtConfig { impl SourceConfigExtras for MbtConfig { async fn new_sources(&self, id: String, path: PathBuf) -> FileResult { - Ok(Box::new(MbtSource::new(id, path).await?)) + let source = MbtSource::new(id, path.clone()).await?; + if let Err(validation_error) = source.validate(self.validate).await { + match self.on_invalid { + OnInvalid::Abort => { + return Err(FileError::AbortOnInvalid( + path, + validation_error.to_string(), + )); + } + OnInvalid::Ignore => { + return Err(FileError::IgnoreOnInvalid( + path, + validation_error.to_string(), + )); + } + OnInvalid::Warn => { + warn!( + "Source {} failed validation, this may cause performance issues: {}", + path.display(), + validation_error.to_string() + ); + } + } + } + Ok(Box::new(source)) } // TODO: Remove #[allow] after switching to Rust/Clippy v1.78+ in CI @@ -80,6 +128,10 @@ impl MbtSource { tile_info: meta.tile_info, }) } + + async fn validate(&self, validation_level: ValidationLevel) -> MbtResult<()> { + self.mbtiles.validate(validation_level).await + } } #[async_trait] @@ -131,13 +183,15 @@ mod tests { use std::path::PathBuf; use indoc::indoc; + use mbtiles::ValidationLevel; use crate::file_config::{FileConfigEnum, FileConfigSource, FileConfigSrc}; - use crate::mbtiles::MbtConfig; + use crate::mbtiles::{MbtConfig, OnInvalid}; #[test] fn parse() { - let cfg = serde_yaml::from_str::>(indoc! {" + let cfg: FileConfigEnum = + serde_yaml::from_str::>(indoc! {" paths: - /dir-path - /path/to/file2.ext @@ -149,8 +203,10 @@ mod tests { pm-src3: https://example.org/file3.ext pm-src4: path: https://example.org/file4.ext + validate: thorough + on_invalid: abort "}) - .unwrap(); + .unwrap(); let res = cfg.finalize(""); assert!(res.is_empty(), "unrecognized config: {res:?}"); let FileConfigEnum::Config(cfg) = cfg else { @@ -190,5 +246,7 @@ mod tests { ), ])) ); + assert_eq!(cfg.custom.validate, ValidationLevel::Thorough); + assert_eq!(cfg.custom.on_invalid, OnInvalid::Abort); } } diff --git a/mbtiles/src/errors.rs b/mbtiles/src/errors.rs index 768357f81..c7016430c 100644 --- a/mbtiles/src/errors.rs +++ b/mbtiles/src/errors.rs @@ -57,7 +57,7 @@ pub enum MbtError { #[error("The destination file {0} is not empty. Some operations like creating a diff file require the destination file to be non-existent or empty.")] NonEmptyTargetFile(PathBuf), - #[error("The file {0} does not have the required uniqueness constraint")] + #[error("The file {0} does not have the required uniqueness constraint. Try adding a unique index on the `map` or `tiles` table.")] NoUniquenessConstraint(String), #[error("Could not copy MBTiles file: {reason}")] diff --git a/mbtiles/src/lib.rs b/mbtiles/src/lib.rs index 4de855359..e6774daa3 100644 --- a/mbtiles/src/lib.rs +++ b/mbtiles/src/lib.rs @@ -11,7 +11,7 @@ mod errors; pub use errors::{MbtError, MbtResult}; mod mbtiles; -pub use mbtiles::{CopyType, MbtTypeCli, Mbtiles}; +pub use mbtiles::{CopyType, MbtTypeCli, Mbtiles, ValidationLevel}; mod metadata; pub use metadata::Metadata; diff --git a/mbtiles/src/mbtiles.rs b/mbtiles/src/mbtiles.rs index 40ece9757..e95de8f6e 100644 --- a/mbtiles/src/mbtiles.rs +++ b/mbtiles/src/mbtiles.rs @@ -23,6 +23,21 @@ pub enum MbtTypeCli { Normalized, } +#[derive(PartialEq, Eq, Debug, Clone, Copy, Default, Serialize, Deserialize, EnumDisplay)] +#[serde(rename_all = "lowercase")] +#[cfg_attr(feature = "cli", derive(clap::ValueEnum))] +pub enum ValidationLevel { + /// Do not validate + Skip, + + /// Quickly check the file + #[default] + Fast, + + /// Do a slow check of everything + Thorough, +} + #[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize, EnumDisplay)] #[enum_display(case = "Kebab")] #[cfg_attr(feature = "cli", derive(clap::ValueEnum))] diff --git a/mbtiles/src/pool.rs b/mbtiles/src/pool.rs index 02fac693d..2d74f6c4d 100644 --- a/mbtiles/src/pool.rs +++ b/mbtiles/src/pool.rs @@ -3,7 +3,8 @@ use std::path::Path; use sqlx::{Pool, Sqlite, SqlitePool}; use crate::errors::MbtResult; -use crate::{Mbtiles, Metadata}; +use crate::mbtiles::ValidationLevel; +use crate::{AggHashType, IntegrityCheckType, Mbtiles, Metadata}; #[derive(Clone, Debug)] pub struct MbtilesPool { @@ -27,4 +28,20 @@ impl MbtilesPool { let mut conn = self.pool.acquire().await?; self.mbtiles.get_tile(&mut *conn, z, x, y).await } + + pub async fn validate(&self, validation_level: ValidationLevel) -> MbtResult<()> { + let mut conn = self.pool.acquire().await?; + match validation_level { + ValidationLevel::Thorough => { + self.mbtiles + .validate(&mut *conn, IntegrityCheckType::Full, AggHashType::Verify) + .await?; + } + ValidationLevel::Fast => { + self.mbtiles.detect_type(&mut *conn).await?; + } + ValidationLevel::Skip => {} + } + Ok(()) + } } diff --git a/tests/fixtures/mbtiles/webp.mbtiles b/tests/fixtures/mbtiles/webp.mbtiles index 3b53baf2e..8e0a2a5f6 100644 Binary files a/tests/fixtures/mbtiles/webp.mbtiles and b/tests/fixtures/mbtiles/webp.mbtiles differ