diff --git a/README.md b/README.md index 779e9f7d53..d2b6a2d896 100644 --- a/README.md +++ b/README.md @@ -480,6 +480,9 @@ Options: -b, --base Base URL or website root directory to check relative URLs e.g. or `/path/to/public` + --root-dir + Root path to use when checking absolute local links, must be an absolute path + --basic-auth Basic authentication support. E.g. `http://example.com username:password` diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs index 4a86924c56..57edd5fcdb 100644 --- a/examples/collect_links/collect_links.rs +++ b/examples/collect_links/collect_links.rs @@ -21,7 +21,7 @@ async fn main() -> Result<()> { }, ]; - let links = Collector::new(None) // base + let links = Collector::default() .skip_missing_inputs(false) // don't skip missing inputs? (default=false) .skip_hidden(false) // skip hidden files? (default=true) .skip_ignored(false) // skip files that are ignored by git? (default=true) diff --git a/fixtures/resolve_paths_from_root_dir/nested/about/index.html b/fixtures/resolve_paths_from_root_dir/nested/about/index.html new file mode 100644 index 0000000000..3141b661a0 --- /dev/null +++ b/fixtures/resolve_paths_from_root_dir/nested/about/index.html @@ -0,0 +1,8 @@ + + + About + + +

About

+ + diff --git a/fixtures/resolve_paths_from_root_dir/nested/another page/index.html b/fixtures/resolve_paths_from_root_dir/nested/another page/index.html new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fixtures/resolve_paths_from_root_dir/nested/index.html b/fixtures/resolve_paths_from_root_dir/nested/index.html new file mode 100644 index 0000000000..8f4528f54a --- /dev/null +++ b/fixtures/resolve_paths_from_root_dir/nested/index.html @@ -0,0 +1,34 @@ + + + Index + + +

Index Title

+ +

+

+

+ + \ No newline at end of file diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 3551c4c9c8..5c0614b179 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -192,7 +192,7 @@ async fn progress_bar_task( while let Some(response) = recv_resp.recv().await { show_progress( &mut io::stderr(), - &pb, + pb.as_ref(), &response, formatter.as_ref(), &verbose, @@ -331,7 +331,7 @@ fn ignore_cache(uri: &Uri, status: &Status, cache_exclude_status: &HashSet) fn show_progress( output: &mut dyn Write, - progress_bar: &Option, + progress_bar: Option<&ProgressBar>, response: &Response, formatter: &dyn ResponseFormatter, verbose: &Verbosity, @@ -401,7 +401,7 @@ mod tests { let formatter = get_response_formatter(&options::OutputMode::Plain); show_progress( &mut buf, - &None, + None, &response, formatter.as_ref(), &Verbosity::default(), @@ -423,7 +423,7 @@ mod tests { let formatter = get_response_formatter(&options::OutputMode::Plain); show_progress( &mut buf, - &None, + None, &response, formatter.as_ref(), &Verbosity::debug(), diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index bb6ac4aac9..521a9b8eef 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -288,7 +288,7 @@ fn underlying_io_error_kind(error: &Error) -> Option { async fn run(opts: &LycheeOptions) -> Result { let inputs = opts.inputs()?; - let mut collector = Collector::new(opts.config.base.clone()) + let mut collector = Collector::new(opts.config.root_dir.clone(), opts.config.base.clone())? .skip_missing_inputs(opts.config.skip_missing) .skip_hidden(!opts.config.hidden) .skip_ignored(!opts.config.no_ignore) diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index 60c259d2ea..bcb70da09c 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -445,6 +445,12 @@ separated list of accepted status codes. This example will accept 200, 201, #[serde(default)] pub(crate) base: Option, + /// Root path to use when checking absolute local links, + /// must be an absolute path + #[arg(long)] + #[serde(default)] + pub(crate) root_dir: Option, + /// Basic authentication support. E.g. `http://example.com username:password` #[arg(long)] #[serde(default)] diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 25f872c641..5479d37ae8 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -393,6 +393,42 @@ mod cli { .stdout(contains("3 OK")); } + #[test] + fn test_resolve_paths_from_root_dir() { + let mut cmd = main_command(); + let dir = fixtures_path().join("resolve_paths_from_root_dir"); + + cmd.arg("--offline") + .arg("--include-fragments") + .arg("--root-dir") + .arg(&dir) + .arg(dir.join("nested").join("index.html")) + .env_clear() + .assert() + .failure() + .stdout(contains("7 Total")) + .stdout(contains("5 OK")) + .stdout(contains("2 Errors")); + } + + #[test] + fn test_resolve_paths_from_root_dir_and_base_url() { + let mut cmd = main_command(); + let dir = fixtures_path(); + + cmd.arg("--offline") + .arg("--root-dir") + .arg("/resolve_paths") + .arg("--base") + .arg(&dir) + .arg(dir.join("resolve_paths").join("index.html")) + .env_clear() + .assert() + .success() + .stdout(contains("3 Total")) + .stdout(contains("3 OK")); + } + #[test] fn test_youtube_quirk() { let url = "https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7"; diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 955bdd24e7..7eb4c8c82b 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -1,3 +1,4 @@ +use crate::ErrorKind; use crate::InputSource; use crate::{ basic_auth::BasicAuthExtractor, extract::Extractor, types::uri::raw::RawUri, utils::request, @@ -9,6 +10,7 @@ use futures::{ StreamExt, }; use par_stream::ParStreamExt; +use std::path::PathBuf; /// Collector keeps the state of link collection /// It drives the link extraction from inputs @@ -21,22 +23,47 @@ pub struct Collector { skip_hidden: bool, include_verbatim: bool, use_html5ever: bool, + root_dir: Option, base: Option, } +impl Default for Collector { + fn default() -> Self { + Collector { + basic_auth_extractor: None, + skip_missing_inputs: false, + include_verbatim: false, + use_html5ever: false, + skip_hidden: true, + skip_ignored: true, + root_dir: None, + base: None, + } + } +} + impl Collector { /// Create a new collector with an empty cache - #[must_use] - pub const fn new(base: Option) -> Self { - Collector { + /// + /// # Errors + /// + /// Returns an `Err` if the `root_dir` is not an absolute path + pub fn new(root_dir: Option, base: Option) -> Result { + if let Some(root_dir) = &root_dir { + if root_dir.is_relative() { + return Err(ErrorKind::RootDirMustBeAbsolute(root_dir.clone())); + } + } + Ok(Collector { basic_auth_extractor: None, skip_missing_inputs: false, include_verbatim: false, use_html5ever: false, skip_hidden: true, skip_ignored: true, + root_dir, base, - } + }) } /// Skip missing input files (default is to error if they don't exist) @@ -119,12 +146,19 @@ impl Collector { }) .flatten() .par_then_unordered(None, move |(content, base)| { + let root_dir = self.root_dir.clone(); let basic_auth_extractor = self.basic_auth_extractor.clone(); async move { let content = content?; let extractor = Extractor::new(self.use_html5ever, self.include_verbatim); let uris: Vec = extractor.extract(&content); - let requests = request::create(uris, &content, &base, &basic_auth_extractor); + let requests = request::create( + uris, + &content.source, + root_dir.as_ref(), + base.as_ref(), + basic_auth_extractor.as_ref(), + ); Result::Ok(stream::iter(requests.into_iter().map(Ok))) } }) @@ -148,17 +182,25 @@ mod tests { }; // Helper function to run the collector on the given inputs - async fn collect(inputs: Vec, base: Option) -> HashSet { - let responses = Collector::new(base).collect_links(inputs); - responses.map(|r| r.unwrap().uri).collect().await + async fn collect( + inputs: Vec, + root_dir: Option, + base: Option, + ) -> Result> { + let responses = Collector::new(root_dir, base)?.collect_links(inputs); + Ok(responses.map(|r| r.unwrap().uri).collect().await) } // Helper function for collecting verbatim links - async fn collect_verbatim(inputs: Vec, base: Option) -> HashSet { - let responses = Collector::new(base) + async fn collect_verbatim( + inputs: Vec, + root_dir: Option, + base: Option, + ) -> Result> { + let responses = Collector::new(root_dir, base)? .include_verbatim(true) .collect_links(inputs); - responses.map(|r| r.unwrap().uri).collect().await + Ok(responses.map(|r| r.unwrap().uri).collect().await) } const TEST_STRING: &str = "http://test-string.com"; @@ -246,7 +288,7 @@ mod tests { }, ]; - let links = collect_verbatim(inputs, None).await; + let links = collect_verbatim(inputs, None, None).await.ok().unwrap(); let expected_links = HashSet::from_iter([ website(TEST_STRING), @@ -269,7 +311,7 @@ mod tests { file_type_hint: Some(FileType::Markdown), excluded_paths: None, }; - let links = collect(vec![input], Some(base)).await; + let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ website("https://endler.dev"), @@ -295,7 +337,7 @@ mod tests { file_type_hint: Some(FileType::Html), excluded_paths: None, }; - let links = collect(vec![input], Some(base)).await; + let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ website("https://github.com/lycheeverse/lychee/"), @@ -324,7 +366,7 @@ mod tests { file_type_hint: Some(FileType::Html), excluded_paths: None, }; - let links = collect(vec![input], Some(base)).await; + let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ website("https://example.com/static/image.png"), @@ -351,7 +393,7 @@ mod tests { excluded_paths: None, }; - let links = collect(vec![input], Some(base)).await; + let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); let expected = HashSet::from_iter([ website("https://localhost.com/@/internal.md"), @@ -373,7 +415,7 @@ mod tests { file_type_hint: Some(FileType::Html), excluded_paths: None, }; - let links = collect(vec![input], Some(base)).await; + let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ // the body links wouldn't be present if the file was parsed strictly as XML @@ -406,7 +448,7 @@ mod tests { excluded_paths: None, }; - let links = collect(vec![input], None).await; + let links = collect(vec![input], None, None).await.ok().unwrap(); let expected_urls = HashSet::from_iter([ website("https://github.com/lycheeverse/lychee/"), @@ -425,7 +467,7 @@ mod tests { file_type_hint: None, excluded_paths: None, }; - let links = collect(vec![input], None).await; + let links = collect(vec![input], None, None).await.ok().unwrap(); let expected_links = HashSet::from_iter([mail("user@example.com")]); @@ -468,7 +510,7 @@ mod tests { }, ]; - let links = collect(inputs, None).await; + let links = collect(inputs, None, None).await.ok().unwrap(); let expected_links = HashSet::from_iter([ website(&format!( @@ -502,7 +544,7 @@ mod tests { excluded_paths: None, }; - let links = collect(vec![input], Some(base)).await; + let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); let expected_links = HashSet::from_iter([ path("/path/to/root/index.html"), diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index 93df6d0db8..023278c034 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -71,6 +71,7 @@ pub mod remap; /// local IPs or e-mail addresses pub mod filter; +/// Test utilities #[cfg(test)] #[macro_use] pub mod test_utils; diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs index b7b76c7e5b..4c68900c18 100644 --- a/lychee-lib/src/types/base.rs +++ b/lychee-lib/src/types/base.rs @@ -30,15 +30,6 @@ impl Base { } } - /// Return the directory if the base is local - #[must_use] - pub(crate) fn dir(&self) -> Option { - match self { - Self::Remote(_) => None, - Self::Local(d) => Some(d.clone()), - } - } - pub(crate) fn from_source(source: &InputSource) -> Option { match &source { InputSource::RemoteUrl(url) => { diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 7246fe7e84..cbcfefe5e9 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -94,6 +94,10 @@ pub enum ErrorKind { #[error("Cannot convert path '{0}' to a URI")] InvalidPathToUri(String), + /// Root dir must be an absolute path + #[error("Root dir must be an absolute path: '{0}'")] + RootDirMustBeAbsolute(PathBuf), + /// The given URI type is not supported #[error("Unsupported URI type: '{0}'")] UnsupportedUriType(String), @@ -310,6 +314,7 @@ impl Hash for ErrorKind { Self::InvalidBase(base, e) => (base, e).hash(state), Self::InvalidBaseJoin(s) => s.hash(state), Self::InvalidPathToUri(s) => s.hash(state), + Self::RootDirMustBeAbsolute(s) => s.hash(state), Self::UnsupportedUriType(s) => s.hash(state), Self::InvalidUrlRemap(remap) => (remap).hash(state), Self::InvalidHeader(e) => e.to_string().hash(state), diff --git a/lychee-lib/src/types/file.rs b/lychee-lib/src/types/file.rs index 7cdeff6c80..d2f8631c6c 100644 --- a/lychee-lib/src/types/file.rs +++ b/lychee-lib/src/types/file.rs @@ -54,7 +54,6 @@ impl> From

for FileType { } /// Helper function to check if a path is likely a URL. - fn is_url(path: &Path) -> bool { path.to_str() .and_then(|s| Url::parse(s).ok()) diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs index bb4847ed9a..daa4f7fde6 100644 --- a/lychee-lib/src/utils/path.rs +++ b/lychee-lib/src/utils/path.rs @@ -1,4 +1,4 @@ -use crate::{Base, ErrorKind, Result}; +use crate::{ErrorKind, Result}; use cached::proc_macro::cached; use once_cell::sync::Lazy; use path_clean::PathClean; @@ -9,11 +9,6 @@ use std::path::{Path, PathBuf}; static CURRENT_DIR: Lazy = Lazy::new(|| env::current_dir().expect("cannot get current dir from environment")); -/// Returns the base if it is a valid `PathBuf` -fn get_base_dir(base: &Option) -> Option { - base.as_ref().and_then(Base::dir) -} - /// Create an absolute path out of a `PathBuf`. /// /// The `clean` method is relatively expensive @@ -29,55 +24,33 @@ pub(crate) fn absolute_path(path: PathBuf) -> PathBuf { .clean() } -/// Get the directory name of a given `Path`. -fn dirname(src: &'_ Path) -> Option<&'_ Path> { - if src.is_file() { - return src.parent(); - } - Some(src) -} - /// Resolve `dst` that was linked to from within `src` /// /// Returns Ok(None) in case of an absolute local link without a `base_url` -pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option) -> Result> { +pub(crate) fn resolve( + src: &Path, + dst: &PathBuf, + ignore_absolute_local_links: bool, +) -> Result> { let resolved = match dst { relative if dst.is_relative() => { // Find `dst` in the parent directory of `src` let Some(parent) = src.parent() else { - return Err(ErrorKind::InvalidFile(relative.to_path_buf())); + return Err(ErrorKind::InvalidFile(relative.clone())); }; parent.join(relative) } absolute if dst.is_absolute() => { - // Absolute local links (leading slash) require the `base_url` to - // define the document root. Silently ignore the link in case the - // `base_url` is not defined. - let Some(base) = get_base_dir(base) else { + if ignore_absolute_local_links { return Ok(None); - }; - let Some(dir) = dirname(&base) else { - return Err(ErrorKind::InvalidBase( - base.display().to_string(), - "The given directory cannot be a base".to_string(), - )); - }; - join(dir.to_path_buf(), absolute) + } + PathBuf::from(absolute) } - _ => return Err(ErrorKind::InvalidFile(dst.to_path_buf())), + _ => return Err(ErrorKind::InvalidFile(dst.clone())), }; Ok(Some(absolute_path(resolved))) } -/// A cumbersome way to concatenate paths without checking their -/// existence on disk. See -fn join(base: PathBuf, dst: &Path) -> PathBuf { - let mut abs = base.into_os_string(); - let target_str = dst.as_os_str(); - abs.push(target_str); - PathBuf::from(abs) -} - /// Check if `child` is a subdirectory/file inside `parent` /// /// Note that `contains(parent, parent)` will return `true` @@ -110,7 +83,7 @@ mod test_path { let dummy = PathBuf::from("index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&dummy, &abs_path, &None)?, + resolve(&dummy, &abs_path, true)?, Some(env::current_dir().unwrap().join("foo.html")) ); Ok(()) @@ -123,7 +96,7 @@ mod test_path { let dummy = PathBuf::from("./index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&dummy, &abs_path, &None)?, + resolve(&dummy, &abs_path, true)?, Some(env::current_dir().unwrap().join("foo.html")) ); Ok(()) @@ -136,43 +109,12 @@ mod test_path { let abs_index = PathBuf::from("/path/to/index.html"); let abs_path = PathBuf::from("./foo.html"); assert_eq!( - resolve(&abs_index, &abs_path, &None)?, + resolve(&abs_index, &abs_path, true)?, Some(PathBuf::from("/path/to/foo.html")) ); Ok(()) } - // dummy - // foo.html - // valid base dir - #[test] - fn test_resolve_absolute_from_base_dir() -> Result<()> { - let dummy = PathBuf::new(); - let abs_path = PathBuf::from("/foo.html"); - let base = Some(Base::Local(PathBuf::from("/some/absolute/base/dir"))); - assert_eq!( - resolve(&dummy, &abs_path, &base)?, - Some(PathBuf::from("/some/absolute/base/dir/foo.html")) - ); - Ok(()) - } - - // /path/to/index.html - // /other/path/to/foo.html - #[test] - fn test_resolve_absolute_from_absolute() -> Result<()> { - let abs_index = PathBuf::from("/path/to/index.html"); - let abs_path = PathBuf::from("/other/path/to/foo.html"); - let base = Some(Base::Local(PathBuf::from("/some/absolute/base/dir"))); - assert_eq!( - resolve(&abs_index, &abs_path, &base)?, - Some(PathBuf::from( - "/some/absolute/base/dir/other/path/to/foo.html" - )) - ); - Ok(()) - } - #[test] fn test_contains() { let parent_dir = tempfile::tempdir().unwrap(); diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 7867e50f09..ca66f7d857 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -8,14 +8,14 @@ use std::{ use crate::{ basic_auth::BasicAuthExtractor, - types::{uri::raw::RawUri, InputContent, InputSource}, + types::{uri::raw::RawUri, InputSource}, utils::{path, url}, Base, BasicAuthCredentials, ErrorKind, Request, Result, Uri, }; /// Extract basic auth credentials for a given URL. fn extract_credentials( - extractor: &Option, + extractor: Option<&BasicAuthExtractor>, uri: &Uri, ) -> Option { extractor.as_ref().and_then(|ext| ext.matches(uri)) @@ -25,10 +25,11 @@ fn extract_credentials( fn create_request( raw_uri: &RawUri, source: &InputSource, - base: &Option, - extractor: &Option, + root_dir: Option<&PathBuf>, + base: Option<&Base>, + extractor: Option<&BasicAuthExtractor>, ) -> Result { - let uri = try_parse_into_uri(raw_uri, source, base)?; + let uri = try_parse_into_uri(raw_uri, source, root_dir, base)?; let source = truncate_source(source); let element = raw_uri.element.clone(); let attribute = raw_uri.attribute.clone(); @@ -48,8 +49,13 @@ fn create_request( /// to create a valid URI. /// - If a URI cannot be created from the file path. /// - If the source is not a file path (i.e. the URI type is not supported). -fn try_parse_into_uri(raw_uri: &RawUri, source: &InputSource, base: &Option) -> Result { - let text = raw_uri.text.clone(); +fn try_parse_into_uri( + raw_uri: &RawUri, + source: &InputSource, + root_dir: Option<&PathBuf>, + base: Option<&Base>, +) -> Result { + let text = prepend_root_dir_if_absolute_local_link(&raw_uri.text, root_dir); let uri = match Uri::try_from(raw_uri.clone()) { Ok(uri) => uri, Err(_) => match base { @@ -58,7 +64,9 @@ fn try_parse_into_uri(raw_uri: &RawUri, source: &InputSource, base: &Option return Err(ErrorKind::InvalidBaseJoin(text.clone())), }, None => match source { - InputSource::FsPath(root) => create_uri_from_file_path(root, &text, base)?, + InputSource::FsPath(root) => { + create_uri_from_file_path(root, &text, root_dir.is_none())? + } _ => return Err(ErrorKind::UnsupportedUriType(text)), }, }, @@ -81,7 +89,7 @@ pub(crate) fn is_anchor(text: &str) -> bool { fn create_uri_from_file_path( file_path: &Path, link_text: &str, - base: &Option, + ignore_absolute_local_links: bool, ) -> Result { let target_path = if is_anchor(link_text) { // For anchors, we need to append the anchor to the file name. @@ -94,7 +102,9 @@ fn create_uri_from_file_path( } else { link_text.to_string() }; - let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, base) else { + let Ok(constructed_url) = + resolve_and_create_url(file_path, &target_path, ignore_absolute_local_links) + else { return Err(ErrorKind::InvalidPathToUri(target_path)); }; Ok(Uri { @@ -125,17 +135,16 @@ fn truncate_source(source: &InputSource) -> InputSource { /// it will not be added to the `HashSet`. pub(crate) fn create( uris: Vec, - input_content: &InputContent, - base: &Option, - extractor: &Option, + source: &InputSource, + root_dir: Option<&PathBuf>, + base: Option<&Base>, + extractor: Option<&BasicAuthExtractor>, ) -> HashSet { - let base = base - .clone() - .or_else(|| Base::from_source(&input_content.source)); + let base = base.cloned().or_else(|| Base::from_source(source)); uris.into_iter() .filter_map(|raw_uri| { - match create_request(&raw_uri, &input_content.source, &base, extractor) { + match create_request(&raw_uri, source, root_dir, base.as_ref(), extractor) { Ok(request) => Some(request), Err(e) => { warn!("Error creating request: {:?}", e); @@ -160,7 +169,7 @@ pub(crate) fn create( fn resolve_and_create_url( src_path: &Path, dest_path: &str, - base_uri: &Option, + ignore_absolute_local_links: bool, ) -> Result { let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path); @@ -168,8 +177,11 @@ fn resolve_and_create_url( // This addresses the issue mentioned in the original comment about double-encoding let decoded_dest = percent_decode_str(dest_path).decode_utf8()?; - let Ok(Some(resolved_path)) = path::resolve(src_path, &PathBuf::from(&*decoded_dest), base_uri) - else { + let Ok(Some(resolved_path)) = path::resolve( + src_path, + &PathBuf::from(&*decoded_dest), + ignore_absolute_local_links, + ) else { return Err(ErrorKind::InvalidPathToUri(decoded_dest.to_string())); }; @@ -181,10 +193,20 @@ fn resolve_and_create_url( Ok(url) } +fn prepend_root_dir_if_absolute_local_link(text: &str, root_dir: Option<&PathBuf>) -> String { + if text.starts_with('/') { + if let Some(path) = root_dir { + if let Some(path_str) = path.to_str() { + return format!("{path_str}{text}"); + } + } + } + text.to_string() +} + #[cfg(test)] mod tests { use super::*; - use crate::types::FileType; #[test] fn test_is_anchor() { @@ -195,28 +217,17 @@ mod tests { #[test] fn test_create_uri_from_path() { let result = - resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None).unwrap(); + resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", true).unwrap(); assert_eq!(result.as_str(), "file:///test+encoding"); } - fn create_input(content: &str, file_type: FileType) -> InputContent { - InputContent { - content: content.to_string(), - file_type, - source: InputSource::String(content.to_string()), - } - } - #[test] fn test_relative_url_resolution() { - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); - let input = create_input( - r#"Relative Link"#, - FileType::Html, - ); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); + let source = InputSource::String(String::new()); let uris = vec![RawUri::from("relative.html")]; - let requests = create(uris, &input, &base, &None); + let requests = create(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -226,14 +237,11 @@ mod tests { #[test] fn test_absolute_url_resolution() { - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); - let input = create_input( - r#"Absolute Link"#, - FileType::Html, - ); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); + let source = InputSource::String(String::new()); let uris = vec![RawUri::from("https://another.com/page")]; - let requests = create(uris, &input, &base, &None); + let requests = create(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -243,14 +251,11 @@ mod tests { #[test] fn test_root_relative_url_resolution() { - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); - let input = create_input( - r#"Root Relative Link"#, - FileType::Html, - ); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); + let source = InputSource::String(String::new()); let uris = vec![RawUri::from("/root-relative")]; - let requests = create(uris, &input, &base, &None); + let requests = create(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -260,14 +265,11 @@ mod tests { #[test] fn test_parent_directory_url_resolution() { - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); - let input = create_input( - r#"Parent Directory Link"#, - FileType::Html, - ); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); + let source = InputSource::String(String::new()); let uris = vec![RawUri::from("../parent")]; - let requests = create(uris, &input, &base, &None); + let requests = create(uris, &source, None, Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -277,11 +279,156 @@ mod tests { #[test] fn test_fragment_url_resolution() { - let base = Some(Base::try_from("https://example.com/path/page.html").unwrap()); - let input = create_input(r##"Fragment Link"##, FileType::Html); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); + let source = InputSource::String(String::new()); let uris = vec![RawUri::from("#fragment")]; - let requests = create(uris, &input, &base, &None); + let requests = create(uris, &source, None, Some(&base), None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment")); + } + + #[test] + fn test_relative_url_resolution_from_root_dir() { + let root_dir = PathBuf::from("/tmp/lychee"); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("relative.html")]; + let requests = create(uris, &source, Some(&root_dir), None, None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "file:///some/relative.html")); + } + + #[test] + fn test_absolute_url_resolution_from_root_dir() { + let root_dir = PathBuf::from("/tmp/lychee"); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("https://another.com/page")]; + let requests = create(uris, &source, Some(&root_dir), None, None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "https://another.com/page")); + } + + #[test] + fn test_root_relative_url_resolution_from_root_dir() { + let root_dir = PathBuf::from("/tmp/lychee"); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("/root-relative")]; + let requests = create(uris, &source, Some(&root_dir), None, None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "file:///tmp/lychee/root-relative")); + } + + #[test] + fn test_parent_directory_url_resolution_from_root_dir() { + let root_dir = PathBuf::from("/tmp/lychee"); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("../parent")]; + let requests = create(uris, &source, Some(&root_dir), None, None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "file:///parent")); + } + + #[test] + fn test_fragment_url_resolution_from_root_dir() { + let root_dir = PathBuf::from("/tmp/lychee"); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("#fragment")]; + let requests = create(uris, &source, Some(&root_dir), None, None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "file:///some/page.html#fragment")); + } + + #[test] + fn test_relative_url_resolution_from_root_dir_and_base_url() { + let root_dir = PathBuf::from("/tmp/lychee"); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("relative.html")]; + let requests = create(uris, &source, Some(&root_dir), Some(&base), None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html")); + } + + #[test] + fn test_absolute_url_resolution_from_root_dir_and_base_url() { + let root_dir = PathBuf::from("/tmp/lychee"); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("https://another.com/page")]; + let requests = create(uris, &source, Some(&root_dir), Some(&base), None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "https://another.com/page")); + } + + #[test] + fn test_root_relative_url_resolution_from_root_dir_and_base_url() { + let root_dir = PathBuf::from("/tmp/lychee"); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("/root-relative")]; + let requests = create(uris, &source, Some(&root_dir), Some(&base), None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "https://example.com/tmp/lychee/root-relative")); + } + + #[test] + fn test_parent_directory_url_resolution_from_root_dir_and_base_url() { + let root_dir = PathBuf::from("/tmp/lychee"); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("../parent")]; + let requests = create(uris, &source, Some(&root_dir), Some(&base), None); + + assert_eq!(requests.len(), 1); + assert!(requests + .iter() + .any(|r| r.uri.url.as_str() == "https://example.com/parent")); + } + + #[test] + fn test_fragment_url_resolution_from_root_dir_and_base_url() { + let root_dir = PathBuf::from("/tmp/lychee"); + let base = Base::try_from("https://example.com/path/page.html").unwrap(); + let source = InputSource::FsPath(PathBuf::from("/some/page.html")); + + let uris = vec![RawUri::from("#fragment")]; + let requests = create(uris, &source, Some(&root_dir), Some(&base), None); assert_eq!(requests.len(), 1); assert!(requests @@ -291,14 +438,10 @@ mod tests { #[test] fn test_no_base_url_resolution() { - let base = None; - let input = create_input( - r#"Absolute Link"#, - FileType::Html, - ); + let source = InputSource::String(String::new()); let uris = vec![RawUri::from("https://example.com/page")]; - let requests = create(uris, &input, &base, &None); + let requests = create(uris, &source, None, None, None); assert_eq!(requests.len(), 1); assert!(requests @@ -308,11 +451,17 @@ mod tests { #[test] fn test_create_request_from_relative_file_path() { - let base = Some(Base::Local(PathBuf::from("/tmp/lychee"))); + let base = Base::Local(PathBuf::from("/tmp/lychee")); let input_source = InputSource::FsPath(PathBuf::from("page.html")); - let actual = - create_request(&RawUri::from("file.html"), &input_source, &base, &None).unwrap(); + let actual = create_request( + &RawUri::from("file.html"), + &input_source, + None, + Some(&base), + None, + ) + .unwrap(); assert_eq!( actual, @@ -330,15 +479,16 @@ mod tests { #[test] fn test_create_request_from_absolute_file_path() { - let base = Some(Base::Local(PathBuf::from("/tmp/lychee"))); + let base = Base::Local(PathBuf::from("/tmp/lychee")); let input_source = InputSource::FsPath(PathBuf::from("/tmp/lychee/page.html")); // Use an absolute path that's outside the base directory let actual = create_request( &RawUri::from("/usr/local/share/doc/example.html"), &input_source, - &base, - &None, + None, + Some(&base), + None, ) .unwrap(); @@ -358,29 +508,53 @@ mod tests { #[test] fn test_parse_relative_path_into_uri() { - let base = Some(Base::Local(PathBuf::from("/tmp/lychee"))); - let input = create_input( - r#"Relative Link"#, - FileType::Html, - ); + let base = Base::Local(PathBuf::from("/tmp/lychee")); + let source = InputSource::String(String::new()); let raw_uri = RawUri::from("relative.html"); - let uri = try_parse_into_uri(&raw_uri, &input.source, &base).unwrap(); + let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap(); assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html"); } #[test] fn test_parse_absolute_path_into_uri() { - let base = Some(Base::Local(PathBuf::from("/tmp/lychee"))); - let input = create_input( - r#"Absolute Link"#, - FileType::Html, - ); + let base = Base::Local(PathBuf::from("/tmp/lychee")); + let source = InputSource::String(String::new()); let raw_uri = RawUri::from("absolute.html"); - let uri = try_parse_into_uri(&raw_uri, &input.source, &base).unwrap(); + let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap(); assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html"); } + + #[test] + fn test_prepend_with_absolute_local_link_and_root_dir() { + let text = "/absolute/path"; + let root_dir = PathBuf::from("/root"); + let result = prepend_root_dir_if_absolute_local_link(text, Some(&root_dir)); + assert_eq!(result, "/root/absolute/path"); + } + + #[test] + fn test_prepend_with_absolute_local_link_and_no_root_dir() { + let text = "/absolute/path"; + let result = prepend_root_dir_if_absolute_local_link(text, None); + assert_eq!(result, "/absolute/path"); + } + + #[test] + fn test_prepend_with_relative_link_and_root_dir() { + let text = "relative/path"; + let root_dir = PathBuf::from("/root"); + let result = prepend_root_dir_if_absolute_local_link(text, Some(&root_dir)); + assert_eq!(result, "relative/path"); + } + + #[test] + fn test_prepend_with_relative_link_and_no_root_dir() { + let text = "relative/path"; + let result = prepend_root_dir_if_absolute_local_link(text, None); + assert_eq!(result, "relative/path"); + } }