+
+
\ No newline at end of file
diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs
index 3551c4c9c8..5c0614b179 100644
--- a/lychee-bin/src/commands/check.rs
+++ b/lychee-bin/src/commands/check.rs
@@ -192,7 +192,7 @@ async fn progress_bar_task(
while let Some(response) = recv_resp.recv().await {
show_progress(
&mut io::stderr(),
- &pb,
+ pb.as_ref(),
&response,
formatter.as_ref(),
&verbose,
@@ -331,7 +331,7 @@ fn ignore_cache(uri: &Uri, status: &Status, cache_exclude_status: &HashSet)
fn show_progress(
output: &mut dyn Write,
- progress_bar: &Option,
+ progress_bar: Option<&ProgressBar>,
response: &Response,
formatter: &dyn ResponseFormatter,
verbose: &Verbosity,
@@ -401,7 +401,7 @@ mod tests {
let formatter = get_response_formatter(&options::OutputMode::Plain);
show_progress(
&mut buf,
- &None,
+ None,
&response,
formatter.as_ref(),
&Verbosity::default(),
@@ -423,7 +423,7 @@ mod tests {
let formatter = get_response_formatter(&options::OutputMode::Plain);
show_progress(
&mut buf,
- &None,
+ None,
&response,
formatter.as_ref(),
&Verbosity::debug(),
diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs
index bb6ac4aac9..521a9b8eef 100644
--- a/lychee-bin/src/main.rs
+++ b/lychee-bin/src/main.rs
@@ -288,7 +288,7 @@ fn underlying_io_error_kind(error: &Error) -> Option {
async fn run(opts: &LycheeOptions) -> Result {
let inputs = opts.inputs()?;
- let mut collector = Collector::new(opts.config.base.clone())
+ let mut collector = Collector::new(opts.config.root_dir.clone(), opts.config.base.clone())?
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
.skip_ignored(!opts.config.no_ignore)
diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs
index 60c259d2ea..bcb70da09c 100644
--- a/lychee-bin/src/options.rs
+++ b/lychee-bin/src/options.rs
@@ -445,6 +445,12 @@ separated list of accepted status codes. This example will accept 200, 201,
#[serde(default)]
pub(crate) base: Option,
+ /// Root path to use when checking absolute local links,
+ /// must be an absolute path
+ #[arg(long)]
+ #[serde(default)]
+ pub(crate) root_dir: Option,
+
/// Basic authentication support. E.g. `http://example.com username:password`
#[arg(long)]
#[serde(default)]
diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs
index 25f872c641..5479d37ae8 100644
--- a/lychee-bin/tests/cli.rs
+++ b/lychee-bin/tests/cli.rs
@@ -393,6 +393,42 @@ mod cli {
.stdout(contains("3 OK"));
}
+ #[test]
+ fn test_resolve_paths_from_root_dir() {
+ let mut cmd = main_command();
+ let dir = fixtures_path().join("resolve_paths_from_root_dir");
+
+ cmd.arg("--offline")
+ .arg("--include-fragments")
+ .arg("--root-dir")
+ .arg(&dir)
+ .arg(dir.join("nested").join("index.html"))
+ .env_clear()
+ .assert()
+ .failure()
+ .stdout(contains("7 Total"))
+ .stdout(contains("5 OK"))
+ .stdout(contains("2 Errors"));
+ }
+
+ #[test]
+ fn test_resolve_paths_from_root_dir_and_base_url() {
+ let mut cmd = main_command();
+ let dir = fixtures_path();
+
+ cmd.arg("--offline")
+ .arg("--root-dir")
+ .arg("/resolve_paths")
+ .arg("--base")
+ .arg(&dir)
+ .arg(dir.join("resolve_paths").join("index.html"))
+ .env_clear()
+ .assert()
+ .success()
+ .stdout(contains("3 Total"))
+ .stdout(contains("3 OK"));
+ }
+
#[test]
fn test_youtube_quirk() {
let url = "https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7";
diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs
index 955bdd24e7..7eb4c8c82b 100644
--- a/lychee-lib/src/collector.rs
+++ b/lychee-lib/src/collector.rs
@@ -1,3 +1,4 @@
+use crate::ErrorKind;
use crate::InputSource;
use crate::{
basic_auth::BasicAuthExtractor, extract::Extractor, types::uri::raw::RawUri, utils::request,
@@ -9,6 +10,7 @@ use futures::{
StreamExt,
};
use par_stream::ParStreamExt;
+use std::path::PathBuf;
/// Collector keeps the state of link collection
/// It drives the link extraction from inputs
@@ -21,22 +23,47 @@ pub struct Collector {
skip_hidden: bool,
include_verbatim: bool,
use_html5ever: bool,
+ root_dir: Option,
base: Option,
}
+impl Default for Collector {
+ fn default() -> Self {
+ Collector {
+ basic_auth_extractor: None,
+ skip_missing_inputs: false,
+ include_verbatim: false,
+ use_html5ever: false,
+ skip_hidden: true,
+ skip_ignored: true,
+ root_dir: None,
+ base: None,
+ }
+ }
+}
+
impl Collector {
/// Create a new collector with an empty cache
- #[must_use]
- pub const fn new(base: Option) -> Self {
- Collector {
+ ///
+ /// # Errors
+ ///
+ /// Returns an `Err` if the `root_dir` is not an absolute path
+ pub fn new(root_dir: Option, base: Option) -> Result {
+ if let Some(root_dir) = &root_dir {
+ if root_dir.is_relative() {
+ return Err(ErrorKind::RootDirMustBeAbsolute(root_dir.clone()));
+ }
+ }
+ Ok(Collector {
basic_auth_extractor: None,
skip_missing_inputs: false,
include_verbatim: false,
use_html5ever: false,
skip_hidden: true,
skip_ignored: true,
+ root_dir,
base,
- }
+ })
}
/// Skip missing input files (default is to error if they don't exist)
@@ -119,12 +146,19 @@ impl Collector {
})
.flatten()
.par_then_unordered(None, move |(content, base)| {
+ let root_dir = self.root_dir.clone();
let basic_auth_extractor = self.basic_auth_extractor.clone();
async move {
let content = content?;
let extractor = Extractor::new(self.use_html5ever, self.include_verbatim);
let uris: Vec = extractor.extract(&content);
- let requests = request::create(uris, &content, &base, &basic_auth_extractor);
+ let requests = request::create(
+ uris,
+ &content.source,
+ root_dir.as_ref(),
+ base.as_ref(),
+ basic_auth_extractor.as_ref(),
+ );
Result::Ok(stream::iter(requests.into_iter().map(Ok)))
}
})
@@ -148,17 +182,25 @@ mod tests {
};
// Helper function to run the collector on the given inputs
- async fn collect(inputs: Vec, base: Option) -> HashSet {
- let responses = Collector::new(base).collect_links(inputs);
- responses.map(|r| r.unwrap().uri).collect().await
+ async fn collect(
+ inputs: Vec,
+ root_dir: Option,
+ base: Option,
+ ) -> Result> {
+ let responses = Collector::new(root_dir, base)?.collect_links(inputs);
+ Ok(responses.map(|r| r.unwrap().uri).collect().await)
}
// Helper function for collecting verbatim links
- async fn collect_verbatim(inputs: Vec, base: Option) -> HashSet {
- let responses = Collector::new(base)
+ async fn collect_verbatim(
+ inputs: Vec,
+ root_dir: Option,
+ base: Option,
+ ) -> Result> {
+ let responses = Collector::new(root_dir, base)?
.include_verbatim(true)
.collect_links(inputs);
- responses.map(|r| r.unwrap().uri).collect().await
+ Ok(responses.map(|r| r.unwrap().uri).collect().await)
}
const TEST_STRING: &str = "http://test-string.com";
@@ -246,7 +288,7 @@ mod tests {
},
];
- let links = collect_verbatim(inputs, None).await;
+ let links = collect_verbatim(inputs, None, None).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website(TEST_STRING),
@@ -269,7 +311,7 @@ mod tests {
file_type_hint: Some(FileType::Markdown),
excluded_paths: None,
};
- let links = collect(vec![input], Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website("https://endler.dev"),
@@ -295,7 +337,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
- let links = collect(vec![input], Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website("https://github.com/lycheeverse/lychee/"),
@@ -324,7 +366,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
- let links = collect(vec![input], Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website("https://example.com/static/image.png"),
@@ -351,7 +393,7 @@ mod tests {
excluded_paths: None,
};
- let links = collect(vec![input], Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected = HashSet::from_iter([
website("https://localhost.com/@/internal.md"),
@@ -373,7 +415,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
- let links = collect(vec![input], Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
// the body links wouldn't be present if the file was parsed strictly as XML
@@ -406,7 +448,7 @@ mod tests {
excluded_paths: None,
};
- let links = collect(vec![input], None).await;
+ let links = collect(vec![input], None, None).await.ok().unwrap();
let expected_urls = HashSet::from_iter([
website("https://github.com/lycheeverse/lychee/"),
@@ -425,7 +467,7 @@ mod tests {
file_type_hint: None,
excluded_paths: None,
};
- let links = collect(vec![input], None).await;
+ let links = collect(vec![input], None, None).await.ok().unwrap();
let expected_links = HashSet::from_iter([mail("user@example.com")]);
@@ -468,7 +510,7 @@ mod tests {
},
];
- let links = collect(inputs, None).await;
+ let links = collect(inputs, None, None).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website(&format!(
@@ -502,7 +544,7 @@ mod tests {
excluded_paths: None,
};
- let links = collect(vec![input], Some(base)).await;
+ let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
path("/path/to/root/index.html"),
diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs
index 93df6d0db8..023278c034 100644
--- a/lychee-lib/src/lib.rs
+++ b/lychee-lib/src/lib.rs
@@ -71,6 +71,7 @@ pub mod remap;
/// local IPs or e-mail addresses
pub mod filter;
+/// Test utilities
#[cfg(test)]
#[macro_use]
pub mod test_utils;
diff --git a/lychee-lib/src/types/base.rs b/lychee-lib/src/types/base.rs
index b7b76c7e5b..4c68900c18 100644
--- a/lychee-lib/src/types/base.rs
+++ b/lychee-lib/src/types/base.rs
@@ -30,15 +30,6 @@ impl Base {
}
}
- /// Return the directory if the base is local
- #[must_use]
- pub(crate) fn dir(&self) -> Option {
- match self {
- Self::Remote(_) => None,
- Self::Local(d) => Some(d.clone()),
- }
- }
-
pub(crate) fn from_source(source: &InputSource) -> Option {
match &source {
InputSource::RemoteUrl(url) => {
diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs
index 7246fe7e84..cbcfefe5e9 100644
--- a/lychee-lib/src/types/error.rs
+++ b/lychee-lib/src/types/error.rs
@@ -94,6 +94,10 @@ pub enum ErrorKind {
#[error("Cannot convert path '{0}' to a URI")]
InvalidPathToUri(String),
+ /// Root dir must be an absolute path
+ #[error("Root dir must be an absolute path: '{0}'")]
+ RootDirMustBeAbsolute(PathBuf),
+
/// The given URI type is not supported
#[error("Unsupported URI type: '{0}'")]
UnsupportedUriType(String),
@@ -310,6 +314,7 @@ impl Hash for ErrorKind {
Self::InvalidBase(base, e) => (base, e).hash(state),
Self::InvalidBaseJoin(s) => s.hash(state),
Self::InvalidPathToUri(s) => s.hash(state),
+ Self::RootDirMustBeAbsolute(s) => s.hash(state),
Self::UnsupportedUriType(s) => s.hash(state),
Self::InvalidUrlRemap(remap) => (remap).hash(state),
Self::InvalidHeader(e) => e.to_string().hash(state),
diff --git a/lychee-lib/src/types/file.rs b/lychee-lib/src/types/file.rs
index 7cdeff6c80..d2f8631c6c 100644
--- a/lychee-lib/src/types/file.rs
+++ b/lychee-lib/src/types/file.rs
@@ -54,7 +54,6 @@ impl> From
for FileType {
}
/// Helper function to check if a path is likely a URL.
-
fn is_url(path: &Path) -> bool {
path.to_str()
.and_then(|s| Url::parse(s).ok())
diff --git a/lychee-lib/src/utils/path.rs b/lychee-lib/src/utils/path.rs
index bb4847ed9a..daa4f7fde6 100644
--- a/lychee-lib/src/utils/path.rs
+++ b/lychee-lib/src/utils/path.rs
@@ -1,4 +1,4 @@
-use crate::{Base, ErrorKind, Result};
+use crate::{ErrorKind, Result};
use cached::proc_macro::cached;
use once_cell::sync::Lazy;
use path_clean::PathClean;
@@ -9,11 +9,6 @@ use std::path::{Path, PathBuf};
static CURRENT_DIR: Lazy =
Lazy::new(|| env::current_dir().expect("cannot get current dir from environment"));
-/// Returns the base if it is a valid `PathBuf`
-fn get_base_dir(base: &Option) -> Option {
- base.as_ref().and_then(Base::dir)
-}
-
/// Create an absolute path out of a `PathBuf`.
///
/// The `clean` method is relatively expensive
@@ -29,55 +24,33 @@ pub(crate) fn absolute_path(path: PathBuf) -> PathBuf {
.clean()
}
-/// Get the directory name of a given `Path`.
-fn dirname(src: &'_ Path) -> Option<&'_ Path> {
- if src.is_file() {
- return src.parent();
- }
- Some(src)
-}
-
/// Resolve `dst` that was linked to from within `src`
///
/// Returns Ok(None) in case of an absolute local link without a `base_url`
-pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option) -> Result