Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor to simply code using Mime struct and mime_guess crate #27

Merged
merged 5 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ serde = "1.0.136"
rust-embed = {version = "6.4.2", features = ["include-exclude"]}
serde_yaml = "0.9.14"
epub-builder = { git = "https://github.com/ultrasaurus/epub-builder", branch="ultra-main" }
mime_guess = "2.0.4"
mime = "0.3.17"
8 changes: 0 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,6 @@ until there are multiple maintainers or additional users.
there's a bit more clarity on options needed. Or if someone wants it,
they can propose a format with config options, file an issue and link it here.

### TODO - if requested
- file extensons: particular file extensions are hard-coded; however,
there are common variants not currently supported
- code writes HTML files with `.html` extension. Alternate `.htm` could
be future config option
- code identifies markdown files with `.md` extension. Would be easy to
also look for `.markdown`

### TODO - tech debt
- need to write some more automated tests
- stylesheet.css is auto-generated for TOC, consider moving book contents
Expand Down
150 changes: 125 additions & 25 deletions src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use anyhow::bail;
use pulldown_cmark::{Event, Parser as MarkdownParser, Tag};
use serde_json;
use serde_yaml;
use std::borrow::Cow;
use std::fs;
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
Expand Down Expand Up @@ -212,31 +211,44 @@ impl Document {
while let Some(event) = parser.next() {
let next_event = match event {
Event::Start(Tag::Link(link_type, url, title)) => {
let md_suffix = ".md";
if url.ends_with(md_suffix) {
let new_url = format!("{}.html", url.trim_end_matches(md_suffix));
Event::Start(Tag::Link(link_type, new_url.into(), title))
} else if is_audio_file(&url) {
let link_text = if let Some(next_event) = parser.next() {
if let Event::Text(text) = next_event {
parser.next(); // skip Event::End
text
} else {
// no text event, just Event::End
Cow::Borrowed("#").into()
let url_string = url.to_string();
match mime_guess::from_path(&url_string).first() {
None => {
// no extension or no matching mime for extension
// just return the link unmodified
Event::Start(Tag::Link(link_type, url, title))
}
Some(mimetype) => {
match (mimetype.type_(), mimetype.subtype()) {
(mime::TEXT, subtype) if subtype == "markdown" => {
// already know we have valid URL with extension
// so no need for additional error checking
let ext = Path::new(&url_string).get_ext_str().unwrap();
let new_url: String =
format!("{}html", url.trim_end_matches(ext));
Event::Start(Tag::Link(link_type, new_url.into(), title))
}
(mime::AUDIO, _) => {
let link_text = if let Some(next_event) = parser.next() {
if let Event::Text(text) = next_event {
parser.next(); // skip past Event::End
text
} else {
// no text event, just Event::End
"#".into()
}
} else {
"".into()
};
let link_tag= format!("<a href=\"{}\" title=\"{}\" class=\"audio\"><span class=\"fa-solid fa-play\">{}</span></a>",
&url, &title, &link_text);
let audio_tag= format!("<audio controls><source src=\"{}\" type=\"{}\">Your browser does not support the audio element. {}</audio>",
url, mimetype, &link_tag);
Event::Html(audio_tag.into())
}
(_, _) => Event::Start(Tag::Link(link_type, url, title)),
}
} else {
Cow::Borrowed("").into()
};
let my_link_text= format!("<a href=\"{}\" title=\"{}\" class=\"audio\"><span class=\"fa-solid fa-play\">{}</span></a>",
&url, &title, &link_text);
let my_ext = get_ext(url.clone());
let my_mimetype = get_mimetype(&my_ext);
let my_html= format!("<audio controls><source src=\"{}\" type=\"{}\">Your browser does not support the audio element. {}</audio>",
url, my_mimetype, &my_link_text);
Event::Html(my_html.into())
} else {
Event::Start(Tag::Link(link_type, url, title))
}
}
}
_ => event,
Expand All @@ -248,3 +260,91 @@ impl Document {
Ok(())
}
}

#[cfg(test)]
mod tests {
use super::*;

const HELLO_MD: &str = "hello world...";
const HELLO_HTML: &str = "<p>hello world...</p>\n";

const EMPTY_BUF: &[u8] = b"";

#[test]
fn test_write_html_empty() {
let markdown = "".to_string();
let mut output = Vec::new();
Document::write_html(&mut output, &markdown).unwrap();
assert_eq!(&output, EMPTY_BUF);
}

#[test]
fn test_write_html_simple_string() {
let markdown: String = HELLO_MD.to_string();
let mut output = Vec::new();
Document::write_html(&mut output, &markdown).unwrap();
let output_str = std::str::from_utf8(&output).unwrap();
assert_eq!(output_str, HELLO_HTML);
}

struct TestData<'a> {
md: &'a str,
html: &'a str,
}

fn verify_write_html_with_test_data(test_data: Vec<TestData>) {
test_data.iter().for_each(|test| {
let markdown: String = test.md.to_string();
let mut output = Vec::new();
Document::write_html(&mut output, &markdown).unwrap();
let output_str = std::str::from_utf8(&output).unwrap();
assert_eq!(output_str, test.html);
});
}

#[test]
// test of standard CommonMark formatting
fn test_write_html_cmark_basics() {
let test_data = vec![
TestData {
// basic text
md: "hello",
html: "<p>hello</p>\n",
},
TestData {
// unordered list
md: "* one\n* two",
html: "<ul>\n<li>one</li>\n<li>two</li>\n</ul>\n",
},
TestData {
// simple link
md: "link: [thing](https://example.com/thing)",
html: "<p>link: <a href=\"https://example.com/thing\">thing</a></p>\n",
},
TestData {
// link with mis-matched quote in title
md: r#"link: ["thing](https://example.com/thing)"#,
html: "<p>link: <a href=\"https://example.com/thing\">&quot;thing</a></p>\n",
},
];
verify_write_html_with_test_data(test_data);
}

#[test]
// test of converting markdwon links to .html
fn test_write_html_link_to_markdown() {
let test_data = vec![
TestData {
// .md link conversion to .html
md: "link: [thing](https://example.com/thing.md)",
html: "<p>link: <a href=\"https://example.com/thing.html\">thing</a></p>\n",
},
TestData {
// .markdown link conversion to .html
md: "link: [thing](https://example.com/thing.markdown)",
html: "<p>link: <a href=\"https://example.com/thing.html\">thing</a></p>\n",
},
];
verify_write_html_with_test_data(test_data);
}
}
43 changes: 2 additions & 41 deletions src/util/mod.rs
Original file line number Diff line number Diff line change
@@ -1,44 +1,5 @@
mod dir_entry;
pub use self::dir_entry::DirEntryExt;
mod path;
pub use self::path::get_mimetype;
pub use self::path::PathExt;

use pulldown_cmark::CowStr;
use std::{borrow::Cow, path::Path};

// return the extension of an url as a lowercase string
// or empty string, if there is no extension
pub fn get_ext<T: AsRef<str>>(url: T) -> Cow<'static, str> {
let path = Path::new(url.as_ref());
path.get_ext().unwrap_or(Cow::Borrowed(""))
}

pub fn is_audio_file(url: &CowStr) -> bool {
let audio_format = ["mp3", "mp4", "m4a", "wav", "ogg"];
let path = Path::new(url.as_ref());
if let Some(ext_osstr) = path.extension() {
let extension = ext_osstr.to_string_lossy().to_lowercase();
if audio_format.contains(&extension.as_str()) {
return true;
}
}
false
}

#[cfg(test)]
mod tests {
// importing names from outer (for mod tests) scope.
use super::*;

#[test]
fn test_get_ext_png() {
let result = get_ext("foo.png");
assert_eq!(result, "png".to_string());
}
#[test]
fn test_get_ext_empty() {
let result = get_ext("");
assert_eq!(result, "".to_string());
}
}
pub mod path;
pub use self::path::PathExt;
62 changes: 20 additions & 42 deletions src/util/path.rs
Original file line number Diff line number Diff line change
@@ -1,44 +1,16 @@
//-- Path utlity functions
use std::{borrow::Cow, path::Path};
//-- Path utlity functions -----------------------------------------------
// extensons to Path struct and related helper functions

// return mimetype given an extension
pub fn get_mimetype(ext: &str) -> Cow<'static, str> {
info!("get_mimetype for: {}", ext);
Cow::from(match ext {
"mp3" => "audio/mpeg",
"mp4" => "video/mp4",
"m4a" => "audio/mp4",
"wav" => "audio/wav",
"ogg" => "audio/ogg",
"jpg" => "image/jpeg",
"jpeg" => "image/jpeg",
"png" => "image/png",
"gif" => "image/gif",
"svg" => "image/svg+xml",
"webp" => "image/webp",
"pdf" => "application/pdf",
"zip" => "application/zip",
"gz" => "application/gzip",
"tar" => "application/x-tar",
"txt" => "text/plain",
"md" => "text/markdown",
"html" => "text/html",
"css" => "text/css",
"js" => "text/javascript",
"json" => "application/json",
"xml" => "application/xml",
"yaml" => "text/yaml",
"yml" => "text/yaml",
_ => "application/octet-stream",
})
}
use mime::Mime;
use std::{borrow::Cow, path::Path};

pub trait PathExt {
// given a path, ensure that all parent directories of that path exist
// and create any that don't exist
fn create_all_parent_dir(&self) -> std::io::Result<()>;
fn get_ext(&self) -> Option<Cow<'static, str>>;
fn mimetype(&self) -> Cow<'static, str>;
fn get_ext_str(&self) -> Option<&str>;
fn mimetype(&self) -> Option<Mime>;
fn is_markdown(&self) -> bool;
}

Expand All @@ -51,16 +23,22 @@ impl PathExt for Path {
Ok(())
}

fn get_ext_str(&self) -> Option<&str> {
if let Some(ext_osstr) = self.extension() {
ext_osstr.to_str()
} else {
None
}
}
fn get_ext(&self) -> Option<Cow<'static, str>> {
if let Some(ext_osstr) = self.extension() {
Some(Cow::Owned(ext_osstr.to_string_lossy().to_lowercase()))
} else {
None
}
}
fn mimetype(&self) -> Cow<'static, str> {
let ext = self.get_ext().unwrap_or(Cow::Borrowed(""));
get_mimetype(&ext)
fn mimetype(&self) -> Option<Mime> {
mime_guess::from_path(self).first()
}
fn is_markdown(&self) -> bool {
if let Some(ext) = self.extension() {
Expand Down Expand Up @@ -88,13 +66,13 @@ mod tests {
assert_eq!(result, None);
}
#[test]
fn test_get_mimetype_png() {
let result = get_mimetype("png");
assert_eq!(result, "image/png".to_string());
fn test_imetype_png() {
let result = Path::new("foo.png").mimetype();
assert_eq!(result, Some(mime::IMAGE_PNG));
}
#[test]
fn test_get_mimetype_empty() {
let result = get_mimetype("");
assert_eq!(result, "application/octet-stream".to_string());
let result = Path::new("foo").mimetype();
assert_eq!(result, None);
}
}
9 changes: 6 additions & 3 deletions src/web.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,13 @@ impl Web<'_> {
.strip_prefix(&self.template_dir_path)
.expect("strip prefix match");

let mimetype = rel_path.mimetype();
let mimetype = rel_path.mimetype().unwrap_or(mime::TEXT_PLAIN_UTF_8);
info!(" rel_path: {}, mimetype: {}", rel_path.display(), mimetype);
let result =
epub.add_resource(rel_path, fs::File::open(dir_entry.path())?, mimetype);
let result = epub.add_resource(
rel_path,
fs::File::open(dir_entry.path())?,
mimetype.to_string(),
);
// TODO: figure out why "?" doesn't work at end of statement above
if result.is_err() {
anyhow::bail!(
Expand Down