From e735b8e50fe35689787a3c98dc7016a0d7edd2ed Mon Sep 17 00:00:00 2001 From: Danny Yoo Date: Tue, 29 Aug 2023 13:59:04 -0700 Subject: [PATCH] Implements a comment directive to skip the next translation group. This adds support for adding a comment of the form: ``. This will cause the system to skip the next message group that would otherwise be translated. It adds a dependency to the regex crate to match for the comment skip pattern. --- Cargo.lock | 13 ++--- Cargo.toml | 1 + src/lib.rs | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 154 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dfef4c97..f606ef73 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -442,6 +442,7 @@ dependencies = [ "pretty_assertions", "pulldown-cmark", "pulldown-cmark-to-cmark", + "regex", "semver", "serde_json", "tempfile", @@ -600,9 +601,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.3" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" +checksum = "12de2eff854e5fa4b1295edd650e227e9d8fb0c9e90b12e7f36d6a6811791a29" dependencies = [ "aho-corasick", "memchr", @@ -612,9 +613,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" +checksum = "49530408a136e16e5b486e883fbb6ba058e8e4e8ae6621a77b048b314336e629" dependencies = [ "aho-corasick", "memchr", @@ -623,9 +624,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "rustix" diff --git a/Cargo.toml b/Cargo.toml index 4f1ff209..898c91d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ mdbook = { version = "0.4.25", default-features = false } polib = "0.2.0" pulldown-cmark = { version = "0.9.2", default-features = false } pulldown-cmark-to-cmark = "10.0.4" +regex = "1.9.4" semver = "1.0.16" serde_json = "1.0.91" diff --git a/src/lib.rs b/src/lib.rs index a3263d51..e68632a3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,6 +26,8 @@ use polib::catalog::Catalog; use pulldown_cmark::{Event, LinkType, Tag}; use pulldown_cmark_to_cmark::{cmark_resume_with_options, Options, State}; +use regex::Regex; +use std::sync::OnceLock; /// Like `mdbook::utils::new_cmark_parser`, but also passes a /// `BrokenLinkCallback`. @@ -190,15 +192,28 @@ pub fn group_events<'a>(events: &'a [(usize, Event<'a>)]) -> Vec> { } impl State { - fn into_group<'a>(self, idx: usize, events: &'a [(usize, Event<'a>)]) -> Group<'a> { + fn into_group<'a>( + self, + idx: usize, + events: &'a [(usize, Event<'a>)], + skip_next_group: &mut bool, + ) -> Group<'a> { match self { - State::Translate(start) => Group::Translate(&events[start..idx]), + State::Translate(start) => { + if *skip_next_group { + *skip_next_group = false; + Group::Skip(&events[start..idx]) + } else { + Group::Translate(&events[start..idx]) + } + } State::Skip(start) => Group::Skip(&events[start..idx]), } } } let mut state = State::Skip(0); + let mut skip_next_group = false; for (idx, (_, event)) in events.iter().enumerate() { match event { @@ -207,13 +222,14 @@ pub fn group_events<'a>(events: &'a [(usize, Event<'a>)]) -> Vec> { // make the group self-contained. Event::Start(Tag::Paragraph | Tag::CodeBlock(..)) => { // A translatable group starts here. - groups.push(state.into_group(idx, events)); + groups.push(state.into_group(idx, events, &mut skip_next_group)); + state = State::Translate(idx); } Event::End(Tag::Paragraph | Tag::CodeBlock(..)) => { // A translatable group ends after `idx`. let idx = idx + 1; - groups.push(state.into_group(idx, events)); + groups.push(state.into_group(idx, events, &mut skip_next_group)); state = State::Skip(idx); } @@ -231,12 +247,24 @@ pub fn group_events<'a>(events: &'a [(usize, Event<'a>)]) -> Vec> { | Event::HardBreak => { // If we're currently skipping, then a new // translatable group starts here. - if let State::Skip(start) = state { - groups.push(Group::Skip(&events[start..idx])); + if let State::Skip(_) = state { + groups.push(state.into_group(idx, events, &mut skip_next_group)); + state = State::Translate(idx); } } + // An HTML comment directive to skip the next translation group. + Event::Html(s) if is_comment_skip_directive(s) => { + // If in the middle of translation, finish it. + if let State::Translate(_) = state { + groups.push(state.into_group(idx, events, &mut skip_next_group)); + state = State::Skip(idx); + } + + skip_next_group = true; + } + // All other block-level events start or continue a // skipping group. _ => { @@ -256,6 +284,15 @@ pub fn group_events<'a>(events: &'a [(usize, Event<'a>)]) -> Vec> { groups } +/// Check whether the HTML is a directive to skip the next translation group. +fn is_comment_skip_directive(html: &str) -> bool { + static RE: OnceLock = OnceLock::new(); + + let re = + RE.get_or_init(|| Regex::new(r"").unwrap()); + re.is_match(html.trim()) +} + /// Render a slice of Markdown events back to Markdown. /// /// # Examples @@ -578,6 +615,19 @@ mod tests { ); } + #[test] + fn extract_events_comments() { + assert_eq!( + extract_events("\nHello", None), + vec![ + (1, Html("\n".into())), + (2, Start(Paragraph)), + (2, Text("Hello".into())), + (2, End(Paragraph)), + ] + ); + } + #[test] fn extract_messages_empty() { assert_extract_messages("", vec![]); @@ -951,4 +1001,94 @@ BOB ], ); } + + #[test] + fn test_is_comment_skip_directive_simple() { + assert_eq!( + is_comment_skip_directive(""), + true + ); + } + + #[test] + fn test_is_comment_skip_directive_tolerates_spaces() { + assert_eq!( + is_comment_skip_directive(""), + true + ); + } + + #[test] + fn test_is_comment_skip_directive_tolerates_dashes() { + assert_eq!( + is_comment_skip_directive(""), + true + ); + } + + #[test] + fn test_is_comment_skip_directive_needs_skip() { + assert_eq!( + is_comment_skip_directive(""), + false + ); + } + #[test] + fn test_is_comment_skip_directive_needs_to_be_a_comment() { + assert_eq!( + is_comment_skip_directive("
mdbook-xgettext: skip
"), + false + ); + } + + #[test] + fn extract_messages_skip_simple() { + assert_extract_messages( + r#" + +This is a paragraph."#, + vec![], + ); + } + + #[test] + fn extract_messages_skip_next_paragraph_ok() { + assert_extract_messages( + r#" +This is a paragraph. + +This should be translated. +"#, + vec![(4, "This should be translated.")], + ); + } + + #[test] + fn extract_messages_skip_next_codeblock() { + assert_extract_messages( + r#" +``` +def f(x): return x * x +``` +This should be translated. +"#, + vec![(5, "This should be translated.")], + ); + } + + #[test] + fn extract_messages_skip_back_to_back() { + assert_extract_messages( + r#" +``` +def f(x): return x * x +``` + +This should not translated. + +But *this* should! +"#, + vec![(8, "But _this_ should!")], + ); + } }