diff --git a/src/de.rs b/src/de.rs index 3e1c5b9f..8dfd1625 100644 --- a/src/de.rs +++ b/src/de.rs @@ -12,6 +12,7 @@ use serde::de::{ self, value::StrDeserializer, Deserialize, DeserializeOwned, DeserializeSeed, Expected, IgnoredAny, Unexpected, Visitor, }; +use std::collections::BTreeMap; use std::fmt::Debug; use std::fmt::Formatter; use std::fmt::Result as FmtResult; @@ -122,6 +123,51 @@ impl Debug for Progress<'_> { } } +/// A structure that describes anchors and aliases in a YAML document. +/// The anchor name, prefixed in the YAML document with "&", is represented +/// without the prefix in `anchor_name`. +/// The `anchor_path` is a string that denotes the path to the anchor in the YAML +/// document. Each key that form the path is separated from one another by "/". +/// The `aliases` vector contains the path to each reference to the anchor in +/// the YAML document. +/// +/// # Examples +/// +/// The following YAML document: +/// ```yaml +/// a: +/// enum: &io +/// INPUT: 0 +/// OUTPUT: 1 +/// b: +/// enum: *io +/// c: +/// enum: *io +/// ``` +/// +/// Is represented by the following `DocumentAnchor`: +/// ```json +/// DocumentAnchor { +/// anchor_name: "io", +/// anchor_path: "/a/enum", +/// aliases: ["/b/enum", "/c/enum"], +/// } +/// ``` +#[derive(Debug, PartialEq)] +pub struct DocumentAnchor { + /// The name of the anchor, without the "&" prefix. + pub anchor_name: String, + + /// The path to the anchor in the YAML document, with keys separated by "/". + /// A "/" at the beginning of the path denotes the root of the YAML document. + pub anchor_path: String, + + /// The path to each alias that references the anchor in the YAML document. + /// Each key that form the alias path is separated by "/". + /// A "/" at the beginning of the path denotes the root of the YAML document. + pub aliases: Vec, +} + impl<'de> Deserializer<'de> { /// Deserializes an instance of type `T` from a string of YAML text. /// @@ -233,6 +279,69 @@ impl<'de> Deserializer<'de> { Deserializer { progress } } + /// Gets a vector of anchors, aliases, and where they occur in the YAML document. + pub fn anchors(&self) -> Option> { + let document = match &self.progress { + Progress::Document(doc) => doc, + _ => return None, + }; + + let mut aliases = BTreeMap::>::new(); + for (i, event) in document.events.iter().enumerate() { + if let (Event::Alias(id), _) = event { + aliases.entry(*id).or_default().push(i); + } + } + + let mut anchors = Vec::new(); + for (alias_id, document_index) in &document.anchor_event_map { + let anchor_name = document.anchor_names.get(alias_id).unwrap(); + let anchor_path = self.event_path(*document_index); + let mut anchors_aliases = Vec::new(); + for alias_index in aliases.get(alias_id).unwrap_or(&Vec::new()) { + anchors_aliases.push(self.event_path(*alias_index)); + } + + anchors.push(DocumentAnchor { + anchor_name: anchor_name.clone(), + anchor_path: anchor_path.clone(), + aliases: anchors_aliases, + }); + } + + Some(anchors) + } + + fn event_path(&self, event_index: usize) -> String { + let mut mapping_end = 0u32; + let mut process_scalar = true; + let mut path = Vec::new(); + + if let Progress::Document(document) = &self.progress { + for i in (0..=event_index).rev() { + let event = &document.events[i]; + match &event.0 { + Event::MappingEnd => mapping_end += 1, + Event::MappingStart(_) => { + if mapping_end > 0 { + mapping_end -= 1; + } else { + process_scalar = true; + } + } + Event::Scalar(scalar) => { + if process_scalar { + path.insert(0, String::from_utf8_lossy(&scalar.value).to_string()); + process_scalar = false; + } + } + _ => {} + } + } + } + format!("/{}", path.join("/")) + } + fn de( self, f: impl for<'document> FnOnce( diff --git a/src/lib.rs b/src/lib.rs index 247cfa79..82498f55 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -108,7 +108,9 @@ #![crate_type = "lib"] // Re-export commonly used items from other modules -pub use crate::de::{from_reader, from_slice, from_str, Deserializer}; // Deserialization functions +pub use crate::de::{ + from_reader, from_slice, from_str, Deserializer, DocumentAnchor +}; // Deserialization functions pub use crate::modules::error::{Error, Location, Result}; // Error handling types pub use crate::ser::{to_string, to_writer, Serializer, State}; // Serialization functions #[doc(inline)] diff --git a/src/loader.rs b/src/loader.rs index fb2cf172..1e5cddf1 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -2,7 +2,7 @@ use crate::{ de::{Event, Progress}, libyml::{ error::Mark, - parser::{Event as YamlEvent, Parser}, + parser::{Anchor, Event as YamlEvent, Parser}, }, modules::error::{self, Error, ErrorImpl, Result}, }; @@ -57,6 +57,11 @@ pub struct Document<'input> { /// encountered during parsing, its id is used to look up the index of the corresponding /// event in the `events` vector. pub anchor_event_map: BTreeMap, + + /// Map from alias id to name. + /// + /// This field is a `BTreeMap` that maps alias ids to their corresponding names. + pub anchor_names: BTreeMap, } impl<'input> Loader<'input> { @@ -140,6 +145,14 @@ impl<'input> Loader<'input> { events: Vec::new(), error: None, anchor_event_map: BTreeMap::new(), + anchor_names: BTreeMap::new(), + }; + + let anchor_name = |anchor: &Anchor| { + format!("{:?}", anchor) + .trim_start_matches("\"") + .trim_end_matches("\"") + .to_owned() }; loop { @@ -165,6 +178,7 @@ impl<'input> Loader<'input> { } YamlEvent::DocumentStart => continue, YamlEvent::DocumentEnd => return Some(document), + YamlEvent::Alias(alias) => match anchors.get(&alias) { Some(id) => Event::Alias(*id), None => { @@ -178,20 +192,22 @@ impl<'input> Loader<'input> { YamlEvent::Scalar(mut scalar) => { if let Some(anchor) = scalar.anchor.take() { let id = anchors.len(); - anchors.insert(anchor, id); + document.anchor_names.insert(id, anchor_name(&anchor)); document .anchor_event_map .insert(id, document.events.len()); + anchors.insert(anchor, id); } Event::Scalar(scalar) } YamlEvent::SequenceStart(mut sequence_start) => { if let Some(anchor) = sequence_start.anchor.take() { let id = anchors.len(); - anchors.insert(anchor, id); + document.anchor_names.insert(id, anchor_name(&anchor)); document .anchor_event_map .insert(id, document.events.len()); + anchors.insert(anchor, id); } Event::SequenceStart(sequence_start) } @@ -199,10 +215,11 @@ impl<'input> Loader<'input> { YamlEvent::MappingStart(mut mapping_start) => { if let Some(anchor) = mapping_start.anchor.take() { let id = anchors.len(); - anchors.insert(anchor, id); + document.anchor_names.insert(id, anchor_name(&anchor)); document .anchor_event_map .insert(id, document.events.len()); + anchors.insert(anchor, id); } Event::MappingStart(mapping_start) } diff --git a/tests/test_de.rs b/tests/test_de.rs index 6033f812..7f633b1b 100644 --- a/tests/test_de.rs +++ b/tests/test_de.rs @@ -18,6 +18,7 @@ mod tests { loader::Loader, modules::error::ErrorImpl, Deserializer, Number, + DocumentAnchor, Value::{self, String as SerdeString}, }; use std::{ @@ -96,6 +97,31 @@ mod tests { test_de(yaml, &expected); } + #[test] + fn test_anchor_api() { + let yaml = indoc! {" + --- + a: + enum: &io + INPUT: 0 + OUTPUT: 1 + b: + enum: *io + c: + enum: *io + "}; + let mut deserializer = Deserializer::from_str(yaml); + let document = deserializer.next().unwrap(); + let anchors = document.anchors().unwrap_or_default(); + let expected = DocumentAnchor { + anchor_name: "io".into(), + anchor_path: "/a/enum".into(), + aliases: ["/b/enum".into(), "/c/enum".into()].to_vec(), + }; + assert_eq!(anchors.len(), 1); + assert_eq!(anchors[0], expected); + } + #[test] /// Test borrowed strings with different YAML representations. fn test_borrowed() {