Skip to content

Commit

Permalink
Merge pull request #35 from lucasvr/lucas/anchors
Browse files Browse the repository at this point in the history
New API to get anchors and aliases from a document
  • Loading branch information
sebastienrousseau authored Jan 1, 2025
2 parents 140d00b + 59ee15f commit c7ba7ac
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 5 deletions.
109 changes: 109 additions & 0 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use serde::de::{
self, value::StrDeserializer, Deserialize, DeserializeOwned,
DeserializeSeed, Expected, IgnoredAny, Unexpected, Visitor,
};
use std::collections::BTreeMap;
use std::fmt::Debug;
use std::fmt::Formatter;
use std::fmt::Result as FmtResult;
Expand Down Expand Up @@ -122,6 +123,51 @@ impl Debug for Progress<'_> {
}
}

/// A structure that describes anchors and aliases in a YAML document.
/// The anchor name, prefixed in the YAML document with "&", is represented
/// without the prefix in `anchor_name`.
/// The `anchor_path` is a string that denotes the path to the anchor in the YAML
/// document. Each key that form the path is separated from one another by "/".
/// The `aliases` vector contains the path to each reference to the anchor in
/// the YAML document.
///
/// # Examples
///
/// The following YAML document:
/// ```yaml
/// a:
/// enum: &io
/// INPUT: 0
/// OUTPUT: 1
/// b:
/// enum: *io
/// c:
/// enum: *io
/// ```
///
/// Is represented by the following `DocumentAnchor`:
/// ```json
/// DocumentAnchor {
/// anchor_name: "io",
/// anchor_path: "/a/enum",
/// aliases: ["/b/enum", "/c/enum"],
/// }
/// ```
#[derive(Debug, PartialEq)]
pub struct DocumentAnchor {
/// The name of the anchor, without the "&" prefix.
pub anchor_name: String,

/// The path to the anchor in the YAML document, with keys separated by "/".
/// A "/" at the beginning of the path denotes the root of the YAML document.
pub anchor_path: String,

/// The path to each alias that references the anchor in the YAML document.
/// Each key that form the alias path is separated by "/".
/// A "/" at the beginning of the path denotes the root of the YAML document.
pub aliases: Vec<String>,
}

impl<'de> Deserializer<'de> {
/// Deserializes an instance of type `T` from a string of YAML text.
///
Expand Down Expand Up @@ -233,6 +279,69 @@ impl<'de> Deserializer<'de> {
Deserializer { progress }
}

/// Gets a vector of anchors, aliases, and where they occur in the YAML document.
pub fn anchors(&self) -> Option<Vec<DocumentAnchor>> {
let document = match &self.progress {
Progress::Document(doc) => doc,
_ => return None,
};

let mut aliases = BTreeMap::<usize, Vec<usize>>::new();
for (i, event) in document.events.iter().enumerate() {
if let (Event::Alias(id), _) = event {
aliases.entry(*id).or_default().push(i);
}
}

let mut anchors = Vec::new();
for (alias_id, document_index) in &document.anchor_event_map {
let anchor_name = document.anchor_names.get(alias_id).unwrap();
let anchor_path = self.event_path(*document_index);
let mut anchors_aliases = Vec::new();
for alias_index in aliases.get(alias_id).unwrap_or(&Vec::new()) {
anchors_aliases.push(self.event_path(*alias_index));
}

anchors.push(DocumentAnchor {
anchor_name: anchor_name.clone(),
anchor_path: anchor_path.clone(),
aliases: anchors_aliases,
});
}

Some(anchors)
}

fn event_path(&self, event_index: usize) -> String {
let mut mapping_end = 0u32;
let mut process_scalar = true;
let mut path = Vec::new();

if let Progress::Document(document) = &self.progress {
for i in (0..=event_index).rev() {
let event = &document.events[i];
match &event.0 {
Event::MappingEnd => mapping_end += 1,
Event::MappingStart(_) => {
if mapping_end > 0 {
mapping_end -= 1;
} else {
process_scalar = true;
}
}
Event::Scalar(scalar) => {
if process_scalar {
path.insert(0, String::from_utf8_lossy(&scalar.value).to_string());
process_scalar = false;
}
}
_ => {}
}
}
}
format!("/{}", path.join("/"))
}

fn de<T>(
self,
f: impl for<'document> FnOnce(
Expand Down
4 changes: 3 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@
#![crate_type = "lib"]

// Re-export commonly used items from other modules
pub use crate::de::{from_reader, from_slice, from_str, Deserializer}; // Deserialization functions
pub use crate::de::{
from_reader, from_slice, from_str, Deserializer, DocumentAnchor
}; // Deserialization functions
pub use crate::modules::error::{Error, Location, Result}; // Error handling types
pub use crate::ser::{to_string, to_writer, Serializer, State}; // Serialization functions
#[doc(inline)]
Expand Down
25 changes: 21 additions & 4 deletions src/loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::{
de::{Event, Progress},
libyml::{
error::Mark,
parser::{Event as YamlEvent, Parser},
parser::{Anchor, Event as YamlEvent, Parser},
},
modules::error::{self, Error, ErrorImpl, Result},
};
Expand Down Expand Up @@ -57,6 +57,11 @@ pub struct Document<'input> {
/// encountered during parsing, its id is used to look up the index of the corresponding
/// event in the `events` vector.
pub anchor_event_map: BTreeMap<usize, usize>,

/// Map from alias id to name.
///
/// This field is a `BTreeMap` that maps alias ids to their corresponding names.
pub anchor_names: BTreeMap<usize, String>,
}

impl<'input> Loader<'input> {
Expand Down Expand Up @@ -140,6 +145,14 @@ impl<'input> Loader<'input> {
events: Vec::new(),
error: None,
anchor_event_map: BTreeMap::new(),
anchor_names: BTreeMap::new(),
};

let anchor_name = |anchor: &Anchor| {
format!("{:?}", anchor)
.trim_start_matches("\"")
.trim_end_matches("\"")
.to_owned()
};

loop {
Expand All @@ -165,6 +178,7 @@ impl<'input> Loader<'input> {
}
YamlEvent::DocumentStart => continue,
YamlEvent::DocumentEnd => return Some(document),

YamlEvent::Alias(alias) => match anchors.get(&alias) {
Some(id) => Event::Alias(*id),
None => {
Expand All @@ -178,31 +192,34 @@ impl<'input> Loader<'input> {
YamlEvent::Scalar(mut scalar) => {
if let Some(anchor) = scalar.anchor.take() {
let id = anchors.len();
anchors.insert(anchor, id);
document.anchor_names.insert(id, anchor_name(&anchor));
document
.anchor_event_map
.insert(id, document.events.len());
anchors.insert(anchor, id);
}
Event::Scalar(scalar)
}
YamlEvent::SequenceStart(mut sequence_start) => {
if let Some(anchor) = sequence_start.anchor.take() {
let id = anchors.len();
anchors.insert(anchor, id);
document.anchor_names.insert(id, anchor_name(&anchor));
document
.anchor_event_map
.insert(id, document.events.len());
anchors.insert(anchor, id);
}
Event::SequenceStart(sequence_start)
}
YamlEvent::SequenceEnd => Event::SequenceEnd,
YamlEvent::MappingStart(mut mapping_start) => {
if let Some(anchor) = mapping_start.anchor.take() {
let id = anchors.len();
anchors.insert(anchor, id);
document.anchor_names.insert(id, anchor_name(&anchor));
document
.anchor_event_map
.insert(id, document.events.len());
anchors.insert(anchor, id);
}
Event::MappingStart(mapping_start)
}
Expand Down
26 changes: 26 additions & 0 deletions tests/test_de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ mod tests {
loader::Loader,
modules::error::ErrorImpl,
Deserializer, Number,
DocumentAnchor,
Value::{self, String as SerdeString},
};
use std::{
Expand Down Expand Up @@ -96,6 +97,31 @@ mod tests {
test_de(yaml, &expected);
}

#[test]
fn test_anchor_api() {
let yaml = indoc! {"
---
a:
enum: &io
INPUT: 0
OUTPUT: 1
b:
enum: *io
c:
enum: *io
"};
let mut deserializer = Deserializer::from_str(yaml);
let document = deserializer.next().unwrap();
let anchors = document.anchors().unwrap_or_default();
let expected = DocumentAnchor {
anchor_name: "io".into(),
anchor_path: "/a/enum".into(),
aliases: ["/b/enum".into(), "/c/enum".into()].to_vec(),
};
assert_eq!(anchors.len(), 1);
assert_eq!(anchors[0], expected);
}

#[test]
/// Test borrowed strings with different YAML representations.
fn test_borrowed() {
Expand Down

0 comments on commit c7ba7ac

Please sign in to comment.