Skip to content

Commit

Permalink
Switch Notes to SubComponents to include figures and cross references
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Jan 17, 2025
1 parent 4e1eec1 commit 5639d66
Show file tree
Hide file tree
Showing 13 changed files with 124 additions and 88 deletions.
16 changes: 8 additions & 8 deletions src/SIL.Machine/Corpora/IUsfmParserHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,24 @@ IReadOnlyList<UsfmAttribute> attributes
void EndChar(UsfmParserState state, string marker, IReadOnlyList<UsfmAttribute> attributes, bool closed);

/// <summary>
/// Start of a note
/// Start of a sub component - a note, figure or cross reference
/// </summary>
void StartNote(UsfmParserState state, string marker, string caller, string category);
void StartSubComponent(UsfmParserState state, string marker, string caller, string category);

/// <summary>
/// End of a note
/// End of a sub component
/// </summary>
void EndNote(UsfmParserState state, string marker, bool closed);
void EndSubComponent(UsfmParserState state, string marker, bool closed);

/// <summary>
/// Start of a note text
/// Start of a sub component text
/// </summary>
void StartNoteText(UsfmParserState state);
void StartSubComponentText(UsfmParserState state);

/// <summary>
/// End of a note text
/// End of a sub component text
/// </summary>
void EndNoteText(UsfmParserState state);
void EndSubComponentText(UsfmParserState state);

/// <summary>
/// Start of a table
Expand Down
4 changes: 2 additions & 2 deletions src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public string UpdateUsfm(
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> rows,
string fullName = null,
UpdateUsfmTextBehavior textBehavior = UpdateUsfmTextBehavior.PreferExisting,
UpdateUsfmIntraVerseMarkerBehavior noteBehavior = UpdateUsfmIntraVerseMarkerBehavior.Preserve,
UpdateUsfmIntraVerseMarkerBehavior subComponentBehavior = UpdateUsfmIntraVerseMarkerBehavior.Preserve,
UpdateUsfmIntraVerseMarkerBehavior formattingBehavior = UpdateUsfmIntraVerseMarkerBehavior.Strip
)
{
Expand All @@ -42,7 +42,7 @@ public string UpdateUsfm(
rows,
fullName is null ? null : $"- {fullName}",
textBehavior,
noteBehavior,
subComponentBehavior,
formattingBehavior
);
try
Expand Down
20 changes: 10 additions & 10 deletions src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ public enum ScriptureTextType
None,
NonVerse,
Verse,
Note,
NoteText
SubComponent,
SubComponentText
}

public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase
Expand Down Expand Up @@ -152,7 +152,7 @@ public override void EndSidebar(UsfmParserState state, string marker, bool close
EndParentElement();
}

public override void StartNote(UsfmParserState state, string marker, string caller, string category)
public override void StartSubComponent(UsfmParserState state, string marker, string caller, string category)
{
if (CurrentTextType != ScriptureTextType.None && !_duplicateVerse)
{
Expand All @@ -162,15 +162,15 @@ public override void StartNote(UsfmParserState state, string marker, string call
}
}

public override void StartNoteText(UsfmParserState state)
public override void StartSubComponentText(UsfmParserState state)
{
_curTextType.Push(ScriptureTextType.NoteText);
StartNoteText(state, CreateNonVerseRef());
_curTextType.Push(ScriptureTextType.SubComponentText);
StartSubComponentText(state, CreateNonVerseRef());
}

public override void EndNoteText(UsfmParserState state)
public override void EndSubComponentText(UsfmParserState state)
{
EndNoteText(state, CreateNonVerseRef());
EndSubComponentText(state, CreateNonVerseRef());
_curTextType.Pop();
}

Expand Down Expand Up @@ -206,9 +206,9 @@ protected virtual void StartNonVerseText(UsfmParserState state, ScriptureRef scr

protected virtual void EndNonVerseText(UsfmParserState state, ScriptureRef scriptureRef) { }

protected virtual void StartNoteText(UsfmParserState state, ScriptureRef scriptureRef) { }
protected virtual void StartSubComponentText(UsfmParserState state, ScriptureRef scriptureRef) { }

protected virtual void EndNoteText(UsfmParserState state, ScriptureRef scriptureRef) { }
protected virtual void EndSubComponentText(UsfmParserState state, ScriptureRef scriptureRef) { }

private void StartVerseText(UsfmParserState state)
{
Expand Down
40 changes: 22 additions & 18 deletions src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase
private readonly List<UsfmToken> _newTokens;
private readonly string _idText;
private readonly UpdateUsfmTextBehavior _textBehavior;
private readonly UpdateUsfmIntraVerseMarkerBehavior _noteBehavior;
private readonly UpdateUsfmIntraVerseMarkerBehavior _subComponentBehavior;
private readonly UpdateUsfmIntraVerseMarkerBehavior _formattingBehavior;
private readonly Stack<bool> _replace;
private int _rowIndex;
Expand All @@ -38,7 +38,7 @@ public UpdateUsfmParserHandler(
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> rows = null,
string idText = null,
UpdateUsfmTextBehavior textBehavior = UpdateUsfmTextBehavior.PreferExisting,
UpdateUsfmIntraVerseMarkerBehavior noteBehavior = UpdateUsfmIntraVerseMarkerBehavior.Preserve,
UpdateUsfmIntraVerseMarkerBehavior subComponentBehavior = UpdateUsfmIntraVerseMarkerBehavior.Preserve,
UpdateUsfmIntraVerseMarkerBehavior formattingBehavior = UpdateUsfmIntraVerseMarkerBehavior.Strip
)
{
Expand All @@ -48,7 +48,7 @@ public UpdateUsfmParserHandler(
_idText = idText;
_replace = new Stack<bool>();
_textBehavior = textBehavior;
_noteBehavior = noteBehavior;
_subComponentBehavior = subComponentBehavior;
_formattingBehavior = formattingBehavior;
}

Expand Down Expand Up @@ -196,26 +196,26 @@ bool closed
base.EndChar(state, marker, attributes, closed);
}

public override void StartNote(UsfmParserState state, string marker, string caller, string category)
public override void StartSubComponent(UsfmParserState state, string marker, string caller, string category)
{
// strip out notes in verses that are being replaced
if (ReplaceWithNewTokens(state))
SkipTokens(state);
else
CollectTokens(state);

base.StartNote(state, marker, caller, category);
base.StartSubComponent(state, marker, caller, category);
}

public override void EndNote(UsfmParserState state, string marker, bool closed)
public override void EndSubComponent(UsfmParserState state, string marker, bool closed)
{
// strip out notes in verses that are being replaced
if (ReplaceWithNewTokens(state, closed: closed, endNote: true))
if (ReplaceWithNewTokens(state, closed: closed, endSubComponent: true))
SkipTokens(state);
else
CollectTokens(state);

base.EndNote(state, marker, closed);
base.EndSubComponent(state, marker, closed);
}

public override void Ref(UsfmParserState state, string marker, string display, string target)
Expand Down Expand Up @@ -284,13 +284,13 @@ protected override void EndNonVerseText(UsfmParserState state, ScriptureRef scri
PopNewTokens();
}

protected override void StartNoteText(UsfmParserState state, ScriptureRef scriptureRef)
protected override void StartSubComponentText(UsfmParserState state, ScriptureRef scriptureRef)
{
IReadOnlyList<string> rowTexts = AdvanceRows(new[] { scriptureRef });
PushNewTokens(rowTexts.Select(t => new UsfmToken(t + " ")));
}

protected override void EndNoteText(UsfmParserState state, ScriptureRef scriptureRef)
protected override void EndSubComponentText(UsfmParserState state, ScriptureRef scriptureRef)
{
PopNewTokens();
}
Expand Down Expand Up @@ -360,13 +360,13 @@ private void SkipTokens(UsfmParserState state)
_tokenIndex = state.Index + 1 + state.SpecialTokenCount;
}

private bool ReplaceWithNewTokens(UsfmParserState state, bool closed = true, bool endNote = false)
private bool ReplaceWithNewTokens(UsfmParserState state, bool closed = true, bool endSubComponent = false)
{
bool stripExistingText = _textBehavior == UpdateUsfmTextBehavior.StripExisting;
bool newText = _replace.Count > 0 && _replace.Peek();
bool inNote = state.NoteTag != null || endNote;
bool inNoteText = CurrentTextType == ScriptureTextType.NoteText;
bool isNoteTag =
bool inSubComponent = state.SubComponentTag != null || endSubComponent;
bool inSubComponentText = CurrentTextType == ScriptureTextType.SubComponentText;
bool isSubComponentTag =
state.Token.Marker != null && UsfmStylesheet.IsNoteOrCrossReferencePart(state.Token.Marker);
bool isFormattingTag =
state.Token.Marker != null && !UsfmStylesheet.IsNoteOrCrossReferencePart(state.Token.Marker);
Expand All @@ -379,20 +379,24 @@ private bool ReplaceWithNewTokens(UsfmParserState state, bool closed = true, boo
bool useNewTokens =
stripExistingText
|| (newText && !existingText)
|| (newText && _textBehavior == UpdateUsfmTextBehavior.PreferNew && (!inNote || inNoteText));
|| (
newText
&& _textBehavior == UpdateUsfmTextBehavior.PreferNew
&& (!inSubComponent || inSubComponentText)
);

if (useNewTokens && _newTokens.Count > 0)
_tokens.AddRange(_newTokens);
_newTokens.Clear();

// figure out when to skip the existing text
bool withinNewText = _replace.Any(r => r);
if (withinNewText && inNote)
if (withinNewText && inSubComponent)
{
if (_noteBehavior == UpdateUsfmIntraVerseMarkerBehavior.Strip)
if (_subComponentBehavior == UpdateUsfmIntraVerseMarkerBehavior.Strip)
return true;

if (!inNoteText || isNoteTag)
if (!inSubComponentText || isSubComponentTag)
return false;
}

Expand Down
44 changes: 23 additions & 21 deletions src/SIL.Machine/Corpora/UsfmParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -245,16 +245,18 @@ public bool ProcessToken()
if (paraTag != null && paraTag.TextType != UsfmTextType.VerseText && paraTag.TextType != 0)
CloseAll();
else
CloseNote();
CloseSubComponent();
break;
case UsfmTokenType.Note:
CloseNote();
case UsfmTokenType.SubComponent:
CloseSubComponent();
break;
case UsfmTokenType.End:
// If end marker for an active note
if (State.Stack.Any(e => e.Type == UsfmElementType.Note && (e.Marker + "*" == token.Marker)))
if (
State.Stack.Any(e => e.Type == UsfmElementType.SubComponent && (e.Marker + "*" == token.Marker))
)
{
CloseNote(closed: true);
CloseSubComponent(closed: true);
break;
}

Expand Down Expand Up @@ -493,8 +495,8 @@ public bool ProcessToken()
if (IsNoteText(token))
{
// Note text should be handled as a full segment
State.Push(new UsfmParserElement(UsfmElementType.NoteText, token.Marker));
Handler?.StartNoteText(State);
State.Push(new UsfmParserElement(UsfmElementType.SubComponentText, token.Marker));
Handler?.StartSubComponentText(State);
}

string actualMarker;
Expand All @@ -519,7 +521,7 @@ public bool ProcessToken()
token.Attributes
);
break;
case UsfmTokenType.Note:
case UsfmTokenType.SubComponent:
// Look for category
string noteCategory = null;
if (
Expand All @@ -534,9 +536,9 @@ public bool ProcessToken()
State.SpecialTokenCount += 3;
}

State.Push(new UsfmParserElement(UsfmElementType.Note, token.Marker));
State.Push(new UsfmParserElement(UsfmElementType.SubComponent, token.Marker));

Handler?.StartNote(State, token.Marker, token.Data, noteCategory);
Handler?.StartSubComponent(State, token.Marker, token.Data, noteCategory);
break;
case UsfmTokenType.Text:
string text = token.Text;
Expand Down Expand Up @@ -606,15 +608,15 @@ public void CloseAll()
private UsfmTokenType DetermineUnknownTokenType()
{
// Unknown inside notes are character
if (State.Stack.Any(e => e.Type == UsfmElementType.Note))
if (State.Stack.Any(e => e.Type == UsfmElementType.SubComponent))
return UsfmTokenType.Character;

return UsfmTokenType.Paragraph;
}

private void CloseNote(bool closed = false)
private void CloseSubComponent(bool closed = false)
{
if (State.Stack.Any(elem => elem.Type == UsfmElementType.Note))
if (State.Stack.Any(elem => elem.Type == UsfmElementType.SubComponent))
{
UsfmParserElement elem;
do
Expand All @@ -623,14 +625,14 @@ private void CloseNote(bool closed = false)
break;

elem = State.Peek();
CloseElement(closed && elem.Type == UsfmElementType.Note);
} while (elem.Type != UsfmElementType.Note);
CloseElement(closed && elem.Type == UsfmElementType.SubComponent);
} while (elem.Type != UsfmElementType.SubComponent);
}
}

private void CloseNoteText()
{
while (State.Stack.Count > 0 && State.Peek().Type == UsfmElementType.NoteText)
while (State.Stack.Count > 0 && State.Peek().Type == UsfmElementType.SubComponentText)
CloseElement();
}

Expand All @@ -654,11 +656,11 @@ private void CloseElement(bool closed = false)
case UsfmElementType.Char:
Handler?.EndChar(State, element.Marker, element.Attributes, closed);
break;
case UsfmElementType.Note:
Handler?.EndNote(State, element.Marker, closed);
case UsfmElementType.SubComponent:
Handler?.EndSubComponent(State, element.Marker, closed);
break;
case UsfmElementType.NoteText:
Handler?.EndNoteText(State);
case UsfmElementType.SubComponentText:
Handler?.EndSubComponentText(State);
break;
case UsfmElementType.Table:
Handler?.EndTable(State);
Expand Down Expand Up @@ -694,7 +696,7 @@ private bool IsRef(UsfmToken token)

private bool IsNoteText(UsfmToken token)
{
return token.Marker == "ft" && State.Stack.Any(elem => elem.Type == UsfmElementType.Note);
return token.Marker == "ft" && State.Stack.Any(elem => elem.Type == UsfmElementType.SubComponent);
}
}
}
8 changes: 4 additions & 4 deletions src/SIL.Machine/Corpora/UsfmParserHandlerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,13 @@ public virtual void EndChar(
bool closed
) { }

public virtual void StartNote(UsfmParserState state, string marker, string caller, string category) { }
public virtual void StartSubComponent(UsfmParserState state, string marker, string caller, string category) { }

public virtual void EndNote(UsfmParserState state, string marker, bool closed) { }
public virtual void EndSubComponent(UsfmParserState state, string marker, bool closed) { }

public virtual void StartNoteText(UsfmParserState state) { }
public virtual void StartSubComponentText(UsfmParserState state) { }

public virtual void EndNoteText(UsfmParserState state) { }
public virtual void EndSubComponentText(UsfmParserState state) { }

public virtual void StartTable(UsfmParserState state) { }

Expand Down
12 changes: 6 additions & 6 deletions src/SIL.Machine/Corpora/UsfmParserState.cs
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,13 @@ public UsfmTag CharTag
}

/// <summary>
/// Current note tag or null for none
/// Current sub component tag (note, figure, cross reference) or null for none
/// </summary>
public UsfmTag NoteTag
public UsfmTag SubComponentTag
{
get
{
UsfmParserElement elem = Stack.LastOrDefault(e => e.Type == UsfmElementType.Note);
UsfmParserElement elem = Stack.LastOrDefault(e => e.Type == UsfmElementType.SubComponent);
return elem != null ? Stylesheet.GetTag(elem.Marker) : null;
}
}
Expand Down Expand Up @@ -153,7 +153,7 @@ public bool IsVerseText
get
{
// Sidebars and notes are not verse text
if (_stack.Any(e => e.Type == UsfmElementType.Sidebar || e.Type == UsfmElementType.Note))
if (_stack.Any(e => e.Type == UsfmElementType.Sidebar || e.Type == UsfmElementType.SubComponent))
return false;

if (!IsVersePara)
Expand Down Expand Up @@ -205,8 +205,8 @@ public enum UsfmElementType
Table,
Row,
Cell,
Note,
NoteText,
SubComponent,
SubComponentText,
Sidebar
};

Expand Down
Loading

0 comments on commit 5639d66

Please sign in to comment.