From ee925dd5c64032f06fb09df50b1fd6475f586982 Mon Sep 17 00:00:00 2001 From: Anna Headley <845363+hackartisan@users.noreply.github.com> Date: Wed, 17 Jul 2024 13:58:41 -0400 Subject: [PATCH] Start a mermaid diagram for a full indexing workflow Co-authored-by: Amin Zare Co-authored-by: Trey Pendragon Co-authored-by: Eliot Jordan Co-authored-by: Shaun Ellis --- architecture-decisions/0002-indexing.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/architecture-decisions/0002-indexing.md b/architecture-decisions/0002-indexing.md index 01b02186..4c465d30 100644 --- a/architecture-decisions/0002-indexing.md +++ b/architecture-decisions/0002-indexing.md @@ -84,3 +84,25 @@ We expect reindexing to need to happen often - either because of changing weight We need to find a way to validate that we're indexing 100% of the documents that we pull from Figgy. Keeping track of three different tables may be complicated. However, we expect to be able to scale this architecture out to allow for multiple harvest sources and transformation steps in the future. + + +## WIP full diagram + +--- +title: A full Indexing Pipeline workflow +--- + +sequenceDiagram +Participant LogLocationTable +Participant FiggyDB +Participant HydratorV1 +Participant HydrationLog +Participant TransformerV1 +Participant TransformationLog +Participant IndexerV1 +Participant SolrIndex + +HydratorV1->>LogLocationTable: Set(type: hydrator, log_location: nil, log_version: 1) +HydratorV1->>LogLocationTable: Get last log_location +HydratorV1->>FiggyDB: Get X (e.g. 500) records starting at log_location +HydratorV1->>LogLocationTable: Set(type: hydrator, log_location: nil, log_version: 1)