From 06f1220066748df3dfd707b13caa2c41ddf6590c Mon Sep 17 00:00:00 2001
From: Chris Mungall <cjm@berkeleybop.org>
Date: Thu, 28 Mar 2024 12:02:30 -0700
Subject: [PATCH] synonymizer refactor2 (#728)

* synonymizer refactoring.

Moving synonymizer from mapping rules to its own data model.
Moving logic from CLI to utils

* Command line

* Add missing
---
 Makefile                                      |   5 +
 src/oaklib/cli.py                             | 148 ++--
 .../datamodels/mapping_rules_datamodel.py     |  82 +--
 .../datamodels/mapping_rules_datamodel.yaml   |   6 +-
 .../datamodels/synonymizer_datamodel.py       | 697 ++++++++++++++++++
 .../datamodels/synonymizer_datamodel.yaml     |  94 +++
 src/oaklib/datamodels/vocabulary.py           |   7 +
 src/oaklib/utilities/kgcl_utilities.py        |   2 +
 .../utilities/lexical/lexical_indexer.py      |  34 +-
 src/oaklib/utilities/lexical/synonymizer.py   | 145 ++++
 tests/input/cli-synonymizer-rules.yaml        |  13 +
 tests/input/matcher_rules.yaml                |   4 +-
 tests/test_cli.py                             |   5 +-
 tests/test_utilities/test_lexical_index.py    |  10 +-
 tests/test_utilities/test_synonymizer.py      |  99 +++
 15 files changed, 1189 insertions(+), 162 deletions(-)
 create mode 100644 src/oaklib/datamodels/synonymizer_datamodel.py
 create mode 100644 src/oaklib/datamodels/synonymizer_datamodel.yaml
 create mode 100644 src/oaklib/utilities/lexical/synonymizer.py
 create mode 100644 tests/input/cli-synonymizer-rules.yaml
 create mode 100644 tests/test_utilities/test_synonymizer.py

diff --git a/Makefile b/Makefile
index 7e3b6ad85..0cc53b3fb 100644
--- a/Makefile
+++ b/Makefile
@@ -22,6 +22,11 @@ src/oaklib/datamodels/%.py: src/oaklib/datamodels/%.yaml
 #	$(RUN) gen-pydantic $< > $@.tmp && mv $@.tmp $@
 	$(RUN) gen-python $< > $@.tmp && mv $@.tmp $@
 	$(RUN) tox -e lint
+
+src/oaklib/datamodels/synonymizer.py: src/oaklib/datamodels/synonymizer.yaml
+	$(RUN) gen-pydantic $< > $@.tmp && mv $@.tmp $@
+
+
 src/oaklib/datamodels/%.schema.json: src/oaklib/datamodels/%.yaml
 	$(RUN) gen-json-schema $< > $@.tmp && mv $@.tmp $@
 src/oaklib/datamodels/%.owl.ttl: src/oaklib/datamodels/%.yaml
diff --git a/src/oaklib/cli.py b/src/oaklib/cli.py
index 0517fa76b..411b06614 100644
--- a/src/oaklib/cli.py
+++ b/src/oaklib/cli.py
@@ -52,9 +52,9 @@
 import oaklib.datamodels.taxon_constraints as tcdm
 from oaklib import datamodels
 from oaklib.converters.logical_definition_flattener import LogicalDefinitionFlattener
+from oaklib.datamodels import synonymizer_datamodel
 from oaklib.datamodels.association import RollupGroup
 from oaklib.datamodels.cross_ontology_diff import DiffCategory
-from oaklib.datamodels.lexical_index import LexicalTransformation, TransformationType
 from oaklib.datamodels.obograph import (
     BasicPropertyValue,
     Edge,
@@ -167,11 +167,9 @@
     parse_kgcl_files,
     write_kgcl,
 )
-from oaklib.utilities.lexical import patternizer
+from oaklib.utilities.lexical import patternizer, synonymizer
 from oaklib.utilities.lexical.lexical_indexer import (
-    DEFAULT_QUALIFIER,
     add_labels_from_uris,
-    apply_transformation,
     create_lexical_index,
     lexical_index_to_sssom,
     load_lexical_index,
@@ -6496,46 +6494,114 @@ def generate_synonyms(terms, rules_file, apply_patch, patch, patch_format, outpu
     else:
         writer = _get_writer(output_type, impl, StreamingKGCLWriter, kgcl)
         writer.output = output
-    # TODO: Eventually get this from settings as above
+    ruleset = synonymizer_datamodel.RuleSet(**yaml.safe_load(open(rules_file)))
+    change_list = []
+    curie_iter = query_terms_iterator(terms, impl)
+    for change in synonymizer.apply_synonymizer_to_terms(impl, curie_iter, ruleset):
+        change_list.append(change)
+        writer.emit(change)
+
+    writer.finish()
+    if apply_patch and len(change_list) > 0:
+        if output:
+            impl.resource.slug = output
+        _apply_changes(impl, change_list)
+
+
+@main.command()
+@click.argument("terms", nargs=-1)
+@click.option(
+    "--rules-file",
+    "-R",
+    help="path to rules file. Conforms to rules_datamodel.\
+        e.g. https://github.com/INCATools/ontology-access-kit/blob/main/tests/input/matcher_rules.yaml",
+)
+@click.option(
+    "--rules-expression",
+    "-Y",
+    multiple=True,
+    help="YAML encoding of a rules expression",
+)
+@click.option(
+    "--apply-patch/--no-apply-patch",
+    default=False,
+    show_default=True,
+    help="Apply KGCL syntax generated based on the synonymizer rules file.",
+)
+@click.option(
+    "--patch",
+    type=click.File(mode="w"),
+    default=sys.stdout,
+    help="Path to where patch file will be written.",
+)
+@click.option(
+    "--patch-format",
+    help="Output syntax for patches.",
+)
+@output_option
+@output_type_option
+def generate_lexical_replacements(
+    terms, rules_file, rules_expression, apply_patch, patch, patch_format, output, output_type
+):
+    """
+    Generate lexical replacements based on a set of synonymizer rules.
+
+
+    If the `--apply-patch` flag is set, the output will be an ontology file with the changes
+    applied. Pass the `--patch` argument to lso get the patch file in KGCL format.
+
+    Example:
+    -------
+
+        runoak -i foo.obo generate-lexical-replacements -R foo_rules.yaml\
+           --patch patch.kgcl --apply-patch -o foo_syn.obo
+
+    If the `apply-patch` flag is NOT set then the main input will be KGCL commands
+
+    Example:
+    -------
+
+        runoak -i foo.obo generate-lexical-replacements -R foo_rules.yaml -o changes.kgcl
+
+
+    You can also pass the expressions directly as YAML
+
+    Example:
+    -------
+
+        runoak -i foo.obo generate-lexical-replacements \
+          -Y '{match: "nuclear (\\w+)", replacement: "\\1 nucleus"}' .all
+
+    see https://github.com/INCATools/kgcl.
+
+    Note: this command is very similar to generate-synonyms, but the main use case here
+    is replacing terms, and applying rules to other elements such as definitions
+
+    """
+    impl = settings.impl
+    if apply_patch:
+        writer = _get_writer(patch_format, impl, StreamingKGCLWriter, kgcl)
+        writer.output = patch
+    else:
+        writer = _get_writer(output_type, impl, StreamingKGCLWriter, kgcl)
+        writer.output = output
     if rules_file:
-        ruleset = load_mapping_rules(rules_file)
+        ruleset = synonymizer_datamodel.RuleSet(**yaml.safe_load(open(rules_file)))
+    elif rules_expression:
+        ruleset = synonymizer_datamodel.RuleSet()
+        for rule_expression in rules_expression:
+            rule = synonymizer_datamodel.Synonymizer(**yaml.safe_load(rule_expression))
+            ruleset.rules.append(rule)
     else:
-        ruleset = None
-    if not isinstance(impl, OboGraphInterface):
-        raise NotImplementedError
-    syn_rules = [x.synonymizer for x in ruleset.rules if x.synonymizer]
-    terms_to_synonymize = {}
+        raise ValueError("Must specify either --rules-file or --rules-expression")
     change_list = []
-    for curie in query_terms_iterator(terms, impl):
-        # for rule in syn_rules:
-        for _, aliases in impl.entity_alias_map(curie).items():
-            matches = []
-            if aliases is not None:
-                # matches.extend([x for x in aliases if re.search(eval(rule.match), x) is not None])
-                for alias in aliases:
-                    if alias:
-                        synonymized, new_alias, qualifier = apply_transformation(
-                            alias,
-                            LexicalTransformation(
-                                TransformationType.Synonymization, params=syn_rules
-                            ),
-                        )
-                        if synonymized:
-                            matches.append(new_alias)
-
-            if len(matches) > 0:
-                if qualifier is None or qualifier == "":
-                    qualifier = DEFAULT_QUALIFIER
-                terms_to_synonymize[curie] = matches
-                change = kgcl.NewSynonym(
-                    id="kgcl_change_id_" + str(len(terms_to_synonymize)),
-                    about_node=curie,
-                    old_value=alias,
-                    new_value=new_alias,
-                    qualifier=qualifier,
-                )
-                change_list.append(change)
-                writer.emit(change)
+    curie_iter = query_terms_iterator(terms, impl)
+    for change in synonymizer.apply_synonymizer_to_terms(
+        impl, curie_iter, ruleset, include_all=True
+    ):
+        change_list.append(change)
+        writer.emit(change)
+
     writer.finish()
     if apply_patch and len(change_list) > 0:
         if output:
diff --git a/src/oaklib/datamodels/mapping_rules_datamodel.py b/src/oaklib/datamodels/mapping_rules_datamodel.py
index 20b33d294..9dc5fe8de 100644
--- a/src/oaklib/datamodels/mapping_rules_datamodel.py
+++ b/src/oaklib/datamodels/mapping_rules_datamodel.py
@@ -7,39 +7,33 @@
 # license: https://creativecommons.org/publicdomain/zero/1.0/
 
 import dataclasses
-import re
-import sys
 from dataclasses import dataclass
 from typing import Any, ClassVar, Dict, List, Optional, Union
 
-from jsonasobj2 import JsonObj, as_dict
+from jsonasobj2 import as_dict
 from linkml_runtime.linkml_model.meta import (
     EnumDefinition,
     PermissibleValue,
-    PvFormulaOptions,
 )
-from linkml_runtime.linkml_model.types import Boolean, Float, String, Uriorcurie
 from linkml_runtime.utils.curienamespace import CurieNamespace
 from linkml_runtime.utils.dataclass_extensions_376 import (
     dataclasses_init_fn_with_kwargs,
 )
 from linkml_runtime.utils.enumerations import EnumDefinitionImpl
-from linkml_runtime.utils.formatutils import camelcase, sfx, underscore
 from linkml_runtime.utils.metamodelcore import (
     Bool,
     URIorCURIE,
-    bnode,
     empty_dict,
     empty_list,
 )
 from linkml_runtime.utils.slot import Slot
 from linkml_runtime.utils.yamlutils import (
     YAMLRoot,
-    extended_float,
-    extended_int,
     extended_str,
 )
-from rdflib import Namespace, URIRef
+from rdflib import URIRef
+
+from oaklib.datamodels.synonymizer_datamodel import Synonymizer, Test
 
 metamodel_version = "1.7.0"
 version = None
@@ -253,74 +247,6 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
         super().__post_init__(**kwargs)
 
 
-@dataclass
-class Synonymizer(YAMLRoot):
-    _inherited_slots: ClassVar[List[str]] = []
-
-    class_class_uri: ClassVar[URIRef] = MAPPINGRULES.Synonymizer
-    class_class_curie: ClassVar[str] = "mappingrules:Synonymizer"
-    class_name: ClassVar[str] = "Synonymizer"
-    class_model_uri: ClassVar[URIRef] = MAPPINGRULES.Synonymizer
-
-    the_rule: Optional[str] = None
-    match: Optional[str] = None
-    match_scope: Optional[str] = None
-    replacement: Optional[str] = None
-    qualifier: Optional[str] = None
-    prefix: Optional[str] = None
-    tests: Optional[Union[dict, "Test"]] = None
-
-    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
-        if self.the_rule is not None and not isinstance(self.the_rule, str):
-            self.the_rule = str(self.the_rule)
-
-        if self.match is not None and not isinstance(self.match, str):
-            self.match = str(self.match)
-
-        if self.match_scope is not None and not isinstance(self.match_scope, str):
-            self.match_scope = str(self.match_scope)
-
-        if self.replacement is not None and not isinstance(self.replacement, str):
-            self.replacement = str(self.replacement)
-
-        if self.qualifier is not None and not isinstance(self.qualifier, str):
-            self.qualifier = str(self.qualifier)
-
-        if self.prefix is not None and not isinstance(self.prefix, str):
-            self.prefix = str(self.prefix)
-
-        if self.tests is not None and not isinstance(self.tests, Test):
-            self.tests = Test(**as_dict(self.tests))
-
-        super().__post_init__(**kwargs)
-
-
-@dataclass
-class Test(YAMLRoot):
-    _inherited_slots: ClassVar[List[str]] = []
-
-    class_class_uri: ClassVar[URIRef] = MAPPINGRULES.Test
-    class_class_curie: ClassVar[str] = "mappingrules:Test"
-    class_name: ClassVar[str] = "Test"
-    class_model_uri: ClassVar[URIRef] = MAPPINGRULES.Test
-
-    input: Optional[str] = None
-    output: Optional[str] = None
-    prefix: Optional[str] = None
-
-    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
-        if self.input is not None and not isinstance(self.input, str):
-            self.input = str(self.input)
-
-        if self.output is not None and not isinstance(self.output, str):
-            self.output = str(self.output)
-
-        if self.prefix is not None and not isinstance(self.prefix, str):
-            self.prefix = str(self.prefix)
-
-        super().__post_init__(**kwargs)
-
-
 @dataclass
 class LexicalIndex(YAMLRoot):
     """
diff --git a/src/oaklib/datamodels/mapping_rules_datamodel.yaml b/src/oaklib/datamodels/mapping_rules_datamodel.yaml
index 39d683388..5dbd7e68b 100644
--- a/src/oaklib/datamodels/mapping_rules_datamodel.yaml
+++ b/src/oaklib/datamodels/mapping_rules_datamodel.yaml
@@ -2,7 +2,7 @@ id: https://w3id.org/oak/mapping-rules-datamodel
 title: Mapping Rules Datamodel
 name: mapping-rules-datamodel
 description: >-
-  A datamodel for specifying lexical mapping rules.
+  A datamodel for specifying lexical mapping rules
 license: https://creativecommons.org/publicdomain/zero/1.0/
 
 prefixes:
@@ -30,6 +30,7 @@ emit_prefixes:
 imports:
   - linkml:types
   - lexical_index
+  - synonymizer_datamodel
 
 
 
@@ -86,7 +87,6 @@ classes:
       predicate_id_one_of:
         multivalued: true
 
-
   Postcondition:
     attributes:
       predicate_id:
@@ -101,7 +101,7 @@ classes:
 
   Synonymizer:
    attributes:
-    the_rule:
+    description:
       description: Description of the rule.
       range: string
     match:
diff --git a/src/oaklib/datamodels/synonymizer_datamodel.py b/src/oaklib/datamodels/synonymizer_datamodel.py
new file mode 100644
index 000000000..5149caad7
--- /dev/null
+++ b/src/oaklib/datamodels/synonymizer_datamodel.py
@@ -0,0 +1,697 @@
+# Auto generated from synonymizer_datamodel.yaml by pythongen.py version: 0.0.1
+# Generation date: 2024-03-27T14:21:27
+# Schema: synonymizer_datamodel
+#
+# id: https://w3id.org/oak/synonymizer-datamodel
+# description: A datamodel for specifying synonymization rules
+# license: https://creativecommons.org/publicdomain/zero/1.0/
+
+import dataclasses
+import re
+from jsonasobj2 import JsonObj, as_dict
+from typing import Optional, List, Union, Dict, ClassVar, Any
+from dataclasses import dataclass
+from linkml_runtime.linkml_model.meta import EnumDefinition, PermissibleValue, PvFormulaOptions
+
+from linkml_runtime.utils.slot import Slot
+from linkml_runtime.utils.metamodelcore import empty_list, empty_dict, bnode
+from linkml_runtime.utils.yamlutils import YAMLRoot, extended_str, extended_float, extended_int
+from linkml_runtime.utils.dataclass_extensions_376 import dataclasses_init_fn_with_kwargs
+from linkml_runtime.utils.formatutils import camelcase, underscore, sfx
+from linkml_runtime.utils.enumerations import EnumDefinitionImpl
+from rdflib import Namespace, URIRef
+from linkml_runtime.utils.curienamespace import CurieNamespace
+from linkml_runtime.linkml_model.types import Boolean, String, Uriorcurie
+from linkml_runtime.utils.metamodelcore import Bool, URIorCURIE
+
+metamodel_version = "1.7.0"
+version = None
+
+# Overwrite dataclasses _init_fn to add **kwargs in __init__
+dataclasses._init_fn = dataclasses_init_fn_with_kwargs
+
+# Namespaces
+LINKML = CurieNamespace("linkml", "https://w3id.org/linkml/")
+ONTOLEXINDEX = CurieNamespace("ontolexindex", "https://w3id.org/oak/lexical-index/")
+OWL = CurieNamespace("owl", "http://www.w3.org/2002/07/owl#")
+PAV = CurieNamespace("pav", "http://purl.org/pav/")
+PROV = CurieNamespace("prov", "http://www.w3.org/ns/prov#")
+RDF = CurieNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
+RDFS = CurieNamespace("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
+SCHEMA = CurieNamespace("schema", "http://schema.org/")
+SH = CurieNamespace("sh", "https://w3id.org/shacl/")
+SKOS = CurieNamespace("skos", "http://www.w3.org/2004/02/skos/core#")
+SYNONYMIZER = CurieNamespace("synonymizer", "https://w3id.org/oak/synonymizer-datamodel/")
+XSD = CurieNamespace("xsd", "http://www.w3.org/2001/XMLSchema#")
+DEFAULT_ = SYNONYMIZER
+
+
+# Types
+class RegularExpressionString(String):
+    type_class_uri = XSD["string"]
+    type_class_curie = "xsd:string"
+    type_name = "RegularExpressionString"
+    type_model_uri = SYNONYMIZER.RegularExpressionString
+
+
+# Class references
+class LexicalGroupingTerm(extended_str):
+    pass
+
+
+class LexicalTransformationPipelineName(extended_str):
+    pass
+
+
+@dataclass
+class RuleSet(YAMLRoot):
+    """
+    A set of rules for generating synonyms or alternate lexical elements.
+    """
+
+    _inherited_slots: ClassVar[List[str]] = []
+
+    class_class_uri: ClassVar[URIRef] = SYNONYMIZER["RuleSet"]
+    class_class_curie: ClassVar[str] = "synonymizer:RuleSet"
+    class_name: ClassVar[str] = "RuleSet"
+    class_model_uri: ClassVar[URIRef] = SYNONYMIZER.RuleSet
+
+    rules: Optional[Union[Union[dict, "Synonymizer"], List[Union[dict, "Synonymizer"]]]] = (
+        empty_list()
+    )
+    prefix: Optional[str] = None
+
+    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
+        if not isinstance(self.rules, list):
+            self.rules = [self.rules] if self.rules is not None else []
+        self.rules = [
+            v if isinstance(v, Synonymizer) else Synonymizer(**as_dict(v)) for v in self.rules
+        ]
+
+        if self.prefix is not None and not isinstance(self.prefix, str):
+            self.prefix = str(self.prefix)
+
+        super().__post_init__(**kwargs)
+
+
+@dataclass
+class Synonymizer(YAMLRoot):
+    """
+    Specification of a rule for generating a synonym or alternate lexical element.
+    """
+
+    _inherited_slots: ClassVar[List[str]] = []
+
+    class_class_uri: ClassVar[URIRef] = SYNONYMIZER["Synonymizer"]
+    class_class_curie: ClassVar[str] = "synonymizer:Synonymizer"
+    class_name: ClassVar[str] = "Synonymizer"
+    class_model_uri: ClassVar[URIRef] = SYNONYMIZER.Synonymizer
+
+    description: Optional[str] = None
+    match: Optional[Union[str, RegularExpressionString]] = None
+    match_scope: Optional[str] = None
+    replacement: Optional[Union[str, RegularExpressionString]] = None
+    qualifier: Optional[str] = None
+    prefix: Optional[str] = None
+    in_place: Optional[Union[bool, Bool]] = None
+    tests: Optional[Union[Union[dict, "Test"], List[Union[dict, "Test"]]]] = empty_list()
+
+    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
+        if self.description is not None and not isinstance(self.description, str):
+            self.description = str(self.description)
+
+        if self.match is not None and not isinstance(self.match, RegularExpressionString):
+            self.match = RegularExpressionString(self.match)
+
+        if self.match_scope is not None and not isinstance(self.match_scope, str):
+            self.match_scope = str(self.match_scope)
+
+        if self.replacement is not None and not isinstance(
+            self.replacement, RegularExpressionString
+        ):
+            self.replacement = RegularExpressionString(self.replacement)
+
+        if self.qualifier is not None and not isinstance(self.qualifier, str):
+            self.qualifier = str(self.qualifier)
+
+        if self.prefix is not None and not isinstance(self.prefix, str):
+            self.prefix = str(self.prefix)
+
+        if self.in_place is not None and not isinstance(self.in_place, Bool):
+            self.in_place = Bool(self.in_place)
+
+        if not isinstance(self.tests, list):
+            self.tests = [self.tests] if self.tests is not None else []
+        self.tests = [v if isinstance(v, Test) else Test(**as_dict(v)) for v in self.tests]
+
+        super().__post_init__(**kwargs)
+
+
+@dataclass
+class Test(YAMLRoot):
+    """
+    A unit test for a rule, specifies an intended output for an input
+    """
+
+    _inherited_slots: ClassVar[List[str]] = []
+
+    class_class_uri: ClassVar[URIRef] = SYNONYMIZER["Test"]
+    class_class_curie: ClassVar[str] = "synonymizer:Test"
+    class_name: ClassVar[str] = "Test"
+    class_model_uri: ClassVar[URIRef] = SYNONYMIZER.Test
+
+    input: Optional[str] = None
+    output: Optional[str] = None
+    prefix: Optional[str] = None
+
+    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
+        if self.input is not None and not isinstance(self.input, str):
+            self.input = str(self.input)
+
+        if self.output is not None and not isinstance(self.output, str):
+            self.output = str(self.output)
+
+        if self.prefix is not None and not isinstance(self.prefix, str):
+            self.prefix = str(self.prefix)
+
+        super().__post_init__(**kwargs)
+
+
+@dataclass
+class LexicalIndex(YAMLRoot):
+    """
+    An index over an ontology keyed by lexical unit
+    """
+
+    _inherited_slots: ClassVar[List[str]] = []
+
+    class_class_uri: ClassVar[URIRef] = ONTOLEXINDEX["LexicalIndex"]
+    class_class_curie: ClassVar[str] = "ontolexindex:LexicalIndex"
+    class_name: ClassVar[str] = "LexicalIndex"
+    class_model_uri: ClassVar[URIRef] = SYNONYMIZER.LexicalIndex
+
+    groupings: Optional[
+        Union[
+            Dict[Union[str, LexicalGroupingTerm], Union[dict, "LexicalGrouping"]],
+            List[Union[dict, "LexicalGrouping"]],
+        ]
+    ] = empty_dict()
+    pipelines: Optional[
+        Union[
+            Dict[
+                Union[str, LexicalTransformationPipelineName],
+                Union[dict, "LexicalTransformationPipeline"],
+            ],
+            List[Union[dict, "LexicalTransformationPipeline"]],
+        ]
+    ] = empty_dict()
+
+    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
+        self._normalize_inlined_as_dict(
+            slot_name="groupings", slot_type=LexicalGrouping, key_name="term", keyed=True
+        )
+
+        self._normalize_inlined_as_dict(
+            slot_name="pipelines",
+            slot_type=LexicalTransformationPipeline,
+            key_name="name",
+            keyed=True,
+        )
+
+        super().__post_init__(**kwargs)
+
+
+@dataclass
+class LexicalGrouping(YAMLRoot):
+    """
+    A grouping of ontology elements by a shared lexical term
+    """
+
+    _inherited_slots: ClassVar[List[str]] = []
+
+    class_class_uri: ClassVar[URIRef] = ONTOLEXINDEX["LexicalGrouping"]
+    class_class_curie: ClassVar[str] = "ontolexindex:LexicalGrouping"
+    class_name: ClassVar[str] = "LexicalGrouping"
+    class_model_uri: ClassVar[URIRef] = SYNONYMIZER.LexicalGrouping
+
+    term: Union[str, LexicalGroupingTerm] = None
+    relationships: Optional[
+        Union[Union[dict, "RelationshipToTerm"], List[Union[dict, "RelationshipToTerm"]]]
+    ] = empty_list()
+
+    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
+        if self._is_empty(self.term):
+            self.MissingRequiredField("term")
+        if not isinstance(self.term, LexicalGroupingTerm):
+            self.term = LexicalGroupingTerm(self.term)
+
+        if not isinstance(self.relationships, list):
+            self.relationships = [self.relationships] if self.relationships is not None else []
+        self.relationships = [
+            v if isinstance(v, RelationshipToTerm) else RelationshipToTerm(**as_dict(v))
+            for v in self.relationships
+        ]
+
+        super().__post_init__(**kwargs)
+
+
+@dataclass
+class RelationshipToTerm(YAMLRoot):
+    """
+    A relationship of an ontology element to a lexical term
+    """
+
+    _inherited_slots: ClassVar[List[str]] = []
+
+    class_class_uri: ClassVar[URIRef] = ONTOLEXINDEX["RelationshipToTerm"]
+    class_class_curie: ClassVar[str] = "ontolexindex:RelationshipToTerm"
+    class_name: ClassVar[str] = "RelationshipToTerm"
+    class_model_uri: ClassVar[URIRef] = SYNONYMIZER.RelationshipToTerm
+
+    predicate: Optional[Union[str, URIorCURIE]] = None
+    element: Optional[Union[str, URIorCURIE]] = None
+    element_term: Optional[str] = None
+    source: Optional[Union[str, URIorCURIE]] = None
+    pipeline: Optional[
+        Union[
+            Union[str, LexicalTransformationPipelineName],
+            List[Union[str, LexicalTransformationPipelineName]],
+        ]
+    ] = empty_list()
+    synonymized: Optional[Union[bool, Bool]] = None
+
+    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
+        if self.predicate is not None and not isinstance(self.predicate, URIorCURIE):
+            self.predicate = URIorCURIE(self.predicate)
+
+        if self.element is not None and not isinstance(self.element, URIorCURIE):
+            self.element = URIorCURIE(self.element)
+
+        if self.element_term is not None and not isinstance(self.element_term, str):
+            self.element_term = str(self.element_term)
+
+        if self.source is not None and not isinstance(self.source, URIorCURIE):
+            self.source = URIorCURIE(self.source)
+
+        if not isinstance(self.pipeline, list):
+            self.pipeline = [self.pipeline] if self.pipeline is not None else []
+        self.pipeline = [
+            (
+                v
+                if isinstance(v, LexicalTransformationPipelineName)
+                else LexicalTransformationPipelineName(v)
+            )
+            for v in self.pipeline
+        ]
+
+        if self.synonymized is not None and not isinstance(self.synonymized, Bool):
+            self.synonymized = Bool(self.synonymized)
+
+        super().__post_init__(**kwargs)
+
+
+class Activity(YAMLRoot):
+    """
+    Generic grouping for any lexical operation
+    """
+
+    _inherited_slots: ClassVar[List[str]] = []
+
+    class_class_uri: ClassVar[URIRef] = PROV["Activity"]
+    class_class_curie: ClassVar[str] = "prov:Activity"
+    class_name: ClassVar[str] = "Activity"
+    class_model_uri: ClassVar[URIRef] = SYNONYMIZER.Activity
+
+
+@dataclass
+class LexicalTransformationPipeline(Activity):
+    """
+    A collection of atomic lexical transformations that are applied in serial fashion
+    """
+
+    _inherited_slots: ClassVar[List[str]] = []
+
+    class_class_uri: ClassVar[URIRef] = ONTOLEXINDEX["LexicalTransformationPipeline"]
+    class_class_curie: ClassVar[str] = "ontolexindex:LexicalTransformationPipeline"
+    class_name: ClassVar[str] = "LexicalTransformationPipeline"
+    class_model_uri: ClassVar[URIRef] = SYNONYMIZER.LexicalTransformationPipeline
+
+    name: Union[str, LexicalTransformationPipelineName] = None
+    transformations: Optional[
+        Union[Union[dict, "LexicalTransformation"], List[Union[dict, "LexicalTransformation"]]]
+    ] = empty_list()
+
+    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
+        if self._is_empty(self.name):
+            self.MissingRequiredField("name")
+        if not isinstance(self.name, LexicalTransformationPipelineName):
+            self.name = LexicalTransformationPipelineName(self.name)
+
+        if not isinstance(self.transformations, list):
+            self.transformations = (
+                [self.transformations] if self.transformations is not None else []
+            )
+        self.transformations = [
+            v if isinstance(v, LexicalTransformation) else LexicalTransformation(**as_dict(v))
+            for v in self.transformations
+        ]
+
+        super().__post_init__(**kwargs)
+
+
+@dataclass
+class LexicalTransformation(Activity):
+    """
+    An atomic lexical transformation applied on a term (string) yielding a transformed string
+    """
+
+    _inherited_slots: ClassVar[List[str]] = []
+
+    class_class_uri: ClassVar[URIRef] = ONTOLEXINDEX["LexicalTransformation"]
+    class_class_curie: ClassVar[str] = "ontolexindex:LexicalTransformation"
+    class_name: ClassVar[str] = "LexicalTransformation"
+    class_model_uri: ClassVar[URIRef] = SYNONYMIZER.LexicalTransformation
+
+    type: Optional[Union[str, "TransformationType"]] = None
+    params: Optional[Union[Union[dict, "Any"], List[Union[dict, "Any"]]]] = empty_list()
+
+    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
+        if self.type is not None and not isinstance(self.type, TransformationType):
+            self.type = TransformationType(self.type)
+
+        super().__post_init__(**kwargs)
+
+
+Any = Any
+
+
+# Enumerations
+class TransformationType(EnumDefinitionImpl):
+    """
+    A controlled datamodels of the types of transformation that can be applied to
+    """
+
+    Stemming = PermissibleValue(
+        text="Stemming",
+        description="Removal of the last few characters of a word to yield a stem term for each word in the term",
+    )
+    Lemmatization = PermissibleValue(
+        text="Lemmatization",
+        description="Contextual reduction of a word to its base form for each word in the term",
+    )
+    WordOrderNormalization = PermissibleValue(
+        text="WordOrderNormalization",
+        description="reorder words in the term to a standard order such that comparisons are order-independent",
+    )
+    Depluralization = PermissibleValue(
+        text="Depluralization",
+        description="Transform plural form to singular form for each word in a term",
+    )
+    CaseNormalization = PermissibleValue(
+        text="CaseNormalization",
+        description="Transform term to a standard case, typically lowercase",
+    )
+    WhitespaceNormalization = PermissibleValue(
+        text="WhitespaceNormalization",
+        description="Trim whitespace, condense whitespace runs, and transform all non-space whitespace to spaces",
+    )
+    TermExpanson = PermissibleValue(
+        text="TermExpanson", description="Expand terms using a dictionary"
+    )
+    Synonymization = PermissibleValue(
+        text="Synonymization", description="Applying synonymizer rules from matcher_rules.yaml"
+    )
+
+    _defn = EnumDefinition(
+        name="TransformationType",
+        description="A controlled datamodels of the types of transformation that can be applied to",
+    )
+
+
+# Slots
+class slots:
+    pass
+
+
+slots.ruleSet__rules = Slot(
+    uri=SYNONYMIZER.rules,
+    name="ruleSet__rules",
+    curie=SYNONYMIZER.curie("rules"),
+    model_uri=SYNONYMIZER.ruleSet__rules,
+    domain=None,
+    range=Optional[Union[Union[dict, Synonymizer], List[Union[dict, Synonymizer]]]],
+)
+
+slots.ruleSet__prefix = Slot(
+    uri=SYNONYMIZER.prefix,
+    name="ruleSet__prefix",
+    curie=SYNONYMIZER.curie("prefix"),
+    model_uri=SYNONYMIZER.ruleSet__prefix,
+    domain=None,
+    range=Optional[str],
+)
+
+slots.synonymizer__description = Slot(
+    uri=SYNONYMIZER.description,
+    name="synonymizer__description",
+    curie=SYNONYMIZER.curie("description"),
+    model_uri=SYNONYMIZER.synonymizer__description,
+    domain=None,
+    range=Optional[str],
+)
+
+slots.synonymizer__match = Slot(
+    uri=SYNONYMIZER.match,
+    name="synonymizer__match",
+    curie=SYNONYMIZER.curie("match"),
+    model_uri=SYNONYMIZER.synonymizer__match,
+    domain=None,
+    range=Optional[Union[str, RegularExpressionString]],
+)
+
+slots.synonymizer__match_scope = Slot(
+    uri=SYNONYMIZER.match_scope,
+    name="synonymizer__match_scope",
+    curie=SYNONYMIZER.curie("match_scope"),
+    model_uri=SYNONYMIZER.synonymizer__match_scope,
+    domain=None,
+    range=Optional[str],
+)
+
+slots.synonymizer__replacement = Slot(
+    uri=SYNONYMIZER.replacement,
+    name="synonymizer__replacement",
+    curie=SYNONYMIZER.curie("replacement"),
+    model_uri=SYNONYMIZER.synonymizer__replacement,
+    domain=None,
+    range=Optional[Union[str, RegularExpressionString]],
+)
+
+slots.synonymizer__qualifier = Slot(
+    uri=SYNONYMIZER.qualifier,
+    name="synonymizer__qualifier",
+    curie=SYNONYMIZER.curie("qualifier"),
+    model_uri=SYNONYMIZER.synonymizer__qualifier,
+    domain=None,
+    range=Optional[str],
+)
+
+slots.synonymizer__prefix = Slot(
+    uri=SYNONYMIZER.prefix,
+    name="synonymizer__prefix",
+    curie=SYNONYMIZER.curie("prefix"),
+    model_uri=SYNONYMIZER.synonymizer__prefix,
+    domain=None,
+    range=Optional[str],
+)
+
+slots.synonymizer__in_place = Slot(
+    uri=SYNONYMIZER.in_place,
+    name="synonymizer__in_place",
+    curie=SYNONYMIZER.curie("in_place"),
+    model_uri=SYNONYMIZER.synonymizer__in_place,
+    domain=None,
+    range=Optional[Union[bool, Bool]],
+)
+
+slots.synonymizer__tests = Slot(
+    uri=SYNONYMIZER.tests,
+    name="synonymizer__tests",
+    curie=SYNONYMIZER.curie("tests"),
+    model_uri=SYNONYMIZER.synonymizer__tests,
+    domain=None,
+    range=Optional[Union[Union[dict, Test], List[Union[dict, Test]]]],
+)
+
+slots.test__input = Slot(
+    uri=SYNONYMIZER.input,
+    name="test__input",
+    curie=SYNONYMIZER.curie("input"),
+    model_uri=SYNONYMIZER.test__input,
+    domain=None,
+    range=Optional[str],
+)
+
+slots.test__output = Slot(
+    uri=SYNONYMIZER.output,
+    name="test__output",
+    curie=SYNONYMIZER.curie("output"),
+    model_uri=SYNONYMIZER.test__output,
+    domain=None,
+    range=Optional[str],
+)
+
+slots.test__prefix = Slot(
+    uri=SYNONYMIZER.prefix,
+    name="test__prefix",
+    curie=SYNONYMIZER.curie("prefix"),
+    model_uri=SYNONYMIZER.test__prefix,
+    domain=None,
+    range=Optional[str],
+)
+
+slots.lexicalIndex__groupings = Slot(
+    uri=ONTOLEXINDEX.groupings,
+    name="lexicalIndex__groupings",
+    curie=ONTOLEXINDEX.curie("groupings"),
+    model_uri=SYNONYMIZER.lexicalIndex__groupings,
+    domain=None,
+    range=Optional[
+        Union[
+            Dict[Union[str, LexicalGroupingTerm], Union[dict, LexicalGrouping]],
+            List[Union[dict, LexicalGrouping]],
+        ]
+    ],
+)
+
+slots.lexicalIndex__pipelines = Slot(
+    uri=ONTOLEXINDEX.pipelines,
+    name="lexicalIndex__pipelines",
+    curie=ONTOLEXINDEX.curie("pipelines"),
+    model_uri=SYNONYMIZER.lexicalIndex__pipelines,
+    domain=None,
+    range=Optional[
+        Union[
+            Dict[
+                Union[str, LexicalTransformationPipelineName],
+                Union[dict, LexicalTransformationPipeline],
+            ],
+            List[Union[dict, LexicalTransformationPipeline]],
+        ]
+    ],
+)
+
+slots.lexicalGrouping__term = Slot(
+    uri=ONTOLEXINDEX.term,
+    name="lexicalGrouping__term",
+    curie=ONTOLEXINDEX.curie("term"),
+    model_uri=SYNONYMIZER.lexicalGrouping__term,
+    domain=None,
+    range=URIRef,
+)
+
+slots.lexicalGrouping__relationships = Slot(
+    uri=ONTOLEXINDEX.relationships,
+    name="lexicalGrouping__relationships",
+    curie=ONTOLEXINDEX.curie("relationships"),
+    model_uri=SYNONYMIZER.lexicalGrouping__relationships,
+    domain=None,
+    range=Optional[Union[Union[dict, RelationshipToTerm], List[Union[dict, RelationshipToTerm]]]],
+)
+
+slots.relationshipToTerm__predicate = Slot(
+    uri=ONTOLEXINDEX.predicate,
+    name="relationshipToTerm__predicate",
+    curie=ONTOLEXINDEX.curie("predicate"),
+    model_uri=SYNONYMIZER.relationshipToTerm__predicate,
+    domain=None,
+    range=Optional[Union[str, URIorCURIE]],
+)
+
+slots.relationshipToTerm__element = Slot(
+    uri=ONTOLEXINDEX.element,
+    name="relationshipToTerm__element",
+    curie=ONTOLEXINDEX.curie("element"),
+    model_uri=SYNONYMIZER.relationshipToTerm__element,
+    domain=None,
+    range=Optional[Union[str, URIorCURIE]],
+)
+
+slots.relationshipToTerm__element_term = Slot(
+    uri=ONTOLEXINDEX.element_term,
+    name="relationshipToTerm__element_term",
+    curie=ONTOLEXINDEX.curie("element_term"),
+    model_uri=SYNONYMIZER.relationshipToTerm__element_term,
+    domain=None,
+    range=Optional[str],
+)
+
+slots.relationshipToTerm__source = Slot(
+    uri=ONTOLEXINDEX.source,
+    name="relationshipToTerm__source",
+    curie=ONTOLEXINDEX.curie("source"),
+    model_uri=SYNONYMIZER.relationshipToTerm__source,
+    domain=None,
+    range=Optional[Union[str, URIorCURIE]],
+)
+
+slots.relationshipToTerm__pipeline = Slot(
+    uri=ONTOLEXINDEX.pipeline,
+    name="relationshipToTerm__pipeline",
+    curie=ONTOLEXINDEX.curie("pipeline"),
+    model_uri=SYNONYMIZER.relationshipToTerm__pipeline,
+    domain=None,
+    range=Optional[
+        Union[
+            Union[str, LexicalTransformationPipelineName],
+            List[Union[str, LexicalTransformationPipelineName]],
+        ]
+    ],
+)
+
+slots.relationshipToTerm__synonymized = Slot(
+    uri=ONTOLEXINDEX.synonymized,
+    name="relationshipToTerm__synonymized",
+    curie=ONTOLEXINDEX.curie("synonymized"),
+    model_uri=SYNONYMIZER.relationshipToTerm__synonymized,
+    domain=None,
+    range=Optional[Union[bool, Bool]],
+)
+
+slots.lexicalTransformationPipeline__name = Slot(
+    uri=ONTOLEXINDEX.name,
+    name="lexicalTransformationPipeline__name",
+    curie=ONTOLEXINDEX.curie("name"),
+    model_uri=SYNONYMIZER.lexicalTransformationPipeline__name,
+    domain=None,
+    range=URIRef,
+)
+
+slots.lexicalTransformationPipeline__transformations = Slot(
+    uri=ONTOLEXINDEX.transformations,
+    name="lexicalTransformationPipeline__transformations",
+    curie=ONTOLEXINDEX.curie("transformations"),
+    model_uri=SYNONYMIZER.lexicalTransformationPipeline__transformations,
+    domain=None,
+    range=Optional[
+        Union[Union[dict, LexicalTransformation], List[Union[dict, LexicalTransformation]]]
+    ],
+)
+
+slots.lexicalTransformation__type = Slot(
+    uri=ONTOLEXINDEX.type,
+    name="lexicalTransformation__type",
+    curie=ONTOLEXINDEX.curie("type"),
+    model_uri=SYNONYMIZER.lexicalTransformation__type,
+    domain=None,
+    range=Optional[Union[str, "TransformationType"]],
+)
+
+slots.lexicalTransformation__params = Slot(
+    uri=ONTOLEXINDEX.params,
+    name="lexicalTransformation__params",
+    curie=ONTOLEXINDEX.curie("params"),
+    model_uri=SYNONYMIZER.lexicalTransformation__params,
+    domain=None,
+    range=Optional[Union[Union[dict, Any], List[Union[dict, Any]]]],
+)
diff --git a/src/oaklib/datamodels/synonymizer_datamodel.yaml b/src/oaklib/datamodels/synonymizer_datamodel.yaml
new file mode 100644
index 000000000..4a53f2a64
--- /dev/null
+++ b/src/oaklib/datamodels/synonymizer_datamodel.yaml
@@ -0,0 +1,94 @@
+id: https://w3id.org/oak/synonymizer-datamodel
+title: Synonymizer Datamodel
+name: synonymizer_datamodel
+description: >-
+  A datamodel for specifying synonymization rules
+license: https://creativecommons.org/publicdomain/zero/1.0/
+
+prefixes:
+  linkml: https://w3id.org/linkml/
+  synonymizer: https://w3id.org/oak/synonymizer-datamodel/
+  skos: http://www.w3.org/2004/02/skos/core#
+  pav: http://purl.org/pav/
+  schema: http://schema.org/
+  sh: https://w3id.org/shacl/
+  prov: http://www.w3.org/ns/prov#
+
+default_prefix: synonymizer
+default_range: string
+
+default_curi_maps:
+  - semweb_context
+
+emit_prefixes:
+  - linkml
+  - rdf
+  - rdfs
+  - xsd
+  - owl
+
+imports:
+  - linkml:types
+  - lexical_index
+
+
+types:
+  RegularExpressionString:
+    typeof: string
+
+
+#==================================
+# Classes                         #
+#==================================
+classes:
+
+  RuleSet:
+    description: A set of rules for generating synonyms or alternate lexical elements.
+    attributes:
+      rules:
+        description: A list of rules for generating synonyms or alternate lexical elements.
+        range: Synonymizer
+        multivalued: true
+      prefix:
+        description: The prefix that qualifies for the rule.
+        range: string
+
+  Synonymizer:
+   description: Specification of a rule for generating a synonym or alternate lexical element.
+   attributes:
+    description:
+      description: Description of the rule.
+      range: string
+    match:
+      description: Reg-ex rule to match substrings in labels.
+      range: RegularExpressionString
+    match_scope:
+      description: Synonym scope of the reg-ex rule, e.g. exact, narrow
+      range: string
+    replacement:
+      description: Reg-ex rule to replace substrings in labels
+      range: RegularExpressionString
+    qualifier:
+      description: Type of match for the new synonym generated.
+      range: string
+    prefix:
+      description: The rule applies to nodes of a specific prefix.
+      range: string
+    in_place:
+      description: Whether the rule is applied in place or not.
+      range: boolean
+    tests:
+      description: Unit tests for each rules.
+      range: Test
+      multivalued: true
+  
+  Test:
+   description: A unit test for a rule, specifies an intended output for an input
+   attributes:
+    input:
+     description: Input string for the rule.
+    output:
+     description: Output based on the rule.
+    prefix:
+      description: The prefix that qualifies for the rule.
+     
\ No newline at end of file
diff --git a/src/oaklib/datamodels/vocabulary.py b/src/oaklib/datamodels/vocabulary.py
index 72ffbc5c0..8dec4aa3f 100644
--- a/src/oaklib/datamodels/vocabulary.py
+++ b/src/oaklib/datamodels/vocabulary.py
@@ -41,6 +41,7 @@
 }
 SYNONYM_PRED_TO_SCOPE_MAP = {v: k for k, v in SCOPE_TO_SYNONYM_PRED_MAP.items()}
 
+
 DEPRECATED_PREDICATE = omd.slots.deprecated.curie
 TERM_REPLACED_BY = omd.slots.term_replaced_by.curie
 CONSIDER_REPLACEMENT = omd.slots.consider.curie
@@ -176,6 +177,12 @@
 NODE_DELETION = "NodeDeletion"
 NODE_TEXT_DEFINITION_CHANGE = "NodeTextDefinitionChange"
 
+EXTENDED_SCOPE_TO_SYNONYM_PRED_MAP = {
+    "LABEL": LABEL_PREDICATE,
+    "DEFINITION": HAS_DEFINITION_CURIE,
+    **SYNONYM_PRED_TO_SCOPE_MAP,
+}
+
 
 class SEMAPV(Enum):
     """SEMAPV Enum containing different mapping_justification."""
diff --git a/src/oaklib/utilities/kgcl_utilities.py b/src/oaklib/utilities/kgcl_utilities.py
index 9ac74ad89..bbb4f916a 100644
--- a/src/oaklib/utilities/kgcl_utilities.py
+++ b/src/oaklib/utilities/kgcl_utilities.py
@@ -103,6 +103,8 @@ def tidy_change_object(change: kgcl.Change):
     Sometimes the main kgcl parser will leave quotes in place, URIs quoted, etc.
     As these are fixed in the main KCGL repo we can remove these here.
 
+    See `<https://github.com/INCATools/kgcl/issues/66>`_ for more information.
+
     :param change:
     :return:
     """
diff --git a/src/oaklib/utilities/lexical/lexical_indexer.py b/src/oaklib/utilities/lexical/lexical_indexer.py
index c535a94af..96ce7cbaf 100644
--- a/src/oaklib/utilities/lexical/lexical_indexer.py
+++ b/src/oaklib/utilities/lexical/lexical_indexer.py
@@ -34,8 +34,8 @@
 from oaklib.datamodels.mapping_rules_datamodel import (
     MappingRuleCollection,
     Precondition,
-    Synonymizer,
 )
+from oaklib.datamodels.synonymizer_datamodel import Synonymizer
 from oaklib.datamodels.vocabulary import (
     IDENTIFIER_PREDICATE,
     SEMAPV,
@@ -47,6 +47,7 @@
 from oaklib.interfaces import BasicOntologyInterface
 from oaklib.types import CURIE, PRED_CURIE
 from oaklib.utilities.basic_utils import pairs_as_dict
+from oaklib.utilities.lexical.synonymizer import apply_synonymizer
 
 LEXICAL_INDEX_FORMATS = ["yaml", "json"]
 DEFAULT_QUALIFIER = "exact"
@@ -470,7 +471,7 @@ def precondition_holds(precondition: Precondition, mapping: Mapping) -> bool:
 
 def apply_transformation(
     term: str, transformation: LexicalTransformation
-) -> Union[str, List[Tuple[bool, str, str]]]:
+) -> Union[str, Tuple[bool, str, str]]:
     """
     Apply an individual transformation on a term
 
@@ -497,35 +498,6 @@ def apply_transformation(
         )
 
 
-def apply_synonymizer(term: str, rules: List[Synonymizer]) -> Tuple[bool, str, str]:
-    """
-    Apply synonymizer rules declared in the given match-rules.yaml file.
-
-    The basic concept is looking for regex in labels and replacing the ones that match
-    with the string passed in 'match.replacement'. Also set qualifier ('match.qualifier')
-    as to whether the replacement is an 'exact', 'broad', 'narrow', or 'related' synonym.
-
-    Note: This function "yields" all intermediate results (for each rule applied)
-    as opposed to a final result. The reason being we only want to return a "True"
-    synonymized result. If the term is not synonymized, then the result will be just
-    the term and a default qualifier. In the case of multiple synonyms, the actual result
-    will be the latest synonymized result.In other words, all the rules have been
-    implemented on the term to finally produce the result.
-
-    :param term: Original label.
-    :param rules: Synonymizer rules from match-rules.yaml file.
-    :yield: A Tuple stating [if the label changed, new label, qualifier]
-    """
-    for rule in rules:
-        tmp_term_2 = term
-        term = re.sub(rule.match, rule.replacement, term)
-
-        if tmp_term_2 != term:
-            yield True, term.strip(), rule.qualifier
-        else:
-            yield False, term.strip(), rule.qualifier
-
-
 def save_mapping_rules(mapping_rules: MappingRuleCollection, path: str):
     """
     Saves a YAML using standard mapping of datanodel to YAML
diff --git a/src/oaklib/utilities/lexical/synonymizer.py b/src/oaklib/utilities/lexical/synonymizer.py
new file mode 100644
index 000000000..72e3553e6
--- /dev/null
+++ b/src/oaklib/utilities/lexical/synonymizer.py
@@ -0,0 +1,145 @@
+import re
+from typing import Iterable, Iterator, List, Optional, Tuple
+
+from kgcl_schema.datamodel import kgcl
+
+from oaklib.datamodels.synonymizer_datamodel import RuleSet, Synonymizer
+from oaklib.datamodels.vocabulary import (
+    EXTENDED_SCOPE_TO_SYNONYM_PRED_MAP,
+)
+from oaklib.interfaces import BasicOntologyInterface
+from oaklib.types import CURIE
+
+
+def apply_synonymizer(
+    term: str, rules: List[Synonymizer], scope_predicate: Optional[CURIE] = None
+) -> Iterator[Tuple[bool, str, str]]:
+    """
+    Apply synonymizer rules declared in the given match-rules.yaml file.
+
+    The basic concept is looking for regex in labels and replacing the ones that match
+    with the string passed in 'match.replacement'. Also set qualifier ('match.qualifier')
+    as to whether the replacement is an 'exact', 'broad', 'narrow', or 'related' synonym.
+
+    Note: This function yields all intermediate results (for each rule applied)
+    as opposed to a final result. The reason being we only want to return a "True"
+    synonymized result. If the term is not synonymized, then the result will be just
+    the term and a default qualifier. In the case of multiple synonyms, the actual result
+    will be the latest synonymized result.In other words, all the rules have been
+    implemented on the term to finally produce the result.
+
+    :param term: Original label.
+    :param rules: Synonymizer rules from match-rules.yaml file.
+    :yield: A Tuple stating [if the label changed, new label, qualifier]
+    """
+    for rule in rules:
+        if not scope_matches(rule, scope_predicate):
+            continue
+        tmp_term_2 = term
+        term = re.sub(rule.match, rule.replacement, term)
+
+        if tmp_term_2 != term:
+            yield True, term.strip(), rule.qualifier
+        else:
+            yield False, term.strip(), rule.qualifier
+
+
+def apply_synonymizer_to_terms(
+    adapter: BasicOntologyInterface,
+    terms: Iterable[CURIE],
+    ruleset: RuleSet,
+    include_all=False,
+) -> Iterator[kgcl.NewSynonym]:
+    """
+    Apply synonymizer rules to a list of terms.
+
+    :param adapter:
+    :param terms:
+    :param ruleset:
+    :param include_all:
+    :return:
+    """
+    n = 0
+    for curie in terms:
+        tvs = list(adapter.entity_alias_map(curie).items())
+        if include_all:
+            defn = adapter.definition(curie)
+            if defn:
+                tvs.append(("definition", [defn]))
+        for scope_pred, aliases in tvs:
+            if aliases is not None:
+                for alias in aliases:
+                    if alias:
+                        for rule in ruleset.rules:
+                            for replaced, new_alias, qualifier in apply_synonymizer(
+                                alias, [rule], scope_pred
+                            ):
+                                if replaced:
+                                    if qualifier is None or qualifier == "":
+                                        qualifier = "exact"
+                                    n += 1
+                                    change_id = f"kgcl_change_id_{n}"
+                                    if qualifier == "label":
+                                        change = kgcl.NodeRename(
+                                            id=change_id,
+                                            about_node=curie,
+                                            old_value=alias,
+                                            new_value=new_alias,
+                                        )
+                                    elif qualifier == "definition":
+                                        change = kgcl.NodeTextDefinitionChange(
+                                            id=change_id,
+                                            about_node=curie,
+                                            old_value=alias,
+                                            new_value=new_alias,
+                                        )
+                                    else:
+                                        if rule.in_place:
+                                            change = kgcl.SynonymReplacement(
+                                                id=change_id,
+                                                about_node=curie,
+                                                old_value=alias,
+                                                new_value=new_alias,
+                                            )
+                                        else:
+                                            change = kgcl.NewSynonym(
+                                                id=change_id,
+                                                about_node=curie,
+                                                old_value=alias,
+                                                new_value=new_alias,
+                                                qualifier=qualifier,
+                                            )
+                                    yield change
+
+
+def scope_matches(rule: Synonymizer, scope_predicate: Optional[CURIE]) -> bool:
+    """
+    Check if the rule scope matches the scope_predicate.
+
+    >>> scope_matches(Synonymizer(match_scope="EXACT"), "oio:hasExactSynonym")
+    True
+    >>> scope_matches(Synonymizer(match_scope="EXACT"), "oio:hasRelatedSynonym")
+    False
+    >>> scope_matches(Synonymizer(match_scope="*"), "oio:hasRelatedSynonym")
+    True
+    >>> scope_matches(Synonymizer(), "oio:hasExactSynonym")
+    True
+
+    :param rule: Synonymizer rule.
+    :param scope_predicate: Scope predicate.
+    :return: True if the rule scope matches the scope_predicate.
+    """
+    if scope_predicate is None:
+        return True
+    if rule.match_scope is None:
+        return True
+    if rule.match_scope == "*" or rule.match_scope == "":
+        return True
+    rule_match_scope = rule.match_scope.upper()
+    if rule_match_scope == scope_predicate.upper():
+        return True
+    if rule_match_scope in EXTENDED_SCOPE_TO_SYNONYM_PRED_MAP:
+        rule_match_scope_predicate = EXTENDED_SCOPE_TO_SYNONYM_PRED_MAP[rule_match_scope]
+        if rule_match_scope_predicate == scope_predicate:
+            return True
+    return False
diff --git a/tests/input/cli-synonymizer-rules.yaml b/tests/input/cli-synonymizer-rules.yaml
new file mode 100644
index 000000000..f5c4093f6
--- /dev/null
+++ b/tests/input/cli-synonymizer-rules.yaml
@@ -0,0 +1,13 @@
+rules:
+  - description: Remove parentheses bound info from the label.
+    match: "\\([^)]*\\)"
+    match_scope: "*"
+    replacement: ""
+
+  - description: Remove box brackets bound info from the label.
+    match: "\\[[^)]*\\]"
+    match_scope: "*"
+    replacement: ""
+
+      
+  
diff --git a/tests/input/matcher_rules.yaml b/tests/input/matcher_rules.yaml
index 9ff4cb030..c47ce14bc 100644
--- a/tests/input/matcher_rules.yaml
+++ b/tests/input/matcher_rules.yaml
@@ -63,13 +63,13 @@ rules:
       weight: 2.0
 
   - synonymizer:
-      the_rule: Remove parentheses bound info from the label.
+      description: Remove parentheses bound info from the label.
       match: "\\([^)]*\\)"
       match_scope: "*"
       replacement: ""
 
   - synonymizer:
-      the_rule: Remove box brackets bound info from the label.
+      description: Remove box brackets bound info from the label.
       match: "\\[[^)]*\\]"
       match_scope: "*"
       replacement: ""
diff --git a/tests/test_cli.py b/tests/test_cli.py
index dd31013e9..8a7eb1b81 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -62,6 +62,7 @@
 TEST_SSSOM_MAPPING = INPUT_DIR / "unreciprocated-mapping-test.sssom.tsv"
 TEST_SYNONYMIZER_OBO = "simpleobo:" + str(INPUT_DIR / "synonym-test.obo")
 RULES_FILE = INPUT_DIR / "matcher_rules.yaml"
+SYNONYMIZER_RULES_FILE = INPUT_DIR / "cli-synonymizer-rules.yaml"
 
 
 def _outpath(test: str, fmt: str = "tmp") -> str:
@@ -1396,7 +1397,7 @@ def test_generate_synonyms_and_apply(self):
                 TEST_SYNONYMIZER_OBO,
                 "generate-synonyms",
                 "-R",
-                RULES_FILE,
+                SYNONYMIZER_RULES_FILE,
                 "--patch",
                 patch_file,
                 "--apply-patch",
@@ -1423,7 +1424,7 @@ def test_generate_synonyms_no_apply(self):
                 TEST_SYNONYMIZER_OBO,
                 "generate-synonyms",
                 "-R",
-                RULES_FILE,
+                SYNONYMIZER_RULES_FILE,
                 "-o",
                 patch_file,
                 ".all",
diff --git a/tests/test_utilities/test_lexical_index.py b/tests/test_utilities/test_lexical_index.py
index 0184333d9..9882add13 100644
--- a/tests/test_utilities/test_lexical_index.py
+++ b/tests/test_utilities/test_lexical_index.py
@@ -5,7 +5,7 @@
     LexicalTransformationPipeline,
     TransformationType,
 )
-from oaklib.datamodels.mapping_rules_datamodel import Synonymizer
+from oaklib.datamodels.synonymizer_datamodel import Synonymizer
 from oaklib.implementations.pronto.pronto_implementation import ProntoImplementation
 from oaklib.implementations.simpleobo.simple_obo_implementation import (
     SimpleOboImplementation,
@@ -56,19 +56,19 @@ def test_pipelines(self):
         builder.build()
         syn_param = [
             Synonymizer(
-                the_rule="Remove parentheses bound info from the label.",
+                description="Remove parentheses bound info from the label.",
                 match=r"\([^)]*\)",  # noqa W605
                 match_scope="*",
                 replacement="",
             ),
             Synonymizer(
-                the_rule="Remove box brackets bound info from the label.",
+                description="Remove box brackets bound info from the label.",
                 match=r"\[[^)]*\]",  # noqa W605
                 match_scope="*",
                 replacement="",
             ),
             Synonymizer(
-                the_rule="Broad match terms with the term 'other' in them.",
+                description="Broad match terms with the term 'other' in them.",
                 match=r"(?i)^Other ",  # noqa W605
                 match_scope="*",
                 replacement="",
@@ -159,7 +159,7 @@ def test_synonymizer_with_other(self):
         oi = SimpleOboImplementation(resource)
         syn_param = [
             Synonymizer(
-                the_rule="Broad match terms with the term 'other' in them.",
+                description="Broad match terms with the term 'other' in them.",
                 match="(?i)^Other ",  # noqa W605
                 match_scope="*",
                 replacement="",
diff --git a/tests/test_utilities/test_synonymizer.py b/tests/test_utilities/test_synonymizer.py
new file mode 100644
index 000000000..4f30c5fd5
--- /dev/null
+++ b/tests/test_utilities/test_synonymizer.py
@@ -0,0 +1,99 @@
+import pytest
+from kgcl_schema.grammar.render_operations import render
+from oaklib import get_adapter
+from oaklib.datamodels.synonymizer_datamodel import RuleSet, Synonymizer
+from oaklib.utilities.lexical.synonymizer import apply_synonymizer, apply_synonymizer_to_terms
+
+from tests import CYTOPLASM, INPUT_DIR, NUCLEAR_MEMBRANE, NUCLEUS
+
+TEST_SIMPLE_ONT = INPUT_DIR / "go-nucleus-simple.obo"
+
+
+@pytest.mark.parametrize(
+    "rule,input,expected",
+    [
+        (
+            {"match": "world", "replacement": "universe"},
+            "hello world",
+            [(True, "hello universe", None)],
+        ),
+        (
+            {"match": "world", "replacement": "universe", "qualifier": "broad"},
+            "hello world",
+            [(True, "hello universe", "broad")],
+        ),
+        (
+            {"match": "world", "replacement": "universe"},
+            "hello universe",
+            [(False, "hello universe", None)],
+        ),
+        (
+            {"match": r"hello (\w+)", "replacement": r"\1, hello"},
+            "hello world",
+            [(True, "world, hello", None)],
+        ),
+        ({"match": r"\bfoo\b", "replacement": "bar"}, "foo baz", [(True, "bar baz", None)]),
+        ({"match": r"\bfoo\b", "replacement": "bar"}, "foo-baz", [(True, "bar-baz", None)]),
+        ({"match": r"\bfoo\b", "replacement": "bar"}, "<foo-baz>", [(True, "<bar-baz>", None)]),
+        ({"match": r"\bfoo\b", "replacement": "bar"}, "baz foo", [(True, "baz bar", None)]),
+        ({"match": r"\bfoo\b", "replacement": "bar"}, "food baz", [(False, "food baz", None)]),
+    ],
+)
+def test_synonymizer(rule, input, expected):
+    s = Synonymizer(**rule)
+    results = list(apply_synonymizer(input, [s]))
+    assert results == expected
+
+
+@pytest.mark.parametrize(
+    "ruleset,include_all,terms,expected",
+    [
+        (
+            Synonymizer(match=r"nuclear (\w+)", replacement=r"\1 of nucleus"),
+            False,
+            [NUCLEUS, NUCLEAR_MEMBRANE],
+            ["create exact synonym 'membrane of nucleus' for GO:0031965"],
+        ),
+        (
+            Synonymizer(match=r"nuclear (\w+)", replacement=r"\1 of nucleus", match_scope="label"),
+            False,
+            [NUCLEUS, NUCLEAR_MEMBRANE],
+            ["create exact synonym 'membrane of nucleus' for GO:0031965"],
+        ),
+        (
+            Synonymizer(match=r"nuclear (\w+)", replacement=r"\1 of nucleus", match_scope="exact"),
+            False,
+            [NUCLEUS, NUCLEAR_MEMBRANE],
+            [],
+        ),
+        (
+            Synonymizer(
+                match=r"nucleus",
+                replacement="NUCLEUS",
+                match_scope="definition",
+                qualifier="definition",
+            ),
+            True,
+            [CYTOPLASM],
+            [
+                (
+                    "change definition of GO:0005737 from All of the contents of a cell excluding "
+                    "the plasma membrane and nucleus, but including other subcellular structures. "
+                    "to All of the contents of a cell excluding the plasma membrane and "
+                    "NUCLEUS, but including other subcellular structures."
+                )
+            ],
+        ),
+    ],
+)
+def test_syonymizer_on_terms(ruleset, include_all, terms, expected):
+    adapter = get_adapter(TEST_SIMPLE_ONT)
+    if isinstance(ruleset, Synonymizer):
+        ruleset = RuleSet(rules=[ruleset])
+    changes = list(apply_synonymizer_to_terms(adapter, terms, ruleset, include_all=include_all))
+    changes_strs = []
+    for change in changes:
+        change_str = render(change)
+        print(change_str)
+        changes_strs.append(change_str)
+    assert set(changes_strs) == set(expected)