From bf5435d1542ae7d24a71d870eb3ed17745ea92ea Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 12 Mar 2024 21:01:14 -0700 Subject: [PATCH 1/4] Keep private copies of prefixes and namespaces to avoid O(2n^2) lookups while merging large prefix spaces --- src/prefixmaps/datamodel/context.py | 61 ++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/src/prefixmaps/datamodel/context.py b/src/prefixmaps/datamodel/context.py index 9968139..deea603 100644 --- a/src/prefixmaps/datamodel/context.py +++ b/src/prefixmaps/datamodel/context.py @@ -1,11 +1,11 @@ """Classes for managing individual Contexts.""" - +import copy import logging import re from collections import defaultdict from dataclasses import dataclass, field from enum import Enum -from typing import List, Mapping, Optional +from typing import List, Mapping, Optional, Set, Union import curies @@ -143,6 +143,14 @@ class Context: merged_from: Optional[List[str]] = None upper: bool = None lower: bool = None + _prefixes: Optional[Set[str]] = None + """Private attr to speed up duplicate lookups""" + _prefixes_lower: Optional[Set[str]] = None + """Private attr to speed up duplicate lookups""" + _namespaces: Optional[Set[str]] = None + """Private attr to speed up duplicate lookups""" + _namespaces_lower: Optional[Set[str]] = None + """Private attr to speed up duplicate lookups""" def combine(self, context: "Context"): """ @@ -164,6 +172,7 @@ def add_prefix( status: StatusType = StatusType.canonical, preferred: bool = False, expansion_source: Optional[str] = None, + force: bool = False ): """ Adds a prefix expansion to this context. @@ -182,9 +191,11 @@ def add_prefix( :param expansion_source: An optional annotation to be used when merging contexts together. The source will keep track of the original context that a given prefix expansion came from. This is used in :meth:`Context.combine`. + :param force: if True, recompute namespaces and prefixes. default False. :return: """ # TODO: check status + _prefix = prefix if not preferred: if self.upper: prefix = prefix.upper() @@ -192,8 +203,8 @@ def add_prefix( raise ValueError("Cannot set both upper AND lower") if self.lower: prefix = prefix.lower() - prefixes = self.prefixes(lower=True) - namespaces = self.namespaces(lower=True) + prefixes = self.prefixes(lower=True, force=force, as_list=False) + namespaces = self.namespaces(lower=True, force=force, as_list=False) if prefix.lower() in prefixes: if namespace.lower() in namespaces: return @@ -203,6 +214,7 @@ def add_prefix( else: if namespace.lower() in namespaces: status = StatusType.namespace_alias + self.prefix_expansions.append( PrefixExpansion( context=self.name, @@ -212,6 +224,10 @@ def add_prefix( expansion_source=expansion_source, ) ) + self._prefixes.add(_prefix) + self._prefixes_lower.add(prefix.lower()) + self._namespaces.add(namespace) + self._namespaces_lower.add(namespace.lower()) def filter(self, prefix: PREFIX = None, namespace: NAMESPACE = None): """ @@ -230,29 +246,54 @@ def filter(self, prefix: PREFIX = None, namespace: NAMESPACE = None): filtered_pes.append(pe) return filtered_pes - def prefixes(self, lower=False) -> List[str]: + def prefixes(self, lower=False, force:bool = True, as_list: bool = True) -> Union[List[str], Set[str]]: """ All unique prefixes in all prefix expansions. :param lower: if True, the prefix is normalized to lowercase. + :param force: if True, recompute. if False, return cached + :param as_list: if True (default), return as a list. Otherwise a set :return: """ if lower: - return list({pe.prefix.lower() for pe in self.prefix_expansions}) + if force or self._prefixes_lower is None: + self._prefixes_lower = {pe.prefix.lower() for pe in self.prefix_expansions} + res = self._prefixes_lower + + else: + if force or self._prefixes is None: + self._prefixes = {pe.prefix for pe in self.prefix_expansions} + res = self._prefixes + + if as_list: + return list(res) else: - return list({pe.prefix for pe in self.prefix_expansions}) + return copy.copy(res) - def namespaces(self, lower=False) -> List[str]: + def namespaces(self, lower=False, force:bool = True, as_list:bool = True) -> Union[List[str], Set[str]]: """ All unique namespaces in all prefix expansions :param lower: if True, the namespace is normalized to lowercase. + :param force: if True, recompute. if False, return cached + :param as_list: if True (default), return as a list. Otherwise a set :return: """ if lower: - return list({pe.namespace.lower() for pe in self.prefix_expansions}) + if force or self._namespaces_lower is None: + self._namespaces_lower = {pe.namespace.lower() for pe in self.prefix_expansions} + res = self._namespaces_lower + + else: + if force or self._namespaces is None: + self._namespaces = {pe.namespace for pe in self.prefix_expansions} + res = self._namespaces + + if as_list: + return list(res) else: - return list({pe.namespace for pe in self.prefix_expansions}) + return copy.copy(res) + def as_dict(self) -> PREFIX_EXPANSION_DICT: """ From 16cd9163605686401efab63e2aaf277c1f75a007 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 12 Mar 2024 21:35:39 -0700 Subject: [PATCH 2/4] default empty set and use length comparison rather than None to check if initialized --- src/prefixmaps/datamodel/context.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/prefixmaps/datamodel/context.py b/src/prefixmaps/datamodel/context.py index deea603..e7ab14e 100644 --- a/src/prefixmaps/datamodel/context.py +++ b/src/prefixmaps/datamodel/context.py @@ -143,13 +143,13 @@ class Context: merged_from: Optional[List[str]] = None upper: bool = None lower: bool = None - _prefixes: Optional[Set[str]] = None + _prefixes: Set[str] = field(default_factory=set) """Private attr to speed up duplicate lookups""" - _prefixes_lower: Optional[Set[str]] = None + _prefixes_lower: Set[str] = field(default_factory=set) """Private attr to speed up duplicate lookups""" - _namespaces: Optional[Set[str]] = None + _namespaces: Set[str] = field(default_factory=set) """Private attr to speed up duplicate lookups""" - _namespaces_lower: Optional[Set[str]] = None + _namespaces_lower: Set[str] = field(default_factory=set) """Private attr to speed up duplicate lookups""" def combine(self, context: "Context"): @@ -256,12 +256,12 @@ def prefixes(self, lower=False, force:bool = True, as_list: bool = True) -> Unio :return: """ if lower: - if force or self._prefixes_lower is None: + if force or len(self._prefixes_lower) == 0: self._prefixes_lower = {pe.prefix.lower() for pe in self.prefix_expansions} res = self._prefixes_lower else: - if force or self._prefixes is None: + if force or len(self._prefixes) == 0: self._prefixes = {pe.prefix for pe in self.prefix_expansions} res = self._prefixes @@ -280,12 +280,12 @@ def namespaces(self, lower=False, force:bool = True, as_list:bool = True) -> Uni :return: """ if lower: - if force or self._namespaces_lower is None: + if force or len(self._namespaces_lower) == 0: self._namespaces_lower = {pe.namespace.lower() for pe in self.prefix_expansions} res = self._namespaces_lower else: - if force or self._namespaces is None: + if force or len(self._namespaces) == 0: self._namespaces = {pe.namespace for pe in self.prefix_expansions} res = self._namespaces From 56846b374adcb21522fbd9c60f0d0675b2631fdc Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 12 Mar 2024 22:25:23 -0700 Subject: [PATCH 3/4] actually copy doesnt make sense here either --- src/prefixmaps/datamodel/context.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/prefixmaps/datamodel/context.py b/src/prefixmaps/datamodel/context.py index e7ab14e..2ae3a7c 100644 --- a/src/prefixmaps/datamodel/context.py +++ b/src/prefixmaps/datamodel/context.py @@ -1,5 +1,4 @@ """Classes for managing individual Contexts.""" -import copy import logging import re from collections import defaultdict @@ -268,7 +267,7 @@ def prefixes(self, lower=False, force:bool = True, as_list: bool = True) -> Unio if as_list: return list(res) else: - return copy.copy(res) + return res def namespaces(self, lower=False, force:bool = True, as_list:bool = True) -> Union[List[str], Set[str]]: """ @@ -292,7 +291,7 @@ def namespaces(self, lower=False, force:bool = True, as_list:bool = True) -> Uni if as_list: return list(res) else: - return copy.copy(res) + return res def as_dict(self) -> PREFIX_EXPANSION_DICT: From fd87a70c7c3ead3efdc6bcee22b663e8069b1428 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 12 Mar 2024 23:33:48 -0700 Subject: [PATCH 4/4] lint --- src/prefixmaps/datamodel/context.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/prefixmaps/datamodel/context.py b/src/prefixmaps/datamodel/context.py index 2ae3a7c..002d97c 100644 --- a/src/prefixmaps/datamodel/context.py +++ b/src/prefixmaps/datamodel/context.py @@ -1,4 +1,5 @@ """Classes for managing individual Contexts.""" + import logging import re from collections import defaultdict @@ -171,7 +172,7 @@ def add_prefix( status: StatusType = StatusType.canonical, preferred: bool = False, expansion_source: Optional[str] = None, - force: bool = False + force: bool = False, ): """ Adds a prefix expansion to this context. @@ -245,7 +246,9 @@ def filter(self, prefix: PREFIX = None, namespace: NAMESPACE = None): filtered_pes.append(pe) return filtered_pes - def prefixes(self, lower=False, force:bool = True, as_list: bool = True) -> Union[List[str], Set[str]]: + def prefixes( + self, lower=False, force: bool = True, as_list: bool = True + ) -> Union[List[str], Set[str]]: """ All unique prefixes in all prefix expansions. @@ -269,7 +272,9 @@ def prefixes(self, lower=False, force:bool = True, as_list: bool = True) -> Unio else: return res - def namespaces(self, lower=False, force:bool = True, as_list:bool = True) -> Union[List[str], Set[str]]: + def namespaces( + self, lower=False, force: bool = True, as_list: bool = True + ) -> Union[List[str], Set[str]]: """ All unique namespaces in all prefix expansions @@ -293,7 +298,6 @@ def namespaces(self, lower=False, force:bool = True, as_list:bool = True) -> Uni else: return res - def as_dict(self) -> PREFIX_EXPANSION_DICT: """ Returns a mapping between canonical prefixes and expansions.