Updating to Pydantic v2 (#495)

Closes #260 ### Summary - Updates Pydantic to v2.5, along with some other packages to allow this - Updates fixtures, scripts, and migrate related code - Updates tests - curie expansion fails: on omim, ensembl, uberon, and wormbase (potentially unrelated?)
monarch-initiative · Jan 10, 2024 · 2e3e930 · 2e3e930
1 parent 388f597
commit 2e3e930
Show file tree

Hide file tree

Showing 47 changed files with 13,008 additions and 16,278 deletions.
diff --git a/Makefile b/Makefile
@@ -77,7 +77,7 @@ install-frontend:
 
 .PHONY: model
 model: install-backend	
-	$(RUN) gen-pydantic $(SCHEMADIR)/model.yaml > $(SCHEMADIR)/model.py
+	$(RUN) gen-pydantic --pydantic-version 2 --extra-fields allow $(SCHEMADIR)/model.yaml > $(SCHEMADIR)/model.py
 	$(RUN) gen-typescript $(SCHEMADIR)/model.yaml > frontend/src/api/model.ts
 	make format
 

diff --git a/backend/poetry.lock b/backend/poetry.lock
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
@@ -5,44 +5,42 @@ description = "Python package for interacting with Monarch Initiative knowledge
 authors = [
     "glass-ships <[email protected]>",
     "kevin schaper <[email protected]>",
-    "The Monarch Initiative <[email protected]>"
+    "The Monarch Initiative <[email protected]>",
 ]
 
 
-packages = [
-    { include = "monarch_py", from = "src" }
-]
+packages = [{ include = "monarch_py", from = "src" }]
 
 
 [tool.poetry.dependencies]
 ### Core dependencies
 python = "^3.9"
-pydantic = "^1.10.2"
+pydantic = "^2"
 curies = "<1"
-linkml = ">=1.6.3"
-prefixmaps = "^0.1.7"
+linkml = "^1.6"
+prefixmaps = "^0.2"
+bioregistry = "^0.10.57"
+oaklib = ">=0.5.22"
 
-requests = "^2.28.1"
-typer = "^0.7.0"
-typer-cli = "^0.0.13"
-rich = "*"
 docker = "^6.0.1"
-pystow = ">=0.5.0"
-loguru = "*"
 fastapi = "^0.103.1"
-oaklib = ">=0.5.19"
 gunicorn = "^21.2.0"
-bioregistry = "^0.10.57"
+loguru = "*"
+pystow = ">=0.5.0"
+requests = "^2.28.1"
+rich = "*"
+typer = "^0.7.0"
+typer-cli = "^0.0.13"
 
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.2.0"
 mkdocs = ">=1.4.2"
 mkdocs-material = ">=9.1.16"
-mkdocstrings = {extras = ["python"], version = ">=0.22.0"}
+mkdocstrings = { extras = ["python"], version = ">=0.22.0" }
 black = "^22.10.0"
 ruff = "*"
-uvicorn = {extras = ["standard"], version = "^0.20.0"}
+uvicorn = { extras = ["standard"], version = "^0.20.0" }
 httpx = "^0.24.1"
 scholarly = "*"
 habanero = "*"
@@ -51,18 +49,15 @@ manubot = "*"
 
 [tool.poetry.scripts]
 monarch = "monarch_py.cli:app"
-monarch-api = { callable = "monarch_py.api.main:run"}
+monarch-api = { callable = "monarch_py.api.main:run" }
 
 
 [tool.ruff]
 line-length = 120
 ignore = [
     "F541", # f-strings with no placeholders
 ]
-exclude = [
-    "tests/fixtures/*.py",
-    "src/monarch_py/datamodels/model.py"
-]
+exclude = ["tests/fixtures/*.py", "src/monarch_py/datamodels/model.py"]
 # per-file-ignores = {"" = ""}
 
 
@@ -78,4 +73,4 @@ style = "pep440"
 
 
 [tool.pytest.ini_options]
-pythonpath = ["src"]
+pythonpath = ["src"]
diff --git a/backend/src/monarch_py/api/config.py b/backend/src/monarch_py/api/config.py
@@ -3,20 +3,22 @@
 from functools import lru_cache
 from typing import List
 
-from pydantic import BaseSettings
+from pydantic import BaseModel
+
+# from pydantic_settings import BaseSettings
 
 from monarch_py.implementations.solr.solr_implementation import SolrImplementation
 from monarch_py.datamodels.model import TermSetPairwiseSimilarity, SemsimSearchResult
 
 
-class Settings(BaseSettings):
-    solr_host = os.getenv("SOLR_HOST") if os.getenv("SOLR_HOST") else "127.0.0.1"
-    solr_port = os.getenv("SOLR_PORT") if os.getenv("SOLR_PORT") else 8983
-    solr_url = os.getenv("SOLR_URL") if os.getenv("SOLR_URL") else f"http://{solr_host}:{solr_port}/solr"
-    phenio_db_path = os.getenv("PHENIO_DB_PATH") if os.getenv("PHENIO_DB_PATH") else "/data/phenio.db"
+class Settings(BaseModel):
+    solr_host: str = os.getenv("SOLR_HOST") if os.getenv("SOLR_HOST") else "127.0.0.1"
+    solr_port: str = os.getenv("SOLR_PORT") if os.getenv("SOLR_PORT") else 8983
+    solr_url: str = os.getenv("SOLR_URL") if os.getenv("SOLR_URL") else f"http://{solr_host}:{solr_port}/solr"
+    phenio_db_path: str = os.getenv("PHENIO_DB_PATH") if os.getenv("PHENIO_DB_PATH") else "/data/phenio.db"
 
-    semsim_server_host = os.getenv("SEMSIM_SERVER_HOST", "127.0.0.1")
-    semsim_server_port = os.getenv("SEMSIM_SERVER_PORT", 9999)
+    semsim_server_host: str = os.getenv("SEMSIM_SERVER_HOST", "127.0.0.1")
+    semsim_server_port: str = os.getenv("SEMSIM_SERVER_PORT", 9999)
 
 
 settings = Settings()

diff --git a/backend/src/monarch_py/datamodels/model.py b/backend/src/monarch_py/datamodels/model.py
@@ -2,7 +2,8 @@
 from datetime import datetime, date
 from enum import Enum
 from typing import List, Dict, Optional, Any, Union
-from pydantic import BaseModel as BaseModel, ConfigDict, Field
+from pydantic import BaseModel as BaseModel, ConfigDict, Field, field_validator
+import re
 import sys
 
 if sys.version_info >= (3, 8):
@@ -15,20 +16,14 @@
 version = "None"
 
 
-class WeakRefShimBaseModel(BaseModel):
-    __slots__ = "__weakref__"
-
-
-class ConfiguredBaseModel(
-    WeakRefShimBaseModel,
-    validate_assignment=True,
-    validate_all=True,
-    underscore_attrs_are_private=True,
-    extra="forbid",
-    arbitrary_types_allowed=True,
-    use_enum_values=True,
-):
-    pass
+class ConfiguredBaseModel(BaseModel):
+    model_config = ConfigDict(
+        validate_assignment=True,
+        validate_default=True,
+        extra="allow",
+        arbitrary_types_allowed=True,
+        use_enum_values=True,
+    )
 
 
 class AssociationDirectionEnum(str, Enum):
@@ -651,34 +646,34 @@ class SemsimSearchResult(ConfiguredBaseModel):
     similarity: Optional[TermSetPairwiseSimilarity] = Field(None)
 
 
-# Update forward refs
-# see https://pydantic-docs.helpmanual.io/usage/postponed_annotations/
-Association.update_forward_refs()
-AssociationCountList.update_forward_refs()
-AssociationTypeMapping.update_forward_refs()
-DirectionalAssociation.update_forward_refs()
-ExpandedCurie.update_forward_refs()
-Entity.update_forward_refs()
-FacetValue.update_forward_refs()
-AssociationCount.update_forward_refs()
-FacetField.update_forward_refs()
-HistoPheno.update_forward_refs()
-HistoBin.update_forward_refs()
-Mapping.update_forward_refs()
-Node.update_forward_refs()
-NodeHierarchy.update_forward_refs()
-Results.update_forward_refs()
-AssociationResults.update_forward_refs()
-AssociationTableResults.update_forward_refs()
-CategoryGroupedAssociationResults.update_forward_refs()
-EntityResults.update_forward_refs()
-MappingResults.update_forward_refs()
-MultiEntityAssociationResults.update_forward_refs()
-SearchResult.update_forward_refs()
-SearchResults.update_forward_refs()
-PairwiseSimilarity.update_forward_refs()
-TermPairwiseSimilarity.update_forward_refs()
-TermSetPairwiseSimilarity.update_forward_refs()
-TermInfo.update_forward_refs()
-BestMatch.update_forward_refs()
-SemsimSearchResult.update_forward_refs()
+# Model rebuild
+# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model
+Association.model_rebuild()
+AssociationCountList.model_rebuild()
+AssociationTypeMapping.model_rebuild()
+DirectionalAssociation.model_rebuild()
+ExpandedCurie.model_rebuild()
+Entity.model_rebuild()
+FacetValue.model_rebuild()
+AssociationCount.model_rebuild()
+FacetField.model_rebuild()
+HistoPheno.model_rebuild()
+HistoBin.model_rebuild()
+Mapping.model_rebuild()
+Node.model_rebuild()
+NodeHierarchy.model_rebuild()
+Results.model_rebuild()
+AssociationResults.model_rebuild()
+AssociationTableResults.model_rebuild()
+CategoryGroupedAssociationResults.model_rebuild()
+EntityResults.model_rebuild()
+MappingResults.model_rebuild()
+MultiEntityAssociationResults.model_rebuild()
+SearchResult.model_rebuild()
+SearchResults.model_rebuild()
+PairwiseSimilarity.model_rebuild()
+TermPairwiseSimilarity.model_rebuild()
+TermSetPairwiseSimilarity.model_rebuild()
+TermInfo.model_rebuild()
+BestMatch.model_rebuild()
+SemsimSearchResult.model_rebuild()
diff --git a/backend/src/monarch_py/datamodels/solr.py b/backend/src/monarch_py/datamodels/solr.py
@@ -40,16 +40,16 @@ class SolrQuery(BaseModel):
     rows: int = 20
     start: int = 0
     facet: bool = True
-    facet_min_count = 1
+    facet_min_count: int = 1
     facet_fields: Optional[List[str]] = Field(default_factory=list)
     facet_queries: Optional[List[str]] = Field(default_factory=list)
     filter_queries: Optional[List[str]] = Field(default_factory=list)
     query_fields: str = None
     def_type: str = "edismax"
     q_op: str = "AND"  # See SOLR-8812, need this plus mm=100% to allow boolean operators in queries
     mm: str = "100%"  # All tokens in the query must be found in the doc
-    boost: str = None
-    sort: str = None
+    boost: Optional[str] = None
+    sort: Optional[str] = None
 
     def add_field_filter_query(self, field, value):
         if field is not None and value is not None:
@@ -67,7 +67,7 @@ def add_filter_query(self, filter_query):
 
     def query_string(self):
         return urllib.parse.urlencode(
-            {self._solrize(k): self._solrize(v) for k, v in self.dict().items() if v is not None},
+            {self._solrize(k): self._solrize(v) for k, v in self.model_dump().items() if v is not None},
             doseq=True,
         )
 

diff --git a/backend/src/monarch_py/service/curie_service.py b/backend/src/monarch_py/service/curie_service.py
@@ -8,4 +8,4 @@
 
 # this is a magic keyword that represents the "merged" context from Chris M's algorithm
 # (https://github.com/linkml/prefixmaps/blob/main/src/prefixmaps/data/merged.csv)
-converter = load_converter(["merged"])
+converter = load_converter("merged")
diff --git a/backend/src/monarch_py/utils/utils.py b/backend/src/monarch_py/utils/utils.py
@@ -111,11 +111,11 @@ def get_headers_from_obj(obj: ConfiguredBaseModel) -> list:
 def to_json(obj: Union[ConfiguredBaseModel, Dict, List[ConfiguredBaseModel]], file: str):
     """Converts a pydantic model to a JSON string."""
     if isinstance(obj, ConfiguredBaseModel):
-        json_value = obj.json(indent=4)
+        json_value = obj.model_dump_json(indent=4)
     elif isinstance(obj, dict):
         json_value = json.dumps(obj, indent=4)
     elif isinstance(obj, list):
-        json_value = json.dumps({"items": [o.dict() for o in obj]}, indent=4)
+        json_value = json.dumps({"items": [o.model_dump() for o in obj]}, indent=4)
     if file:
         with open(file, "w") as f:
             f.write(json_value)
@@ -129,15 +129,15 @@ def to_tsv(obj: ConfiguredBaseModel, file: str) -> str:
 
     # Extract headers and rows from object
     if isinstance(obj, Entity):
-        headers = obj.dict().keys()
-        rows = [list(obj.dict().values())]
+        headers = obj.model_dump().keys()
+        rows = [list(obj.model_dump().values())]
     elif isinstance(obj, (AssociationCountList, HistoPheno, Results)):
         if not obj.items:
             headers = get_headers_from_obj(obj)
             rows = []
         else:
-            headers = obj.items[0].dict().keys()
-            rows = [list(item.dict().values()) for item in obj.items]
+            headers = obj.items[0].model_dump().keys()
+            rows = [list(item.model_dump().values()) for item in obj.items]
     else:
         console.print(f"\n[bold red]{FMT_INPUT_ERROR_MSG}[/]\n")
         raise typer.Exit(1)
@@ -160,15 +160,15 @@ def to_table(obj: ConfiguredBaseModel):
         console.print(f"\n[bold red]Table output not implemented for Node objects.[/]\n")
         raise typer.Exit(1)
     elif isinstance(obj, Entity):
-        headers = obj.dict().keys()
-        rows = [list(obj.dict().values())]
+        headers = obj.model_dump().keys()
+        rows = [list(obj.model_dump().values())]
     elif isinstance(obj, (AssociationCountList, HistoPheno, Results)):
         if not obj.items:
             headers = get_headers_from_obj(obj)
             rows = []
         else:
-            headers = obj.items[0].dict().keys()
-            rows = [list(item.dict().values()) for item in obj.items]
+            headers = obj.items[0].model_dump().keys()
+            rows = [list(item.model_dump().values()) for item in obj.items]
     else:
         console.print(f"\n[bold red]{FMT_INPUT_ERROR_MSG}[/]\n")
         raise typer.Exit(1)
@@ -199,9 +199,9 @@ def to_yaml(obj: ConfiguredBaseModel, file: str):
     fh = open(file, "w") if file else sys.stdout
 
     if isinstance(obj, Entity):
-        yaml.dump(obj.dict(), fh, indent=4)
+        yaml.dump(obj.model_dump(), fh, indent=4)
     elif isinstance(obj, Results) or isinstance(obj, HistoPheno) or isinstance(obj, AssociationCountList):
-        yaml.dump([item.dict() for item in obj.items], fh, indent=4)
+        yaml.dump([item.model_dump() for item in obj.items], fh, indent=4)
     else:
         console.print(f"\n[bold red]{FMT_INPUT_ERROR_MSG}[/]\n")
         raise typer.Exit(1)

diff --git a/backend/tests/fixtures/association_counts.py b/backend/tests/fixtures/association_counts.py
@@ -5,8 +5,8 @@
 def association_counts():
     return {
         "items": [
-            {"label": "Phenotypes", "count": 4027, "category": "biolink:DiseaseToPhenotypicFeatureAssociation"},
+            {"label": "Phenotypes", "count": 3879, "category": "biolink:DiseaseToPhenotypicFeatureAssociation"},
             {"label": "Causal Genes", "count": 124, "category": "biolink:CausalGeneToDiseaseAssociation"},
-            {"label": "Correlated Genes", "count": 151, "category": "biolink:CorrelatedGeneToDiseaseAssociation"},
+            {"label": "Correlated Genes", "count": 139, "category": "biolink:CorrelatedGeneToDiseaseAssociation"},
         ]
     }
diff --git a/backend/tests/fixtures/association_counts_query.py b/backend/tests/fixtures/association_counts_query.py
@@ -8,6 +8,7 @@ def association_counts_query():
         "rows": 20,
         "start": 0,
         "facet": True,
+        "facet_min_count": 1,
         "facet_fields": [],
         "facet_queries": [
             '(category:"biolink:DiseaseToPhenotypicFeatureAssociation") AND (subject:"MONDO:0020121" OR subject_closure:"MONDO:0020121")',
@@ -44,5 +45,4 @@ def association_counts_query():
         "mm": "100%",
         "boost": None,
         "sort": None,
-        "facet_min_count": 1,
     }