Merge pull request #874 from lsst/tickets/DM-40303

DM-40303: Fix pydantic v2 warnings
lsst · Aug 5, 2023 · 42f2c38 · 42f2c38
2 parents fc214b3 + 247b81f
commit 42f2c38
Showing 44 changed files with 326 additions and 179 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@ repos:
         # supported by your project here, or alternatively use
         # pre-commit's default_language_version, see
         # https://pre-commit.com/#top_level-default_language_version
-        language_version: python3.10
+        language_version: python3.11
   - repo: https://github.com/pycqa/isort
     rev: 5.12.0
     hooks:

diff --git a/doc/changes/DM-40303.feature.rst b/doc/changes/DM-40303.feature.rst
@@ -0,0 +1 @@
+Fully supports Pydantic version 2.x and version 1.x.
diff --git a/pyproject.toml b/pyproject.toml
@@ -115,7 +115,7 @@ butler = "lsst.daf.butler.cli.butler:main"
 
 [tool.black]
 line-length = 110
-target-version = ["py310"]
+target-version = ["py311"]
 
 [tool.isort]
 profile = "black"
@@ -179,7 +179,7 @@ select = [
     "W",  # pycodestyle
     "D",  # pydocstyle
 ]
-target-version = "py310"
+target-version = "py311"
 extend-select = [
     "RUF100", # Warn about unused noqa
 ]

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
@@ -917,7 +917,7 @@ def _rewrite_data_id(
                     continue
 
                 # Build up a WHERE expression
-                bind = {k: v for k, v in values.items()}
+                bind = dict(values.items())
                 where = " AND ".join(f"{dimensionName}.{k} = {k}" for k in bind)
 
                 # Hopefully we get a single record that matches

diff --git a/python/lsst/daf/butler/_compat.py b/python/lsst/daf/butler/_compat.py
@@ -21,11 +21,13 @@
 
 """Code to support backwards compatibility."""
 
+from __future__ import annotations
+
 __all__ = ["PYDANTIC_V2", "_BaseModelCompat"]
 
 import sys
 from collections.abc import Callable
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Literal
 
 from pydantic import BaseModel
 from pydantic.fields import FieldInfo
@@ -41,6 +43,8 @@
 
 PYDANTIC_V2 = PYDANTIC_VERSION.startswith("2.")
 
+# This matches the pydantic v2 internal definition.
+IncEx = set[int] | set[str] | dict[int, Any] | dict[str, Any] | None
 
 if PYDANTIC_V2:
 
@@ -55,8 +59,8 @@ class _BaseModelCompat(BaseModel):
         def json(
             self,
             *,
-            include: set[int] | set[str] | dict[int, Any] | dict[str, Any] | None = None,  # type: ignore
-            exclude: set[int] | set[str] | dict[int, Any] | dict[str, Any] | None = None,  # type: ignore
+            include: IncEx = None,  # type: ignore
+            exclude: IncEx = None,  # type: ignore
             by_alias: bool = False,
             skip_defaults: bool | None = None,
             exclude_unset: bool = False,
@@ -109,8 +113,8 @@ def model_dump_json(
                 self,
                 *,
                 indent: int | None = None,
-                include: set[int] | set[str] | dict[int, Any] | dict[str, Any] | None = None,
-                exclude: set[int] | set[str] | dict[int, Any] | dict[str, Any] | None = None,
+                include: IncEx = None,
+                exclude: IncEx = None,
                 by_alias: bool = False,
                 exclude_unset: bool = False,
                 exclude_defaults: bool = False,
@@ -135,6 +139,31 @@ def model_rebuild(
             ) -> bool | None:
                 return None
 
+            def model_dump(
+                self,
+                *,
+                mode: Literal["json", "python"] | str = "python",
+                include: IncEx = None,
+                exclude: IncEx = None,
+                by_alias: bool = False,
+                exclude_unset: bool = False,
+                exclude_defaults: bool = False,
+                exclude_none: bool = False,
+                round_trip: bool = False,
+                warnings: bool = True,
+            ) -> dict[str, Any]:
+                return {}
+
+            @classmethod
+            def model_validate_json(
+                cls,
+                json_data: str | bytes | bytearray,
+                *,
+                strict: bool | None = None,
+                context: dict[str, Any] | None = None,
+            ) -> Self:
+                return cls()
+
 else:
     from astropy.utils.decorators import classproperty
 
@@ -156,8 +185,8 @@ def model_dump_json(
             self,
             *,
             indent: int | None = None,
-            include: set[int] | set[str] | dict[int, Any] | dict[str, Any] | None = None,
-            exclude: set[int] | set[str] | dict[int, Any] | dict[str, Any] | None = None,
+            include: IncEx = None,
+            exclude: IncEx = None,
             by_alias: bool = False,
             exclude_unset: bool = False,
             exclude_defaults: bool = False,
@@ -196,3 +225,36 @@ def model_rebuild(
             _types_namespace: dict[str, Any] | None = None,
         ) -> bool | None:
             return cls.update_forward_refs()
+
+        def model_dump(
+            self,
+            *,
+            mode: Literal["json", "python"] | str = "python",
+            include: IncEx = None,
+            exclude: IncEx = None,
+            by_alias: bool = False,
+            exclude_unset: bool = False,
+            exclude_defaults: bool = False,
+            exclude_none: bool = False,
+            round_trip: bool = False,
+            warnings: bool = True,
+        ) -> dict[str, Any]:
+            # Need to decide whether to warn if the mode parameter is given.
+            return self.dict(
+                include=include,  # type: ignore
+                exclude=exclude,  # type: ignore
+                by_alias=by_alias,
+                exclude_unset=exclude_unset,
+                exclude_defaults=exclude_defaults,
+                exclude_none=exclude_none,
+            )
+
+        @classmethod
+        def model_validate_json(
+            cls,
+            json_data: str | bytes | bytearray,
+            *,
+            strict: bool | None = None,
+            context: dict[str, Any] | None = None,
+        ) -> Self:
+            return cls.parse_raw(json_data)
diff --git a/python/lsst/daf/butler/cli/cliLog.py b/python/lsst/daf/butler/cli/cliLog.py
@@ -306,7 +306,7 @@ def _setLogLevel(cls, component: str | None, level: str) -> None:
         """
         components: set[str | None]
         if component is None:
-            components = {comp for comp in cls.root_loggers()}
+            components = set(cls.root_loggers())
         elif not component or component == ".":
             components = {None}
         else:

diff --git a/python/lsst/daf/butler/cli/utils.py b/python/lsst/daf/butler/cli/utils.py
@@ -274,7 +274,7 @@ def split_commas(
         combined into a single tuple.
     """
     if values is None:
-        return tuple()
+        return ()
     valueList = []
     for value in ensure_iterable(values):
         # If we have [, or ,] we do the slow split. If square brackets

diff --git a/python/lsst/daf/butler/core/config.py b/python/lsst/daf/butler/core/config.py
@@ -750,7 +750,7 @@ def nameTuples(self, topLevelOnly: bool = False) -> list[tuple[str, ...]]:
             in the list is a `tuple` of strings representing the hierarchy.
         """
         if topLevelOnly:
-            return list((k,) for k in self)
+            return [(k,) for k in self]
 
         def getKeysAsTuples(
             d: Mapping[str, Any] | Sequence[str], keys: list[tuple[str, ...]], base: tuple[str, ...] | None

diff --git a/python/lsst/daf/butler/core/configSupport.py b/python/lsst/daf/butler/core/configSupport.py
@@ -186,7 +186,7 @@ def dataId(self) -> dict[str, Any] | None:
         (`dict` or `None`)
         """
         if self._dataId is not None:
-            return {k: v for k, v in self._dataId}
+            return dict(self._dataId)
         else:
             return None
 

diff --git a/python/lsst/daf/butler/core/datasets/ref.py b/python/lsst/daf/butler/core/datasets/ref.py
@@ -35,9 +35,10 @@
 from collections.abc import Iterable
 from typing import TYPE_CHECKING, Any, ClassVar, Protocol, TypeAlias, runtime_checkable
 
-from lsst.daf.butler._compat import _BaseModelCompat
+import pydantic
+from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat
 from lsst.utils.classes import immutable
-from pydantic import StrictStr, validator
+from pydantic import StrictStr
 
 from ..configSupport import LookupKey
 from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
@@ -179,24 +180,35 @@ class SerializedDatasetRef(_BaseModelCompat):
     run: StrictStr | None = None
     component: StrictStr | None = None
 
-    @validator("dataId")
-    def _check_dataId(cls, v: Any, values: dict[str, Any]) -> Any:  # noqa: N805
-        if (d := "datasetType") in values and values[d] is None:
-            raise ValueError("Can not specify 'dataId' without specifying 'datasetType'")
-        return v
-
-    @validator("run")
-    def _check_run(cls, v: Any, values: dict[str, Any]) -> Any:  # noqa: N805
-        if v and (i := "id") in values and values[i] is None:
-            raise ValueError("'run' cannot be provided unless 'id' is.")
-        return v
-
-    @validator("component")
-    def _check_component(cls, v: Any, values: dict[str, Any]) -> Any:  # noqa: N805
-        # Component should not be given if datasetType is given
-        if v and (d := "datasetType") in values and values[d] is not None:
-            raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).")
-        return v
+    if PYDANTIC_V2:
+        # Can not use "after" validator since in some cases the validator
+        # seems to trigger with the datasetType field not yet set.
+        @pydantic.model_validator(mode="before")  # type: ignore[attr-defined]
+        @classmethod
+        def check_consistent_parameters(cls, data: dict[str, Any]) -> dict[str, Any]:
+            has_datasetType = data.get("datasetType") is not None
+            has_dataId = data.get("dataId") is not None
+            if has_datasetType is not has_dataId:
+                raise ValueError("If specifying datasetType or dataId, must specify both.")
+
+            if data.get("component") is not None and has_datasetType:
+                raise ValueError("datasetType can not be set if component is given.")
+            return data
+
+    else:
+
+        @pydantic.validator("dataId")
+        def _check_dataId(cls, v: Any, values: dict[str, Any]) -> Any:  # noqa: N805
+            if v and (d := "datasetType") in values and values[d] is None:
+                raise ValueError("Can not specify 'dataId' without specifying 'datasetType'")
+            return v
+
+        @pydantic.validator("component")
+        def _check_component(cls, v: Any, values: dict[str, Any]) -> Any:  # noqa: N805
+            # Component should not be given if datasetType is given
+            if v and (d := "datasetType") in values and values[d] is not None:
+                raise ValueError(f"datasetType ({values[d]}) can not be set if component is given ({v}).")
+            return v
 
     @classmethod
     def direct(

diff --git a/python/lsst/daf/butler/core/dimensions/_records.py b/python/lsst/daf/butler/core/dimensions/_records.py
@@ -118,43 +118,59 @@ def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSe
     return model
 
 
+# While supporting pydantic v1 and v2 keep this outside the model.
+_serialized_dimension_record_schema_extra = {
+    "examples": [
+        {
+            "definition": "detector",
+            "record": {
+                "instrument": "HSC",
+                "id": 72,
+                "full_name": "0_01",
+                "name_in_raft": "01",
+                "raft": "0",
+                "purpose": "SCIENCE",
+            },
+        }
+    ]
+}
+
+
 class SerializedDimensionRecord(_BaseModelCompat):
     """Simplified model for serializing a `DimensionRecord`."""
 
     definition: str = Field(
         ...,
         title="Name of dimension associated with this record.",
-        example="exposure",
+        examples=["exposure"],
     )
 
     # Use strict types to prevent casting
     record: dict[str, None | StrictFloat | StrictStr | StrictBool | StrictInt | tuple[int, int]] = Field(
         ...,
         title="Dimension record keys and values.",
-        example={
-            "definition": "exposure",
-            "record": {"instrument": "LATISS", "exposure": 2021050300044, "obs_id": "AT_O_20210503_00044"},
-        },
+        examples=[
+            {
+                "definition": "exposure",
+                "record": {
+                    "instrument": "LATISS",
+                    "exposure": 2021050300044,
+                    "obs_id": "AT_O_20210503_00044",
+                },
+            }
+        ],
     )
 
-    if not PYDANTIC_V2:
+    if PYDANTIC_V2:
+        model_config = {
+            "json_schema_extra": _serialized_dimension_record_schema_extra,  # type: ignore[typeddict-item]
+        }
+    else:
 
         class Config:
             """Local configuration overrides for model."""
 
-            schema_extra = {
-                "example": {
-                    "definition": "detector",
-                    "record": {
-                        "instrument": "HSC",
-                        "id": 72,
-                        "full_name": "0_01",
-                        "name_in_raft": "01",
-                        "raft": "0",
-                        "purpose": "SCIENCE",
-                    },
-                }
-            }
+            schema_extra = _serialized_dimension_record_schema_extra
 
     @classmethod
     def direct(
@@ -405,7 +421,7 @@ def from_simple(
 
         # Timespan and region have to be converted to native form
         # for now assume that those keys are special
-        rec = record_model.dict()
+        rec = record_model.model_dump()
 
         if (ts := "timespan") in rec:
             rec[ts] = Timespan.from_simple(rec[ts], universe=universe, registry=registry)

diff --git a/python/lsst/daf/butler/core/json.py b/python/lsst/daf/butler/core/json.py
@@ -53,7 +53,7 @@ def to_json_pydantic(self: SupportsSimple, minimal: bool = False) -> str:
     a pydantic model.
 
     """
-    return self.to_simple(minimal=minimal).json(exclude_defaults=True, exclude_unset=True)
+    return self.to_simple(minimal=minimal).model_dump_json(exclude_defaults=True, exclude_unset=True)
 
 
 def from_json_pydantic(
@@ -63,7 +63,7 @@ def from_json_pydantic(
     registry: Registry | None = None,
 ) -> SupportsSimple:
     """Convert from JSON to a pydantic model."""
-    simple = cls._serializedType.parse_raw(json_str)
+    simple = cls._serializedType.model_validate_json(json_str)
     try:
         return cls.from_simple(simple, universe=universe, registry=registry)
     except AttributeError as e:
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fully supports Pydantic version 2.x and version 1.x.