Skip to content

Commit

Permalink
Rewrite much of DatasetRef.from_simple to fix cache problem.
Browse files Browse the repository at this point in the history
The previous caching did not respect storage class overrides; all
DatasetRefs with the same UUID and component would end up using the
first storage class deserialized.

The fix here is to always call overrideStorageClass when using the
cache, rather than add the storage class to the cache key.  That's
because we expect the value of the cache to mostly be in avoiding
reconstruction of the data ID, and overrideStorageClass doesn't touch
that.

By the same token, the dataset type name has been removed from the
cache key as well, with only refs for parent dataset types cached.
This should shrink the cache a bit and improve performance in the
usual (no-component) case.
  • Loading branch information
TallJimbo committed Nov 3, 2023
1 parent e89626a commit 94adb4f
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 54 deletions.
3 changes: 3 additions & 0 deletions doc/changes/DM-41562.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix caching in DatasetRef deserialization that caused the serialized storage class to be ignored.

This caused intermittent failures when running pipelines that use multiple storage classes for the same dataset type.
100 changes: 47 additions & 53 deletions python/lsst/daf/butler/_dataset_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,17 +464,23 @@ def from_simple(
Newly-constructed object.
"""
cache = PersistenceContextVars.datasetRefs.get()
localName = sys.intern(
datasetType.name
if datasetType is not None
else (x.name if (x := simple.datasetType) is not None else "")
)
key = (simple.id.int, localName)
if cache is not None and (cachedRef := cache.get(key, None)) is not None:
return cachedRef
key = simple.id.int
if cache is not None and (ref := cache.get(key, None)) is not None:
if datasetType is not None:
if datasetType.component() is not None:
ref = ref.makeComponentRef(datasetType.component())
ref = ref.overrideStorageClass(datasetType.storageClass_name)
return ref

Check warning on line 473 in python/lsst/daf/butler/_dataset_ref.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_dataset_ref.py#L471-L473

Added lines #L471 - L473 were not covered by tests
if simple.datasetType is not None:
_, component = DatasetType.splitDatasetTypeName(simple.datasetType.name)

Check warning on line 475 in python/lsst/daf/butler/_dataset_ref.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_dataset_ref.py#L475

Added line #L475 was not covered by tests
if component is not None:
ref = ref.makeComponentRef(component)
ref = ref.overrideStorageClass(simple.datasetType.storageClass)
return ref
return ref

Check warning on line 480 in python/lsst/daf/butler/_dataset_ref.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_dataset_ref.py#L477-L480

Added lines #L477 - L480 were not covered by tests
# Minimalist component will just specify component and id and
# require registry to reconstruct
if not (simple.datasetType is not None or simple.dataId is not None or simple.run is not None):
if simple.datasetType is None and simple.dataId is None and simple.run is None:
if registry is None:
raise ValueError("Registry is required to construct component DatasetRef from integer id")
if simple.id is None:
Expand All @@ -484,52 +490,40 @@ def from_simple(
raise RuntimeError(f"No matching dataset found in registry for id {simple.id}")
if simple.component:
ref = ref.makeComponentRef(simple.component)
if cache is not None:
cache[key] = ref
return ref

if universe is None and registry is None:
raise ValueError("One of universe or registry must be provided.")

if universe is None and registry is not None:
universe = registry.dimensions

if universe is None:
# this is for mypy
raise ValueError("Unable to determine a usable universe")

if simple.datasetType is None and datasetType is None:
# mypy
raise ValueError("The DatasetType must be specified to construct a DatasetRef")
if datasetType is None:
if simple.datasetType is None:
raise ValueError("Cannot determine Dataset type of this serialized class")
datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry)

if simple.dataId is None:
# mypy
raise ValueError("The DataId must be specified to construct a DatasetRef")
dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)

# Check that simple ref is resolved.
if simple.run is None:
dstr = ""
if simple.datasetType is None:
dstr = f" (datasetType={datasetType.name!r})"
raise ValueError(
"Run collection name is missing from serialized representation. "
f"Encountered with {simple!r}{dstr}."
else:
if universe is None and registry is None:
raise ValueError("One of universe or registry must be provided.")

Check warning on line 495 in python/lsst/daf/butler/_dataset_ref.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_dataset_ref.py#L495

Added line #L495 was not covered by tests
if universe is None:
universe = registry.dimensions
if datasetType is None:
if simple.datasetType is None:
raise ValueError("Cannot determine Dataset type of this serialized class")

Check warning on line 500 in python/lsst/daf/butler/_dataset_ref.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_dataset_ref.py#L500

Added line #L500 was not covered by tests
datasetType = DatasetType.from_simple(simple.datasetType, universe=universe, registry=registry)

Check failure on line 501 in python/lsst/daf/butler/_dataset_ref.py

View workflow job for this annotation

GitHub Actions / call-workflow / lint

E501

line too long (111 > 110 characters)
if simple.dataId is None:
# mypy
raise ValueError("The DataId must be specified to construct a DatasetRef")

Check warning on line 504 in python/lsst/daf/butler/_dataset_ref.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_dataset_ref.py#L504

Added line #L504 was not covered by tests
dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)
# Check that simple ref is resolved.
if simple.run is None:
dstr = ""

Check warning on line 508 in python/lsst/daf/butler/_dataset_ref.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_dataset_ref.py#L508

Added line #L508 was not covered by tests
if simple.datasetType is None:
dstr = f" (datasetType={datasetType.name!r})"
raise ValueError(

Check warning on line 511 in python/lsst/daf/butler/_dataset_ref.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_dataset_ref.py#L510-L511

Added lines #L510 - L511 were not covered by tests
"Run collection name is missing from serialized representation. "
f"Encountered with {simple!r}{dstr}."
)
ref = cls(
datasetType,
dataId,
id=simple.id,
run=simple.run,
)

newRef = cls(
datasetType,
dataId,
id=simple.id,
run=simple.run,
)
if cache is not None:
cache[key] = newRef
return newRef
if datasetType.component() is not None:
cache[key] = ref.makeCompositeRef()

Check warning on line 523 in python/lsst/daf/butler/_dataset_ref.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_dataset_ref.py#L523

Added line #L523 was not covered by tests
else:
cache[key] = ref

Check warning on line 525 in python/lsst/daf/butler/_dataset_ref.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_dataset_ref.py#L525

Added line #L525 was not covered by tests
return ref

to_json = to_json_pydantic
from_json: ClassVar = classmethod(from_json_pydantic)
Expand Down
6 changes: 5 additions & 1 deletion python/lsst/daf/butler/persistence_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,14 @@ class PersistenceContextVars:
r"""A cache of `DataCoordinate`\ s.
"""

datasetRefs: ContextVar[dict[tuple[int, str], DatasetRef] | None] = ContextVar(
datasetRefs: ContextVar[dict[int, DatasetRef] | None] = ContextVar(
"datasetRefs", default=None
)
r"""A cache of `DatasetRef`\ s.
Keys are UUID converted to int, but only refs of parent dataset types are
cached AND THE STORAGE CLASS IS UNSPECIFIED; consumers of this cache must
call overrideStorageClass on the result.
"""

dimensionRecords: ContextVar[dict[Hashable, DimensionRecord] | None] = ContextVar(
Expand Down

0 comments on commit 94adb4f

Please sign in to comment.