-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DM-47325: Add API for parsing butler dataset URIs (butler and ivo) #1113
Changes from all commits
eb7fda0
babd1fb
e5a2201
dd8c184
ea001a3
a5155b4
3cd5774
85c3545
970e68b
2cbb8f0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
Added two new APIs for handling Butler dataset URIs. | ||
``Butler.parse_dataset_uri`` parses a URI and returns the butler repository label and associated UUID. | ||
``Butler.get_dataset_from_uri`` will parse a URI and attempt to retrieve the ``DatasetRef``. | ||
URIs should be of the form IVOA identifiers as described in `DMTN-302 <https://dmtn-302.lsst.io>`_. | ||
Deprecated ``butler://`` URIs are still supported but should not be used in new systems. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Metadata-Version: 1.0 | ||
Name: lsst-daf-butler | ||
Version: g57cedf6216+76f9c43fa5 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,13 +48,14 @@ | |
DatasetId, | ||
DatasetRef, | ||
DatasetType, | ||
LabeledButlerFactory, | ||
StorageClass, | ||
Timespan, | ||
) | ||
from lsst.daf.butler.datastore.file_templates import FileTemplate | ||
from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, _RegistryFactory | ||
from lsst.daf.butler.tests import DatastoreMock | ||
from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir | ||
from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, mock_env, removeTestTempDir | ||
|
||
try: | ||
from lsst.daf.butler.tests.server import create_test_server | ||
|
@@ -882,10 +883,75 @@ def makeButler(self, writeable: bool = False) -> Butler: | |
registryConfig = RegistryConfig(config.get("registry")) | ||
_RegistryFactory(registryConfig).create_from_config() | ||
|
||
# Write the YAML file so that some tests can recreate butler from it. | ||
config.dumpToUri(os.path.join(self.root, "butler.yaml")) | ||
butler = Butler.from_config(config, writeable=writeable) | ||
DatastoreMock.apply(butler) | ||
return butler | ||
|
||
def test_dataset_uris(self): | ||
"""Test that dataset URIs can be parsed and retrieved.""" | ||
butler = self.makeButler(writeable=True) | ||
butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) | ||
butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) | ||
|
||
butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) | ||
ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G") | ||
self.assertIsInstance(ref, DatasetRef) | ||
|
||
# Get the butler root for the URI. | ||
config_dir = butler._config["root"] | ||
|
||
# Read it via a repo label and a path. | ||
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as index_file: | ||
label = "test_repo" | ||
index_file.write(f"{label}: {config_dir}\n") | ||
index_file.flush() | ||
with mock_env({"DAF_BUTLER_REPOSITORY_INDEX": index_file.name}): | ||
butler_factory = LabeledButlerFactory() | ||
factory = butler_factory.bind(access_token=None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rra does this approach work for you in the cutout service? factory = butler_factory.bind(access_token=token)
...
ref = Butler.get_dataset_from_uri(dataset_uri, factory=factory) ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The cutout service passes a Butler instance into the backend, so the pattern looks like: butler_factory = LabeledButlerFactory()
def _get_backend(label: str, token: str) -> ImageCutoutBackend:
# Called for each cutout
factory = butler_factory.bind(access_token=token)
butler = factory.create_butler(label=label)
# ...
return ImageCutoutBackend(butler, projection_finder, output, tmpdir) Is that what you had in mind? Basically move the access token parameter to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah. No. It's probably the wrong API for you. Somewhere you are parsing the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe I could pass the return value of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think passing the bound factory and URI into whatever is wanting to know the DatasetRef is what we want here. |
||
|
||
for dataset_uri in ( | ||
f"ivo://org.rubinobs/usdac/test?repo={config_dir}&id={ref.id}", | ||
f"ivo://org.rubinobs/ukdac/lsst-dr1?repo={config_dir}%2Fbutler.yaml&id={ref.id}", | ||
f"butler://{label}/{ref.id}", | ||
f"ivo://org.rubinobs/usdac/lsst-dp1?repo={label}&id={ref.id}", | ||
): | ||
result = Butler.get_dataset_from_uri(dataset_uri) | ||
self.assertEqual(result.dataset, ref) | ||
# The returned butler needs to have the datastore mocked. | ||
DatastoreMock.apply(result.butler) | ||
dataset_id, _ = result.butler.get(result.dataset) | ||
self.assertEqual(dataset_id, ref.id) | ||
|
||
factory_result = Butler.get_dataset_from_uri(dataset_uri, factory=factory) | ||
self.assertEqual(factory_result.dataset, ref) | ||
# The returned butler needs to have the datastore mocked. | ||
DatastoreMock.apply(factory_result.butler) | ||
dataset_id, _ = factory_result.butler.get(factory_result.dataset) | ||
self.assertEqual(dataset_id, ref.id) | ||
|
||
# Non existent dataset. | ||
missing_id = str(ref.id).replace("2", "3") | ||
result = Butler.get_dataset_from_uri(f"butler://{label}/{missing_id}") | ||
self.assertIsNone(result.dataset) | ||
|
||
# Test some failure modes. | ||
for dataset_uri in ( | ||
"butler://label/1234", # Bad UUID. | ||
"butler://1234", # No UUID. | ||
"butler:///1234", # No label. | ||
"ivo://rubin/1234", # No query part and bad UUID and no label. | ||
"ivo://rubin/datasets/dr1/82d79caa-0823-4300-9874-67b737367ee0", # No query part. | ||
"ivo://org.rubinobs/datasets?repo=dr1&id=1234", # Bad UUID. | ||
"ivo://org.rubinobs/butler?release=dr1&id=82d79caa-0823-4300-9874-67b737367ee0", # No repo key. | ||
"ivo://org.rubinobs/butler?repo=dr1&repo=dr2&id=82d79caa-0823-4300-9874-67b737367ee0", # 2 vals. | ||
"ivo://org.rubinobs/something?repo=%20&id=82d79caa-0823-4300-9874-67b737367ee0", # no repo. | ||
"https://something.edu/1234", # Wrong scheme. | ||
): | ||
timj marked this conversation as resolved.
Show resolved
Hide resolved
|
||
with self.assertRaises(ValueError): | ||
Butler.parse_dataset_uri(dataset_uri) | ||
|
||
|
||
class NameKeyCollectionManagerDirectSimpleButlerTestCase(DirectSimpleButlerTestCase, unittest.TestCase): | ||
"""Run tests against DirectButler implementation using the | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure what is expected of query param keys in terms of case sensitivity. But I think no harm in treating those as case-insensitive as well?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In theory we are generating these IVO IDs so I don't really want the added complication of converting the dict to a dict with case insensitive keys (I have a recollection of using such a special dict at some point but I'm not sure where it was).