From eb7fda0095fde5548805469134c2dce9f0069647 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Fri, 1 Nov 2024 14:38:13 -0700
Subject: [PATCH 01/10] Add API for parsing butler dataset URIs (butler and
 ivo)

---
 python/lsst/daf/butler/_butler.py | 80 +++++++++++++++++++++++++++++++
 tests/test_simpleButler.py        | 47 +++++++++++++++++-
 2 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
index 2fa502bb78..8b12776e8b 100644
--- a/python/lsst/daf/butler/_butler.py
+++ b/python/lsst/daf/butler/_butler.py
@@ -29,6 +29,9 @@
 
 __all__ = ["Butler"]
 
+import os
+import urllib.parse
+import uuid
 from abc import abstractmethod
 from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence
 from contextlib import AbstractContextManager
@@ -526,6 +529,83 @@ def get_known_repos(cls) -> set[str]:
         """
         return ButlerRepoIndex.get_known_repos()
 
+    @classmethod
+    def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
+        """Extract the butler label and dataset ID from a dataset URI.
+
+        Parameters
+        ----------
+        uri : `str`
+            The dataset URI to parse.
+
+        Returns
+        -------
+        label : `str`
+            The label associated with the butler repository from which this
+            dataset originates.
+        dataset_id : `DatasetId`
+            The ID of the dataset.
+
+        Notes
+        -----
+        Supports dataset URIs of the forms ``ivo://rubin/butler_label/UUID``
+        and ``butler://butler_label/UUID``. In ``ivo`` URIs the butler label
+        can include ``/`` and the leading ``/`` is always stripped. If the
+        repository label starts with ``/`` then it must be doubled up. e.g.,
+
+            ivo://rubin//repo/main/82d79caa-0823-4300-9874-67b737367ee0
+
+        will return a label of ``/repo/main``.
+
+        This method does not attempt to check that the dataset exists in the
+        labeled butler.
+        """
+        parsed = urllib.parse.urlparse(uri)
+        if parsed.scheme == "ivo":
+            # TODO: Validate netloc component.
+            label, id_ = os.path.split(parsed.path)
+            # Strip the leading /.
+            label = label[1:]
+        elif parsed.scheme == "butler":
+            label = parsed.netloc
+            # Need to strip the leading /.
+            id_ = parsed.path[1:]
+        else:
+            raise ValueError(f"Unrecognized URI scheme: {uri!r}")
+        if not label:
+            raise ValueError(f"No butler repository label found in uri {uri!r}")
+        try:
+            dataset_id = uuid.UUID(hex=id_)
+        except Exception as e:
+            e.add_note(f"Error extracting dataset ID from uri {uri!r} with dataset ID string {id_!r}")
+            raise
+
+        return label, dataset_id
+
+    @classmethod
+    def get_dataset_from_uri(cls, uri: str) -> DatasetRef | None:
+        """Get the dataset associated with the given dataset URI.
+
+        Parameters
+        ----------
+        uri : `str`
+            The URI associated with a dataset.
+
+        Returns
+        -------
+        ref : `DatasetRef` or `None`
+            The dataset associated with that URI, or `None` if the UUID
+            is valid but the dataset is not known to this butler.
+
+        Notes
+        -----
+        It might be possible to pass in an optional ``LabeledButlerFactory``
+        but how would a caller know the right access token to supply?
+        """
+        label, dataset_id = cls.parse_dataset_uri(uri)
+        butler = cls.from_config(label)
+        return butler.get_dataset(dataset_id)
+
     @abstractmethod
     def _caching_context(self) -> AbstractContextManager[None]:
         """Context manager that enables caching."""
diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py
index 595b08957f..0a44c39da5 100644
--- a/tests/test_simpleButler.py
+++ b/tests/test_simpleButler.py
@@ -54,7 +54,7 @@
 from lsst.daf.butler.datastore.file_templates import FileTemplate
 from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, _RegistryFactory
 from lsst.daf.butler.tests import DatastoreMock
-from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir
+from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, mock_env, removeTestTempDir
 
 try:
     from lsst.daf.butler.tests.server import create_test_server
@@ -882,10 +882,55 @@ def makeButler(self, writeable: bool = False) -> Butler:
         registryConfig = RegistryConfig(config.get("registry"))
         _RegistryFactory(registryConfig).create_from_config()
 
+        # Write the YAML file so that some tests can recreate butler from it.
+        config.dumpToUri(os.path.join(self.root, "butler.yaml"))
         butler = Butler.from_config(config, writeable=writeable)
         DatastoreMock.apply(butler)
         return butler
 
+    def test_dataset_uris(self):
+        """Test that dataset URIs can be parsed and retrieved."""
+        butler = self.makeButler(writeable=True)
+        butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
+        butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
+
+        butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
+        ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G")
+        self.assertIsInstance(ref, DatasetRef)
+
+        # Get the butler root for the URI.
+        config_dir = butler._config["root"]
+
+        # Read it via a repo label and a path.
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as index_file:
+            label = "test_repo"
+            index_file.write(f"{label}: {config_dir}\n")
+            index_file.flush()
+            with mock_env({"DAF_BUTLER_REPOSITORY_INDEX": index_file.name}):
+                for dataset_uri in (
+                    f"ivo://rubin/{config_dir}/{ref.id}",
+                    f"ivo://rubin/{config_dir}/butler.yaml/{ref.id}",
+                    f"butler://{label}/{ref.id}",
+                    f"ivo://rubin/{label}/{ref.id}",
+                ):
+                    ref2 = Butler.get_dataset_from_uri(dataset_uri)
+                    self.assertEqual(ref, ref2)
+
+                # Non existent dataset.
+                missing_id = str(ref.id).replace("2", "3")
+                no_ref = Butler.get_dataset_from_uri(f"butler://{label}/{missing_id}")
+                self.assertIsNone(no_ref)
+
+        # Test some failure modes.
+        with self.assertRaises(ValueError):
+            Butler.parse_dataset_uri("ivo://rubin/1234")
+        with self.assertRaises(ValueError):
+            Butler.parse_dataset_uri("butler://label/1234")
+        with self.assertRaises(ValueError):
+            Butler.parse_dataset_uri("butler://1234")
+        with self.assertRaises(ValueError):
+            Butler.parse_dataset_uri("https://something.edu/1234")
+
 
 class NameKeyCollectionManagerDirectSimpleButlerTestCase(DirectSimpleButlerTestCase, unittest.TestCase):
     """Run tests against DirectButler implementation using the

From babd1fba898b790fdc2fa12324fd13750c58729a Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 7 Nov 2024 17:28:27 -0700
Subject: [PATCH 02/10] Add support for using a LabeledButlerFactory to
 retrieve the butler

---
 python/lsst/daf/butler/_butler.py             | 23 +++++++++++++------
 .../daf/butler/_labeled_butler_factory.py     | 15 +++++++++++-
 tests/test_simpleButler.py                    |  7 ++++++
 3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
index 8b12776e8b..e6cecf32d7 100644
--- a/python/lsst/daf/butler/_butler.py
+++ b/python/lsst/daf/butler/_butler.py
@@ -63,6 +63,7 @@
     from ._dataset_type import DatasetType
     from ._deferredDatasetHandle import DeferredDatasetHandle
     from ._file_dataset import FileDataset
+    from ._labeled_butler_factory import LabeledButlerFactoryProtocol
     from ._storage_class import StorageClass
     from ._timespan import Timespan
     from .datastore import DatasetRefURIs
@@ -583,27 +584,35 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
         return label, dataset_id
 
     @classmethod
-    def get_dataset_from_uri(cls, uri: str) -> DatasetRef | None:
+    def get_dataset_from_uri(
+        cls, uri: str, factory: LabeledButlerFactoryProtocol | None = None
+    ) -> DatasetRef | None:
         """Get the dataset associated with the given dataset URI.
 
         Parameters
         ----------
         uri : `str`
             The URI associated with a dataset.
+        factory : `LabeledButlerFactoryProtocol` or `None`, optional
+            Bound factory function that will be given the butler label
+            and receive a `Butler`.
 
         Returns
         -------
         ref : `DatasetRef` or `None`
             The dataset associated with that URI, or `None` if the UUID
             is valid but the dataset is not known to this butler.
-
-        Notes
-        -----
-        It might be possible to pass in an optional ``LabeledButlerFactory``
-        but how would a caller know the right access token to supply?
         """
         label, dataset_id = cls.parse_dataset_uri(uri)
-        butler = cls.from_config(label)
+        butler: Butler | None = None
+        if factory is not None:
+            # If the label is not recognized, it might be a path.
+            try:
+                butler = factory(label)
+            except KeyError:
+                pass
+        if butler is None:
+            butler = cls.from_config(label)
         return butler.get_dataset(dataset_id)
 
     @abstractmethod
diff --git a/python/lsst/daf/butler/_labeled_butler_factory.py b/python/lsst/daf/butler/_labeled_butler_factory.py
index 40f887ed0a..3941722527 100644
--- a/python/lsst/daf/butler/_labeled_butler_factory.py
+++ b/python/lsst/daf/butler/_labeled_butler_factory.py
@@ -25,9 +25,10 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-__all__ = ("LabeledButlerFactory",)
+__all__ = ("LabeledButlerFactory", "LabeledButlerFactoryProtocol")
 
 from collections.abc import Callable, Mapping
+from typing import Protocol
 
 from lsst.resources import ResourcePathExpression
 
@@ -42,6 +43,12 @@
 instance."""
 
 
+class LabeledButlerFactoryProtocol(Protocol):
+    """Callable to retrieve a butler from a label."""
+
+    def __call__(self, label: str) -> Butler: ...
+
+
 class LabeledButlerFactory:
     """Factory for efficiently instantiating Butler instances from the
     repository index file.  This is intended for use from long-lived services
@@ -83,6 +90,12 @@ def __init__(self, repositories: Mapping[str, str] | None = None) -> None:
         # This may be overridden by unit tests.
         self._preload_direct_butler_cache = True
 
+    def bind(self, access_token: str | None) -> LabeledButlerFactoryProtocol:
+        def create(label: str) -> Butler:
+            return self.create_butler(label=label, access_token=access_token)
+
+        return create
+
     def create_butler(self, *, label: str, access_token: str | None) -> Butler:
         """Create a Butler instance.
 
diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py
index 0a44c39da5..ef8275e166 100644
--- a/tests/test_simpleButler.py
+++ b/tests/test_simpleButler.py
@@ -48,6 +48,7 @@
     DatasetId,
     DatasetRef,
     DatasetType,
+    LabeledButlerFactory,
     StorageClass,
     Timespan,
 )
@@ -907,6 +908,9 @@ def test_dataset_uris(self):
             index_file.write(f"{label}: {config_dir}\n")
             index_file.flush()
             with mock_env({"DAF_BUTLER_REPOSITORY_INDEX": index_file.name}):
+                butler_factory = LabeledButlerFactory()
+                factory = butler_factory.bind(access_token=None)
+
                 for dataset_uri in (
                     f"ivo://rubin/{config_dir}/{ref.id}",
                     f"ivo://rubin/{config_dir}/butler.yaml/{ref.id}",
@@ -916,6 +920,9 @@ def test_dataset_uris(self):
                     ref2 = Butler.get_dataset_from_uri(dataset_uri)
                     self.assertEqual(ref, ref2)
 
+                    ref2 = Butler.get_dataset_from_uri(dataset_uri, factory=factory)
+                    self.assertEqual(ref, ref2)
+
                 # Non existent dataset.
                 missing_id = str(ref.id).replace("2", "3")
                 no_ref = Butler.get_dataset_from_uri(f"butler://{label}/{missing_id}")

From e5a22011e66fd0dba324f25a9b7d95f9736fa8a2 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Tue, 26 Nov 2024 12:46:35 -0700
Subject: [PATCH 03/10] Switch IVOID to using a query string for the dataset
 UUID

---
 python/lsst/daf/butler/_butler.py | 15 ++++++++-------
 tests/test_simpleButler.py        | 25 ++++++++++++++-----------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
index e6cecf32d7..971a624c33 100644
--- a/python/lsst/daf/butler/_butler.py
+++ b/python/lsst/daf/butler/_butler.py
@@ -549,12 +549,13 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
 
         Notes
         -----
-        Supports dataset URIs of the forms ``ivo://rubin/butler_label/UUID``
+        Supports dataset URIs of the forms
+        ``ivo://rubin.lsst/datasets?butler_label/UUID``
         and ``butler://butler_label/UUID``. In ``ivo`` URIs the butler label
-        can include ``/`` and the leading ``/`` is always stripped. If the
-        repository label starts with ``/`` then it must be doubled up. e.g.,
+        can include ``/`` and the trailing ``/`` before the UUID is always
+        stripped.
 
-            ivo://rubin//repo/main/82d79caa-0823-4300-9874-67b737367ee0
+            ivo://rubin.lsst/datasets?/repo/main/UUID
 
         will return a label of ``/repo/main``.
 
@@ -564,9 +565,9 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
         parsed = urllib.parse.urlparse(uri)
         if parsed.scheme == "ivo":
             # TODO: Validate netloc component.
-            label, id_ = os.path.split(parsed.path)
-            # Strip the leading /.
-            label = label[1:]
+            if parsed.path != "/datasets":
+                raise ValueError(f"Unrecognized path in IVOID {uri}. Expected 'datasets' got {parsed.path!r}")
+            label, id_ = os.path.split(parsed.query)
         elif parsed.scheme == "butler":
             label = parsed.netloc
             # Need to strip the leading /.
diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py
index ef8275e166..ad077c9ecb 100644
--- a/tests/test_simpleButler.py
+++ b/tests/test_simpleButler.py
@@ -912,10 +912,10 @@ def test_dataset_uris(self):
                 factory = butler_factory.bind(access_token=None)
 
                 for dataset_uri in (
-                    f"ivo://rubin/{config_dir}/{ref.id}",
-                    f"ivo://rubin/{config_dir}/butler.yaml/{ref.id}",
+                    f"ivo://rubin.lsst/datasets?{config_dir}/{ref.id}",
+                    f"ivo://rubin.lsst/datasets?{config_dir}/butler.yaml/{ref.id}",
                     f"butler://{label}/{ref.id}",
-                    f"ivo://rubin/{label}/{ref.id}",
+                    f"ivo://rubin.lsst/datasets?{label}/{ref.id}",
                 ):
                     ref2 = Butler.get_dataset_from_uri(dataset_uri)
                     self.assertEqual(ref, ref2)
@@ -929,14 +929,17 @@ def test_dataset_uris(self):
                 self.assertIsNone(no_ref)
 
         # Test some failure modes.
-        with self.assertRaises(ValueError):
-            Butler.parse_dataset_uri("ivo://rubin/1234")
-        with self.assertRaises(ValueError):
-            Butler.parse_dataset_uri("butler://label/1234")
-        with self.assertRaises(ValueError):
-            Butler.parse_dataset_uri("butler://1234")
-        with self.assertRaises(ValueError):
-            Butler.parse_dataset_uri("https://something.edu/1234")
+        for dataset_uri in (
+            "butler://label/1234",  # Bad UUID.
+            "butler://1234",  # No label.
+            "ivo://rubin/1234",  # No query part and bad UUID and no label.
+            "ivo://rubin/datasets/dr1/82d79caa-0823-4300-9874-67b737367ee0",  # No query part.
+            "ivo://rubin/datasets?dr1/1234",  # Bad UUID.
+            "ivo://rubin.lsst/butler?dr1/82d79caa-0823-4300-9874-67b737367ee0",  # Not datasets.
+            "https://something.edu/1234",  # Wrong scheme.
+        ):
+            with self.assertRaises(ValueError):
+                Butler.parse_dataset_uri(dataset_uri)
 
 
 class NameKeyCollectionManagerDirectSimpleButlerTestCase(DirectSimpleButlerTestCase, unittest.TestCase):

From dd8c18426ac71225489fe77d888f860b0e46ddc1 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Tue, 26 Nov 2024 13:06:29 -0700
Subject: [PATCH 04/10] Change det_dataset_from_uri so it returns a Butler

Otherwise there is no easy way for the caller to get the
dataset:

butler, ref = Butler.get_dataset_from_uri(uri)
thing = butler.get(ref)
---
 python/lsst/daf/butler/_butler.py |  9 ++++++---
 tests/test_simpleButler.py        | 14 +++++++++++---
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
index 971a624c33..27d55ce318 100644
--- a/python/lsst/daf/butler/_butler.py
+++ b/python/lsst/daf/butler/_butler.py
@@ -587,7 +587,7 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
     @classmethod
     def get_dataset_from_uri(
         cls, uri: str, factory: LabeledButlerFactoryProtocol | None = None
-    ) -> DatasetRef | None:
+    ) -> tuple[Butler, DatasetRef | None]:
         """Get the dataset associated with the given dataset URI.
 
         Parameters
@@ -596,10 +596,13 @@ def get_dataset_from_uri(
             The URI associated with a dataset.
         factory : `LabeledButlerFactoryProtocol` or `None`, optional
             Bound factory function that will be given the butler label
-            and receive a `Butler`.
+            and receive a `Butler`. If this is not provided the label
+            will be tried directly.
 
         Returns
         -------
+        butler : `Butler`
+            Butler object associated with this URI.
         ref : `DatasetRef` or `None`
             The dataset associated with that URI, or `None` if the UUID
             is valid but the dataset is not known to this butler.
@@ -614,7 +617,7 @@ def get_dataset_from_uri(
                 pass
         if butler is None:
             butler = cls.from_config(label)
-        return butler.get_dataset(dataset_id)
+        return butler, butler.get_dataset(dataset_id)
 
     @abstractmethod
     def _caching_context(self) -> AbstractContextManager[None]:
diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py
index ad077c9ecb..012ff0c39a 100644
--- a/tests/test_simpleButler.py
+++ b/tests/test_simpleButler.py
@@ -917,15 +917,23 @@ def test_dataset_uris(self):
                     f"butler://{label}/{ref.id}",
                     f"ivo://rubin.lsst/datasets?{label}/{ref.id}",
                 ):
-                    ref2 = Butler.get_dataset_from_uri(dataset_uri)
+                    new_butler, ref2 = Butler.get_dataset_from_uri(dataset_uri)
                     self.assertEqual(ref, ref2)
+                    # The returned butler needs to have the datastore mocked.
+                    DatastoreMock.apply(new_butler)
+                    dataset_id, _ = butler.get(ref2)
+                    self.assertEqual(dataset_id, ref.id)
 
-                    ref2 = Butler.get_dataset_from_uri(dataset_uri, factory=factory)
+                    factory_butler, ref2 = Butler.get_dataset_from_uri(dataset_uri, factory=factory)
                     self.assertEqual(ref, ref2)
+                    # The returned butler needs to have the datastore mocked.
+                    DatastoreMock.apply(factory_butler)
+                    dataset_id, _ = factory_butler.get(ref2)
+                    self.assertEqual(dataset_id, ref.id)
 
                 # Non existent dataset.
                 missing_id = str(ref.id).replace("2", "3")
-                no_ref = Butler.get_dataset_from_uri(f"butler://{label}/{missing_id}")
+                _, no_ref = Butler.get_dataset_from_uri(f"butler://{label}/{missing_id}")
                 self.assertIsNone(no_ref)
 
         # Test some failure modes.

From ea001a3715ee0fb81cb8e679ca4ba1d75487b755 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Tue, 3 Dec 2024 15:18:17 -0700
Subject: [PATCH 05/10] Change IVO parser to use from described in DMTN-302

---
 python/lsst/daf/butler/_butler.py | 31 +++++++++++++++++++++----------
 tests/test_simpleButler.py        | 15 +++++++++------
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
index 27d55ce318..74bb22ed16 100644
--- a/python/lsst/daf/butler/_butler.py
+++ b/python/lsst/daf/butler/_butler.py
@@ -29,7 +29,6 @@
 
 __all__ = ["Butler"]
 
-import os
 import urllib.parse
 import uuid
 from abc import abstractmethod
@@ -550,30 +549,42 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
         Notes
         -----
         Supports dataset URIs of the forms
-        ``ivo://rubin.lsst/datasets?butler_label/UUID``
-        and ``butler://butler_label/UUID``. In ``ivo`` URIs the butler label
-        can include ``/`` and the trailing ``/`` before the UUID is always
-        stripped.
+        ``ivo://org.rubinobs/usdac/dr1?repo=butler_label&id=UUID`` (see
+        DMTN-302) and ``butler://butler_label/UUID``. The ``butler`` URI is
+        deprecated and can not include ``/`` in the label string. ``ivo`` URIs
+        can include anything supported by the `Butler` constructor, including
+        paths to repositories and alias labels.
 
-            ivo://rubin.lsst/datasets?/repo/main/UUID
+            ivo://org.rubinobs/dr1?repo=/repo/main&id=UUID
 
         will return a label of ``/repo/main``.
 
         This method does not attempt to check that the dataset exists in the
         labeled butler.
+
+        Since the IVOID can be issued by any publisher to represent a Butler
+        dataset there is no validation of the path or netloc component of the
+        URI. The only requirement is that there are ``id`` and ``repo`` keys
+        in the ``ivo`` URI query component.
         """
         parsed = urllib.parse.urlparse(uri)
         if parsed.scheme == "ivo":
-            # TODO: Validate netloc component.
-            if parsed.path != "/datasets":
-                raise ValueError(f"Unrecognized path in IVOID {uri}. Expected 'datasets' got {parsed.path!r}")
-            label, id_ = os.path.split(parsed.query)
+            # Do not validate the netloc or the path values.
+            qs = urllib.parse.parse_qs(parsed.query)
+            if "repo" not in qs or "id" not in qs:
+                raise ValueError(f"Missing 'repo' and/or 'id' query parameters in IVOID {uri}.")
+            if len(qs["repo"]) != 1 or len(qs["id"]) != 1:
+                raise ValueError(f"Butler IVOID only supports a single value of repo and id, got {uri}")
+            label = qs["repo"][0]
+            id_ = qs["id"][0]
         elif parsed.scheme == "butler":
             label = parsed.netloc
             # Need to strip the leading /.
             id_ = parsed.path[1:]
         else:
             raise ValueError(f"Unrecognized URI scheme: {uri!r}")
+        # Strip trailing/leading whitespace from label.
+        label = label.strip()
         if not label:
             raise ValueError(f"No butler repository label found in uri {uri!r}")
         try:
diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py
index 012ff0c39a..5ad73ceab8 100644
--- a/tests/test_simpleButler.py
+++ b/tests/test_simpleButler.py
@@ -912,10 +912,10 @@ def test_dataset_uris(self):
                 factory = butler_factory.bind(access_token=None)
 
                 for dataset_uri in (
-                    f"ivo://rubin.lsst/datasets?{config_dir}/{ref.id}",
-                    f"ivo://rubin.lsst/datasets?{config_dir}/butler.yaml/{ref.id}",
+                    f"ivo://org.rubinobs/usdac/test?repo={config_dir}&id={ref.id}",
+                    f"ivo://org.rubinobs/ukdac/lsst-dr1?repo={config_dir}/butler.yaml&id={ref.id}",
                     f"butler://{label}/{ref.id}",
-                    f"ivo://rubin.lsst/datasets?{label}/{ref.id}",
+                    f"ivo://org.rubinobs/usdac/lsst-dp1?repo={label}&id={ref.id}",
                 ):
                     new_butler, ref2 = Butler.get_dataset_from_uri(dataset_uri)
                     self.assertEqual(ref, ref2)
@@ -939,11 +939,14 @@ def test_dataset_uris(self):
         # Test some failure modes.
         for dataset_uri in (
             "butler://label/1234",  # Bad UUID.
-            "butler://1234",  # No label.
+            "butler://1234",  # No UUID.
+            "butler:///1234",  # No label.
             "ivo://rubin/1234",  # No query part and bad UUID and no label.
             "ivo://rubin/datasets/dr1/82d79caa-0823-4300-9874-67b737367ee0",  # No query part.
-            "ivo://rubin/datasets?dr1/1234",  # Bad UUID.
-            "ivo://rubin.lsst/butler?dr1/82d79caa-0823-4300-9874-67b737367ee0",  # Not datasets.
+            "ivo://org.rubinobs/datasets?repo=dr1&id=1234",  # Bad UUID.
+            "ivo://org.rubinobs/butler?release=dr1&id=82d79caa-0823-4300-9874-67b737367ee0",  # No repo key.
+            "ivo://org.rubinobs/butler?repo=dr1&repo=dr2&id=82d79caa-0823-4300-9874-67b737367ee0",  # 2 vals.
+            "ivo://org.rubinobs/something?repo=%20&id=82d79caa-0823-4300-9874-67b737367ee0",  # no repo.
             "https://something.edu/1234",  # Wrong scheme.
         ):
             with self.assertRaises(ValueError):

From a5155b4883b1108a0919a823f3d750268d6e5225 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Tue, 3 Dec 2024 16:07:20 -0700
Subject: [PATCH 06/10] Add news fragment

---
 doc/changes/DM-47325.feature.rst | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 doc/changes/DM-47325.feature.rst

diff --git a/doc/changes/DM-47325.feature.rst b/doc/changes/DM-47325.feature.rst
new file mode 100644
index 0000000000..3fe2ac51b8
--- /dev/null
+++ b/doc/changes/DM-47325.feature.rst
@@ -0,0 +1,5 @@
+Added two new APIs for handling Butler dataset URIs.
+``Butler.parse_dataset_uri`` parses a URI and returns the butler repository label and associated UUID.
+``Butler.get_dataset_from_uri`` will parse a URI and attempt to retrieve the ``DatasetRef``.
+URIs should be of the form IVOA identifiers as described in `DMTN-302 <https://dmtn-302.lsst.io>`_.
+Deprecated ``butler://`` URIs are still supported but should not be used in new systems.

From 3cd5774743f0bcfb4405b9aa055b10d357cfdd76 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 5 Dec 2024 13:50:31 -0700
Subject: [PATCH 07/10] Allow IVO:// and BUTLER:// URI schemes to be used

The URI scheme and netloc are meant to be case insensitive.
Since the butler URI scheme relies on butler labels which
are not case-insensitive, the netloc is not downcased in
that situation.
---
 python/lsst/daf/butler/_butler.py         | 7 ++++---
 python/lsst_daf_butler.dist-info/METADATA | 3 +++
 2 files changed, 7 insertions(+), 3 deletions(-)
 create mode 100644 python/lsst_daf_butler.dist-info/METADATA

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
index 74bb22ed16..382ea4871e 100644
--- a/python/lsst/daf/butler/_butler.py
+++ b/python/lsst/daf/butler/_butler.py
@@ -568,7 +568,8 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
         in the ``ivo`` URI query component.
         """
         parsed = urllib.parse.urlparse(uri)
-        if parsed.scheme == "ivo":
+        parsed_scheme = parsed.scheme.lower()
+        if parsed_scheme == "ivo":
             # Do not validate the netloc or the path values.
             qs = urllib.parse.parse_qs(parsed.query)
             if "repo" not in qs or "id" not in qs:
@@ -577,8 +578,8 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
                 raise ValueError(f"Butler IVOID only supports a single value of repo and id, got {uri}")
             label = qs["repo"][0]
             id_ = qs["id"][0]
-        elif parsed.scheme == "butler":
-            label = parsed.netloc
+        elif parsed_scheme == "butler":
+            label = parsed.netloc  # Butler label is case sensitive.
             # Need to strip the leading /.
             id_ = parsed.path[1:]
         else:
diff --git a/python/lsst_daf_butler.dist-info/METADATA b/python/lsst_daf_butler.dist-info/METADATA
new file mode 100644
index 0000000000..ca0bc054af
--- /dev/null
+++ b/python/lsst_daf_butler.dist-info/METADATA
@@ -0,0 +1,3 @@
+Metadata-Version: 1.0
+Name: lsst-daf-butler
+Version: g57cedf6216+76f9c43fa5

From 85c354567a23a1fa8562fcf6ba3d46a11b5f8807 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 5 Dec 2024 13:53:19 -0700
Subject: [PATCH 08/10] Ensure that URI parsing test unquotes

The %2F becomes a /
---
 tests/test_simpleButler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py
index 5ad73ceab8..be1153294f 100644
--- a/tests/test_simpleButler.py
+++ b/tests/test_simpleButler.py
@@ -913,7 +913,7 @@ def test_dataset_uris(self):
 
                 for dataset_uri in (
                     f"ivo://org.rubinobs/usdac/test?repo={config_dir}&id={ref.id}",
-                    f"ivo://org.rubinobs/ukdac/lsst-dr1?repo={config_dir}/butler.yaml&id={ref.id}",
+                    f"ivo://org.rubinobs/ukdac/lsst-dr1?repo={config_dir}%2Fbutler.yaml&id={ref.id}",
                     f"butler://{label}/{ref.id}",
                     f"ivo://org.rubinobs/usdac/lsst-dp1?repo={label}&id={ref.id}",
                 ):

From 970e68bc201f7b55bf8ce04025521e014c21c0e5 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 5 Dec 2024 13:57:39 -0700
Subject: [PATCH 09/10] Add docstring for bind method

---
 python/lsst/daf/butler/_labeled_butler_factory.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/python/lsst/daf/butler/_labeled_butler_factory.py b/python/lsst/daf/butler/_labeled_butler_factory.py
index 3941722527..141ddc0eee 100644
--- a/python/lsst/daf/butler/_labeled_butler_factory.py
+++ b/python/lsst/daf/butler/_labeled_butler_factory.py
@@ -91,6 +91,21 @@ def __init__(self, repositories: Mapping[str, str] | None = None) -> None:
         self._preload_direct_butler_cache = True
 
     def bind(self, access_token: str | None) -> LabeledButlerFactoryProtocol:
+        """Create a callable factory function for generating Butler instances
+        with out needing to specify access tokans again.
+
+        Parameters
+        ----------
+        access_token : `str` or `None`
+            An optional access token to use for authentication with the Butler.
+
+        Returns
+        -------
+        bound : `LabeledButlerFactoryProtocol`
+            A callable that takes a label as input and returns a Butler
+            instance.
+        """
+
         def create(label: str) -> Butler:
             return self.create_butler(label=label, access_token=access_token)
 

From 2cbb8f05adcca8785d63becc6fdfaa0f5fee9ba7 Mon Sep 17 00:00:00 2001
From: Tim Jenness <tjenness@lsst.org>
Date: Thu, 5 Dec 2024 15:23:49 -0700
Subject: [PATCH 10/10] Return dataclasses rather than tuple

---
 python/lsst/daf/butler/_butler.py | 43 +++++++++++++++++++------------
 tests/test_simpleButler.py        | 20 +++++++-------
 2 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
index 382ea4871e..dcb978589e 100644
--- a/python/lsst/daf/butler/_butler.py
+++ b/python/lsst/daf/butler/_butler.py
@@ -29,6 +29,7 @@
 
 __all__ = ["Butler"]
 
+import dataclasses
 import urllib.parse
 import uuid
 from abc import abstractmethod
@@ -74,6 +75,19 @@
 _LOG = getLogger(__name__)
 
 
+@dataclasses.dataclass
+class ParsedButlerDatasetURI:
+    label: str
+    dataset_id: uuid.UUID
+    uri: str
+
+
+@dataclasses.dataclass
+class SpecificButlerDataset:
+    butler: Butler
+    dataset: DatasetRef | None
+
+
 class Butler(LimitedButler):  # numpydoc ignore=PR02
     """Interface for data butler and factory for Butler instances.
 
@@ -530,7 +544,7 @@ def get_known_repos(cls) -> set[str]:
         return ButlerRepoIndex.get_known_repos()
 
     @classmethod
-    def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
+    def parse_dataset_uri(cls, uri: str) -> ParsedButlerDatasetURI:
         """Extract the butler label and dataset ID from a dataset URI.
 
         Parameters
@@ -540,11 +554,9 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
 
         Returns
         -------
-        label : `str`
+        parsed : `ParsedButlerDatasetURI`
             The label associated with the butler repository from which this
-            dataset originates.
-        dataset_id : `DatasetId`
-            The ID of the dataset.
+            dataset originates and the ID of the dataset.
 
         Notes
         -----
@@ -594,12 +606,12 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
             e.add_note(f"Error extracting dataset ID from uri {uri!r} with dataset ID string {id_!r}")
             raise
 
-        return label, dataset_id
+        return ParsedButlerDatasetURI(label=label, dataset_id=dataset_id, uri=uri)
 
     @classmethod
     def get_dataset_from_uri(
         cls, uri: str, factory: LabeledButlerFactoryProtocol | None = None
-    ) -> tuple[Butler, DatasetRef | None]:
+    ) -> SpecificButlerDataset:
         """Get the dataset associated with the given dataset URI.
 
         Parameters
@@ -613,23 +625,22 @@ def get_dataset_from_uri(
 
         Returns
         -------
-        butler : `Butler`
-            Butler object associated with this URI.
-        ref : `DatasetRef` or `None`
-            The dataset associated with that URI, or `None` if the UUID
-            is valid but the dataset is not known to this butler.
+        result : `SpecificButlerDataset`
+            The butler associated with this URI and the dataset itself.
+            The dataset can be `None` if the UUID is valid but the dataset
+            is not known to this butler.
         """
-        label, dataset_id = cls.parse_dataset_uri(uri)
+        parsed = cls.parse_dataset_uri(uri)
         butler: Butler | None = None
         if factory is not None:
             # If the label is not recognized, it might be a path.
             try:
-                butler = factory(label)
+                butler = factory(parsed.label)
             except KeyError:
                 pass
         if butler is None:
-            butler = cls.from_config(label)
-        return butler, butler.get_dataset(dataset_id)
+            butler = cls.from_config(parsed.label)
+        return SpecificButlerDataset(butler=butler, dataset=butler.get_dataset(parsed.dataset_id))
 
     @abstractmethod
     def _caching_context(self) -> AbstractContextManager[None]:
diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py
index be1153294f..1ffa7941ee 100644
--- a/tests/test_simpleButler.py
+++ b/tests/test_simpleButler.py
@@ -917,24 +917,24 @@ def test_dataset_uris(self):
                     f"butler://{label}/{ref.id}",
                     f"ivo://org.rubinobs/usdac/lsst-dp1?repo={label}&id={ref.id}",
                 ):
-                    new_butler, ref2 = Butler.get_dataset_from_uri(dataset_uri)
-                    self.assertEqual(ref, ref2)
+                    result = Butler.get_dataset_from_uri(dataset_uri)
+                    self.assertEqual(result.dataset, ref)
                     # The returned butler needs to have the datastore mocked.
-                    DatastoreMock.apply(new_butler)
-                    dataset_id, _ = butler.get(ref2)
+                    DatastoreMock.apply(result.butler)
+                    dataset_id, _ = result.butler.get(result.dataset)
                     self.assertEqual(dataset_id, ref.id)
 
-                    factory_butler, ref2 = Butler.get_dataset_from_uri(dataset_uri, factory=factory)
-                    self.assertEqual(ref, ref2)
+                    factory_result = Butler.get_dataset_from_uri(dataset_uri, factory=factory)
+                    self.assertEqual(factory_result.dataset, ref)
                     # The returned butler needs to have the datastore mocked.
-                    DatastoreMock.apply(factory_butler)
-                    dataset_id, _ = factory_butler.get(ref2)
+                    DatastoreMock.apply(factory_result.butler)
+                    dataset_id, _ = factory_result.butler.get(factory_result.dataset)
                     self.assertEqual(dataset_id, ref.id)
 
                 # Non existent dataset.
                 missing_id = str(ref.id).replace("2", "3")
-                _, no_ref = Butler.get_dataset_from_uri(f"butler://{label}/{missing_id}")
-                self.assertIsNone(no_ref)
+                result = Butler.get_dataset_from_uri(f"butler://{label}/{missing_id}")
+                self.assertIsNone(result.dataset)
 
         # Test some failure modes.
         for dataset_uri in (