diff --git a/compose.yml b/compose.yml index 0388699fb5..28a54515c1 100644 --- a/compose.yml +++ b/compose.yml @@ -8,6 +8,7 @@ services: ports: - "8080:8080" environment: - BUTLER_SERVER_CONFIG_URI: "/butler_root" + DAF_BUTLER_REPOSITORY_INDEX: "/butler_config/repository_index.yaml" volumes: - ../ci_hsc_gen3/DATA:/butler_root + - ./docker/compose_files:/butler_config diff --git a/docker/compose_files/repository_index.yaml b/docker/compose_files/repository_index.yaml new file mode 100644 index 0000000000..5690de63dd --- /dev/null +++ b/docker/compose_files/repository_index.yaml @@ -0,0 +1 @@ +hsc_gen3: /butler_root diff --git a/python/lsst/daf/butler/remote_butler/server/_config.py b/python/lsst/daf/butler/remote_butler/server/_config.py deleted file mode 100644 index 9181fb40c1..0000000000 --- a/python/lsst/daf/butler/remote_butler/server/_config.py +++ /dev/null @@ -1,50 +0,0 @@ -# This file is part of daf_butler. -# -# Developed for the LSST Data Management System. -# This product includes software developed by the LSST Project -# (http://www.lsst.org). -# See the COPYRIGHT file at the top-level directory of this distribution -# for details of code ownership. -# -# This software is dual licensed under the GNU General Public License and also -# under a 3-clause BSD license. Recipients may choose which of these licenses -# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, -# respectively. If you choose the GPL option then the following text applies -# (but note that there is still no warranty even if you opt for BSD instead): -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -import os -from dataclasses import dataclass - -__all__ = ("Config", "get_config_from_env") - - -@dataclass(frozen=True) -class Config: - """Simplified configuration for a client/server connection.""" - - config_uri: str - """URI of the configuration.""" - - -def get_config_from_env() -> Config: - """Retrieve a configuration from the environment.""" - config_uri = os.getenv("BUTLER_SERVER_CONFIG_URI") - if config_uri is None: - raise Exception( - "The environment variable BUTLER_SERVER_CONFIG_URI " - "must point to a Butler configuration to be used by the server" - ) - return Config(config_uri=config_uri) diff --git a/python/lsst/daf/butler/remote_butler/server/_dependencies.py b/python/lsst/daf/butler/remote_butler/server/_dependencies.py index ec0cb7986c..075d5f5b72 100644 --- a/python/lsst/daf/butler/remote_butler/server/_dependencies.py +++ b/python/lsst/daf/butler/remote_butler/server/_dependencies.py @@ -25,24 +25,34 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from functools import cache +from typing import Annotated -from lsst.daf.butler import Butler -from lsst.daf.butler.direct_butler import DirectButler +from fastapi import Depends +from lsst.daf.butler import LabeledButlerFactory -from ._config import get_config_from_env from ._factory import Factory +_butler_factory = LabeledButlerFactory() -@cache -def _make_global_butler() -> DirectButler: - config = get_config_from_env() - butler = Butler.from_config(config.config_uri) - if not isinstance(butler, DirectButler): - raise TypeError("Server can only use a DirectButler") - return butler +async def butler_factory_dependency() -> LabeledButlerFactory: + """Return a global LabeledButlerFactory instance. This will be used to + construct internal DirectButler instances for interacting with the Butler + repositories we are serving. + """ + return _butler_factory -def factory_dependency() -> Factory: - """Return factory dependency for injection into FastAPI.""" - return Factory(butler=_make_global_butler()) + +async def factory_dependency( + repository: str, butler_factory: Annotated[LabeledButlerFactory, Depends(butler_factory_dependency)] +) -> Factory: + """Return Factory object for injection into FastAPI. + + Parameters + ---------- + repository : `str` + Label of the repository for lookup from the repository index. + butler_factory : `LabeledButlerFactory` + Factory for instantiating DirectButlers. + """ + return Factory(butler_factory=butler_factory, repository=repository) diff --git a/python/lsst/daf/butler/remote_butler/server/_factory.py b/python/lsst/daf/butler/remote_butler/server/_factory.py index 06c52b8f69..8994fac7fb 100644 --- a/python/lsst/daf/butler/remote_butler/server/_factory.py +++ b/python/lsst/daf/butler/remote_butler/server/_factory.py @@ -25,22 +25,30 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +from lsst.daf.butler import LabeledButlerFactory from lsst.daf.butler.direct_butler import DirectButler __all__ = ("Factory",) class Factory: - """Class to provide a cached Butler instance. + """Class for instantiating per-request dependencies, following the pattern + in `SQR-072 `_. Parameters ---------- - butler : `DirectButler` - Butler to use. + repository : `str` + The label of the Butler repository requested by the user. + butler_factory : `LabeledButlerFactory` + Factory used to instantiate Butler instances. """ - def __init__(self, *, butler: DirectButler): - self._butler = butler + def __init__(self, *, repository: str, butler_factory: LabeledButlerFactory): + self._repository = repository + self._butler_factory = butler_factory def create_butler(self) -> DirectButler: - return self._butler + butler = self._butler_factory.create_butler(label=self._repository, access_token=None) + if not isinstance(butler, DirectButler): + raise TypeError("Server can only use a DirectButler") + return butler diff --git a/python/lsst/daf/butler/remote_butler/server/_server.py b/python/lsst/daf/butler/remote_butler/server/_server.py index 5a0329e4df..6759b00fbb 100644 --- a/python/lsst/daf/butler/remote_butler/server/_server.py +++ b/python/lsst/daf/butler/remote_butler/server/_server.py @@ -45,7 +45,12 @@ app = FastAPI() app.add_middleware(GZipMiddleware, minimum_size=1000) -app.include_router(external_router, prefix=_DEFAULT_API_PATH) + +# A single instance of the server can serve data from multiple Butler +# repositories. This 'repository' path placeholder is consumed by +# factory_dependency(). +repository_placeholder = "{repository}" +app.include_router(external_router, prefix=f"{_DEFAULT_API_PATH}/repo/{repository_placeholder}") @app.exception_handler(MissingDatasetTypeError) diff --git a/tests/test_server.py b/tests/test_server.py index 49d38db0c4..eace355d0e 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -36,8 +36,8 @@ from fastapi.testclient import TestClient from lsst.daf.butler.remote_butler import RemoteButler, RemoteButlerFactory from lsst.daf.butler.remote_butler._authentication import _EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY - from lsst.daf.butler.remote_butler.server import Factory, app - from lsst.daf.butler.remote_butler.server._dependencies import factory_dependency + from lsst.daf.butler.remote_butler.server import app + from lsst.daf.butler.remote_butler.server._dependencies import butler_factory_dependency from lsst.resources.s3utils import clean_test_environment_for_s3, getS3Client from moto import mock_s3 except ImportError: @@ -70,6 +70,8 @@ TESTDIR = os.path.abspath(os.path.dirname(__file__)) +TEST_REPOSITORY_NAME = "testrepo" + def _make_test_client(app, raise_server_exceptions=True): client = TestClient(app, raise_server_exceptions=raise_server_exceptions) @@ -80,7 +82,7 @@ def _make_remote_butler(http_client, *, collections: str | None = None): options = None if collections is not None: options = ButlerInstanceOptions(collections=collections) - factory = RemoteButlerFactory("https://test.example/api/butler", http_client) + factory = RemoteButlerFactory(f"https://test.example/api/butler/repo/{TEST_REPOSITORY_NAME}", http_client) return factory.create_butler_for_access_token("fake-access-token", butler_options=options) @@ -115,12 +117,9 @@ def setUpClass(cls): cls.simple_dataset_ref = _create_simple_dataset(cls.repo.butler) # Override the server's Butler initialization to point at our test repo - server_butler = Butler.from_config(cls.root, writeable=True) - - def create_factory_dependency(): - return Factory(butler=server_butler) + server_butler_factory = LabeledButlerFactory({TEST_REPOSITORY_NAME: cls.root}) - app.dependency_overrides[factory_dependency] = create_factory_dependency + app.dependency_overrides[butler_factory_dependency] = lambda: server_butler_factory # Set up the RemoteButler that will connect to the server cls.client = _make_test_client(app) @@ -140,13 +139,13 @@ def create_factory_dependency(): # Populate the test server. # The DatastoreMock is required because the datasets referenced in # these imports do not point at real files. - DatastoreMock.apply(server_butler) - server_butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) - server_butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) + DatastoreMock.apply(cls.repo.butler) + cls.repo.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) + cls.repo.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) @classmethod def tearDownClass(cls): - del app.dependency_overrides[factory_dependency] + del app.dependency_overrides[butler_factory_dependency] removeTestTempDir(cls.root) def test_health_check(self): @@ -154,15 +153,9 @@ def test_health_check(self): self.assertEqual(response.status_code, 200) self.assertEqual(response.json()["name"], "butler") - def test_simple(self): - response = self.client.get("/api/butler/v1/universe") - self.assertEqual(response.status_code, 200) - self.assertIn("namespace", response.json()) - - def test_remote_butler(self): + def test_dimension_universe(self): universe = self.butler.dimensions self.assertEqual(universe.namespace, "daf_butler") - self.assertFalse(self.butler.isWriteable()) def test_get_dataset_type(self): bias_type = self.butler.get_dataset_type("bias") @@ -243,22 +236,24 @@ def test_instantiate_via_butler_http_search(self): def override_read(http_resource_path): return self.client.get(http_resource_path.geturl()).content + server_url = f"https://test.example/api/butler/repo/{TEST_REPOSITORY_NAME}/" + with patch.object(HttpResourcePath, "read", override_read): # Add access key to environment variables. RemoteButler # instantiation will throw an error if access key is not # available. with mock_env({_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY: "fake-access-token"}): butler = Butler( - "https://test.example/api/butler", + server_url, collections=["collection1", "collection2"], run="collection2", ) - butler_factory = LabeledButlerFactory({"server": "https://test.example/api/butler"}) + butler_factory = LabeledButlerFactory({"server": server_url}) factory_created_butler = butler_factory.create_butler(label="server", access_token="token") self.assertIsInstance(butler, RemoteButler) self.assertIsInstance(factory_created_butler, RemoteButler) - self.assertEqual(butler._server_url, "https://test.example/api/butler/") - self.assertEqual(factory_created_butler._server_url, "https://test.example/api/butler/") + self.assertEqual(butler._server_url, server_url) + self.assertEqual(factory_created_butler._server_url, server_url) self.assertEqual(butler.collections, ("collection1", "collection2")) self.assertEqual(butler.run, "collection2") diff --git a/tests_integration/test_docker_container.py b/tests_integration/test_docker_container.py index 58e638f524..c12e3d31c0 100644 --- a/tests_integration/test_docker_container.py +++ b/tests_integration/test_docker_container.py @@ -24,13 +24,20 @@ def _run_server_docker(): port = 8080 butler_root = "/butler_root" + + # Set up a repository index file to be read in by the server + index_filename = "repo_index.yaml" + repo_name = "testserver" + with open(os.path.join(temp_dir, index_filename), "wb") as fh: + fh.write(f"{repo_name}: {butler_root}\n".encode()) + docker_image = os.getenv("BUTLER_SERVER_DOCKER_IMAGE") if not docker_image: raise Exception("BUTLER_SERVER_DOCKER_IMAGE must be set") container = ( DockerContainer(docker_image) .with_exposed_ports(port) - .with_env("BUTLER_SERVER_CONFIG_URI", butler_root) + .with_env("DAF_BUTLER_REPOSITORY_INDEX", f"{butler_root}/{index_filename}") .with_volume_mapping(temp_dir, butler_root, "rw") ) @@ -38,7 +45,7 @@ def _run_server_docker(): server_host = container.get_container_host_ip() server_port = container.get_exposed_port(port) server_url = f"http://{server_host}:{server_port}" - full_server_url = f"{server_url}/api/butler" + full_server_url = f"{server_url}/api/butler/repo/{repo_name}" try: _wait_for_startup(server_url) yield full_server_url