diff --git a/python/lsst/daf/butler/remote_butler/_authentication.py b/python/lsst/daf/butler/remote_butler/_authentication.py new file mode 100644 index 0000000000..961d6b4069 --- /dev/null +++ b/python/lsst/daf/butler/remote_butler/_authentication.py @@ -0,0 +1,95 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +__all__ = ("get_authentication_token_from_environment", "get_authentication_headers") + +import os +from fnmatch import fnmatchcase +from urllib.parse import urlparse + +SERVER_WHITELIST = ["*.lsst.cloud"] +EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY = "BUTLER_RUBIN_ACCESS_TOKEN" +RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY = "ACCESS_TOKEN" + + +def get_authentication_token_from_environment(server_url: str) -> str | None: + """Search the environment for a Rubin Science Platform access token. + + The token may come from the following sources in this order: + 1. The "BUTLER_RUBIN_ACCESS_TOKEN" environment variable. + This environment variable is meant primarily for development use, + running outside the Rubin Science Platform. This token will be sent + to EVERY server that we connect to, so be careful when connecting to + untrusted servers. + 2. The "ACCESS_TOKEN" environment variable. + This environment variable is provided by the Rubin Science Platform + Jupyter notebooks. It will only be returned if the given `server_url` + is in a whitelist of servers known to belong to the Rubin Science + Platform. Because this is a long-lived token that can be used to + impersonate the user with their full access rights, it should not be + sent to untrusted servers. + + Parameters + ---------- + server_url : `str` + URL of the Butler server that the caller intends to connect to + + Returns + ------- + access_token: `str | None` + A Rubin Science Platform access token, or None if no token was + configured in the environment + """ + explicit_butler_token = os.getenv(EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY) + if explicit_butler_token: + return explicit_butler_token + + hostname = urlparse(server_url).hostname.lower() + hostname_in_whitelist = any((fnmatchcase(hostname, pattern) for pattern in SERVER_WHITELIST)) + notebook_token = os.getenv(RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY) + if hostname_in_whitelist and notebook_token: + return notebook_token + + return None + + +def get_authentication_headers(access_token: str) -> dict[str, str]: + """Return HTTP headers required for authenticating the user via Rubin + Science Platform's Gafaelfawr service + + Parameters + ---------- + access_token : `str` + Rubin Science Platform access token + + Returns + ------- + header_map : dict[str, str] + HTTP header names and values as a mapping from name -> value + """ + # Access tokens are opaque bearer tokens. See https://sqr-069.lsst.io/ + return {"Authorization": f"Bearer {access_token}"} diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index e6a1bb4429..2b2cee0183 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -51,6 +51,7 @@ from ..dimensions import DataId, DimensionConfig, DimensionUniverse from ..registry import Registry, RegistryDefaults from ..transfers import RepoExportContext +from ._authentication import get_authentication_headers, get_authentication_token_from_environment from ._config import RemoteButlerConfigModel @@ -67,6 +68,7 @@ def __init__( inferDefaults: bool = True, # Parameters unique to RemoteButler http_client: httpx.Client | None = None, + access_token: str | None = None, **kwargs: Any, ): butler_config = ButlerConfig(config, searchPaths, without_datastore=True) @@ -81,6 +83,7 @@ def __init__( butler_config[server_url_key], butler_config.configDir ) self._config = RemoteButlerConfigModel.model_validate(butler_config) + self._dimensions: DimensionUniverse | None = None # TODO: RegistryDefaults should have finish() called on it, but this # requires getCollectionSummary() which is not yet implemented @@ -91,8 +94,15 @@ def __init__( # This is generally done for testing. self._client = http_client else: - headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"} - self._client = httpx.Client(headers=headers, base_url=str(self._config.remote_butler.url)) + server_url = str(self._config.remote_butler.url) + auth_headers = {} + if access_token is None: + access_token = get_authentication_token_from_environment(server_url) + if access_token is not None: + auth_headers = get_authentication_headers(access_token) + + headers = auth_headers | {"user-agent": f"{get_full_type_name(self)}/{__version__}"} + self._client = httpx.Client(headers=headers, base_url=server_url) def isWriteable(self) -> bool: # Docstring inherited. diff --git a/tests/test_authentication.py b/tests/test_authentication.py new file mode 100644 index 0000000000..b748e9e255 --- /dev/null +++ b/tests/test_authentication.py @@ -0,0 +1,50 @@ +import os +import unittest +from contextlib import contextmanager +from unittest.mock import patch + +from lsst.daf.butler.remote_butler._authentication import ( + EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY, + RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY, + get_authentication_headers, + get_authentication_token_from_environment, +) + + +@contextmanager +def _mock_env(new_environment): + with patch.dict(os.environ, new_environment, clear=True): + yield + + +class TestButlerClientAuthentication(unittest.TestCase): + """Test access-token logic""" + + def test_explicit_butler_token(self): + with _mock_env( + { + EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY: "token1", + RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY: "not-this-token", + } + ): + token = get_authentication_token_from_environment("https://untrustedserver.com") + assert token == "token1" + + def test_jupyter_token_with_safe_server(self): + with _mock_env({RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY: "token2"}): + token = get_authentication_token_from_environment("https://data.LSST.cloud/butler") + assert token == "token2" + + def test_jupyter_token_with_unsafe_server(self): + with _mock_env({RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY: "token2"}): + token = get_authentication_token_from_environment("https://untrustedserver.com/butler") + assert token is None + + def test_missing_token(self): + with _mock_env({}): + token = get_authentication_token_from_environment("https://data.lsst.cloud/butler") + assert token is None + + def test_header_generation(self): + headers = get_authentication_headers("tokendata") + assert headers == {"Authorization": "Bearer tokendata"}