From 3af7307f0d9f24df256b44441a4940a5e8c7cd9c Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Wed, 11 Dec 2024 13:46:23 -0700 Subject: [PATCH] Fix dataset fields not working in Butler.query_datasets Fix an issue where dataset fields like `ingest_date` were raising `InvalidQueryError: Unrecognized identifier` because the where clause was being applied before query was known to be a dataset query. --- doc/changes/DM-48094.bugfix.md | 1 + python/lsst/daf/butler/_butler.py | 4 ++-- python/lsst/daf/butler/tests/butler_queries.py | 7 +++++++ 3 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 doc/changes/DM-48094.bugfix.md diff --git a/doc/changes/DM-48094.bugfix.md b/doc/changes/DM-48094.bugfix.md new file mode 100644 index 0000000000..be3d6b0747 --- /dev/null +++ b/doc/changes/DM-48094.bugfix.md @@ -0,0 +1 @@ +Fix a bug where dataset fields like `ingest_date` were raising `InvalidQueryError: Unrecognized identifier` when used in `Butler.query_datasets` `where` clause. diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index dcb978589e..c8f350e5f5 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -1869,8 +1869,8 @@ def query_datasets( warn_limit = True with self.query() as query: result = ( - query.where(data_id, where, bind=bind, **kwargs) - .datasets(dataset_type, collections=collections, find_first=find_first) + query.datasets(dataset_type, collections=collections, find_first=find_first) + .where(data_id, where, bind=bind, **kwargs) .order_by(*ensure_iterable(order_by)) .limit(query_limit) ) diff --git a/python/lsst/daf/butler/tests/butler_queries.py b/python/lsst/daf/butler/tests/butler_queries.py index 050816848e..5905df81ba 100644 --- a/python/lsst/daf/butler/tests/butler_queries.py +++ b/python/lsst/daf/butler/tests/butler_queries.py @@ -1921,6 +1921,13 @@ def test_dataset_queries(self) -> None: self.assertEqual(rows[0]["visit"], 1) self.assertEqual(rows[0]["dt.collection"], "run1") + # Test that dataset fields like ingest_date can be used in the 'where' + # clause. + result = butler.query_datasets("dt", "run1", where="ingest_date > T'2000-01-01'") + self.assertEqual(len(result), 1) + result = butler.query_datasets("dt", "run1", where="ingest_date < T'2000-01-01'", explain=False) + self.assertEqual(len(result), 0) + def test_multiple_instrument_queries(self) -> None: """Test that multiple-instrument queries are not rejected as having governor dimension ambiguities.