Skip to content

Commit

Permalink
Add indices to temporary tables.
Browse files Browse the repository at this point in the history
The indices are created on combinations of dimensions. This should help
with the performance of follow-up queries in graph builder.
  • Loading branch information
andy-slac committed Jan 23, 2025
1 parent e9c7ef5 commit 1eab6b0
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
4 changes: 3 additions & 1 deletion python/lsst/daf/butler/direct_query_driver/_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,9 @@ def materialize(
#
sql_select, _ = plan.finish_select(return_columns=False)
table = self._exit_stack.enter_context(
self.db.temporary_table(make_table_spec(plan.final_columns, self.db, plan.postprocessing))
self.db.temporary_table(
make_table_spec(plan.final_columns, self.db, plan.postprocessing, make_indices=True)
)
)
self.db.insert(table, select=sql_select)
if key is None:
Expand Down
26 changes: 24 additions & 2 deletions python/lsst/daf/butler/direct_query_driver/_sql_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
import sqlalchemy

from .. import ddl
from ..dimensions import DimensionGroup
from ..dimensions._group import SortedSequenceSet
from ..nonempty_mapping import NonemptyMapping
from ..queries import tree as qt
from ._postprocessing import Postprocessing
Expand Down Expand Up @@ -638,7 +640,7 @@ def to_select_builder(


def make_table_spec(
columns: qt.ColumnSet, db: Database, postprocessing: Postprocessing | None
columns: qt.ColumnSet, db: Database, postprocessing: Postprocessing | None, *, make_indices: bool = False
) -> ddl.TableSpec:
"""Make a specification that can be used to create a table to store
this query's outputs.
Expand All @@ -652,18 +654,22 @@ def make_table_spec(
postprocessing : `Postprocessing`
Struct representing post-query processing in Python, which may
require additional columns in the query results.
make_indices : `bool`, optional
If `True` add indices for groups of columns.
Returns
-------
spec : `.ddl.TableSpec`
Table specification for this query's result columns (including
those from `postprocessing` and `SqlJoinsBuilder.special`).
"""
indices = _make_table_indices(columns.dimensions) if make_indices else []
results = ddl.TableSpec(
[
columns.get_column_spec(logical_table, field).to_sql_spec(name_shrinker=db.name_shrinker)
for logical_table, field in columns
]
],
indexes=indices,
)
if postprocessing:
for element in postprocessing.iter_missing(columns):
Expand All @@ -679,3 +685,19 @@ def make_table_spec(
ddl.FieldSpec(name=SqlSelectBuilder.EMPTY_COLUMNS_NAME, dtype=SqlSelectBuilder.EMPTY_COLUMNS_TYPE)
)
return results


def _make_table_indices(dimensions: DimensionGroup) -> list[ddl.IndexSpec]:

index_columns: list[SortedSequenceSet] = []
for dimension in dimensions.required:
minimal_group = dimensions.universe[dimension].minimal_group.required

for idx in range(len(index_columns)):
if index_columns[idx] <= minimal_group:
index_columns[idx] = minimal_group
break
else:
index_columns.append(minimal_group)

return [ddl.IndexSpec(*columns) for columns in index_columns]

0 comments on commit 1eab6b0

Please sign in to comment.