diff --git a/src/matchbox/server/postgresql/benchmark/generate_queries.py b/src/matchbox/server/postgresql/benchmark/generate_queries.py new file mode 100644 index 00000000..700e7ced --- /dev/null +++ b/src/matchbox/server/postgresql/benchmark/generate_queries.py @@ -0,0 +1,26 @@ +from matchbox.client.helpers.selector import query, selector + + +# Generates a query that can be used for benchmarking purposes +def generate_query(crn, fields, backend, resolution, return_type): + select_crn = selector( + table=str(crn), + fields=fields, + engine=crn.database.engine, + ) + + query_string = query( + selector=select_crn, + backend=backend, + resolution=resolution, + return_type=return_type, + ) + + return query_string + + +if __name__ == "__main__": + query = "" + # vars: crn, fields, backend, resolution, return_type + # query = generate_query(crn, fields, backend, resolution, return_type) + print(query) diff --git a/src/matchbox/server/postgresql/utils/query.py b/src/matchbox/server/postgresql/utils/query.py index 444892bf..2a20f6ad 100644 --- a/src/matchbox/server/postgresql/utils/query.py +++ b/src/matchbox/server/postgresql/utils/query.py @@ -355,6 +355,8 @@ def query( # Get cluster assignments mb_ids = sql_to_df(id_query, engine, return_type="arrow") + query_string = str(id_query) + # Get source data raw_data = source.to_arrow( fields=set([source.db_pk] + fields), @@ -379,14 +381,14 @@ def query( # Return in requested format if return_type == "arrow": - return result + return result, query_string elif return_type == "pandas": return result.to_pandas( use_threads=True, split_blocks=True, self_destruct=True, types_mapper=ArrowDtype, - ) + ), query_string else: raise ValueError(f"return_type of {return_type} not valid")