From 140c41dac2aee2e76d9518a0abea54bf272f4bd6 Mon Sep 17 00:00:00 2001 From: jimdale Date: Fri, 15 Mar 2024 14:20:28 +0100 Subject: [PATCH] add class method to queryset to allow merging of pre-existing querysets --- pyproject.toml | 2 +- viewser/commands/queryset/models/queryset.py | 70 +++++++++++++++++++- viewser/commands/queryset/operations.py | 12 ++-- 3 files changed, 76 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 66938e5..f179558 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "viewser" -version = "6.0.4" +version = "6.1.0" description = "The Views 3 CLI tool" authors = ["peder2911 "] readme = "README.md" diff --git a/viewser/commands/queryset/models/queryset.py b/viewser/commands/queryset/models/queryset.py index 3138ed4..315c47c 100644 --- a/viewser/commands/queryset/models/queryset.py +++ b/viewser/commands/queryset/models/queryset.py @@ -39,7 +39,75 @@ class Queryset(schema.Queryset): definitions. """ def __init__(self, name, loa): - super().__init__(name = name, loa = loa, operations = []) + super().__init__(name=name, loa=loa, operations=[]) + + @classmethod + def from_merger(cls, querysets, name, theme=None, description=None, verbose=False): + + def rename_to_string(column): + rename_string = ''.join([column[0].namespace, column[0].name, str(column[0].arguments)]) + return rename_string + + def database_to_string(column): + database_string = ''.join([column[-1].namespace, column[-1].name, str(column[-1].arguments)]) + return database_string + + def transform_to_string(column): + transform_string = ''.join( + [column[i].namespace + column[i].name + str(column[i].arguments) for i in range(1, len(column) - 1)]) + return transform_string + + loas = [queryset.loa for queryset in querysets] + + if len(set(loas)) > 1: + raise RuntimeError(f'querysets cannot be merged - they are defined at different loas {loas}') + + columns = [] + rename_strings = [] + database_strings = [] + transform_strings = [] + + for queryset in querysets: + for column in queryset.operations: + + rename_string = rename_to_string(column) + database_string = database_to_string(column) + transform_string = transform_to_string(column) + + if len(rename_strings) == 0: + rename_strings.append(rename_string) + database_strings.append(database_string) + transform_strings.append(transform_string) + columns.append(column) + else: + if rename_string in rename_strings: + idx = rename_strings.index(rename_string) + + if database_string != database_strings[idx]: + raise RuntimeError( + f'querysets cannot be merged - two columns named {column[0].arguments[0]} ' + f'with different raw data') + + if transform_string != transform_strings[idx]: + raise RuntimeError( + f'querysets cannot be merged - two columns named {column[0].arguments[0]} ' + f'with different xforms') + if verbose: + print(f'Merging querysets: omitting copy of identically-defined ' + f'column {column[0].arguments[0]}') + else: + rename_strings.append(rename_string) + database_strings.append(database_string) + transform_strings.append(transform_string) + columns.append(column) + + qs_merged = cls(name=name, loa=loas[0]) + + qs_merged.operations = columns + qs_merged.themes = [] if theme is None else [theme,] + qs_merged.description = description + + return qs_merged @util.deepcopy_self def with_column(self, col: column.Column): diff --git a/viewser/commands/queryset/operations.py b/viewser/commands/queryset/operations.py index 6e0018c..c649a87 100644 --- a/viewser/commands/queryset/operations.py +++ b/viewser/commands/queryset/operations.py @@ -170,22 +170,22 @@ def _fetch( try: data = pd.read_parquet(io.BytesIO(data.value.content)) -# clear_output(wait=True) - print(f'{retries+1}: Queryset {name} read successfully') + print(f'\n') + print(f'Queryset {name} read successfully') succeeded = True except: message = data.value.content.decode() if retries == 0: - print(f'{retries + 1}: {message}') + print(f'\n') + print(f'\r {retries + 1}: {message}', flush=True, end="\r") else: - clear_output(wait=True) - print(f'{retries+1}: {message}', end="\r") + print(f'\r {retries+1}: {message}', flush=True, end="\r") if 'failed' in message: failed = True data = message if retries > max_retries: - clear_output(wait=True) + print(f'\n') print(f'Max attempts to retrieve exceeded ({max_retries}) : aborting retrieval', end="\r") failed = True data = message