From 9817db6c03faa876d2d4406fb1b72e56b2670497 Mon Sep 17 00:00:00 2001 From: cbroz1 Date: Mon, 28 Oct 2024 15:44:14 -0700 Subject: [PATCH 1/7] Balance parens --- src/spyglass/utils/sql_helper_fn.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/spyglass/utils/sql_helper_fn.py b/src/spyglass/utils/sql_helper_fn.py index b36734fac..0265eaf32 100644 --- a/src/spyglass/utils/sql_helper_fn.py +++ b/src/spyglass/utils/sql_helper_fn.py @@ -212,7 +212,7 @@ def bash_escape_sql(s, add_newline=True): "WHERE ": "", # Remove preceding WHERE of dj.where_clause " ": " ", # Squash double spaces "( (": "((", # Squash double parens - ") )": ")", + ") )": "))", '"': "'", # Replace double quotes with single "`": "", # Remove backticks } @@ -232,6 +232,5 @@ def bash_escape_sql(s, add_newline=True): for old, new in replace_map.items(): s = s.replace(old, new) - if s.startswith("(((") and s.endswith(")))"): - s = s[2:-2] # Remove extra parens for readability + return s From 49254ecef2831d4273093bb9d061d8b757822448 Mon Sep 17 00:00:00 2001 From: cbroz1 Date: Tue, 29 Oct 2024 14:47:53 -0700 Subject: [PATCH 2/7] Remove redundant parens --- src/spyglass/utils/sql_helper_fn.py | 43 +++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/spyglass/utils/sql_helper_fn.py b/src/spyglass/utils/sql_helper_fn.py index 0265eaf32..a3b0898ef 100644 --- a/src/spyglass/utils/sql_helper_fn.py +++ b/src/spyglass/utils/sql_helper_fn.py @@ -1,3 +1,4 @@ +import re from functools import cached_property from os import system as os_system from pathlib import Path @@ -196,6 +197,34 @@ def _export_conda_env(self): self._logger.info(f"Conda environment exported to {yml_path}") +def remove_redundant(s): + """Remove redundant parentheses from a string. + + '((a=b)OR((c=d)AND((e=f))))' -> '(a=b) OR ((c=d) AND (e=f))' + + Full solve would require content parsing, this removes duplicates. + https://codegolf.stackexchange.com/questions/250596/remove-redundant-parentheses + """ + + def is_list(x): # Check if element is a list + return isinstance(x, list) + + def list_to_str(x): # Convert list to string + return "(%s)" % "".join(map(list_to_str, x)) if is_list(x) else x + + def flatten_list(nested): + ret = [flatten_list(e) if is_list(e) else e for e in nested if e] + return ret[0] if ret == [[*ret[0]]] else ret # first if all same + + tokens = repr("\"'" + s)[3:] # Quote to safely eval the string + as_list = tokens.translate({40: "',['", 41: "'],'"}) # parens -> square + flattened = flatten_list(eval(as_list)) # Flatten the nested list + as_str = list_to_str(flattened) # back to str + + # space out AND and OR for readability + return re.sub(r"\b(and|or)\b", r" \1 ", as_str, flags=re.IGNORECASE) + + def bash_escape_sql(s, add_newline=True): """Escape restriction string for bash. @@ -207,9 +236,19 @@ def bash_escape_sql(s, add_newline=True): Add newlines for readability around AND & OR. Default True """ s = s.strip() + if s.startswith("WHERE"): + s = s[5:].strip() + + # Balance parentheses - because make_condition may unbalance outside parens + n_open = s.count("(") + n_close = s.count(")") + add_open = max(0, n_close - n_open) + add_close = max(0, n_open - n_close) + balanced = "(" * add_open + s + ")" * add_close + + s = remove_redundant(balanced) replace_map = { - "WHERE ": "", # Remove preceding WHERE of dj.where_clause " ": " ", # Squash double spaces "( (": "((", # Squash double parens ") )": "))", @@ -231,6 +270,6 @@ def bash_escape_sql(s, add_newline=True): replace_map.update({"%%%%": "%%"}) # Remove extra percent signs for old, new in replace_map.items(): - s = s.replace(old, new) + s = re.sub(re.escape(old), new, s) return s From d3a0c51c2f99791c8ba1733d48e2089e37936d59 Mon Sep 17 00:00:00 2001 From: cbroz1 Date: Wed, 30 Oct 2024 10:12:22 -0700 Subject: [PATCH 3/7] Init table in join --- src/spyglass/utils/mixins/export.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/spyglass/utils/mixins/export.py b/src/spyglass/utils/mixins/export.py index c6aef6b6b..22f8c2e6f 100644 --- a/src/spyglass/utils/mixins/export.py +++ b/src/spyglass/utils/mixins/export.py @@ -8,6 +8,7 @@ from re import match as re_match from datajoint.condition import make_condition +from datajoint.table import Table from packaging.version import parse as version_parse from spyglass.utils.logging import logger @@ -264,6 +265,8 @@ def _run_join(self, **kwargs): joined = self.proj().join(other.proj(), log_export=False) for table in table_list: # log separate for unique pks + if isinstance(table, type) and issubclass(table, Table): + table = table() # adapted from dj.declare.compile_foreign_key for r in joined.fetch(*table.primary_key, as_dict=True): table._log_fetch(restriction=r) From 3e285424f798bb0a9df46fcfb785cd3cb73b01fb Mon Sep 17 00:00:00 2001 From: Chris Broz Date: Wed, 30 Oct 2024 15:17:54 -0700 Subject: [PATCH 4/7] Update src/spyglass/utils/mixins/export.py Co-authored-by: Samuel Bray --- src/spyglass/utils/mixins/export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spyglass/utils/mixins/export.py b/src/spyglass/utils/mixins/export.py index 22f8c2e6f..1a7799246 100644 --- a/src/spyglass/utils/mixins/export.py +++ b/src/spyglass/utils/mixins/export.py @@ -319,7 +319,7 @@ def restrict(self, restriction): return super().restrict(restriction) log_export = "fetch_nwb" not in self._called_funcs() return self._run_with_log( - super().restrict, restriction=restriction, log_export=log_export + super().restrict, restriction=dj.AndList([restriction, self.restriction]), log_export=log_export ) def join(self, other, log_export=True, *args, **kwargs): From 9d064b4ef560eefe57cccba69fd80419b84e4949 Mon Sep 17 00:00:00 2001 From: cbroz1 Date: Thu, 31 Oct 2024 10:13:33 -0700 Subject: [PATCH 5/7] Include externals --- src/spyglass/common/common_usage.py | 86 ++++++++++++++++++++++++----- src/spyglass/utils/dj_graph.py | 15 ++--- src/spyglass/utils/mixins/export.py | 4 +- 3 files changed, 84 insertions(+), 21 deletions(-) diff --git a/src/spyglass/common/common_usage.py b/src/spyglass/common/common_usage.py index d1909eeb0..ccbf7c909 100644 --- a/src/spyglass/common/common_usage.py +++ b/src/spyglass/common/common_usage.py @@ -144,6 +144,22 @@ def stop_export(self, **kwargs) -> None: # before actually exporting anything, which is more associated with # Selection + def _list_raw_files(self, key: dict) -> list[str]: + """Return a list of unique nwb file names for a given restriction/key.""" + file_table = self * self.File & key + return list( + { + *AnalysisNwbfile.join(file_table, log_export=False).fetch( + "nwb_file_name" + ) + } + ) + + def _list_analysis_files(self, key: dict) -> list[str]: + """Return a list of unique analysis file names for a given restriction/key.""" + file_table = self * self.File & key + return list(file_table.fetch("analysis_file_name")) + def list_file_paths(self, key: dict, as_dict=True) -> list[str]: """Return a list of unique file paths for a given restriction/key. @@ -159,18 +175,60 @@ def list_file_paths(self, key: dict, as_dict=True) -> list[str]: If False, returns a list of strings without key. """ file_table = self * self.File & key - analysis_fp = [ - AnalysisNwbfile().get_abs_path(fname) - for fname in file_table.fetch("analysis_file_name") - ] - nwbfile_fp = [ - Nwbfile().get_abs_path(fname) - for fname in AnalysisNwbfile.join( - file_table, log_export=False - ).fetch("nwb_file_name") - ] - unique_ft = list({*analysis_fp, *nwbfile_fp}) - return [{"file_path": p} for p in unique_ft] if as_dict else unique_ft + unique_fp = { + *[ + AnalysisNwbfile().get_abs_path(p) + for p in self._list_analysis_files(key) + ], + *[Nwbfile().get_abs_path(p) for p in self._list_raw_files(key)], + } + + return [{"file_path": p} for p in unique_fp] if as_dict else unique_fp + + @property + def _externals(self) -> dj.external.ExternalMapping: + """Return the external mapping for the common_n schema.""" + return dj.external.ExternalMapping(schema=AnalysisNwbfile) + + def _add_externals_to_restr_graph( + self, restr_graph: RestrGraph, key: dict + ) -> RestrGraph: + """Add external tables to a RestrGraph for a given restriction/key. + + Tables added as nodes with restrictions based on file paths. Names + added to visited set to appear in restr_ft obj bassed to SQLDumpHelper. + + Parameters + ---------- + restr_graph : RestrGraph + A RestrGraph object to add external tables to. + key : dict + Any valid restriction key for ExportSelection.Table + + Returns + ------- + restr_graph : RestrGraph + The updated RestrGraph + """ + raw_tbl = self._externals["raw"] + raw_name = raw_tbl.full_table_name + raw_restr = ( + "filepath in ('" + "','".join(self._list_raw_files(key)) + "')" + ) + restr_graph.graph.add_node(raw_name, ft=raw_tbl, restr=raw_restr) + + analysis_tbl = self._externals["analysis"] + analysis_name = analysis_tbl.full_table_name + analysis_restr = ( # filepaths have analysis subdir. regexp substrings + "filepath REGEXP '" + "|".join(self._list_analysis_files(key)) + "'" + ) # regexp is slow, but we're only doing this once, and future-proof + restr_graph.graph.add_node( + analysis_name, ft=analysis_tbl, restr=analysis_restr + ) + + restr_graph.visited.update({raw_name, analysis_name}) + + return restr_graph def get_restr_graph( self, key: dict, verbose=False, cascade=True @@ -193,9 +251,11 @@ def get_restr_graph( "table_name", "restriction", as_dict=True ) ) - return RestrGraph( + + restr_graph = RestrGraph( seed_table=self, leaves=leaves, verbose=verbose, cascade=cascade ) + return self._add_externals_to_restr_graph(restr_graph, key) def preview_tables(self, **kwargs) -> list[dj.FreeTable]: """Return a list of restricted FreeTables for a given restriction/key. diff --git a/src/spyglass/utils/dj_graph.py b/src/spyglass/utils/dj_graph.py index 435d37d38..1e6137a02 100644 --- a/src/spyglass/utils/dj_graph.py +++ b/src/spyglass/utils/dj_graph.py @@ -808,13 +808,14 @@ def file_paths(self) -> List[str]: directly by the user. """ self.cascade() - return [ - self.analysis_file_tbl.get_abs_path(file) - for file in set( - [f for files in self.file_dict.values() for f in files] - ) - if file is not None - ] + + files = { + file + for table in self.visited + for file in self._get_node(table).get("files", []) + } + + return [self.analysis_file_tbl.get_abs_path(file) for file in files] class TableChain(RestrGraph): diff --git a/src/spyglass/utils/mixins/export.py b/src/spyglass/utils/mixins/export.py index 1a7799246..ba05d14ce 100644 --- a/src/spyglass/utils/mixins/export.py +++ b/src/spyglass/utils/mixins/export.py @@ -319,7 +319,9 @@ def restrict(self, restriction): return super().restrict(restriction) log_export = "fetch_nwb" not in self._called_funcs() return self._run_with_log( - super().restrict, restriction=dj.AndList([restriction, self.restriction]), log_export=log_export + super().restrict, + restriction=dj.AndList([restriction, self.restriction]), + log_export=log_export, ) def join(self, other, log_export=True, *args, **kwargs): From 93a32229d279be91675d62d90fff2c4c100b63ad Mon Sep 17 00:00:00 2001 From: cbroz1 Date: Thu, 31 Oct 2024 11:41:25 -0700 Subject: [PATCH 6/7] #1173 --- src/spyglass/utils/dj_mixin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/spyglass/utils/dj_mixin.py b/src/spyglass/utils/dj_mixin.py index b75b8879c..090b1a3ee 100644 --- a/src/spyglass/utils/dj_mixin.py +++ b/src/spyglass/utils/dj_mixin.py @@ -172,7 +172,7 @@ def _nwb_table_tuple(self) -> tuple: table_dict[resolved], ) - def fetch_nwb(self, log_export=True, *attrs, **kwargs): + def fetch_nwb(self, *attrs, **kwargs): """Fetch NWBFile object from relevant table. Implementing class must have a foreign key reference to Nwbfile or @@ -184,6 +184,7 @@ def fetch_nwb(self, log_export=True, *attrs, **kwargs): """ table, tbl_attr = self._nwb_table_tuple + log_export = kwargs.pop("log_export", True) if log_export and self.export_id and "analysis" in tbl_attr: self._log_fetch_nwb(table, tbl_attr) From 1883feee36240b223321a40ecc1cc02ce0c81ceb Mon Sep 17 00:00:00 2001 From: cbroz1 Date: Thu, 31 Oct 2024 16:32:29 -0700 Subject: [PATCH 7/7] Add hex-blob arg to mysqldump --- src/spyglass/utils/sql_helper_fn.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/spyglass/utils/sql_helper_fn.py b/src/spyglass/utils/sql_helper_fn.py index a3b0898ef..99b2fa52e 100644 --- a/src/spyglass/utils/sql_helper_fn.py +++ b/src/spyglass/utils/sql_helper_fn.py @@ -76,10 +76,11 @@ def _write_sql_cnf(self): def _cmd_prefix(self, docker_id=None): """Get prefix for mysqldump command. Includes docker exec if needed.""" + default = "mysqldump --hex-blob " if not docker_id: - return "mysqldump " + return default return ( - f"docker exec -i {docker_id} \\\n\tmysqldump " + f"docker exec -i {docker_id} \\\n\t{default}" + "-u {user} --password={password} \\\n\t".format( **self._get_credentials() )