From c0e0ee9162e079029f30e0968802c57894c86cca Mon Sep 17 00:00:00 2001 From: Chris Brozdowski Date: Fri, 9 Feb 2024 17:57:49 -0800 Subject: [PATCH 01/19] Fault-permit insert and remove mutual exclusivity protections on Merge (#824) * #719, #804, #212 * #768 * Add merge delete and populate * Changes following PR review @edeno * Replace delayed import of ImportedSpikeSorting --- CHANGELOG.md | 47 ++++++----- src/spyglass/common/common_behav.py | 21 +++++ src/spyglass/common/common_usage.py | 18 ++++- src/spyglass/common/populate_all_common.py | 93 ++++++++++++---------- src/spyglass/linearization/__init__.py | 4 +- src/spyglass/utils/dj_merge_tables.py | 91 ++++++++++++--------- src/spyglass/utils/dj_mixin.py | 15 ++-- 7 files changed, 180 insertions(+), 109 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc991e655..c6a9fa232 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,27 +4,38 @@ ### Infrastructure -- Additional documentation. #690 -- Clean up following pre-commit checks. #688 -- Add Mixin class to centralize `fetch_nwb` functionality. #692, #734 -- Refactor restriction use in `delete_downstream_merge` #703 -- Add `cautious_delete` to Mixin class - - Initial implementation. #711, #762 - - More robust caching of join to downstream tables. #806 - - Overwrite datajoint `delete` method to use `cautious_delete`. #806 - - Reverse join order for session summary. #821 - - Add temporary logging of use to `common_usage`. #811, #821 -- Add `deprecation_factory` to facilitate table migration. #717 -- Add Spyglass logger. #730 -- IntervalList: Add secondary key `pipeline` #742 -- Increase pytest coverage for `common`, `lfp`, and `utils`. #743 -- Update docs to reflect new notebooks. #776 -- Add overview of Spyglass to docs. #779 -- Update linting for Black 24. #808 -- Steamline dependency management. #822 +- Docs: + - Additional documentation. #690 + - Add overview of Spyglass to docs. #779 + - Update docs to reflect new notebooks. #776 +- Mixin: + - Add Mixin class to centralize `fetch_nwb` functionality. #692, #734 + - Refactor restriction use in `delete_downstream_merge` #703 + - Add `cautious_delete` to Mixin class + - Initial implementation. #711, #762 + - More robust caching of join to downstream tables. #806 + - Overwrite datajoint `delete` method to use `cautious_delete`. #806 + - Reverse join order for session summary. #821 + - Add temporary logging of use to `common_usage`. #811, #821 +- Merge Tables: + - UUIDs: Revise Merge table uuid generation to include source. #824 + - UUIDs: Remove mutual exclusivity logic due to new UUID generation. #824 + - Add method for `merge_populate`. #824 +- Linting: + - Clean up following pre-commit checks. #688 + - Update linting for Black 24. #808 +- Misc: + - Add `deprecation_factory` to facilitate table migration. #717 + - Add Spyglass logger. #730 + - Increase pytest coverage for `common`, `lfp`, and `utils`. #743 + - Steamline dependency management. #822 ### Pipelines +- Common: + - `IntervalList`: Add secondary key `pipeline` #742 + - Add `common_usage` table. #811, #821, #824 + - Add catch errors during `populate_all_common`. #824 - Spike sorting: - Add SpikeSorting V1 pipeline. #651 - Move modules into spikesorting.v0 #807 diff --git a/src/spyglass/common/common_behav.py b/src/spyglass/common/common_behav.py index d7d4759fb..f2a2226d8 100644 --- a/src/spyglass/common/common_behav.py +++ b/src/spyglass/common/common_behav.py @@ -42,6 +42,25 @@ class SpatialSeries(SpyglassMixin, dj.Part): name=null: varchar(32) # name of spatial series """ + def populate(self, keys=None): + """Insert position source data from NWB file. + + WARNING: populate method on Manual table is not protected by transaction + protections like other DataJoint tables. + """ + if not isinstance(keys, list): + keys = [keys] + if isinstance(keys[0], dj.Table): + keys = [k for tbl in keys for k in tbl.fetch("KEY", as_dict=True)] + for key in keys: + nwb_file_name = key.get("nwb_file_name") + if not nwb_file_name: + raise ValueError( + "PositionSource.populate is an alias for a non-computed table " + + "and must be passed a key with nwb_file_name" + ) + self.insert_from_nwbfile(nwb_file_name) + @classmethod def insert_from_nwbfile(cls, nwb_file_name): """Add intervals to ItervalList and PositionSource. @@ -482,6 +501,7 @@ def _no_transaction_make(self, key): # Skip populating if no pos interval list names if len(pos_intervals) == 0: + # TODO: Now that populate_all accept errors, raise here? logger.error(f"NO POS INTERVALS FOR {key}; {no_pop_msg}") return @@ -519,6 +539,7 @@ def _no_transaction_make(self, key): # Check that each pos interval was matched to only one epoch if len(matching_pos_intervals) != 1: + # TODO: Now that populate_all accept errors, raise here? logger.error( f"Found {len(matching_pos_intervals)} pos intervals for {key}; " + f"{no_pop_msg}\n{matching_pos_intervals}" diff --git a/src/spyglass/common/common_usage.py b/src/spyglass/common/common_usage.py index ccd58091d..fdf7ae99d 100644 --- a/src/spyglass/common/common_usage.py +++ b/src/spyglass/common/common_usage.py @@ -1,6 +1,7 @@ """A schema to store the usage of advanced Spyglass features. -Records show usage of features such as table chains, which will be used to +Records show usage of features such as cautious delete and fault-permitting +insert, which will be used to determine which features are used, how often, and by whom. This will help plan future development of Spyglass. """ @@ -21,3 +22,18 @@ class CautiousDelete(dj.Manual): restriction: varchar(255) merge_deletes = null: blob """ + + +@schema +class InsertError(dj.Manual): + definition = """ + id: int auto_increment + --- + dj_user: varchar(64) + connection_id: int # MySQL CONNECTION_ID() + nwb_file_name: varchar(64) + table: varchar(64) + error_type: varchar(64) + error_message: varchar(255) + error_raw = null: blob + """ diff --git a/src/spyglass/common/populate_all_common.py b/src/spyglass/common/populate_all_common.py index b2fa7d760..2972ed145 100644 --- a/src/spyglass/common/populate_all_common.py +++ b/src/spyglass/common/populate_all_common.py @@ -1,3 +1,5 @@ +import datajoint as dj + from spyglass.common.common_behav import ( PositionSource, RawPosition, @@ -14,51 +16,58 @@ from spyglass.common.common_nwbfile import Nwbfile from spyglass.common.common_session import Session from spyglass.common.common_task import TaskEpoch +from spyglass.common.common_usage import InsertError from spyglass.utils import logger def populate_all_common(nwb_file_name): - # Insert session one by one - fp = [(Nwbfile & {"nwb_file_name": nwb_file_name}).proj()] - logger.info("Populate Session...") - Session.populate(fp) - - # If we use Kachery for data sharing we can uncomment the following two lines. TBD - # logger.info('Populate NwbfileKachery...') - # NwbfileKachery.populate() - - logger.info("Populate ElectrodeGroup...") - ElectrodeGroup.populate(fp) - - logger.info("Populate Electrode...") - Electrode.populate(fp) - - logger.info("Populate Raw...") - Raw.populate(fp) - - logger.info("Populate SampleCount...") - SampleCount.populate(fp) - - logger.info("Populate DIOEvents...") - DIOEvents.populate(fp) - - # sensor data (from analog ProcessingModule) is temporarily removed from NWBFile - # to reduce file size while it is not being used. add it back in by commenting out - # the removal code in spyglass/data_import/insert_sessions.py when ready - # logger.info('Populate SensorData') - # SensorData.populate(fp) - - logger.info("Populate TaskEpochs") - TaskEpoch.populate(fp) - logger.info("Populate StateScriptFile") - StateScriptFile.populate(fp) - logger.info("Populate VideoFile") - VideoFile.populate(fp) - logger.info("RawPosition...") - PositionSource.insert_from_nwbfile(nwb_file_name) - RawPosition.populate(fp) - - logger.info("Populate ImportedSpikeSorting...") + """Insert all common tables for a given NWB file.""" from spyglass.spikesorting.imported import ImportedSpikeSorting - ImportedSpikeSorting.populate(fp) + key = [(Nwbfile & f"nwb_file_name LIKE '{nwb_file_name}'").proj()] + tables = [ + Session, + # NwbfileKachery, # Not used by default + ElectrodeGroup, + Electrode, + Raw, + SampleCount, + DIOEvents, + # SensorData, # Not used by default. Generates large files + RawPosition, + TaskEpoch, + StateScriptFile, + VideoFile, + PositionSource, + RawPosition, + ImportedSpikeSorting, + ] + error_constants = dict( + dj_user=dj.config["database.user"], + connection_id=dj.conn().connection_id, + nwb_file_name=nwb_file_name, + ) + + for table in tables: + logger.info(f"Populating {table.__name__}...") + try: + table.populate(key) + except Exception as e: + InsertError.insert1( + dict( + **error_constants, + table=table.__name__, + error_type=type(e).__name__, + error_message=str(e), + error_raw=str(e), + ) + ) + query = InsertError & error_constants + if query: + err_tables = query.fetch("table") + logger.error( + f"Errors occurred during population for {nwb_file_name}:\n\t" + + f"Failed tables {err_tables}\n\t" + + "See common_usage.InsertError for more details" + ) + return query.fetch("KEY") diff --git a/src/spyglass/linearization/__init__.py b/src/spyglass/linearization/__init__.py index 681df507c..e6a9504ea 100644 --- a/src/spyglass/linearization/__init__.py +++ b/src/spyglass/linearization/__init__.py @@ -1 +1,3 @@ -from spyglass.linearization.merge import LinearizedPositionOutput +# CB: Circular import if only importing PositionOutput + +# from spyglass.linearization.merge import LinearizedPositionOutput diff --git a/src/spyglass/utils/dj_merge_tables.py b/src/spyglass/utils/dj_merge_tables.py index f15183dfe..14c8c20db 100644 --- a/src/spyglass/utils/dj_merge_tables.py +++ b/src/spyglass/utils/dj_merge_tables.py @@ -3,13 +3,14 @@ from inspect import getmodule from itertools import chain as iter_chain from pprint import pprint +from time import time from typing import Union import datajoint as dj from datajoint.condition import make_condition from datajoint.errors import DataJointError from datajoint.preview import repr_html -from datajoint.utils import from_camel_case, get_master, to_camel_case +from datajoint.utils import from_camel_case, to_camel_case from IPython.core.display import HTML from spyglass.utils.logging import logger @@ -248,6 +249,9 @@ def _merge_repr(cls, restriction: str = True) -> dj.expression.Union: return_empties=False, # motivated by SpikeSortingOutput.Import ) ] + if not parts: + logger.warning("No parts found. Try adjusting restriction.") + return attr_dict = { # NULL for non-numeric, 0 for numeric attr.name: "0" if attr.numeric else "NULL" @@ -274,10 +278,8 @@ def _proj_part(part): return query @classmethod - def _merge_insert( - cls, rows: list, part_name: str = None, mutual_exclusvity=True, **kwargs - ) -> None: - """Insert rows into merge, ensuring db integrity and mutual exclusivity + def _merge_insert(cls, rows: list, part_name: str = None, **kwargs) -> None: + """Insert rows into merge, ensuring data exists in part parent(s). Parameters --------- @@ -291,18 +293,17 @@ def _merge_insert( TypeError If rows is not a list of dicts ValueError - If entry already exists, mutual exclusivity errors If data doesn't exist in part parents, integrity error """ cls._ensure_dependencies_loaded() + type_err_msg = "Input `rows` must be a list of dictionaries" try: for r in iter(rows): - assert isinstance( - r, dict - ), 'Input "rows" must be a list of dictionaries' + if not isinstance(r, dict): + raise TypeError(type_err_msg) except TypeError: - raise TypeError('Input "rows" must be a list of dictionaries') + raise TypeError(type_err_msg) parts = cls._merge_restrict_parts(as_objects=True) if part_name: @@ -315,30 +316,24 @@ def _merge_insert( master_entries = [] parts_entries = {p: [] for p in parts} for row in rows: - keys = [] # empty to-be-inserted key + keys = [] # empty to-be-inserted keys for part in parts: # check each part - part_parent = part.parents(as_objects=True)[-1] part_name = cls._part_name(part) + part_parent = part.parents(as_objects=True)[-1] if part_parent & row: # if row is in part parent - if keys and mutual_exclusvity: # if key from other part - raise ValueError( - "Mutual Exclusivity Error! Entry exists in more " - + f"than one table - Entry: {row}" - ) - keys = (part_parent & row).fetch("KEY") # get pk if len(keys) > 1: raise ValueError( "Ambiguous entry. Data has mult rows in " + f"{part_name}:\n\tData:{row}\n\t{keys}" ) - master_pk = { # make uuid - cls()._reserved_pk: dj.hash.key_hash(keys[0]), - } - parts_entries[part].append({**master_pk, **keys[0]}) - master_entries.append( - {**master_pk, cls()._reserved_sk: part_name} - ) + key = keys[0] + master_sk = {cls()._reserved_sk: part_name} + uuid = dj.hash.key_hash(key | master_sk) + master_pk = {cls()._reserved_pk: uuid} + + master_entries.append({**master_pk, **master_sk}) + parts_entries[part].append({**master_pk, **key}) if not keys: raise ValueError( @@ -369,27 +364,22 @@ def _ensure_dependencies_loaded(cls) -> None: if not dj.conn.connection.dependencies._loaded: dj.conn.connection.dependencies.load() - def insert(self, rows: list, mutual_exclusvity=True, **kwargs): - """Merges table specific insert - - Ensuring db integrity and mutual exclusivity + def insert(self, rows: list, **kwargs): + """Merges table specific insert, ensuring data exists in part parents. Parameters --------- rows: List[dict] An iterable where an element is a dictionary. - mutual_exclusvity: bool - Check for mutual exclusivity before insert. Default True. Raises ------ TypeError If rows is not a list of dicts ValueError - If entry already exists, mutual exclusivity errors If data doesn't exist in part parents, integrity error """ - self._merge_insert(rows, mutual_exclusvity=mutual_exclusvity, **kwargs) + self._merge_insert(rows, **kwargs) @classmethod def merge_view(cls, restriction: str = True): @@ -586,6 +576,8 @@ def merge_get_part( + "Try adding a restriction before invoking `get_part`.\n\t" + "Or permitting multiple sources with `multi_source=True`." ) + if len(sources) == 0: + return None parts = [ ( @@ -770,12 +762,33 @@ def merge_fetch(self, restriction: str = True, *attrs, **kwargs) -> list: ) return results[0] if len(results) == 1 else results - @classmethod - def merge_populate(source: str, key=None): - raise NotImplementedError( - "CBroz: In the future, this command will support executing " - + "part_parent `make` and then inserting all entries into Merge" - ) + def merge_populate(self, source: str, keys=None): + """Populate the merge table with entries from the source table.""" + logger.warning("CBroz: Not fully tested. Use with caution.") + parent_class = self.merge_get_parent_class(source) + if not keys: + keys = parent_class.key_source + parent_class.populate(keys) + successes = (parent_class & keys).fetch("KEY", as_dict=True) + self.insert(successes) + + def delete(self, force_permission=False, *args, **kwargs): + """Alias for cautious_delete, overwrites datajoint.table.Table.delete""" + for part in self.merge_get_part( + restriction=self.restriction, + multi_source=True, + return_empties=False, + ): + part.delete(force_permission=force_permission, *args, **kwargs) + + def super_delete(self, *args, **kwargs): + """Alias for datajoint.table.Table.delete. + + Added to support MRO of SpyglassMixin""" + logger.warning("!! Using super_delete. Bypassing cautious_delete !!") + + self._log_use(start=time(), super_delete=True) + super().delete(*args, **kwargs) _Merge = Merge diff --git a/src/spyglass/utils/dj_mixin.py b/src/spyglass/utils/dj_mixin.py index 0e18e3a5c..349476600 100644 --- a/src/spyglass/utils/dj_mixin.py +++ b/src/spyglass/utils/dj_mixin.py @@ -215,8 +215,8 @@ def delete_downstream_merge( if not merge_join_dict and not disable_warning: logger.warning( f"No merge deletes found w/ {self.table_name} & " - + f"{restriction}.\n\tIf this is unexpected, try running with " - + "`reload_cache`." + + f"{restriction}.\n\tIf this is unexpected, try importing " + + " Merge table(s) and running with `reload_cache`." ) if dry_run: @@ -365,7 +365,7 @@ def _usage_table(self): return CautiousDelete() - def _log_use(self, start, merge_deletes=None): + def _log_use(self, start, merge_deletes=None, super_delete=False): """Log use of cautious_delete.""" if isinstance(merge_deletes, QueryExpression): merge_deletes = merge_deletes.fetch(as_dict=True) @@ -374,15 +374,13 @@ def _log_use(self, start, merge_deletes=None): dj_user=dj.config["database.user"], origin=self.full_table_name, ) + restr_str = "Super delete: " if super_delete else "" + restr_str += "".join(self.restriction) if self.restriction else "None" try: self._usage_table.insert1( dict( **safe_insert, - restriction=( - "".join(self.restriction)[255:] # handle list - if self.restriction - else "None" - ), + restriction=restr_str[:255], merge_deletes=merge_deletes, ) ) @@ -455,4 +453,5 @@ def delete(self, force_permission=False, *args, **kwargs): def super_delete(self, *args, **kwargs): """Alias for datajoint.table.Table.delete.""" logger.warning("!! Using super_delete. Bypassing cautious_delete !!") + self._log_use(start=time(), super_delete=True) super().delete(*args, **kwargs) From be6e414fc1132f77ded648bab39a162f129d5fb8 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Fri, 9 Feb 2024 18:09:17 -0800 Subject: [PATCH 02/19] Update CITATION.cff (#826) * Update CITATION.cff * Update change log --- CHANGELOG.md | 4 ++-- CITATION.cff | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6a9fa232..db20daa1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Change Log -## [0.4.4] (Unreleased) +## [0.5.0] (February 9, 2024) ### Infrastructure @@ -170,4 +170,4 @@ [0.4.1]: https://github.com/LorenFrankLab/spyglass/releases/tag/0.4.1 [0.4.2]: https://github.com/LorenFrankLab/spyglass/releases/tag/0.4.2 [0.4.3]: https://github.com/LorenFrankLab/spyglass/releases/tag/0.4.3 -[0.4.4]: https://github.com/LorenFrankLab/spyglass/releases/tag/0.4.4 +[0.5.0]: https://github.com/LorenFrankLab/spyglass/releases/tag/0.5.0 diff --git a/CITATION.cff b/CITATION.cff index 4dce7514b..2460e4494 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -146,5 +146,5 @@ keywords: - spike sorting - kachery license: MIT -version: 0.4.3 -date-released: '2023-11-07' +version: 0.5.0 +date-released: '2024-02-09' From a02ba15f3b695c7de7916b7e0f05b80019539a73 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Sat, 10 Feb 2024 09:29:04 -0800 Subject: [PATCH 03/19] Update ref --- docs/src/index.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 1a0233192..8fb8ae030 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -68,12 +68,14 @@ contains license information. ## Citing Spyglass -Kyu Hyun Lee, Eric Denovellis, Ryan Ly, Jeremy Magland, Jeff Soules, Alison -Comrie, Jennifer Guidera, Rhino Nevers, Daniel Gramling, Philip Adenekan, Ji -Hyun Bak, Emily Monroe, Andrew Tritt, Oliver Rübel, Thinh Nguyen, Dimitri -Yatsenko, Joshua Chu, Caleb Kemere, Samuel Garcia, Alessio Buccino, Emily Aery -Jones, Lisa Giocomo, and Loren Frank. 'Spyglass: A Data Analysis Framework for -Reproducible and Shareable Neuroscience Research.' (2022) Society for -Neuroscience, San Diego, CA. +> Lee, K.H.\*, Denovellis, E.L.\*, Ly, R., Magland, J., Soules, J., Gramling, D.P., + Guidera, J.A., Nevers, R., Adenekan, P., Bray, S., et al. (2024). Spyglass: +a data analysis framework for reproducible and shareable neuroscience research. + bioRxiv. + [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295 ). + +*\* Equal contribution* + +See paper related code [here](https://github.com/LorenFrankLab/spyglass-paper). From c4e2cac72869ce9b55e61d682452f599455d7a69 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Sat, 10 Feb 2024 09:35:43 -0800 Subject: [PATCH 04/19] Add MUA notebook and fix numbering. --- docs/mkdocs.yml | 24 ++++++++++--------- ...cting_Clusterless_Waveform_Features.ipynb} | 0 ...ss.ipynb => 41_Decoding_Clusterless.ipynb} | 0 ...s.ipynb => 42_Decoding_SortedSpikes.ipynb} | 0 ...Detection.ipynb => 50_MUA_Detection.ipynb} | 0 5 files changed, 13 insertions(+), 11 deletions(-) rename notebooks/{41_Extracting_Clusterless_Waveform_Features.ipynb => 40_Extracting_Clusterless_Waveform_Features.ipynb} (100%) rename notebooks/{42_Decoding_Clusterless.ipynb => 41_Decoding_Clusterless.ipynb} (100%) rename notebooks/{43_Decoding_SortedSpikes.ipynb => 42_Decoding_SortedSpikes.ipynb} (100%) rename notebooks/{51_MUA_Detection.ipynb => 50_MUA_Detection.ipynb} (100%) diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 3767f8dd4..054df7de9 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -43,13 +43,6 @@ theme: nav: - Home: index.md - Installation: installation.md - - Miscellaneous: - - Overview: misc/index.md - - FigURL: misc/figurl_views.md - - Session Groups: misc/session_groups.md - - Insert Data: misc/insert_data.md - - Merge Tables: misc/merge_tables.md - - Database Management: misc/database_management.md - Tutorials: - Overview: notebooks/index.md - Intro: @@ -61,7 +54,7 @@ nav: - Spikes: - Spike Sorting V0: notebooks/10_Spike_SortingV0.ipynb - Spike Sorting V1: notebooks/10_Spike_SortingV1.ipynb - - Curation: notebooks/11_Curation.ipynb + - Curation: notebooks/11_CurationV0.ipynb - Position: - Position Trodes: notebooks/20_Position_Trodes.ipynb - DLC Models: notebooks/21_DLC.ipynb @@ -72,9 +65,18 @@ nav: - Theta: notebooks/31_Theta.ipynb - Ripple Detection: notebooks/32_Ripple_Detection.ipynb - Decoding: - - Extract Clusterless: notebooks/41_Extracting_Clusterless_Waveform_Features.ipynb - - Decoding Clusterless: notebooks/42_Decoding_Clusterless.ipynb - - Decoding Sorted Spikes: notebooks/43_Decoding_SortedSpikes.ipynb + - Extracting Waveforms: notebooks/40_Extracting_Clusterless_Waveform_Features.ipynb + - Decoding Clusterless: notebooks/41_Decoding_Clusterless.ipynb + - Decoding Sorted Spikes: notebooks/42_Decoding_SortedSpikes.ipynb + - MUA: + - MUA Detection: notebooks/50_MUA_Detection.ipynb + - Miscellaneous: + - Overview: misc/index.md + - FigURL: misc/figurl_views.md + - Session Groups: misc/session_groups.md + - Insert Data: misc/insert_data.md + - Merge Tables: misc/merge_tables.md + - Database Management: misc/database_management.md - API Reference: api/ # defer to gen-files + literate-nav - How to Contribute: contribute.md - Change Log: CHANGELOG.md diff --git a/notebooks/41_Extracting_Clusterless_Waveform_Features.ipynb b/notebooks/40_Extracting_Clusterless_Waveform_Features.ipynb similarity index 100% rename from notebooks/41_Extracting_Clusterless_Waveform_Features.ipynb rename to notebooks/40_Extracting_Clusterless_Waveform_Features.ipynb diff --git a/notebooks/42_Decoding_Clusterless.ipynb b/notebooks/41_Decoding_Clusterless.ipynb similarity index 100% rename from notebooks/42_Decoding_Clusterless.ipynb rename to notebooks/41_Decoding_Clusterless.ipynb diff --git a/notebooks/43_Decoding_SortedSpikes.ipynb b/notebooks/42_Decoding_SortedSpikes.ipynb similarity index 100% rename from notebooks/43_Decoding_SortedSpikes.ipynb rename to notebooks/42_Decoding_SortedSpikes.ipynb diff --git a/notebooks/51_MUA_Detection.ipynb b/notebooks/50_MUA_Detection.ipynb similarity index 100% rename from notebooks/51_MUA_Detection.ipynb rename to notebooks/50_MUA_Detection.ipynb From b7a81d4522879d3ac498470240cc68a168e65ba1 Mon Sep 17 00:00:00 2001 From: Samuel Bray Date: Mon, 12 Feb 2024 12:20:45 -0800 Subject: [PATCH 05/19] Only apply include labels filter if include_labels not empty (#827) * dont skip unit if include_labels list is empty * update check for np array size --- src/spyglass/spikesorting/analysis/v1/group.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/spyglass/spikesorting/analysis/v1/group.py b/src/spyglass/spikesorting/analysis/v1/group.py index 573697f1a..10ac27474 100644 --- a/src/spyglass/spikesorting/analysis/v1/group.py +++ b/src/spyglass/spikesorting/analysis/v1/group.py @@ -96,9 +96,10 @@ def filter_units( for ind, unit_labels in enumerate(labels): if isinstance(unit_labels, str): unit_labels = [unit_labels] - if np.all(~np.isin(unit_labels, include_labels)) or np.any( - np.isin(unit_labels, exclude_labels) - ): + if ( + include_labels.size > 0 + and np.all(~np.isin(unit_labels, include_labels)) + ) or np.any(np.isin(unit_labels, exclude_labels)): # if the unit does not have any of the include labels # or has any of the exclude labels, skip continue From 253241922120ddb4da815f645b41c96e4e4733e3 Mon Sep 17 00:00:00 2001 From: Chris Brozdowski Date: Tue, 13 Feb 2024 07:59:55 -0800 Subject: [PATCH 06/19] gh-actions docs fixes (#828) * Update 'latest' in docs deploy * Docs bugfix. Rename Link action, incorporate cspell. * MUA as own heading --- .github/workflows/black.yml | 10 ------- .github/workflows/{codespell.yml => lint.yml} | 15 ++++++----- .github/workflows/publish-docs.yml | 4 ++- .github/workflows/test-conda.yml | 2 ++ .github/workflows/test-package-build.yml | 1 + docs/build-docs.sh | 26 ++++++++++--------- docs/mkdocs.yml | 3 ++- notebooks/04_PopulateConfigFile.ipynb | 11 ++++++++ src/spyglass/utils/dj_chains.py | 4 +-- src/spyglass/utils/dj_mixin.py | 4 +-- 10 files changed, 45 insertions(+), 35 deletions(-) delete mode 100644 .github/workflows/black.yml rename .github/workflows/{codespell.yml => lint.yml} (61%) diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml deleted file mode 100644 index 0a8b8e925..000000000 --- a/.github/workflows/black.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: Lint - -on: [push, pull_request] - -jobs: - lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: psf/black@stable \ No newline at end of file diff --git a/.github/workflows/codespell.yml b/.github/workflows/lint.yml similarity index 61% rename from .github/workflows/codespell.yml rename to .github/workflows/lint.yml index 44c1e44a5..a75f17768 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/lint.yml @@ -1,13 +1,14 @@ ---- -name: Codespell +name: Lint -on: - push: - branches: [master] - pull_request: - branches: [master] +on: [push, pull_request] jobs: + black: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: psf/black@stable + codespell: name: Check for spelling errors runs-on: ubuntu-latest diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml index a7a31758e..8cabef999 100644 --- a/.github/workflows/publish-docs.yml +++ b/.github/workflows/publish-docs.yml @@ -40,7 +40,9 @@ jobs: - name: Deploy run: | - echo "OWNER: ${REPO_OWNER}" + FULL_VERSION=${{ github.ref }} + export MAJOR_VERSION=${FULL_VERSION:0:3} + echo "OWNER: ${REPO_OWNER}. BUILD: ${MAJOR_VERSION}" bash ./docs/build-docs.sh push $REPO_OWNER env: USERNAME: github-actions[bot] diff --git a/.github/workflows/test-conda.yml b/.github/workflows/test-conda.yml index 594a7b2b8..56347a38e 100644 --- a/.github/workflows/test-conda.yml +++ b/.github/workflows/test-conda.yml @@ -2,6 +2,8 @@ name: Test conda env and run tests on: push: + branches: + - '!test_branch' schedule: # once a day at midnight UTC - cron: '0 0 * * *' diff --git a/.github/workflows/test-package-build.yml b/.github/workflows/test-package-build.yml index 3d87548fc..8dab627fd 100644 --- a/.github/workflows/test-package-build.yml +++ b/.github/workflows/test-package-build.yml @@ -5,6 +5,7 @@ on: branches: - master - maint/* + - '!test_branch' tags: - "*" pull_request: diff --git a/docs/build-docs.sh b/docs/build-docs.sh index b36b0533d..74174e9ca 100755 --- a/docs/build-docs.sh +++ b/docs/build-docs.sh @@ -1,8 +1,7 @@ #!/bin/bash # Run this script from repo root to serve site: > bash ./docs/build-docs.sh serve # Then, navigate to localhost:8000/ to inspect site, then ctrl+c to exit -# For auto-reload during dev, use `mkdocs serve -f ./docs/mkdosc.yaml` - +# For auto-reload during dev, use `mkdocs serve -f ./docs/mkdocs.yaml` # Copy top-level repo files for docs display cp ./CHANGELOG.md ./docs/src/ @@ -14,26 +13,29 @@ mv ./docs/src/notebooks/README.md ./docs/src/notebooks/index.md cp -r ./notebook-images ./docs/src/notebooks/ cp -r ./notebook-images ./docs/src/ -# Get major version -version_line=$(grep "__version__ =" ./src/spyglass/_version.py) -version_string=$(echo "$version_line" | awk -F"[\"']" '{print $2}') -export MAJOR_VERSION="${version_string:0:3}" -echo "$MAJOR_VERSION" +if [ -z "$MAJOR_VERSION" ]; then # Get version from file + version_line=$(grep "__version__ =" ./src/spyglass/_version.py) + version_string=$(echo "$version_line" | awk -F"[\"']" '{print $2}') + export MAJOR_VERSION="${version_string:0:3}" +fi +echo "$MAJOR_VERSION" # May be available as env var # Get ahead of errors export JUPYTER_PLATFORM_DIRS=1 -# jupyter notebook --generate-config +jupyter notebook --generate-config -y &> /dev/null +jupyter trust ./docs/src/notebooks/*.ipynb &> /dev/null # Generate site docs -mike deploy "$MAJOR_VERSION" --config ./docs/mkdocs.yml -b documentation +mike deploy "$MAJOR_VERSION" --config ./docs/mkdocs.yml -b documentation \ + 2>&1 | grep -v 'kernel_spec' # Suppress kernel_spec errors # Label this version as latest, set as default -mike alias "$MAJOR_VERSION" latest --config ./docs/mkdocs.yml -b documentation -mike set-default latest --config ./docs/mkdocs.yml -b documentation +mike alias "$MAJOR_VERSION" latest -u --config ./docs/mkdocs.yml -b documentation +# mike set-default latest --config ./docs/mkdocs.yml -b documentation # # Serve site to localhost if [ "$1" == "serve" ]; then # If first arg is serve, serve docs - mike serve --config ./docs/mkdocs.yml -b documentation + mike serve --config ./docs/mkdocs.yml -b documentation | grep -v 'kernel_spec' elif [ "$1" == "push" ]; then # if first arg is push if [ -z "$2" ]; then # When no second arg, use local git user git_user=$(git config user.name) diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 3767f8dd4..853ae8084 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -61,7 +61,7 @@ nav: - Spikes: - Spike Sorting V0: notebooks/10_Spike_SortingV0.ipynb - Spike Sorting V1: notebooks/10_Spike_SortingV1.ipynb - - Curation: notebooks/11_Curation.ipynb + - Curation: notebooks/11_CurationV0.ipynb - Position: - Position Trodes: notebooks/20_Position_Trodes.ipynb - DLC Models: notebooks/21_DLC.ipynb @@ -75,6 +75,7 @@ nav: - Extract Clusterless: notebooks/41_Extracting_Clusterless_Waveform_Features.ipynb - Decoding Clusterless: notebooks/42_Decoding_Clusterless.ipynb - Decoding Sorted Spikes: notebooks/43_Decoding_SortedSpikes.ipynb + - MUA Detection: notebooks/51_MUA_Detection.ipynb - API Reference: api/ # defer to gen-files + literate-nav - How to Contribute: contribute.md - Change Log: CHANGELOG.md diff --git a/notebooks/04_PopulateConfigFile.ipynb b/notebooks/04_PopulateConfigFile.ipynb index 23dce0f84..4ead237fb 100644 --- a/notebooks/04_PopulateConfigFile.ipynb +++ b/notebooks/04_PopulateConfigFile.ipynb @@ -2,6 +2,7 @@ "cells": [ { "cell_type": "markdown", + "id": "68303e8a", "metadata": {}, "source": [ "# Customizing Data Insertion into Spyglass\n", @@ -44,6 +45,7 @@ }, { "cell_type": "markdown", + "id": "bcc87f67", "metadata": {}, "source": [ "## Define a Configuration YAML File\n", @@ -85,6 +87,7 @@ }, { "cell_type": "markdown", + "id": "fc6d0986", "metadata": {}, "source": [ "## Create Entries to Reference in the Configuration YAML\n", @@ -145,6 +148,7 @@ }, { "cell_type": "markdown", + "id": "9d641e00", "metadata": {}, "source": [ "## Example Ingestion with Real Data\n", @@ -171,6 +175,7 @@ { "cell_type": "code", "execution_count": null, + "id": "37aa5182", "metadata": {}, "outputs": [], "source": [ @@ -180,6 +185,7 @@ { "cell_type": "code", "execution_count": null, + "id": "aab5b47a", "metadata": {}, "outputs": [], "source": [ @@ -195,6 +201,7 @@ }, { "cell_type": "markdown", + "id": "d132e797", "metadata": {}, "source": [ "Then call `insert_sessions` as usual." @@ -203,6 +210,7 @@ { "cell_type": "code", "execution_count": null, + "id": "bed5c6e1", "metadata": {}, "outputs": [], "source": [ @@ -213,6 +221,7 @@ }, { "cell_type": "markdown", + "id": "d875b158", "metadata": {}, "source": [ "Confirm the session was inserted with the correct `DataAcquisitionDevice`" @@ -221,6 +230,7 @@ { "cell_type": "code", "execution_count": null, + "id": "8411cb43", "metadata": {}, "outputs": [], "source": [ @@ -234,6 +244,7 @@ }, { "cell_type": "markdown", + "id": "d85b1416", "metadata": {}, "source": [] } diff --git a/src/spyglass/utils/dj_chains.py b/src/spyglass/utils/dj_chains.py index 457e3e613..f281bfb37 100644 --- a/src/spyglass/utils/dj_chains.py +++ b/src/spyglass/utils/dj_chains.py @@ -221,7 +221,7 @@ def objects(self) -> List[dj.FreeTable]: ) def join( - self, restricton: str = None, reverse_order: bool = False + self, restriction: str = None, reverse_order: bool = False ) -> dj.expression.QueryExpression: """Return join of tables in chain with restriction applied to parent. @@ -237,7 +237,7 @@ def join( return None objects = self.objects[::-1] if reverse_order else self.objects - restriction = restricton or self.parent.restriction or True + restriction = restriction or self.parent.restriction or True join = objects[0] & restriction for table in objects[1:]: try: diff --git a/src/spyglass/utils/dj_mixin.py b/src/spyglass/utils/dj_mixin.py index 349476600..c4ddb41d8 100644 --- a/src/spyglass/utils/dj_mixin.py +++ b/src/spyglass/utils/dj_mixin.py @@ -30,7 +30,7 @@ class SpyglassMixin: a NWBFile table (including AnalysisNwbfile) or a _nwb_table attribute to determine which table to use. delte_downstream_merge(restriction=None, dry_run=True, reload_cache=False) - Delete downstream merge table entries associated with restricton. + Delete downstream merge table entries associated with restriction. Requires caching of merge tables and links, which is slow on first call. `restriction` can be set to a string to restrict the delete. `dry_run` can be set to False to commit the delete. `reload_cache` can be set to @@ -178,7 +178,7 @@ def delete_downstream_merge( return_parts: bool = True, **kwargs, ) -> Union[List[QueryExpression], Dict[str, List[QueryExpression]]]: - """Delete downstream merge table entries associated with restricton. + """Delete downstream merge table entries associated with restriction. Requires caching of merge tables and links, which is slow on first call. From 2816c9bec0680509454ebabfffeddcfde012114a Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Tue, 13 Feb 2024 08:40:31 -0800 Subject: [PATCH 07/19] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 2241868e1..ed980ad42 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ ![Spyglass Figure](docs/src/images/fig1.png) +[Demo](https://spyglass.hhmi.2i2c.cloud/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2FLorenFrankLab%2Fspyglass-demo&urlpath=lab%2Ftree%2Fspyglass-demo%2Fnotebooks%2F01_Insert_Data.ipynb&branch=main) | [Installation](https://lorenfranklab.github.io/spyglass/latest/installation/) | [Docs](https://lorenfranklab.github.io/spyglass/) | [Tutorials](https://github.com/LorenFrankLab/spyglass/tree/master/notebooks) | [Citation](#citation) + `spyglass` is a data analysis framework that facilitates the storage, analysis, visualization, and sharing of neuroscience data to support reproducible research. It is designed to be interoperable with the NWB format and integrates From acf8622578edb1b8902ffffbb0859a50d2be436e Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Tue, 13 Feb 2024 13:11:35 -0500 Subject: [PATCH 08/19] Fix citation --- README.md | 6 +----- docs/src/index.md | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index ed980ad42..01dd55277 100644 --- a/README.md +++ b/README.md @@ -83,11 +83,7 @@ License and Copyright notice can be found at ## Citation -> Lee, K.H.\*, Denovellis, E.L.\*, Ly, R., Magland, J., Soules, J., Gramling, D.P., - Guidera, J.A., Nevers, R., Adenekan, P., Bray, S., et al. (2024). Spyglass: -a data analysis framework for reproducible and shareable neuroscience research. - bioRxiv. - [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295 ). +> Lee, K.H., Denovellis, E.L., Ly, R., Magland, J., Soules, J., Comrie, A.E., Gramling, D.P., Guidera, J.A., Nevers, R., Adenekan, P., Brozdowski, C., Bray, S., Monroe, E., Bak, J.H., Coulter, M.E., Sun, X., Tritt, A., Rübel, O., Nguyen, T., Yatsenko, D., Chu, J., Kemere, C., Garcia, S., Buccino, A., Frank, L.M., 2024. Spyglass: a data analysis framework for reproducible and shareable neuroscience research. bioRxiv. [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295 ). *\* Equal contribution* diff --git a/docs/src/index.md b/docs/src/index.md index 8fb8ae030..bafbc128f 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -68,11 +68,7 @@ contains license information. ## Citing Spyglass -> Lee, K.H.\*, Denovellis, E.L.\*, Ly, R., Magland, J., Soules, J., Gramling, D.P., - Guidera, J.A., Nevers, R., Adenekan, P., Bray, S., et al. (2024). Spyglass: -a data analysis framework for reproducible and shareable neuroscience research. - bioRxiv. - [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295 ). +> Lee, K.H., Denovellis, E.L., Ly, R., Magland, J., Soules, J., Comrie, A.E., Gramling, D.P., Guidera, J.A., Nevers, R., Adenekan, P., Brozdowski, C., Bray, S., Monroe, E., Bak, J.H., Coulter, M.E., Sun, X., Tritt, A., Rübel, O., Nguyen, T., Yatsenko, D., Chu, J., Kemere, C., Garcia, S., Buccino, A., Frank, L.M., 2024. Spyglass: a data analysis framework for reproducible and shareable neuroscience research. bioRxiv. [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295 ). *\* Equal contribution* From 4a75f8e63cb85746992fbee1f1ecdec36afdaee8 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Tue, 13 Feb 2024 13:18:43 -0500 Subject: [PATCH 09/19] Fix citation --- README.md | 2 +- docs/src/index.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 01dd55277..a1625f8d0 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ License and Copyright notice can be found at ## Citation -> Lee, K.H., Denovellis, E.L., Ly, R., Magland, J., Soules, J., Comrie, A.E., Gramling, D.P., Guidera, J.A., Nevers, R., Adenekan, P., Brozdowski, C., Bray, S., Monroe, E., Bak, J.H., Coulter, M.E., Sun, X., Tritt, A., Rübel, O., Nguyen, T., Yatsenko, D., Chu, J., Kemere, C., Garcia, S., Buccino, A., Frank, L.M., 2024. Spyglass: a data analysis framework for reproducible and shareable neuroscience research. bioRxiv. [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295 ). +> Lee, K.H.\*, Denovellis, E.L.\*, Ly, R., Magland, J., Soules, J., Comrie, A.E., Gramling, D.P., Guidera, J.A., Nevers, R., Adenekan, P., Brozdowski, C., Bray, S., Monroe, E., Bak, J.H., Coulter, M.E., Sun, X., Tritt, A., Rübel, O., Nguyen, T., Yatsenko, D., Chu, J., Kemere, C., Garcia, S., Buccino, A., Frank, L.M., 2024. Spyglass: a data analysis framework for reproducible and shareable neuroscience research. bioRxiv. [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295 ). *\* Equal contribution* diff --git a/docs/src/index.md b/docs/src/index.md index bafbc128f..3f70c1c9c 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -68,7 +68,7 @@ contains license information. ## Citing Spyglass -> Lee, K.H., Denovellis, E.L., Ly, R., Magland, J., Soules, J., Comrie, A.E., Gramling, D.P., Guidera, J.A., Nevers, R., Adenekan, P., Brozdowski, C., Bray, S., Monroe, E., Bak, J.H., Coulter, M.E., Sun, X., Tritt, A., Rübel, O., Nguyen, T., Yatsenko, D., Chu, J., Kemere, C., Garcia, S., Buccino, A., Frank, L.M., 2024. Spyglass: a data analysis framework for reproducible and shareable neuroscience research. bioRxiv. [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295 ). +> Lee, K.H.\*, Denovellis, E.L.\*, Ly, R., Magland, J., Soules, J., Comrie, A.E., Gramling, D.P., Guidera, J.A., Nevers, R., Adenekan, P., Brozdowski, C., Bray, S., Monroe, E., Bak, J.H., Coulter, M.E., Sun, X., Tritt, A., Rübel, O., Nguyen, T., Yatsenko, D., Chu, J., Kemere, C., Garcia, S., Buccino, A., Frank, L.M., 2024. Spyglass: a data analysis framework for reproducible and shareable neuroscience research. bioRxiv. [10.1101/2024.01.25.577295](https://doi.org/10.1101/2024.01.25.577295 ). *\* Equal contribution* From b857d08012959579d3583266f800001bffde8d09 Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Wed, 14 Feb 2024 14:01:04 -0800 Subject: [PATCH 10/19] include all relevant restrictions on video file --- src/spyglass/position/v1/dlc_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/spyglass/position/v1/dlc_utils.py b/src/spyglass/position/v1/dlc_utils.py index 8ef76074e..714dd9eb4 100644 --- a/src/spyglass/position/v1/dlc_utils.py +++ b/src/spyglass/position/v1/dlc_utils.py @@ -419,7 +419,8 @@ def get_video_path(key): """ import pynwb - vf_key = {"nwb_file_name": key["nwb_file_name"], "epoch": key["epoch"]} + valid_fields = VideoFile.fetch().dtype.fields.keys() + vf_key = {k: val for k, val in key.items() if k in valid_fields} VideoFile()._no_transaction_make(vf_key, verbose=False) video_query = VideoFile & vf_key From afc150c2353025cba22ca4f17050e897d70065dd Mon Sep 17 00:00:00 2001 From: Chris Brozdowski Date: Wed, 14 Feb 2024 17:31:33 -0800 Subject: [PATCH 11/19] Proposed structure for user roles. (#832) * Add roles * Remove use of unix user group. Add note for retroactive role assign * Add docs on roles and external tables. Reduce key length --- CHANGELOG.md | 6 + config/add_dj_collaborator.py | 2 +- config/add_dj_guest.py | 2 +- docs/src/misc/database_management.md | 25 ++- src/spyglass/decoding/v1/sorted_spikes.py | 1 + .../spikesorting/analysis/v1/group.py | 3 +- src/spyglass/utils/database_settings.py | 164 +++++++++--------- src/spyglass/utils/dj_mixin.py | 21 +++ 8 files changed, 136 insertions(+), 88 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db20daa1a..eb05dd9ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Change Log +## Unreleased + +### Infrastructure + +- Add user roles to `database_settings.py`. #832 + ## [0.5.0] (February 9, 2024) ### Infrastructure diff --git a/config/add_dj_collaborator.py b/config/add_dj_collaborator.py index cf6c0c48e..13dbf889d 100755 --- a/config/add_dj_collaborator.py +++ b/config/add_dj_collaborator.py @@ -9,4 +9,4 @@ "This script is deprecated. " + "Use spyglass.utils.database_settings.DatabaseSettings instead." ) - DatabaseSettings(user_name=sys.argv[1]).add_collab_user() + DatabaseSettings(user_name=sys.argv[1]).add_collab() diff --git a/config/add_dj_guest.py b/config/add_dj_guest.py index 1a087a3a8..80f55a141 100755 --- a/config/add_dj_guest.py +++ b/config/add_dj_guest.py @@ -9,4 +9,4 @@ "This script is deprecated. " + "Use spyglass.utils.database_settings.DatabaseSettings instead." ) - DatabaseSettings(user_name=sys.argv[1]).add_dj_guest() + DatabaseSettings(user_name=sys.argv[1]).add_guest() diff --git a/docs/src/misc/database_management.md b/docs/src/misc/database_management.md index 42f3125e0..5c00d2688 100644 --- a/docs/src/misc/database_management.md +++ b/docs/src/misc/database_management.md @@ -36,19 +36,28 @@ schema/database prefix. - `ALL` privileges allow users to create, alter, or drop tables and schemas in addition to operations above. -In practice, DataJoint only permits alerations of secondary keys on existing +In practice, DataJoint only permits alterations of secondary keys on existing tables, and more derstructive operations would require using DataJoint to execeute MySQL commands. Shared schema prefixes are those defined in the Spyglass package (e.g., `common`, `lfp`, etc.). A 'user schema' is any schema with the username as prefix. User types differ in the privileges they are granted on these prifixes. +Declaring a table with the SpyglassMixin on a schema other than a shared module +or the user's own prefix will raise a warning. -### Users types +### Users roles + +When a database is first initialized, the team should run `add_roles` to create +the following roles: -- `collab_user`: `ALL` on user schema, `SELECT` on all other schemas. - `dj_guest`: `SELECT` on all schemas. +- `dj_collab`: `ALL` on user schema, `SELECT` on all other schemas. - `dj_user`: `ALL` on shared and user schema, `SELECT` on all other schemas. +- `dj_admin`: `ALL` on all schemas. + +If new shared modules are introduced, the `add_module` method should be used to +expand the privileges of the `dj_user` role. ### Setting Passwords @@ -224,9 +233,19 @@ To remove orphaned files, we run the following commands in our cron jobs: ```python from spyglass.common import AnalysisNwbfile from spyglass.spikesorting import SpikeSorting +from spyglass.common.common_nwbfile import schema as nwbfile_schema +from spyglass.decoding.v1.sorted_spikes import schema as spikes_schema +from spyglass.decoding.v1.clusterless import schema as clusterless_schema def main(): AnalysisNwbfile().nightly_cleanup() SpikeSorting().nightly_cleanup() + nwbfile_schema.external['analysis'].delete(delete_external_files=True)) + nwbfile_schema.external['raw'].delete(delete_external_files=True)) + spikes_schema.external['analysis'].delete(delete_external_files=True)) + clusterless_schema.external['analysis'].delete(delete_external_files=True)) ``` + +The `delete` calls above use DataJoint's `ExternalTable.delete` method, which +will remove files from disk that are no longer referenced in the database. diff --git a/src/spyglass/decoding/v1/sorted_spikes.py b/src/spyglass/decoding/v1/sorted_spikes.py index 55d174e37..43a71d91e 100644 --- a/src/spyglass/decoding/v1/sorted_spikes.py +++ b/src/spyglass/decoding/v1/sorted_spikes.py @@ -47,6 +47,7 @@ class SortedSpikesDecodingSelection(SpyglassMixin, dj.Manual): -> IntervalList.proj(decoding_interval='interval_list_name') estimate_decoding_params = 1 : bool # whether to estimate the decoding parameters """ + # NOTE: Excessive key length fixed by reducing UnitSelectionParams.unit_filter_params_name @schema diff --git a/src/spyglass/spikesorting/analysis/v1/group.py b/src/spyglass/spikesorting/analysis/v1/group.py index 10ac27474..3403ad0b5 100644 --- a/src/spyglass/spikesorting/analysis/v1/group.py +++ b/src/spyglass/spikesorting/analysis/v1/group.py @@ -14,11 +14,12 @@ @schema class UnitSelectionParams(SpyglassMixin, dj.Manual): definition = """ - unit_filter_params_name: varchar(128) + unit_filter_params_name: varchar(32) --- include_labels = Null: longblob exclude_labels = Null: longblob """ + # NOTE: pk reduced from 128 to 32 to avoid long primary key error contents = [ [ "all_units", diff --git a/src/spyglass/utils/database_settings.py b/src/spyglass/utils/database_settings.py index 7fd1b44ff..8dacb3be1 100755 --- a/src/spyglass/utils/database_settings.py +++ b/src/spyglass/utils/database_settings.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -import grp import os import sys import tempfile @@ -9,11 +8,6 @@ from spyglass.utils.logging import logger -GRANT_ALL = "GRANT ALL PRIVILEGES ON " -GRANT_SEL = "GRANT SELECT ON " -CREATE_USR = "CREATE USER IF NOT EXISTS " -TEMP_PASS = " IDENTIFIED BY 'temppass';" -ESC = r"\_%" SHARED_MODULES = [ "common", "spikesorting", @@ -25,6 +19,12 @@ "waveform", "mua", ] +GRANT_ALL = "GRANT ALL PRIVILEGES ON " +GRANT_SEL = "GRANT SELECT ON " +CREATE_USR = "CREATE USER IF NOT EXISTS " +CREATE_ROLE = "CREATE ROLE IF NOT EXISTS " +TEMP_PASS = " IDENTIFIED BY 'temppass';" +ESC = r"\_%" class DatabaseSettings: @@ -32,20 +32,29 @@ def __init__( self, user_name=None, host_name=None, - target_group=None, debug=False, target_database=None, ): """Class to manage common database settings + Roles: + - dj_guest: select for all prefix + - dj_collab: select for all prefix, all for user prefix + - dj_user: select for all prefix, all for user prefix, all for shared + - dj_admin: all for all prefix + + Note: To add dj_user role to all those with common access, run: + query = "SELECT user, host FROM mysql.db WHERE Db LIKE 'common%';" + users = dj.conn().query(query).fetchall() + for user in users: + dj.conn().query(f"GRANT dj_user TO '{user[0][0]}'@'%';") + Parameters ---------- user_name : str, optional The name of the user to add to the database. Default from dj.config host_name : str, optional The name of the host to add to the database. Default from dj.config - target_group : str, optional - Group to which user belongs. Default is kachery-users debug : bool, optional Default False. If True, pprint sql instead of running target_database : str, optional @@ -56,91 +65,78 @@ def __init__( self.host = ( host_name or dj.config["database.host"] or "lmf-db.cin.ucsf.edu" ) - self.target_group = target_group or "kachery-users" self.debug = debug self.target_database = target_database or "mysql" @property - def _add_collab_usr_sql(self): - return [ - # Create the user (if not already created) and set the password - f"{CREATE_USR}'{self.user}'@'%'{TEMP_PASS}\n", - # Grant privileges to databases matching the user_name pattern - f"{GRANT_ALL}`{self.user}{ESC}`.* TO '{self.user}'@'%';\n", - # Grant SELECT privileges on all databases - f"{GRANT_SEL}`%`.* TO '{self.user}'@'%';\n", + def _create_roles_sql(self): + guest_role = [ + f"{CREATE_ROLE}'dj_guest';\n", + f"{GRANT_SEL}`%`.* TO 'dj_guest';\n", + ] + collab_role = [ + f"{CREATE_ROLE}'dj_collab';\n", + f"{GRANT_SEL}`%`.* TO 'dj_collab';\n", + ] # also gets own prefix below + user_role = [ + f"{CREATE_ROLE}'dj_user';\n", + f"{GRANT_SEL}`%`.* TO 'dj_user';\n", + ] + [ + f"{GRANT_ALL}`{module}`.* TO 'dj_user';\n" + for module in self.shared_modules + ] # also gets own prefix below + admin_role = [ + f"{CREATE_ROLE}'dj_admin';\n", + f"{GRANT_ALL}`%`.* TO 'dj_admin';\n", ] - def add_collab_user(self): - """Add collaborator user with full permissions to shared modules""" - file = self.write_temp_file(self._add_collab_usr_sql) - self.exec(file) + return guest_role + collab_role + user_role + admin_role + + def _create_user_sql(self, role): + """Create user and grant role""" + return [ + f"{CREATE_USR}'{self.user}'@'%'{TEMP_PASS}\n", # create user + f"GRANT {role} TO '{self.user}'@'%';\n", # grant role + ] @property - def _add_dj_guest_sql(self): - # Note: changing to temppass for uniformity + def _user_prefix_sql(self): + """Grant user all permissions for user prefix""" return [ - # Create the user (if not already created) and set the password - f"{CREATE_USR}'{self.user}'@'%'{TEMP_PASS}\n", - # Grant privileges - f"{GRANT_SEL}`%`.* TO '{self.user}'@'%';\n", + f"{GRANT_ALL}`{self.user}{ESC}`.* TO '{self.user}'@'%';\n", ] - def add_dj_guest(self, method="file"): - """Add guest user with select permissions to shared modules""" - file = self.write_temp_file(self._add_dj_guest_sql) - self.exec(file) + @property + def _add_guest_sql(self): + return self._create_user_sql("dj_guest") - def _find_group(self): - # find the kachery-users group - groups = grp.getgrall() - group_found = False # initialize the flag as False - for group in groups: - if group.gr_name == self.target_group: - group_found = ( - True # set the flag to True when the group is found - ) - break + @property + def _add_collab_sql(self): + return self._create_user_sql("dj_collab") + self._user_prefix_sql - # Check if the group was found - if not group_found: - if self.debug: - logger.info(f"All groups: {[g.gr_name for g in groups]}") - sys.exit( - f"Error: The target group {self.target_group} was not found." - ) + @property + def _add_user_sql(self): + return self._create_user_sql("dj_user") + self._user_prefix_sql - return group + def _add_module_sql(self, module_name): + return [f"{GRANT_ALL}`{module_name}{ESC}`.* TO dj_user;\n"] - def _add_module_sql(self, module_name, group): - return [ - f"{GRANT_ALL}`{module_name}{ESC}`.* TO `{user}`@'%';\n" - # get a list of usernames - for user in group.gr_mem - ] + def add_collab(self): + """Add collaborator user with full permissions to shared modules""" + file = self.write_temp_file(self._add_collab_sql) + self.exec(file) + + def add_guest(self): + """Add guest user with select permissions to shared modules""" + file = self.write_temp_file(self._add_guest_sql) + self.exec(file) def add_module(self, module_name): """Add module to database. Grant permissions to all users in group""" logger.info(f"Granting everyone permissions to module {module_name}") - group = self._find_group() - file = self.write_temp_file(self._add_module_sql(module_name, group)) + file = self.write_temp_file(self._add_module_sql(module_name)) self.exec(file) - @property - def _add_dj_user_sql(self): - return ( - [ - f"{CREATE_USR}'{self.user}'@'%' " - + "IDENTIFIED BY 'temppass';\n", - f"{GRANT_ALL}`{self.user}{ESC}`.* TO '{self.user}'@'%';" + "\n", - ] - + [ - f"{GRANT_ALL}`{module}`.* TO '{self.user}'@'%';\n" - for module in self.shared_modules - ] - + [f"{GRANT_SEL}`%`.* TO '{self.user}'@'%';\n"] - ) - def add_dj_user(self, check_exists=True): """Add user to database with permissions to shared modules""" if check_exists: @@ -149,10 +145,15 @@ def add_dj_user(self, check_exists=True): logger.info("Creating database user ", self.user) else: sys.exit( - f"Error: could not find {self.user} in home dir: {user_home}" + f"Error: couldn't find {self.user} in home dir: {user_home}" ) - file = self.write_temp_file(self._add_dj_user_sql) + file = self.write_temp_file(self._add_user_sql) + self.exec(file) + + def add_roles(self): + """Add roles to database""" + file = self.write_temp_file(self._create_roles_sql) self.exec(file) def write_temp_file(self, content: list) -> tempfile.NamedTemporaryFile: @@ -176,11 +177,10 @@ def exec(self, file): if self.debug: return - if self.target_database == "mysql": - cmd = f"mysql -p -h {self.host} < {file.name}" - else: - cmd = ( - f"docker exec -i {self.target_database} mysql -u {self.user} " - + f"--password=tutorial < {file.name}" - ) + cmd = ( + f"mysql -p -h {self.host} < {file.name}" + if self.target_database == "mysql" + else f"docker exec -i {self.target_database} mysql -u {self.user} " + + f"--password=tutorial < {file.name}" + ) os.system(cmd) diff --git a/src/spyglass/utils/dj_mixin.py b/src/spyglass/utils/dj_mixin.py index c4ddb41d8..1b4b24ff6 100644 --- a/src/spyglass/utils/dj_mixin.py +++ b/src/spyglass/utils/dj_mixin.py @@ -10,6 +10,7 @@ from datajoint.utils import get_master, user_choice from pymysql.err import DataError +from spyglass.utils.database_settings import SHARED_MODULES from spyglass.utils.dj_chains import TableChain, TableChains from spyglass.utils.dj_helper_fn import fetch_nwb from spyglass.utils.dj_merge_tables import RESERVED_PRIMARY_KEY as MERGE_PK @@ -55,6 +56,26 @@ class SpyglassMixin: _session_pk = None # Session primary key. Mixin is ambivalent to Session pk _member_pk = None # LabMember primary key. Mixin ambivalent table structure + def __init__(self, *args, **kwargs): + """Initialize SpyglassMixin. + + Checks that schema prefix is in SHARED_MODULES. + """ + if ( + self.database # Connected to a database + and not self.is_declared # New table + and self.database.split("_")[0] # Prefix + not in [ + *SHARED_MODULES, # Shared modules + dj.config["database.user"], # User schema + "temp", + "test", + ] + ): + logger.error( + f"Schema prefix not in SHARED_MODULES: {self.database}" + ) + # ------------------------------- fetch_nwb ------------------------------- @cached_property From 57f0d5b7ba9bd444d77e1d2fec43343dbcb8f16b Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Thu, 15 Feb 2024 16:11:55 -0500 Subject: [PATCH 12/19] Fix test for update of position tools (#835) Related to single LED halving the data bug --- tests/common/test_position.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/common/test_position.py b/tests/common/test_position.py index 8a7261c74..c0d32a11d 100644 --- a/tests/common/test_position.py +++ b/tests/common/test_position.py @@ -118,8 +118,8 @@ def test_fetch1_dataframe(interval_position_info, interval_pos_key): df_sums = {c: df[c].iloc[:5].sum() for c in df.columns} df_sums_exp = { "head_orientation": 4.4300073600180125, - "head_position_x": 111.25, - "head_position_y": 141.75, + "head_position_x": 222.5, + "head_position_y": 283.5, "head_speed": 0.6084872579024899, "head_velocity_x": -0.4329520555149495, "head_velocity_y": 0.42756198762527325, From 79a17e11d4285f9c357d9c8111dc29d70b30291e Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Fri, 16 Feb 2024 09:31:41 -0800 Subject: [PATCH 13/19] fix build error in mamba and restriction for dlc --- environment_dlc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment_dlc.yml b/environment_dlc.yml index e175e7408..45fd107c8 100644 --- a/environment_dlc.yml +++ b/environment_dlc.yml @@ -22,7 +22,6 @@ dependencies: - libgcc # dlc-only - matplotlib - non_local_detector - - numpy<1.24 - pip>=20.2.* - position_tools - pybind11 # req by mountainsort4 -> isosplit5 @@ -47,4 +46,5 @@ dependencies: - pynwb>=2.2.0,<3 - sortingview>=0.11 - spikeinterface>=0.98.2,<0.99 + - tensorflow<=2.12 # dlc-only - .[dlc] From bfc870c6673ca8ff87ba86cb06d93e26a73088a0 Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Fri, 16 Feb 2024 09:33:23 -0800 Subject: [PATCH 14/19] flush stdout before converting mp4 --- src/spyglass/position/v1/dlc_utils.py | 29 +++++++++++++++++---------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/spyglass/position/v1/dlc_utils.py b/src/spyglass/position/v1/dlc_utils.py index 714dd9eb4..e2dc1ff04 100644 --- a/src/spyglass/position/v1/dlc_utils.py +++ b/src/spyglass/position/v1/dlc_utils.py @@ -6,6 +6,7 @@ import pathlib import pwd import subprocess +import sys from collections import abc from contextlib import redirect_stdout from itertools import groupby @@ -538,17 +539,23 @@ def _convert_mp4( "copy", f"{dest_path.as_posix()}", ] - try: - convert_process = subprocess.Popen( - convert_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT - ) - except subprocess.CalledProcessError as err: - raise RuntimeError( - f"command {err.cmd} return with error (code {err.returncode}): {err.output}" - ) from err - out, _ = convert_process.communicate() - print(out.decode("utf-8")) - print(f"finished converting {filename}") + if dest_path.exists(): + print(f"{dest_path} already exists, skipping conversion") + else: + try: + sys.stdout.flush() + convert_process = subprocess.Popen( + convert_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + except subprocess.CalledProcessError as err: + raise RuntimeError( + f"command {err.cmd} return with error (code {err.returncode}): {err.output}" + ) from err + out, _ = convert_process.communicate() + print(out.decode("utf-8")) + print(f"finished converting {filename}") print( f"Checking that number of packets match between {orig_filename} and {dest_filename}" ) From fa45fb62dac1b24ccd6bb1043f853b1f759e9a29 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Fri, 16 Feb 2024 15:56:52 -0500 Subject: [PATCH 15/19] Fix notebook name (#840) --- docs/mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 63e48af8d..5eb02d0b6 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -68,7 +68,7 @@ nav: - Extracting Waveforms: notebooks/40_Extracting_Clusterless_Waveform_Features.ipynb - Decoding Clusterless: notebooks/41_Decoding_Clusterless.ipynb - Decoding Sorted Spikes: notebooks/42_Decoding_SortedSpikes.ipynb - - MUA Detection: notebooks/51_MUA_Detection.ipynb + - MUA Detection: notebooks/50_MUA_Detection.ipynb - Miscellaneous: - Overview: misc/index.md - FigURL: misc/figurl_views.md From 4d3343822c4c356cec3c76630f49bee33642c6a2 Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Fri, 16 Feb 2024 14:26:01 -0800 Subject: [PATCH 16/19] remove deprecated yaml.safe_load function --- src/spyglass/position/v1/position_dlc_model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/spyglass/position/v1/position_dlc_model.py b/src/spyglass/position/v1/position_dlc_model.py index 30bd3b1e8..5b1cf265b 100644 --- a/src/spyglass/position/v1/position_dlc_model.py +++ b/src/spyglass/position/v1/position_dlc_model.py @@ -196,7 +196,8 @@ def make(self, key): raise OSError(f"config_path {config_path} does not exist.") if config_path.suffix in (".yml", ".yaml"): with open(config_path, "rb") as f: - dlc_config = yaml.safe_load(f) + safe_yaml = yaml.YAML(typ="safe", pure=True) + dlc_config = safe_yaml.load(f) if isinstance(params["params"], dict): dlc_config.update(params["params"]) del params["params"] From e450392ff4cb7337e28042e1f9b68473aa65bd30 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Fri, 16 Feb 2024 19:52:52 -0500 Subject: [PATCH 17/19] Fix test --- tests/common/test_position.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/common/test_position.py b/tests/common/test_position.py index c0d32a11d..b10c0654b 100644 --- a/tests/common/test_position.py +++ b/tests/common/test_position.py @@ -120,9 +120,9 @@ def test_fetch1_dataframe(interval_position_info, interval_pos_key): "head_orientation": 4.4300073600180125, "head_position_x": 222.5, "head_position_y": 283.5, - "head_speed": 0.6084872579024899, - "head_velocity_x": -0.4329520555149495, - "head_velocity_y": 0.42756198762527325, + "head_speed": 1.2245733375331014, + "head_velocity_x": -0.865904111029899, + "head_velocity_y": 0.8551239752505465, } for k in df_sums: assert k in df_sums_exp, err_msg From 7ecc7d82e7dad1f6f0957212bee53c067e202751 Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Mon, 19 Feb 2024 10:10:27 -0800 Subject: [PATCH 18/19] replace deprecated yaml.safe_load function --- src/spyglass/position/v1/dlc_reader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/spyglass/position/v1/dlc_reader.py b/src/spyglass/position/v1/dlc_reader.py index aed66ac4a..8d6c18c23 100644 --- a/src/spyglass/position/v1/dlc_reader.py +++ b/src/spyglass/position/v1/dlc_reader.py @@ -114,7 +114,8 @@ def pkl(self): def yml(self): if self._yml is None: with open(self.yml_path, "rb") as f: - self._yml = yaml.safe_load(f) + safe_yaml = yaml.YAML(typ="safe", pure=True) + self._yml = safe_yaml.load(f) return self._yml @property From 459d9cde327c0feeb0d8864f87f780ec00addc00 Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Mon, 19 Feb 2024 10:11:51 -0800 Subject: [PATCH 19/19] only call no_transaction_make if video key not present --- src/spyglass/position/v1/dlc_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/spyglass/position/v1/dlc_utils.py b/src/spyglass/position/v1/dlc_utils.py index e2dc1ff04..4d55bd844 100644 --- a/src/spyglass/position/v1/dlc_utils.py +++ b/src/spyglass/position/v1/dlc_utils.py @@ -422,7 +422,8 @@ def get_video_path(key): valid_fields = VideoFile.fetch().dtype.fields.keys() vf_key = {k: val for k, val in key.items() if k in valid_fields} - VideoFile()._no_transaction_make(vf_key, verbose=False) + if not VideoFile & vf_key: + VideoFile()._no_transaction_make(vf_key, verbose=False) video_query = VideoFile & vf_key if len(video_query) != 1: