From b684336e3c3ea03857dd1c0befb2d904f6524e8b Mon Sep 17 00:00:00 2001 From: Chris Brozdowski Date: Tue, 21 Nov 2023 06:07:54 -0800 Subject: [PATCH] Limited scope docs PR (#690) * WIP: database management doc * #686 * Spelling * Cherry pick from cbroz/main * Update changelog, uncomment docs jupyter --- CHANGELOG.md | 6 + docs/build-docs.sh | 2 + docs/mkdocs.yml | 7 +- docs/src/api/index.md | 27 +++ docs/src/api/make_pages.py | 29 ++- docs/src/index.md | 44 ++-- docs/src/misc/database_management.md | 229 ++++++++++++++++++ docs/src/misc/index.md | 9 + docs/src/misc/merge_tables.md | 2 +- notebooks/00_Setup.ipynb | 10 +- notebooks/py_scripts/00_Setup.py | 13 +- pyproject.toml | 2 +- src/spyglass/common/common_filter.py | 8 +- .../position/v1/position_dlc_centroid.py | 3 +- src/spyglass/settings.py | 9 +- src/spyglass/utils/database_settings.py | 3 +- 16 files changed, 361 insertions(+), 42 deletions(-) create mode 100644 docs/src/api/index.md create mode 100644 docs/src/misc/database_management.md create mode 100644 docs/src/misc/index.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c682248d..437b61ac4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Change Log +## [0.4.4] (November 7, 2023) + +- Additional documentation. #686 + ## [0.4.3] (November 7, 2023) - Migrate `config` helper scripts to Spyglass codebase. #662 @@ -95,6 +99,8 @@ - Allow creation and linkage of device metadata from YAML #400 - Move helper functions to utils directory #386 +[0.4.4]: https://github.com/LorenFrankLab/spyglass/releases/tag/0.4.4 +[0.4.3]: https://github.com/LorenFrankLab/spyglass/releases/tag/0.4.3 [0.4.2]: https://github.com/LorenFrankLab/spyglass/releases/tag/0.4.2 [0.4.1]: https://github.com/LorenFrankLab/spyglass/releases/tag/0.4.1 [0.4.0]: https://github.com/LorenFrankLab/spyglass/releases/tag/0.4.0 diff --git a/docs/build-docs.sh b/docs/build-docs.sh index 6f4c3aeaa..d5b2e0fc7 100644 --- a/docs/build-docs.sh +++ b/docs/build-docs.sh @@ -9,6 +9,8 @@ cp ./CHANGELOG.md ./docs/src/ cp ./LICENSE ./docs/src/LICENSE.md mkdir -p ./docs/src/notebooks cp ./notebooks/*ipynb ./docs/src/notebooks/ +cp ./notebooks/*md ./docs/src/notebooks/ +cp ./docs/src/notebooks/README.md ./docs/src/notebooks/index.md cp -r ./notebook-images ./docs/src/notebooks/ cp -r ./notebook-images ./docs/src/ diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 587eeb78c..32f789bb9 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -47,16 +47,19 @@ nav: - Home: index.md - Installation: installation.md - Miscellaneous: + - Overview: misc/index.md - FigURL: misc/figurl_views.md - Session Groups: misc/session_groups.md - Insert Data: misc/insert_data.md - Merge Tables: misc/merge_tables.md + - Database Management: misc/database_management.md - Tutorials: - - Overview: notebooks/README.md + - Overview: notebooks/index.md - General: - Setup: notebooks/00_Setup.ipynb - Insert Data: notebooks/01_Insert_Data.ipynb - Data Sync: notebooks/02_Data_Sync.ipynb + - Merge Tables: notebooks/03_Merge_Tables.ipynb - Ephys: - Spike Sorting: notebooks/10_Spike_Sorting.ipynb - Curation: notebooks/11_Curation.ipynb @@ -101,8 +104,6 @@ plugins: group_by_category: false line_length: 80 docstring_style: numpy - # watch: - # - src/spyglass/ - literate-nav: nav_file: navigation.md - exclude-search: diff --git a/docs/src/api/index.md b/docs/src/api/index.md new file mode 100644 index 000000000..0f5bf479d --- /dev/null +++ b/docs/src/api/index.md @@ -0,0 +1,27 @@ +# API Docs + +The files in this directory are automatically generated from the docstrings in +the source code. They include descriptions of each of the DataJoint tables and +other classes/methods within Spyglass. + +These docs are updated any time a new release is made or a tag is +pushed to the repository. + + diff --git a/docs/src/api/make_pages.py b/docs/src/api/make_pages.py index 8dcd37c1d..942f6ae09 100644 --- a/docs/src/api/make_pages.py +++ b/docs/src/api/make_pages.py @@ -4,15 +4,36 @@ from pathlib import Path import mkdocs_gen_files +from mkdocs.utils import log + +ignored_stems = ["__init__", "_version"] + +added = 0 +add_limit = None nav = mkdocs_gen_files.Nav() -for path in sorted(Path("src").glob("**/*.py")): - if path.stem == "__init__" or "cython" in path.stem: +for path in sorted(Path("src/spyglass/").glob("**/*.py")): + if path.stem in ignored_stems or "cython" in path.stem: continue - with mkdocs_gen_files.open(f"api/{path.with_suffix('')}.md", "w") as f: + rel_path = path.relative_to("src/spyglass") + with mkdocs_gen_files.open(f"api/{rel_path.with_suffix('')}.md", "w") as f: module_path = ".".join([p for p in path.with_suffix("").parts]) print(f"::: {module_path}", file=f) - nav[path.parts] = f"{path.with_suffix('')}.md" + nav[rel_path.parts] = f"{rel_path.with_suffix('')}.md" + + if add_limit is not None: + if added < add_limit: + log.warning(f"Generated {rel_path.with_suffix('')}.md") + added += 1 + else: + break + +if add_limit is not None: + from IPython import embed + + embed() + with mkdocs_gen_files.open("api/navigation.md", "w") as nav_file: + nav_file.write("* [Overview](../api/index.md)\n") nav_file.writelines(nav.build_literate_nav()) diff --git a/docs/src/index.md b/docs/src/index.md index 21efc2c71..c5eaf0338 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,24 +1,38 @@ # Spyglass -**Spyglass** is a data analysis framework that facilitates the -storage, analysis, and sharing of neuroscience data to support -reproducible research. It is designed to be interoperable with the NWB -format and integrates open-source tools into a coherent framework. +**Spyglass** is a data analysis framework that facilitates the storage, +analysis, and sharing of neuroscience data to support reproducible research. It +is designed to be interoperable with the NWB format and integrates open-source +tools into a coherent framework. -## Installation +## Getting Started -To install to this project, see [Installation](./installation.md). +This site hosts both [installation instructions](./installation.md) and +[tutorials](./notebooks/index.md) to help you get started with Spyglass. We +recommend running the notebooks yourself. They can be downloaded from GitHub +[here](https://github.com/LorenFrankLab/spyglass). -## Contributing +## Diving Deeper -For contribution instructions see [How to Contribute](./contribute.md) +The [API Reference](./api/index.md) provides a detailed description of all the +tables and class functions in Spyglass via python docstrings. Potential +contributors should also read the [Developer Guide](./contribute.md). Those +interested in in hosting a Spyglass instance for their own data should read the +[database management guide](./misc/database_management.md). + +We have a series of additional docs under the [misc](./misc/index.md) folder +that may be helpful. Our [changelog](./CHANGELOG.md) highlights the changes that +have been made to Spyglass over time and the [copyright](./LICENSE.md) page +contains license information. ## Citing Spyglass -Kyu Hyun Lee, Eric Denovellis, Ryan Ly, Jeremy Magland, Jeff Soules, -Alison Comrie, Jennifer Guidera, Rhino Nevers, Daniel Gramling, Philip -Adenekan, Ji Hyun Bak, Emily Monroe, Andrew Tritt, Oliver Rübel, Thinh -Nguyen, Dimitri Yatsenko, Joshua Chu, Caleb Kemere, Samuel Garcia, -Alessio Buccino, Emily Aery Jones, Lisa Giocomo, and Loren Frank. -'Spyglass: A Data Analysis Framework for Reproducible and Shareable -Neuroscience Research.' (2022) Society for Neuroscience, San Diego, CA. +Kyu Hyun Lee, Eric Denovellis, Ryan Ly, Jeremy Magland, Jeff Soules, Alison +Comrie, Jennifer Guidera, Rhino Nevers, Daniel Gramling, Philip Adenekan, Ji +Hyun Bak, Emily Monroe, Andrew Tritt, Oliver Rübel, Thinh Nguyen, Dimitri +Yatsenko, Joshua Chu, Caleb Kemere, Samuel Garcia, Alessio Buccino, Emily Aery +Jones, Lisa Giocomo, and Loren Frank. 'Spyglass: A Data Analysis Framework for +Reproducible and Shareable Neuroscience Research.' (2022) Society for +Neuroscience, San Diego, CA. + + diff --git a/docs/src/misc/database_management.md b/docs/src/misc/database_management.md new file mode 100644 index 000000000..dfcfa7eeb --- /dev/null +++ b/docs/src/misc/database_management.md @@ -0,0 +1,229 @@ +# Database Management + +While Spyglass can help you organize your data, there are a number of things +you'll need to do to manage users, database backups, and file cleanup. + +Some these tasks should be set to run regularly. [Cron jobs](https://www.hostinger.com/tutorials/cron-job) +can help with automation. + +## MySQL Version + +The Frank Lab's database is running MySQL 8.0 with a number of custom +configurations set by our system admin to reflect UCSF's IT security +requirements. + +DataJoint's default docker container for MySQL is version 5.7. As the Spyglass +team has hit select compatibility issues, we've worked with the DataJoint team +to update the open source package to support MySQL 8.0. + +While the Spyglass team won't be able to support earlier versions, if you run +into any issues declaring Spyglass tables with an 8.0 instance, please let us +know. + +## User Management + +The [DatabaseSettings](../api/utils/database_settings.md) class provides a +number of methods to help you manage users. By default, it will write out a +temporary `.sql` file and execute it on the database. + +### Privileges + +DataJoint schemas correspond to MySQL databases. Privileges are managed by +schema/database prefix. + +- `SELECT` privileges allow users to read, write, and delete data. +- `ALL` privileges allow users to create, alter, or drop tables and schemas in + addition to operations above. + +In practice, DataJoint only permits alerations of secondary keys on existing +tables, and more derstructive operations would require using DataJoint to +execeute MySQL commands. + +Shared schema prefixes are those defined in the Spyglass package (e.g., +`common`, `lfp`, etc.). A 'user schema' is any schema with the username as +prefix. User types differ in the privileges they are granted on +these prifixes. + +### Users types + +- `collab_user`: `ALL` on user schema, `SELECT` on all other schemas. +- `dj_guest`: `SELECT` on all schemas. +- `dj_user`: `ALL` on shared and user schema, `SELECT` on all other schemas. + +### Setting Passwords + +New users are generated with the password `temppass`. In order to change this, +we recommend downloading DataJoint `0.14.2` (currently pre-release). + +```console +git clone https://github.com/datajoint/datajoint-python/ +pip install ./datajoint-python +``` + +Then, you the user can reset within Python: + +```python +import datajoint as dj +dj.set_password() +``` + +## Database Backups + +The following codeblockes are a series of files used to back up our database and +migrate the contents to another server. Some conventions to note: + +- `.host`: files used in the host's context +- `.container`: files used inside the database Docker container +- `.env`: files used to set environment variables used by the scripts for + database name, backup name, and backup credentials + +This backup process uses a dedicated backup user, that an admin would need to +criate with the relevant permissions. + +### mysql.env.host + +
+MySQL host environment variables + +Values may be adjusted as needed for different building images. + +```bash +ROOT_PATH=/usr/local/containers/mysql # path to this container's working area + +# variables for building image +SRC=ubuntu +VER=20.04 +DOCKERFILE=Dockerfile.base + +# variables for referencing image +IMAGE=mysql8 +TAG=u20 +# variables for running the container +CNAME=mysql-datajoint +MACADDR=4e:b0:3d:42:e0:70 +RPORT=3306 + +# variables for initializing/relaunching the container +# - where the mysql data and backups will live - these values +# are examples +DB_PATH=/data/db +DB_DATA=mysql +DB_BACKUP=/data/mysql-backups + +# backup info +BACK_USER=mysql-backup +BACK_PW={password} +BACK_DBNAME={database} +# mysql root password - make sure to remove this AFTER the container +# is initialized - and this file will be replicated inside the container +# on initialization, so remove it from there: /opt/bin/mysql.env +``` + +
+ +### backup-database.sh.host + +This script runs the mysql-backup container script (exec inside the container) +that dumps the database contents for each database as well as the entire +database. Use cron to set this to run on your desired schedule. + +
+MySQL host docker exec + +```bash +#!/bin/bash + +PRIOR_DIR=$(pwd) +cd /usr/local/containers/mysql || exit +. mysql.env +cd "$(dirname ${ROOT_PATH})" +# +docker exec ${CNAME} /opt/bin/mysql-backup.csh +# +cd "$(dirname ${DB_BACKUP})" +# +cd ${PRIOR_DIR} +``` + +
+ +### mysql-backup-xfer.csh.host + +This script transfers the backup to another server 'X' and is specific for us as +it uses passwordless ssh keys to a local unprivileged user on X that has the +mysql backup area on X as that user's home. + +
+MySQL host transfer script + +```bash +#!/bin/csh +set td=`date +"%Y%m%d"` +cd /data/mysql-backups +scp -P {port} -i ~/mysql-backup -r ${database}-${td} mysql-backup@${X}:~/ +/bin/rm -r lmf-db-${td} +``` + +
+ +### myenv.csh.container + +
+Docker container environment variables + +```bash +set db_backup=mysql-backups +set back_user=mysql-backup +set back_pw={password} +set back_dbname={database} +``` + +
+ +### mysql-backup.csh.container + +
+Generate backups from within container + +```bash +#!/bin/csh +source /opt/bin/myenv.csh +set td=`date +"%Y%m%d"` +cd /${db_backup} +mkdir ${back_dbname}-${td} + +set list=`echo "show databases;" | mysql --user=${back_user} --password=${back_pw}` +set cnt=0 + +foreach db ($list) + if ($cnt == 0) then + echo "dumping mysql databases on $td" + else + echo "dumping MySQL database : $db" + # Per-schema backups + mysqldump $db --max_allowed_packet=512M --user=${back_user} --password=${back_pw} > /${db_backup}/${back_dbname}-${td}/mysql.${db}.sql + endif +@ cnt = $cnt + 1 +end +# Full database backup +mysqldump --all-databases --max_allowed_packet=512M --user=${back_user} --password=${back_pw} > /${db_backup}/${back_dbname}-${td}/mysql-all.sql +``` + +
+ +## File Cleanup + +Spyglass is designed to hold metadata for analyses that reference NWB files on +disk. There are several tables that retain lists of files that have been generated +during analyses. If someone deletes analysis entries, files will still be on disk. + +To remove orphaned files, we run the following commands in our cron jobs: + +```python +from spyglass.common import AnalysisNwbfile +from spyglass.spikesorting import SpikeSorting + +def main(): + AnalysisNwbfile().nightly_cleanup() + SpikeSorting().nightly_cleanup() +``` diff --git a/docs/src/misc/index.md b/docs/src/misc/index.md new file mode 100644 index 000000000..9b3991cb6 --- /dev/null +++ b/docs/src/misc/index.md @@ -0,0 +1,9 @@ +# Misc Docs + +This folder contains miscellaneous supporting files documentation. + +- [Database Management](./database_management.md) +- [figurl Views](./figurl_views.md) +- [insert Data](./insert_data.md) +- [Merge Tables](./merge_tables.md) +- [Session Groups](./session_groups.md) diff --git a/docs/src/misc/merge_tables.md b/docs/src/misc/merge_tables.md index dc10e2dfc..c5419d350 100644 --- a/docs/src/misc/merge_tables.md +++ b/docs/src/misc/merge_tables.md @@ -70,7 +70,7 @@ The Merge class in Spyglass's utils is a subclass of DataJoint's [Manual Table](https://datajoint.com/docs/core/design/tables/tiers/#data-entry-lookup-and-manual) and adds functions to make the awkwardness of part tables more manageable. These functions are described in the -[API section](../../api/src/spyglass/utils/dj_merge_tables.md), under +[API section](../api/utils/dj_merge_tables.md), under `utils.dj_merge_tables`. ### Restricting diff --git a/notebooks/00_Setup.ipynb b/notebooks/00_Setup.ipynb index ecf012f27..2bb273bf1 100644 --- a/notebooks/00_Setup.ipynb +++ b/notebooks/00_Setup.ipynb @@ -219,12 +219,16 @@ "- Add yourself to the\n", " [`docker` group](https://docs.docker.com/engine/install/linux-postinstall/) so\n", " that you don't have to be sudo to run docker.\n", - "- Download the docker image for datajoint/mysql\n", + "- Download the docker image for `datajoint/mysql:8.0`.\n", "\n", " ```bash\n", - " docker pull datajoint/mysql\n", + " docker pull datajoint/mysql:8.0\n", " ```\n", "\n", + "_Note_: For this demo, MySQL version won't matter. Some\n", + " [database management](https://lorenfranklab.github.io/spyglass/latest/misc/database_management/#mysql-version)\n", + " features of Spyglass, however, expect MySQL >= 8.\n", + "\n", "- When run, this is referred to as a 'Docker container'\n", "- Next start the container with a couple additional pieces of info...\n", "\n", @@ -233,7 +237,7 @@ " - Port mapping. Here, we map 3306 across the local machine and container.\n", "\n", " ```bash\n", - " docker run --name spyglass-db -p 3306:3306 -e MYSQL_ROOT_PASSWORD=tutorial datajoint/mysql\n", + " docker run --name spyglass-db -p 3306:3306 -e MYSQL_ROOT_PASSWORD=tutorial datajoint/mysql:8.0\n", " ```\n", "\n", "- For data to persist after terminating the container,\n", diff --git a/notebooks/py_scripts/00_Setup.py b/notebooks/py_scripts/00_Setup.py index bdc46d164..21bb57083 100644 --- a/notebooks/py_scripts/00_Setup.py +++ b/notebooks/py_scripts/00_Setup.py @@ -5,7 +5,7 @@ # extension: .py # format_name: light # format_version: '1.5' -# jupytext_version: 1.14.5 +# jupytext_version: 1.15.2 # kernelspec: # display_name: Python 3 (ipykernel) # language: python @@ -185,12 +185,16 @@ # - Add yourself to the # [`docker` group](https://docs.docker.com/engine/install/linux-postinstall/) so # that you don't have to be sudo to run docker. -# - Download the docker image for datajoint/mysql +# - Download the docker image for `datajoint/mysql:8.0`. # # ```bash -# docker pull datajoint/mysql +# docker pull datajoint/mysql:8.0 # ``` # +# _Note_: For this demo, MySQL version won't matter. Some +# [database management](https://lorenfranklab.github.io/spyglass/latest/misc/database_management/#mysql-version) +# features of Spyglass, however, expect MySQL >= 8. +# # - When run, this is referred to as a 'Docker container' # - Next start the container with a couple additional pieces of info... # @@ -199,7 +203,7 @@ # - Port mapping. Here, we map 3306 across the local machine and container. # # ```bash -# docker run --name spyglass-db -p 3306:3306 -e MYSQL_ROOT_PASSWORD=tutorial datajoint/mysql +# docker run --name spyglass-db -p 3306:3306 -e MYSQL_ROOT_PASSWORD=tutorial datajoint/mysql:8.0 # ``` # # - For data to persist after terminating the container, @@ -240,6 +244,7 @@ # + import os + import datajoint as dj if os.path.basename(os.getcwd()) == "notebooks": diff --git a/pyproject.toml b/pyproject.toml index ca4339aef..0b1511d98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,6 @@ exclude = [] line-length = 80 [tool.codespell] -skip = '.git,*.pdf,*.svg,*.ipynb,./docs/site/**' +skip = '.git,*.pdf,*.svg,*.ipynb,./docs/site/**,temp*' # Nevers - name in Citation ignore-words-list = 'nevers' diff --git a/src/spyglass/common/common_filter.py b/src/spyglass/common/common_filter.py index b137b197d..2f3e12fac 100644 --- a/src/spyglass/common/common_filter.py +++ b/src/spyglass/common/common_filter.py @@ -225,7 +225,7 @@ def filter_data_nwb( electrode_ids: list, decimation: int, description: str = "filtered data", - type: Union[None, str] = None, + data_type: Union[None, str] = None, ): """ Filter data from an NWB electrical series using the ghostipy package, @@ -256,6 +256,10 @@ def filter_data_nwb( The NWB object ID of the filtered data and a list containing the first and last timestamps. """ + # Note: type -> data_type to avoid conflict with builtin type + # All existing refs to this func use positional args, so no need to + # adjust elsewhere, but low probability of issues with custom scripts + MEM_USE_LIMIT = 0.9 # % of RAM use permitted gsp = _import_ghostipy() @@ -323,7 +327,7 @@ def filter_data_nwb( timestamps=np.empty(output_shape_list[time_axis]), description=description, ) - if type == "LFP": + if data_type == "LFP": ecephys_module = nwbf.create_processing_module( name="ecephys", description=description ) diff --git a/src/spyglass/position/v1/position_dlc_centroid.py b/src/spyglass/position/v1/position_dlc_centroid.py index 62650a9e9..351f577aa 100644 --- a/src/spyglass/position/v1/position_dlc_centroid.py +++ b/src/spyglass/position/v1/position_dlc_centroid.py @@ -467,8 +467,7 @@ def fetch1_dataframe(self): def four_led_centroid(pos_df: pd.DataFrame, **params): - """ - Determines the centroid of 4 LEDS on an implant LED ring. + """Determines the centroid of 4 LEDS on an implant LED ring. Assumed to be the Green LED, and 3 red LEDs called: redLED_C, redLED_L, redLED_R By default, uses (greenled + redLED_C) / 2 to calculate centroid If Green LED is NaN, but red center LED is not, diff --git a/src/spyglass/settings.py b/src/spyglass/settings.py index 03eda414e..9ae32639b 100644 --- a/src/spyglass/settings.py +++ b/src/spyglass/settings.py @@ -18,17 +18,14 @@ class SpyglassConfig: facilitate testing. """ - def __init__(self, base_dir=None, **kwargs): + def __init__(self, base_dir: str = None, **kwargs): """ Initializes a new instance of the class. Parameters ---------- - base_dir (str): The base directory. - - Returns - ------- - None + base_dir (str) + The base directory. """ self.supplied_base_dir = base_dir self._config = dict() diff --git a/src/spyglass/utils/database_settings.py b/src/spyglass/utils/database_settings.py index 3a29b3834..8d4f299bd 100644 --- a/src/spyglass/utils/database_settings.py +++ b/src/spyglass/utils/database_settings.py @@ -65,9 +65,10 @@ def add_collab_user(self): @property def _add_dj_guest_sql(self): + # Note: changing to temppass for uniformity return [ # Create the user (if not already created) and set the password - f"{CREATE_USR}'{self.user}'@'%' IDENTIFIED BY 'Data_$haring';\n", + f"{CREATE_USR}'{self.user}'@'%'{TEMP_PASS}\n", # Grant privileges f"{GRANT_SEL}`%`.* TO '{self.user}'@'%';\n", ]