From 6f6d5e9a25d8d7109d87ccccb10da4006cfe78df Mon Sep 17 00:00:00 2001 From: Valerie Becker Date: Mon, 6 Jan 2025 13:40:07 -0700 Subject: [PATCH] Add schema migration and deployment process including edits to other docs --- alembic-autogenerate.py | 15 ++- doc/.gitignore | 2 + doc/contributor-guide/adding-columns.rst | 2 +- .../consdbclient-summit-utils.rst | 2 +- doc/index.rst | 4 +- doc/operator-guide/deployment.rst | 86 ++++++++++++- doc/operator-guide/monitoring.rst | 32 ++++- doc/operator-guide/runbook.rst | 113 +++++++++++++++++- .../schema-migration-process.rst | 82 ++++++++++++- .../consdb-client-library-in-summit-utils.rst | 2 +- doc/user-guide/description-and-goals.rst | 2 +- doc/user-guide/schemas.rst | 58 +++++++-- doc/user-guide/sql-clients.rst | 15 ++- doc/user-guide/tap-clients.rst | 15 ++- 14 files changed, 392 insertions(+), 38 deletions(-) diff --git a/alembic-autogenerate.py b/alembic-autogenerate.py index 92e1ad7f..252b0196 100755 --- a/alembic-autogenerate.py +++ b/alembic-autogenerate.py @@ -2,14 +2,19 @@ # # How to use this script: -# 1. Load the LSST environment and setup sdm_schemas and felis. -# source loadLSST.bash -# setup felis -# setup -r /path/to/sdm_schemas +# 1. Install required packages and sdm_schemas, set environment variables: +# pip install lsst-felis testing.postgresql alembic sqlalchemy pyyaml \ +# black psycopg2-binary +# git clone https://github.com/lsst/sdm_schemas +# cd sdm_schemas +# export SDM_SCHEMAS_DIR=`pwd``` # 2. From the root of the consdb git repo, invoke the script. Supply a # revision message as the command line argument: -# python alembic-autogenerate.py DM-12345 +# python alembic-autogenerate.py this is my revision message "\n" \ +# the message can span multiple lines "\n" \ +# if desired # 3. Revise your auto-generated code as needed. +# 4. Remove the autogenerated creation of sql views (visit1, ccdvisit1). # import os diff --git a/doc/.gitignore b/doc/.gitignore index ad2c2bbf..d213c868 100644 --- a/doc/.gitignore +++ b/doc/.gitignore @@ -8,3 +8,5 @@ doxygen.conf # Sphinx products _build py-api + +*.DS_Store diff --git a/doc/contributor-guide/adding-columns.rst b/doc/contributor-guide/adding-columns.rst index a52fb7fe..bc8f410f 100644 --- a/doc/contributor-guide/adding-columns.rst +++ b/doc/contributor-guide/adding-columns.rst @@ -7,7 +7,7 @@ Structure - ConsDB content must relate to exposures or visits or observations structured like exposures. General time series should go in the Engineering and Facilities Database (EFD). - ConsDB content should generally be scalar values. Large amounts of data, especially arrays or images or cubes, should generally go into the Large File Annex (LFA). -- Avoid arrays expressed as individual columns (e.g. ``something0``, ``something1``, ``something2``) where possible, as this increases the number of columns drastically (and there is `a limit `_), makes it hard to query (``SELECT`` clauses need to list all of these individually, and ``WHERE`` clauses may need to include large ``OR`` or ``AND`` conditions), and potentially requires a lot of database storage space. +- Avoid arrays expressed as individual columns (e.g. ``something0``, ``something1``, ``something2``) where possible, as this increases the number of columns drastically (and there is `a limit `__), makes it hard to query (``SELECT`` clauses need to list all of these individually, and ``WHERE`` clauses may need to include large ``OR`` or ``AND`` conditions), and potentially requires a lot of database storage space. - Columns should be named in all lowercase with underscore (``_``) separators, also known as "snake_case". Data sources diff --git a/doc/developer-guide/consdbclient-summit-utils.rst b/doc/developer-guide/consdbclient-summit-utils.rst index 5a56da13..5eb4efc5 100644 --- a/doc/developer-guide/consdbclient-summit-utils.rst +++ b/doc/developer-guide/consdbclient-summit-utils.rst @@ -2,4 +2,4 @@ ConsDbClient in summit_utils ############################ -How to write and test code in summit_utils for ConsDbClient \ No newline at end of file +How to write and test code in summit_utils for ConsDbClient diff --git a/doc/index.rst b/doc/index.rst index eae2c8ac..bb642a45 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -3,8 +3,8 @@ ConsDB ====== -``lsst.consdb`` is developed at https://github.com/lsst-dm/consdb. -You can find Jira issues for this module under the `consdb `_ component. +``lsst.consdb`` is developed at `https://github.com/lsst-dm/consdb `__. +You can find Jira issues for this module under the `ConsDB `__ component. ############# Documentation diff --git a/doc/operator-guide/deployment.rst b/doc/operator-guide/deployment.rst index 57d530ba..4cfce8f3 100644 --- a/doc/operator-guide/deployment.rst +++ b/doc/operator-guide/deployment.rst @@ -2,5 +2,87 @@ Deployment ########### -* Database -* REST API Server +Database +======== + +Deployments of the Consolidated Database are currently located at + +- Summit +- USDF (+ dev, use the same underlying database, a replication of Summit) +- Base Test Stand (BTS) +- Tucson Test Stand (TTS) + +Updates to these deployments may be needed when there are edits to the schema for any of the cdb_* tables defined in sdm_schemas. + +Tools: +------ + +- Argo-CD +- LOVE +- Felis + +Repositories: +------------- + +- `phalanx `__ +- `sdm_schemas `__ +- `consdb `__ + +Access needed: +-------------- + +- NOIRLab VPN +- Summit VPN +- USDF + +Process: +-------- + + +Deploy code to populate db at Summit and/or USDF +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Follow the testing steps above for testing alembic migration and code at TTS/BTS, before the you consider deploying at the summit. + +The steps to deploy at the summit mirror the steps to test on a test stand with coordination and permission from the observers and site teams. +Access to argo-cd deployments is available via the Summit OpenVPN. +To coordinate your deployment update on the summit, you must attend Coordination Activities Planning (CAP) meeting on Tuesday mornings and announce your request. + +Add your migration intentions to the CAP SITCOM confluence agenda `here `__ + +The CAP members may tell you a time frame that is acceptable for you to perform these changes. + +They may also tell you specific people to coordinate with to help you take images to test LATISS and LSSTCOMCAMSIM tables. There will be more tables to test eventually. + +Channels to note: #rubinobs-test-planning; #summit-announce; #summit-auxtel, and `channel usage guide `__. + +When you get your final approval and designated time to perform the changes to ConsDB, announce on #summit-announce, and follow similar steps as test stand procedure above. + +USDF Deployment Steps +^^^^^^^^^^^^^^^^^^^^^ + +These steps must happen in synchrony with a Summit migration. + +1. Disable (pause) SUBSCRIPTION at USDF. +2. Perform the migration at the summit with the steps below. +3. Connect to the USDF database via psql and perform the alembic migration. +4. Check or test as agreed upon with the ConsDB team. +5. Enable and Refresh Subscription at USDF. + +If there is no impact or coordination with Summit needed: Run alembic migration at USDF, and test as appropriate. + +Summit Deployment Steps +^^^^^^^^^^^^^^^^^^^^^^^ + +1. Use a branch in ``phalanx`` to point to the ConsDB tag for deployment. +2. Set the Argo-CD application ``consdb's`` target revision to your ``phalanx`` branch. +3. Refresh the ConsDB application and review pod logs. +4. Connect to the summit database via psql and perform the alembic migration. +5. Have an image taken with the observing team, then verify database entries with a SQL query or Jupyter notebook. +6. Check your new entries in the database using a jupyter notebook or SQL query in RSP showing your new image has been inserted to the database as expected. + +Once deployment succeeds, set the ``Target Revision`` in Argo-CD back to ``main`` and complete the ``phalanx`` PR for the tested ConsDB tag. + + +REST API Server +=============== \ No newline at end of file diff --git a/doc/operator-guide/monitoring.rst b/doc/operator-guide/monitoring.rst index a7d80755..b04a0e0e 100644 --- a/doc/operator-guide/monitoring.rst +++ b/doc/operator-guide/monitoring.rst @@ -2,5 +2,33 @@ Monitoring ########### -* Database -* REST API Server +Reporting channels +================== + +Users of ConsDB, ConsDBClient (``pqserver``) should report issues via #consolidated-database in rubin-obs.slack.com. + +ConsDB operators monitor this channel and #ops-usdf, #ops-usdf-alerts for issues and outages reported, as well as escalate verified database issues. + +Database +======== + +The ConsDB team is responsible for verifying whether or not the database is up when issues are reported. + +They can check the method reported by the users, check using ``psql``/ ``pgcli``, and check in the #ops-usdf slack channel for currently reported issues. + +Once the ConsDB team has confirmed there is an issue with the database, they should notify #ops-usdf slack channel and USDF DBAs should be responsible for fixing/restarting. + +REST API Server +=============== + +If we suspect the API server died, the ConsDB team should be responsible for checking and restarting it. + +Use the appropriate argo-cd deployment graph to check deployment logs, and potentially restart the service. + + +Other issues +------------ + +If the K8s infrastructure died then the ConsDB team can verify the problem, but there are likely to be wider issues seen. + +USDF or Summit K8s/IT support should be responsible for fixing. \ No newline at end of file diff --git a/doc/operator-guide/runbook.rst b/doc/operator-guide/runbook.rst index c3f4a73e..2aeebb9e 100644 --- a/doc/operator-guide/runbook.rst +++ b/doc/operator-guide/runbook.rst @@ -2,4 +2,115 @@ RunBook ######## -Maybe from ConsDb Usage Confluence page? +Confluence runbook initial incomplete version `here `__ + +Overview +======== + +This application does ... + +Its design and architecture are documented at ... + +Usage +===== + +Most users +---------- + +Administration +-------------- + +Architecture +============ + +Kubernetes vclusters used + +Relevant policies + +S3DF Dependencies +----------------- + +Kubernetes +Weka storage for Kubernetes +... + +Systems +------- + +Components, Kubernetes namespaces, deployments + +Backups +------- + +Associated Systems +------------------ + +IAM +=== + +Requesting Access +----------------- + +Key Roles +--------- + +Service Accounts +---------------- + +Network +======= + +External endpoints, IP and port, encryption, authentication, clients, API + +SLAC-internal endpoints, IP and port, encryption, authentication, clients, API + +Configuration +============= + +GitHub repos with deployments + +Monitoring +========== + +Grafana or other links + +Maintenance +=========== + +Periodic tasks + +Documentation and Training +========================== + +Links to documentation and training resources + +LSST io page at `consdb.lsst.io `__ + +Support +======= + +#consolidated-database + +Overall complaints: +------------------- + +Kian-Tat Lim + +ConsDB services (hinfo, pqserver): +-------------------------------------- + +Brian Brondel , Valerie Becker + +Transformed EFD component: +-------------------------- + +Rodrigo Boufleur , Glauber Costa Vila Verde + +``consdb`` component in Jira. + + +Known Issues +============ + +Standard Procedures +=================== diff --git a/doc/operator-guide/schema-migration-process.rst b/doc/operator-guide/schema-migration-process.rst index 25750dbe..1b85bc78 100644 --- a/doc/operator-guide/schema-migration-process.rst +++ b/doc/operator-guide/schema-migration-process.rst @@ -2,8 +2,80 @@ Schema Migration Process ######################## -* Add columns to sdm_schemas -* Create alembic migration -* Test migration and code to populate the new columns/tables at TTS/BTS if Summit schema is changing -* Deploy migration in synchrony at Summit (if necessary), USDF, and Prompt Release (if necessary) -* Deploy code to populate at Summit and/or USDF +Add columns to sdm_schemas +========================== + +First, add the requested database additions, justifications, and where they are generated to our `confluence entry table `__. +Then, create a ticket and edit the `repository `__ to apply your schema changes to any of the ``cdb_*.yml`` schemas. + +If your sdm_schemas PR has issues, check that the schema conforms to Felis's data model and valid SQL tables can be created with `felis validate/create `__ +Alembic migrations should be automatically created by a git workflow after your sdm_schemas pull request completes. + + +Create an Alembic Migration (manually) +====================================== + +`Alembic `__ keeps track of versioning by autogenerated migrations to sync the test stands and summit databases. +Versioning our database schema changes allows us to apply edits and move the database’s state forward or backward as needed. + +1. Create an Alembic migration on your ConsDB ticket branch. +2. Use the script ``consdb/alembic-autogenerate.py`` to generate Alembic migrations. +3. Follow the directions in the header of the script, then run ``python alembic-autogenerate.py`` to create version files in respective database-named directories in ``consdb/alembic/``. +4. Manually edit the generated files in ``consdb/alembic//`` to: + + - Remove the ``visit1`` and ``ccdvisit1`` views. + - Ensure constraints and renamed columns are correct. + +Test alembic migration +====================== +Before merging your ConsDB migration PR or applying this migration to the Summit, you must test applying the migration in a test environment. +Test both applying the migration and any code that populates the new columns/tables at TTS/BTS if Summit schema is changing. + + +1. Update the deployment on the test stand: +------------------------------------------- + +1. Choose the appropriate test stand (TTS, BTS) +2. Create a branch in ``phalanx`` and edit the corresponding test stand environment file ``phalanx/applications/consdb/values-.yaml`` to point to your branch's built docker image (tickets-DM-###). +3. Coordinate and announce in the appropriate slack channel that you will begin testing your migrations. +4. Update the ConsDB deployment in ``/argo-cd`` to use your ``phalanx`` branch in the ``Target Revision``. Refresh and check pod logs. +5. Verify the tables that you will be upgrading exist using ``psql`` +6. From the ``consdb/`` directory, (where ``alembic.ini`` file is) use the alembic commands to upgrade the existing database tables: ``alembic upgrade head -n `` +7. Deploy new ConsDB software (``hinfo``, ``pqserver``) and check the initial logs. + +2. Test with LATISS imaging in ATQueue: +--------------------------------------- + +See `TTS Start Guide `__ for guidelines on using the test stands. + +Access LOVE via ``/love`` and use the 1Password admin information to sign in, or your SLAC username and password. +Navigate to the ATQueue or Auxillary Telescope (AuxTel) Script Queue. + +- Before editing these scripts, note their starting configurations, as we will return the scripts to this configuration when we are done. + +Take a test/simulated picture with LATISS through the ATQueue using these three scripts: + +1. ``set_summary_state.py`` Change the configuration to set ATHeaderService and ATCamera to ENABLED. +2. ``enable_latiss.py`` Remove any existing configuration. +3. ``take_image_latiss.py`` Update the configuration to remove anything that is not 'nimages' (1) and 'image_type' (BIAS or DARK or FLAT) + +Once you have put these three scripts in the queue, click ``run``. +Watch for errors in both the Script Queue and the Argo-CD ConsDB pod logs and ``hinfo-latiss`` deployment. +Address any errors and retest. + +Check the database by using ``psql`` commands like ``\dt`` to display the table names and maybe even ``SELECT * from cdb_latiss.exposure where day_obs == ;`` to view the most recent data. + +Run set_summary_state to set ATHeaderService and ATCamera back to STANDBY, and return LATISS back to STANDBY. +Then return these three scripts to their original configurations. + +If you have encountered errors in this process, do not proceed to the summit, but address those errors and retest them with your ``phalanx`` branch pointing to your ConsDB branch with the fix to these errors. + +If tests are successful, create a pull request for the Alembic migration in ConsDB. Tag the release according to ``standards-practices`` guidelines. +Update your existing ``phalanx`` branch to point the environment based deployments to this ConsDB tag. + +You are able to retest on the test stand at this point, hopefully there were no changes to your ConsDB pull request so this step is trivial. + +Deploy migration in synchrony at Summit (if necessary), USDF, and Prompt Release (if necessary) +----------------------------------------------------------------------------------------------- + +See deployment page for specific environment deployment steps \ No newline at end of file diff --git a/doc/user-guide/consdb-client-library-in-summit-utils.rst b/doc/user-guide/consdb-client-library-in-summit-utils.rst index 9c28aa86..0d632eb3 100644 --- a/doc/user-guide/consdb-client-library-in-summit-utils.rst +++ b/doc/user-guide/consdb-client-library-in-summit-utils.rst @@ -2,4 +2,4 @@ ConsDB Client Library in summit_utils ###################################### -Querying using ConsDbClient \ No newline at end of file +Querying using ConsDbClient diff --git a/doc/user-guide/description-and-goals.rst b/doc/user-guide/description-and-goals.rst index f31bb04d..63ae5e24 100644 --- a/doc/user-guide/description-and-goals.rst +++ b/doc/user-guide/description-and-goals.rst @@ -2,4 +2,4 @@ Description and Goals ###################### -https://dmtn-227.lsst.io/ +`Our data management technote (227) `__ diff --git a/doc/user-guide/schemas.rst b/doc/user-guide/schemas.rst index bc179a4e..e3de744f 100644 --- a/doc/user-guide/schemas.rst +++ b/doc/user-guide/schemas.rst @@ -2,12 +2,52 @@ Schemas ######## -* Types of schemas - * Summit for observers and Summit systems - * Smallest, contains primary key information from HeaderService and additional information from other Summit systems, including experimental and engineering data - * USDF for staff and analytical uses - * Largest, contains a full replica of the Summit plus additional information from USDF systems including Prompt Processing and Data Release Production, possibly Calibration Products Production, and human annotations from processing campaigns - * Release for science users - * Near-real-time "prompt" ConsDB replicates a subset of the USDF version - * Data Release ConsDB is a snapshot of a subset of the USDF version with data pertaining to the exposures/visits in the DR -* Schema browser +Types of schemas +================ +Summit for observers and Summit systems +--------------------------------------- +Summit schemas are the smallest. They should contain primary key information from HeaderService and additional information from other Summit systems, including experimental and engineering data. + +USDF for staff and analytical uses +---------------------------------- +USDF schemas are the largest. They contain a full replica of the Summit schemas plus additional information from USDF systems. +These other systems include Prompt Processing and Data Release Production, possibly Calibration Products Production, and human annotations from processing campaigns. + +Release for science users +------------------------- +* Near-real-time "prompt" ConsDB replicates a subset of the USDF version +* Data Release (DR) ConsDB is a snapshot of a subset of the USDF version with data pertaining to the exposures/visits in the DR + +Schema browser +============== + +Versioning +========== + +(https://rubin-obs.slack.com/archives/C07QJMQ7L4A/p1730482605167509) + +- Schemas are using semantic versioning +- Should be consistent across all schemas, not just ConsDB + +major: backward incompatible changes to the database objects (adding a table, deleting a column) +- except adding a table is not backwards incompatible + +minor: backward compatible changes to the database objects (adding a column) +patch: updates or additions to semantics/metdata (units, UCDs, etc.) +- changing units can create incompatibilities + +And we should say what should happen in the case of changes to primary/foreign keys. +- Semantic neutrality: becoming non-primary is unique and anything becoming primary was already unique + + - or there can be ones that are not neutral. + +Think about the utility of these versions in terms of interaction with the ConsDB APIs, migrations, etc. + +Do sdm_schemas versions appear in the db? + +Currently the schemas are tagged and versioned as a set, at least w.r.t. the Science Platform. +So once ConsDB is available in TAP, it should be part of that set. + +What do users see, how does TAP play into this, do the schema need this type of micro versioning? + +- Services/cosndb repo versioning strategy - services of monthly YY.0M.DD diff --git a/doc/user-guide/sql-clients.rst b/doc/user-guide/sql-clients.rst index 13f8ea0d..156166d1 100644 --- a/doc/user-guide/sql-clients.rst +++ b/doc/user-guide/sql-clients.rst @@ -2,7 +2,14 @@ SQL Clients (not recommended) ############################## -* Connection information - * Summit - * USDF - * Release +Connection information +====================== + +Summit +------ + +USDF +---- + +Release +------- diff --git a/doc/user-guide/tap-clients.rst b/doc/user-guide/tap-clients.rst index 9345715b..b49142ff 100644 --- a/doc/user-guide/tap-clients.rst +++ b/doc/user-guide/tap-clients.rst @@ -2,7 +2,14 @@ TAP Clients ############ -* Connection information - * Summit - * USDF - * Release +Connection information +====================== + +Summit +------ + +USDF +---- + +Release +-------