diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..63361f2 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @Sage-Bionetworks/sagebio-it @Sage-Bionetworks/Agora-Admin diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 0000000..db640e0 --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,35 @@ +name: main + +on: + pull_request: + branches: ['*'] + push: + branches: ['develop', 'staging', 'prod' ] + +jobs: + tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: pre-commit/action@v3.0.0 + + deploy: + if: ${{ github.event_name == 'push' }} + needs: ["tests"] + # self hosted runner labels are setup in github to match branch names + runs-on: [self-hosted, "${{ github.ref_name }}"] + # variables in context environments are setup in github to match branch names + environment: + name: ${{ github.ref_name }} + + steps: + # use older checkout version due to https://github.com/dawidd6/action-download-artifact/issues/261 + - uses: actions/checkout@v2 + - name: Import Synapse Data + run: ./import-data.sh $BRANCH $SYNAPSE_PASSWORD $DB_HOST $DB_USER $DB_PASS + env: + BRANCH: ${{ github.ref_name }} + SYNAPSE_PASSWORD: ${{ secrets.SYNAPSE_PASSWORD }} + DB_HOST: ${{ secrets.DB_HOST }} + DB_USER: ${{ secrets.DB_USER }} + DB_PASS: ${{ secrets.DB_PASS }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..d74508a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + - repo: https://github.com/adrienverge/yamllint + rev: v1.33.0 + hooks: + - id: yamllint + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.4 + hooks: + - id: remove-tabs + - repo: https://github.com/sirosen/check-jsonschema + rev: 0.27.0 + hooks: + - id: check-github-workflows + - id: check-github-actions diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 78db586..0000000 --- a/.travis.yml +++ /dev/null @@ -1,21 +0,0 @@ -language: python -python: 3.8 -cache: pip -fast_finish: true - -branches: - only: - - develop - - staging - - prod - -before_install: - - openssl aes-256-cbc -K $encrypted_3307e78034e0_key -iv $encrypted_3307e78034e0_iv -in agora-ci-develop.pem.enc -out ~/.ssh/agora-ci-develop.pem -d - - openssl aes-256-cbc -K $encrypted_76f307832d54_key -iv $encrypted_76f307832d54_iv -in agora-ci-prod.pem.enc -out ~/.ssh/agora-ci-staging.pem -d - - openssl aes-256-cbc -K $encrypted_76f307832d54_key -iv $encrypted_76f307832d54_iv -in agora-ci-prod.pem.enc -out ~/.ssh/agora-ci-prod.pem -d - - eval "$(ssh-agent -s)" - - chmod 600 ~/.ssh/*.pem - - ssh-add ~/.ssh/agora-ci-$TRAVIS_BRANCH.pem - -script: - - ./updatedb.sh || travis_terminate 1 diff --git a/.yamllint b/.yamllint new file mode 100644 index 0000000..496d561 --- /dev/null +++ b/.yamllint @@ -0,0 +1,29 @@ +--- + +extends: default + +rules: + braces: + level: warning + max-spaces-inside: 1 + brackets: + level: warning + max-spaces-inside: 1 + colons: + level: warning + commas: + level: warning + comments: disable + comments-indentation: disable + document-start: disable + empty-lines: + level: warning + hyphens: + level: warning + indentation: + level: warning + indent-sequences: consistent + line-length: disable + truthy: disable + new-line-at-end-of-file: + level: warning diff --git a/README.md b/README.md index 2d33c73..0821942 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,125 @@ # Overview -Agora Data Manager is a tool that loads the JSON files into Agora's document database instances in our AWS environments. +Agora Data Manager is a tool that loads the JSON files into Agora's document database +instances in our AWS environments. # Purpose This project allows Agora maintainers to update the Agora database with new versions of gene data from Synapse. This is a manually triggered, -self-service update. +self-service update. # Execution ![alt text][db_update] -# Worflow +# Workflow To deploy an updated data version to the Agora development database 1. Increment `data-version` in `data-manifest.json` on the `develop` branch. 2. Commit the change -3. The [CI system](https://travis-ci.org/Sage-Bionetworks/agora-data-manager) automatically updates the dev DB +3. The Github action CI system automatically updates the dev DB To deploy an updated data version to the Agora staging database: 1. Merge the data-version update from the dev branch to the staging branch. -2. The [CI system](https://travis-ci.org/Sage-Bionetworks/agora-data-manager) automatically updates the staging DB +2. The Github action CI system automatically updates the dev DB To deploy an updated data version to the Agora production database: 1. Merge the data-version update from the staging branch to the production branch. -2. The [CI system](https://travis-ci.org/Sage-Bionetworks/agora-data-manager) automatically updates the production DB +2. The Github action CI system automatically updates the dev DB # Setup -The following environment variables need to be setup for the scripts to deploy database updates: +## Secrets -| Variable | Description | Example | -|----------------------|-----------------------------------|---------------------------------------------------------------------------| -| BASTIAN_HOST_develop | The bastian host | ec2-10-11-12-13.compute-1.amazonaws.com | -| DB_HOST_develop | The database host | dbcluster-mr0a782pfjnk.cluster-ctcayu3de2lt.us-east-1.docdb.amazonaws.com | -| DB_USER_develop | The database user | dbuser | -| DB_PASS_develop | The database password | supersecret | -| SYNAPSE_USERNAME | The Synapse service user | syn-service-user | -| SYNAPSE_PASSWORD | The Synapse service user password | supersecret | +The following secrets need to be setup in Github for the scripts to deploy database updates: -__Note__: The variables containing `_develop` postfix corresponds to the branch. -To deploy to a prod environment a prod branch is require along with a variable -containing a `_prod` prefix (i.e. BASTIAN_HOST_prod) +Global secrets: +| Variable | Description | Example | +|----------------------|----------------------------------|----------------------------------| +| SYNAPSE_PASSWORD | Synapse service user token (PAT) | glY4283tLQHZ...0eXAiOi...JKV1QiL | -[db_update]: diagram1.png "update diagram" + +Context specific secrets for each environment that corresponds to a git branch (develop/staging/prod): + +| Variable | Description | Example | +|-----------|-----------------------------|---------------------------------------------------------------------------| +| DB_HOST | The database host | dbcluster-mr0a782pfjnk.cluster-ctcayu3de2lt.us-east-1.docdb.amazonaws.com | +| DB_USER | The database user | dbuser | +| DB_PASS | The database password | supersecret | + + +![alt text][github_secrets] + + +## Self hosted runners + +[agora2-infra] repository deploys a bastian host in AWS for each environment which have access to +the databases. We manually configure a [Github self-hosted runner](https://docs.github.com/en/actions/hosting-your-own-runners) +for each bastian host, a label is applied to each runner to match the corresponding git branch name (develop/staging/prod). +Each runner corresponds to an environment which corresponds to a git branch. The update is +executed from these runners. When a push happens on a branch (i.e. develop), the update +is executed on the `agora-bastian-develop` runner which in turn updates the development database. + + +![alt text][self_hosted_runners] + + +### Setup self hosted runners + +Github self hosted runners are deployed with a [Sceptre template config file])(https://github.com/Sage-Bionetworks/agora2-infra/blob/main/config/agoradev/develop/agora-bastian.yaml). + +Self Hosted Runner setup: +* Deploy the template to the Agora AWS account. +* Login to AWS console and goto `EC2 -> select the deployed instance -> Connect -> Session Manager -> Connect` to gain ssh access to the instance. +* Follow the instructions to install the [Github self hosted runner](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/adding-self-hosted-runners#adding-a-self-hosted-runner-to-a-repository). We installed it to the `/home/ssm-user/actions-runner` folder. +* Run the `config.sh` script to configure the runner. !! Important !! Make sure to set the runner `name` and `label` corresponding to the desired deployment environment (develop/staging/prod).. +```text +sh-4.2$ pwd +/home/ssm-user/actions-runner + +sh-4.2$ ./config.sh --url https://github.com/Sage-Bionetworks/agora-data-manager --token XXXXXXXXXXXXXXXXX6VLI + +-------------------------------------------------------------------------------- +| ____ _ _ _ _ _ _ _ _ | +| / ___(_) |_| | | |_ _| |__ / \ ___| |_(_) ___ _ __ ___ | +| | | _| | __| |_| | | | | '_ \ / _ \ / __| __| |/ _ \| '_ \/ __| | +| | |_| | | |_| _ | |_| | |_) | / ___ \ (__| |_| | (_) | | | \__ \ | +| \____|_|\__|_| |_|\__,_|_.__/ /_/ \_\___|\__|_|\___/|_| |_|___/ | +| | +| Self-hosted runner registration | +| | +-------------------------------------------------------------------------------- + +# Authentication + + +√ Connected to GitHub + +# Runner Registration + +Enter the name of the runner group to add this runner to: [press Enter for Default] + +Enter the name of runner: [press Enter for ip-10-XXX-XXX-XXX] agora-bastian-prod + +This runner will have the following labels: 'self-hosted', 'Linux', 'X64' +Enter any additional labels (ex. label-1,label-2): [press Enter to skip] prod + +√ Runner successfully added +√ Runner connection is good + +# Runner settings + +Enter name of work folder: [press Enter for _work] + +√ Settings Saved. +``` +* Setup the [GH runner agent to run as a service](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/configuring-the-self-hosted-runner-application-as-a-service) +* Run the agent and then check the [GH Runners page](https://github.com/Sage-Bionetworks/agora-data-manager/settings/actions/runners) to make sure that the runner is in `Idle` status. + +[db_update]: agora-db-update.drawio.png "update diagram" +[github_secrets]: github_secrets.png "github secrets screen" +[self_hosted_runners]: self-hosted-runners.png "self hosted runners" +[agora2-infra]: https://github.com/Sage-Bionetworks/agora2-infra "agora2-infra repository" +[Github self-hosted runners]: https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#about-self-hosted-runners diff --git a/agora-ci-develop.pem.enc b/agora-ci-develop.pem.enc deleted file mode 100644 index 92d6134..0000000 Binary files a/agora-ci-develop.pem.enc and /dev/null differ diff --git a/agora-ci-prod.pem.enc b/agora-ci-prod.pem.enc deleted file mode 100644 index cae6810..0000000 Binary files a/agora-ci-prod.pem.enc and /dev/null differ diff --git a/agora-db-update.drawio.png b/agora-db-update.drawio.png new file mode 100644 index 0000000..88d8819 Binary files /dev/null and b/agora-db-update.drawio.png differ diff --git a/data-manifest.json b/data-manifest.json index 0760186..312e106 100644 --- a/data-manifest.json +++ b/data-manifest.json @@ -1,5 +1,5 @@ { - "data-version": "66", + "data-version": "71", "data-manifest-id": "syn13363290", "team-images-id": "syn12861877" } diff --git a/diagram1.png b/diagram1.png deleted file mode 100644 index 0053688..0000000 Binary files a/diagram1.png and /dev/null differ diff --git a/github_secrets.png b/github_secrets.png new file mode 100644 index 0000000..0144203 Binary files /dev/null and b/github_secrets.png differ diff --git a/import-data.sh b/import-data.sh index c30b310..7a88cba 100755 --- a/import-data.sh +++ b/import-data.sh @@ -5,28 +5,27 @@ #!/bin/bash set -e -TRAVIS_BRANCH=$1 -SYNAPSE_USERNAME=$2 -SYNAPSE_PASSWORD=$3 -DB_HOST=$4 -DB_USER=$5 -DB_PASS=$6 +BRANCH=$1 +SYNAPSE_PASSWORD=$2 +DB_HOST=$3 +DB_USER=$4 +DB_PASS=$5 CURRENT_DIR=$(pwd) -PARENT_DIR="$(dirname "$CURRENT_DIR")" -TMP_DIR=/tmp -WORKING_DIR=$TMP_DIR/work +WORKING_DIR=$CURRENT_DIR DATA_DIR=$WORKING_DIR/data TEAM_IMAGES_DIR=$DATA_DIR/team_images +mkdir -p $TEAM_IMAGES_DIR + # Version key/value should be on his own line DATA_VERSION=$(cat $WORKING_DIR/data-manifest.json | grep data-version | head -1 | awk -F: '{ print $2 }' | sed 's/[",]//g' | tr -d '[[:space:]]') DATA_MANIFEST_ID=$(cat $WORKING_DIR/data-manifest.json | grep data-manifest-id | head -1 | awk -F: '{ print $2 }' | sed 's/[",]//g' | tr -d '[[:space:]]') TEAM_IMAGES_ID=$(cat $WORKING_DIR/data-manifest.json | grep team-images-id | head -1 | awk -F: '{ print $2 }' | sed 's/[",]//g' | tr -d '[[:space:]]') -echo "$TRAVIS_BRANCH branch, DATA_VERSION = $DATA_VERSION, manifest id = $DATA_MANIFEST_ID" +echo "$BRANCH branch, DATA_VERSION = $DATA_VERSION, manifest id = $DATA_MANIFEST_ID" # Download the manifest file from synapse -synapse -u $SYNAPSE_USERNAME -p $SYNAPSE_PASSWORD get --downloadLocation $DATA_DIR -v $DATA_VERSION $DATA_MANIFEST_ID +synapse -p $SYNAPSE_PASSWORD get --downloadLocation $DATA_DIR -v $DATA_VERSION $DATA_MANIFEST_ID # Ensure there's a newline at the end of the manifest file; otherwise the last listed file will not be downloaded # echo >> $DATA_DIR/data_manifest.csv @@ -34,11 +33,11 @@ synapse -u $SYNAPSE_USERNAME -p $SYNAPSE_PASSWORD get --downloadLocation $DATA_D # Download all files referenced in the manifest from synapse cat $DATA_DIR/data_manifest.csv | tail -n +2 | while IFS=, read -r id version; do echo Downloading $id,$version - synapse -u $SYNAPSE_USERNAME -p $SYNAPSE_PASSWORD get --downloadLocation $DATA_DIR -v $version $id ; + synapse -p $SYNAPSE_PASSWORD get --downloadLocation $DATA_DIR -v $version $id ; done # Download team images -synapse -u $SYNAPSE_USERNAME -p $SYNAPSE_PASSWORD get -r --downloadLocation $TEAM_IMAGES_DIR/ $TEAM_IMAGES_ID +synapse -p $SYNAPSE_PASSWORD get -r --downloadLocation $TEAM_IMAGES_DIR/ $TEAM_IMAGES_ID echo "Data Files: " ls -al $WORKING_DIR @@ -65,7 +64,7 @@ mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabas mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection genesbiodomains --jsonArray --drop --file $DATA_DIR/genes_biodomains.json mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection biodomaininfo --jsonArray --drop --file $DATA_DIR/biodomain_info.json -mongo --host $DB_HOST -u $DB_USER -p $DB_PASS --authenticationDatabase admin $WORKING_DIR/create-indexes.js +mongosh --host $DB_HOST -u $DB_USER -p $DB_PASS --authenticationDatabase admin $WORKING_DIR/create-indexes.js pushd $TEAM_IMAGES_DIR ls -1r *.{jpg,jpeg,png} | while read x; do mongofiles -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase $DB_USER -v put $x; echo $x; done diff --git a/self-hosted-runners.png b/self-hosted-runners.png new file mode 100644 index 0000000..8b0b147 Binary files /dev/null and b/self-hosted-runners.png differ diff --git a/updatedb.sh b/updatedb.sh deleted file mode 100755 index 0ba3578..0000000 --- a/updatedb.sh +++ /dev/null @@ -1,36 +0,0 @@ -# Update agora db from a build machine by running an import script -# on a bastian host -#!/bin/bash -set -e - -# double interpolate vars from travis -eval export "BASTIAN_HOST=\$BASTIAN_HOST_$TRAVIS_BRANCH" -eval export "DB_HOST=\$DB_HOST_$TRAVIS_BRANCH" -eval export "DB_USER=\$DB_USER_$TRAVIS_BRANCH" -eval export "DB_PASS=\$DB_PASS_$TRAVIS_BRANCH" - -# Escape chars in env vars -q_mid=\'\\\'\' -SYNAPSE_USERNAME_ESC="'${SYNAPSE_USERNAME//\'/$q_mid}'" -SYNAPSE_PASSWORD_ESC="'${SYNAPSE_PASSWORD//\'/$q_mid}'" -DB_USER_ESC="'${DB_USER//\'/$q_mid}'" -DB_PASS_ESC="'${DB_PASS//\'/$q_mid}'" - -# bastian configs -BASTIAN_USER="ec2-user" -BASTIAN_SSH_KEY="~/.ssh/agora-ci-$TRAVIS_BRANCH.pem" - -# set SSH configuration -echo -e "Host $BASTIAN_HOST\n\tStrictHostKeyChecking no\n" >> ~/.ssh/config - -# clean up from previous builds -ssh -i $BASTIAN_SSH_KEY $BASTIAN_USER@$BASTIAN_HOST "rm -rf /tmp/work" - -# create directories for data -ssh -i $BASTIAN_SSH_KEY $BASTIAN_USER@$BASTIAN_HOST "mkdir -p /tmp/work/data/team_images" - -# setup script on bastian -scp -i $BASTIAN_SSH_KEY import-data.sh data-manifest.json create-indexes.js $BASTIAN_USER@$BASTIAN_HOST:/tmp/work/. - -# run import on bastian -ssh -i $BASTIAN_SSH_KEY $BASTIAN_USER@$BASTIAN_HOST "bash /tmp/work/import-data.sh $TRAVIS_BRANCH $SYNAPSE_USERNAME_ESC $SYNAPSE_PASSWORD_ESC $DB_HOST $DB_USER_ESC $DB_PASS_ESC"