diff --git a/.github/workflows/scicat-to-pss.yml b/.github/workflows/scicat-to-pss.yml new file mode 100644 index 0000000..72921fa --- /dev/null +++ b/.github/workflows/scicat-to-pss.yml @@ -0,0 +1,49 @@ +name: scicat-to-pss + +on: + workflow_dispatch: + inputs: + commit: + description: 'Commit of the CI repo to deploy' + required: false + pull_request: + branches: [ main ] + push: + branches: [ main ] + release: + types: [ published ] + +jobs: + + set_env: + uses: ./.github/workflows/reusable.environment.yml + with: + commit: ${{ github.event.inputs.commit }} + + check_changed: + needs: set_env + uses: ./.github/workflows/reusable.changes.yml + with: + files: | + .github/workflows/scicat-to-pss.yml + helm_configs/scicat-to-pss/${{ needs.set_env.outputs.environment }}/** + helm_configs/scicat-to-pss/values.yaml + scicat-to-pss/** + commit: ${{ needs.set_env.outputs.commit }} + + build_deploy: + if: (needs.check_changed.outputs.changed == 'true' && !needs.set_env.outputs.component) || needs.set_env.outputs.component == 'sp' + needs: + - check_changed + - set_env + uses: ./.github/workflows/reusable.build-deploy.yml + with: + context: scicat-to-pss/. + image_name: ${{ github.repository }}/scicat-to-pss + release_name: scicat-to-pss + tag: ${{ needs.set_env.outputs.tag }} + environment: ${{ needs.set_env.outputs.environment }} + helm_chart: cron_chart + commit: ${{ needs.set_env.outputs.commit }} + secrets: + KUBECONFIG: ${{ secrets.KUBECONFIG }} diff --git a/cron_chart/.helmignore b/cron_chart/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/cron_chart/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/cron_chart/Chart.yaml b/cron_chart/Chart.yaml new file mode 100644 index 0000000..b12b310 --- /dev/null +++ b/cron_chart/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: cron-chart +description: A Helm chart for scheduling cron jobs + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 1.0.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/cron_chart/README.md b/cron_chart/README.md new file mode 100644 index 0000000..07d4b5a --- /dev/null +++ b/cron_chart/README.md @@ -0,0 +1,51 @@ +# Cron-chart + +A simple chart to deploy a cronJob runner mounting secrets + +## Installing the Chart + +To install the chart with the release name `my-release`: + +```bash +$ helm install my-release cron_chart +``` + +The command deploys a cron chart on the Kubernetes cluster in the default configuration. The [Parameters](#parameters) section lists the parameters that can be configured during installation. + +> **Tip**: List all releases using `helm list` + +## Uninstalling the Chart + +To uninstall/delete the `my-release` deployment: + +```bash +$ helm delete my-release +``` + +The command removes all the Kubernetes components associated with the chart and deletes the release. + +## Parameters + +The following table lists the configurable parameters of the chart and their default values. + +### Common parameters + +| Parameter | Description | Default | +|---------------------|----------------------------------------------------------------------|--------------------------------| +| `nameOverride` | String to partially override fullname | `nil` | +| `fullnameOverride` | String to fully override fullname | `nil` | + +### cron-chart parameters + +| Parameter | Description | Default | +|------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------| +| `image.repository` | Image name | `busybox` | +| `image.tag` | Image tag | `latest` | +| `image.pullPolicy` | Image pull policy | `Always` | +| `cronjob.restartPolicy` | Set the cronjob restart policy | `OnFailure` | +| `cronjob.schedule` | Set the schedule of the cronjob in the usual cron format command | `0 7 * * 1` | +| `cronjob.secret` | Name of the secret used by the cronjob to fetch env vars | `nil` | +| `secrets` | Object of objects which create secrets, in the form: { secretName:{ type:Opaque,data:{ key1:value1,key2:value2,key3:value3 } } } + | `nil` | +| `volumes` | Object of arrays with volumes to mount, in the form: https://kubernetes.io/docs/concepts/storage/volumes/#background | `nil` | +| `volumeMounts` | Object of arrays with volumes to mount and where, in the form: https://kubernetes.io/docs/concepts/storage/volumes/#background | `nil` | diff --git a/cron_chart/templates/NOTES.txt b/cron_chart/templates/NOTES.txt new file mode 100644 index 0000000..e5e9d6f --- /dev/null +++ b/cron_chart/templates/NOTES.txt @@ -0,0 +1,6 @@ +1. Get the application resources by running: + $ kubectl get all -n {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "helm_chart.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" +2. Get the application configmaps by running: + $ kubectl get configmaps -n {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "helm_chart.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" +3. Get the application secrets by running: + $ kubectl get secrets -n {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "helm_chart.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" diff --git a/cron_chart/templates/_helpers.tpl b/cron_chart/templates/_helpers.tpl new file mode 100644 index 0000000..13939b6 --- /dev/null +++ b/cron_chart/templates/_helpers.tpl @@ -0,0 +1,74 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "helm_chart.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "helm_chart.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- .Release.Name }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "helm_chart.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "helm_chart.labels" -}} +helm.sh/chart: {{ include "helm_chart.chart" . }} +{{ include "helm_chart.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "helm_chart.selectorLabels" -}} +app.kubernetes.io/name: {{ include "helm_chart.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "helm_chart.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "helm_chart.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Validate the secret, checking if base64 encoded +*/}} +{{- define "validateSecret" -}} +{{ $secret := regexReplaceAllLiteral "\u0026#x3D;" (regexReplaceAllLiteral "\u0026#x2F;" . "/") "=" }} +{{- if (b64dec $secret | hasPrefix "illegal base64") -}} +{{ fail "Please b64 encode your secrets!" }} +{{- else }} +{{- $secret }} +{{- end }} +{{- end }} diff --git a/cron_chart/templates/cronjob.yaml b/cron_chart/templates/cronjob.yaml new file mode 100644 index 0000000..b69e890 --- /dev/null +++ b/cron_chart/templates/cronjob.yaml @@ -0,0 +1,34 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ include "helm_chart.fullname" . }} + labels: + {{- include "helm_chart.labels" $ | nindent 4 }} +spec: + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 1 + schedule: {{ .Values.cronjob.schedule | quote }} + jobTemplate: + spec: + template: + metadata: + labels: + {{- include "helm_chart.selectorLabels" . | nindent 12 }} + spec: + restartPolicy: {{ .Values.cronjob.restartPolicy }} + containers: + - name: {{ include "helm_chart.fullname" . }} + image: {{ tpl .Values.image.repository $ }}:{{ tpl .Values.image.tag $ }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.env }} + env: + {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + {{- with .Values.volumes}} + volumes: + {{- tpl (toYaml .) $ | nindent 10 }} + {{- end }} diff --git a/cron_chart/templates/secrets.yaml b/cron_chart/templates/secrets.yaml new file mode 100644 index 0000000..e62d84c --- /dev/null +++ b/cron_chart/templates/secrets.yaml @@ -0,0 +1,14 @@ +{{- range $name, $values := $.Values.secrets }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ tpl $name $ }} + labels: + {{- include "helm_chart.labels" $ | nindent 4 }} +type: {{ $values.type }} +data: + {{- range $k, $v := $values.data }} + {{ $k }}: + {{- tpl (printf "%s" $v) $ | include "validateSecret" | indent 4 }} + {{- end }} +{{- end }} diff --git a/cron_chart/values.yaml b/cron_chart/values.yaml new file mode 100644 index 0000000..e88fda9 --- /dev/null +++ b/cron_chart/values.yaml @@ -0,0 +1,18 @@ +# Default values for search-api. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: "busybox" + pullPolicy: Always + # Overrides the image tag whose default is the chart appVersion. + tag: latest + +nameOverride: "" +fullnameOverride: "" + +cronjob: + restartPolicy: OnFailure + schedule: 0 7 * * 1 diff --git a/helm_configs/scicat-to-pss/development/values.yaml b/helm_configs/scicat-to-pss/development/values.yaml new file mode 100644 index 0000000..660ddb3 --- /dev/null +++ b/helm_configs/scicat-to-pss/development/values.yaml @@ -0,0 +1,6 @@ +scicatBaseUrl: http://backend.scicat-development/api/v3 +pssBaseUrl: http://pss.scicat-development + +cronjob: + restartPolicy: OnFailure + schedule: 0 5 * * 6 diff --git a/helm_configs/scicat-to-pss/production/values.yaml b/helm_configs/scicat-to-pss/production/values.yaml new file mode 100644 index 0000000..b8b4fb0 --- /dev/null +++ b/helm_configs/scicat-to-pss/production/values.yaml @@ -0,0 +1,6 @@ +scicatBaseUrl: http://backend.scicat-production/api/v3 +pssBaseUrl: http://pss.scicat-production + +cronjob: + restartPolicy: OnFailure + schedule: 30 5 * * 6 diff --git a/helm_configs/scicat-to-pss/qa/values.yaml b/helm_configs/scicat-to-pss/qa/values.yaml new file mode 100644 index 0000000..9d68cd1 --- /dev/null +++ b/helm_configs/scicat-to-pss/qa/values.yaml @@ -0,0 +1,7 @@ +scicatBaseUrl: http://backend.scicat-qa/api/v3 +pssBaseUrl: http://pss.scicat-qa + +cronjob: + restartPolicy: OnFailure + schedule: 15 5 * * 6 + diff --git a/helm_configs/scicat-to-pss/values.yaml b/helm_configs/scicat-to-pss/values.yaml new file mode 100644 index 0000000..92ed74c --- /dev/null +++ b/helm_configs/scicat-to-pss/values.yaml @@ -0,0 +1,12 @@ +replicaCount: 1 + +image: + repository: "{{ .Values.ciRepository }}" + pullPolicy: Always + tag: "{{ .Values.ciTag }}" + +env: + - name: SCICAT_BASE_URL + value: "{{ .Values.scicatBaseUrl }}" + - name: PSS_BASE_URL + value: "{{ .Values.pssBaseUrl }}" diff --git a/scicat-to-pss/Dockerfile b/scicat-to-pss/Dockerfile new file mode 100644 index 0000000..159b2b1 --- /dev/null +++ b/scicat-to-pss/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3.10-slim +ENV PYTHONDONTWRITEBYTECODE 1 +WORKDIR /usr/src/app +# copy and install dependencies +COPY requirements.txt copy_public_ds.py ./ +RUN pip install -r requirements.txt + +CMD ["python", "copy_public_ds.py"] diff --git a/scicat-to-pss/copy_public_ds.py b/scicat-to-pss/copy_public_ds.py new file mode 100644 index 0000000..818a595 --- /dev/null +++ b/scicat-to-pss/copy_public_ds.py @@ -0,0 +1,74 @@ +import logging +from os import environ + +from requests import get, post, delete + +meaningful_fields = { + "datasets": { + "title": "datasetName", + "keywords": "keywords", + "metadata": "scientificMetadata", + "description": "description", + }, +} + + +def prepFields(item, group): + return {k: item.get(v, "") for k, v in meaningful_fields[group].items()} + + +def format_dataset_for_scoring(raw_datasets): + return [ + { + "id": item["pid"], + "group": "datasets", + "fields": prepFields(item, "datasets"), + } + for item in raw_datasets + ] + + +def delete_all_scored(pss_items_url): + res = get( + pss_items_url, + ) + delete_codes = map( + lambda x: delete(f"{pss_items_url}/{x['id']}").status_code, res.json() or [] + ) + return list(delete_codes) + + +def post_datasets_to_scoring(scoring_datasets, pss_items_url): + return post(pss_items_url, json=scoring_datasets) + + +def compute_weights(pss_compute_url): + return post(pss_compute_url) + + +def get_public_datasets(sc_datasets_url): + res = get( + sc_datasets_url, + ) + return res.json() + + +def main(scicat_base_url, pss_base_url): + logging.info(scicat_base_url) + logging.info(pss_base_url) + pss_items_url = f"{pss_base_url}/items" + delete_status_codes = delete_all_scored(pss_items_url) + logging.info(delete_status_codes) + public_datasets = get_public_datasets(f"{scicat_base_url}/datasets") + logging.info(len(public_datasets)) + scoring_datasets = format_dataset_for_scoring(public_datasets) + logging.info(len(scoring_datasets)) + to_scoring = post_datasets_to_scoring(scoring_datasets, pss_items_url) + logging.info(to_scoring.json()) + scores = compute_weights(f"{pss_base_url}/compute") + logging.info(scores.json()) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + main(environ["SCICAT_BASE_URL"], environ["PSS_BASE_URL"]) diff --git a/scicat-to-pss/requirements.txt b/scicat-to-pss/requirements.txt new file mode 100644 index 0000000..d15ce5a --- /dev/null +++ b/scicat-to-pss/requirements.txt @@ -0,0 +1 @@ +requests==2.28.1