Skip to content

Commit

Permalink
Merge 2.0 to master (#157)
Browse files Browse the repository at this point in the history
  • Loading branch information
celinepelletier authored Jun 20, 2024
1 parent 9654590 commit aca210e
Show file tree
Hide file tree
Showing 63 changed files with 10,266 additions and 2,882 deletions.
4 changes: 0 additions & 4 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ SEND_UPDATE_TO_SQS=
SQS_QUEUE_URL=
MAX_SET_CONTENT_SIZE=

# Python configuration (used for survival endpoint)
SURVIVAL_PY_FILE=
PYTHON_PATH=

# Riff
RIFF_URL=

Expand Down
25 changes: 25 additions & 0 deletions .github/workflows/check_pull_request.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Check Pull Request Quality

on:
pull_request:

jobs:
tests:
name: Run Tests
runs-on: ubuntu-latest
steps:
- name: Checkout Source Code
uses: actions/checkout@v3
- name: Setup node
uses: actions/setup-node@v3
with:
node-version: 20
- name: Use Dependencies Cache
uses: actions/cache@v3
with:
path: '**/node_modules'
key: ${{ runner.os }}-modules-${{ hashFiles('**/package-lock.json') }}
- name: Install Dependencies
run: npm ci
- name: Run tests
run: npm run test
21 changes: 21 additions & 0 deletions .github/workflows/scan.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: build
on:
pull_request:

jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Build an image from Dockerfile
run: |
docker build -t ${{ github.sha }} .
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
image-ref: '${{ github.sha }}'
format: 'table'
exit-code: '1'
severity: 'CRITICAL,HIGH'
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ node_modules
/docker/esdata/
/.idea/
dev/es_data/*
*.env-dev
*.env-dev
venv
17 changes: 5 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
# First image to compile typescript to javascript
FROM node:16.13-alpine AS build-image
FROM node:20-alpine3.18 AS build
WORKDIR /app
COPY . .
RUN npm ci
RUN npm run clean
RUN npm run build
RUN npm ci && npm run cleanAndBuild

# Second image, that creates an image for production
FROM nikolaik/python-nodejs:python3.9-nodejs16-alpine AS prod-image
FROM node:20-alpine3.18 AS prod-image
WORKDIR /app
COPY --from=build-image ./app/dist ./dist
COPY --from=build ./app/dist ./dist
COPY package* ./
COPY ./resource ./resource
RUN npm ci --production
RUN pip3 install -r resource/py/requirements.txt

RUN apk update && apk upgrade --no-cache libcrypto3 libssl3 && npm ci --production
CMD [ "node", "./dist/src/index.js" ]
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ Arranger server is an application that wraps Elasticsearch and provides a GraphQ

## Development

* Execute: `npm run build` then `npm run start`
* Execute: `npm run cbs`

Note: You can execute this project in a docker container if you prefer: `docker run -u node -it --rm --network host -v ${PWD}:/app --workdir /app node:20-alpine3.18 sh`

### General

Expand Down
4 changes: 2 additions & 2 deletions admin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ The Law of the Land is: 1 arranger project per environment (qa, staging, prod).
npm run admin-project
or
# run the script with docker (PWD = root of the project)
docker run -it --network host --rm -v ${PWD}:/code --workdir /code node:16.13-alpine sh -c "npm install && npm run build && npm run admin-project"
docker run -it --network host --rm -v ${PWD}:/code --workdir /code node:20-alpine3.18 sh -c "npm install && npm run build && npm run admin-project"
# run the script with docker (PWD = root of the project) and local elastic search (from /dev)
docker run -it --rm --network es-net -v ${PWD}:/code --workdir /code node:16.13-alpine sh -c "npm install && npm run build && npm run admin-project"
docker run -it --rm --network es-net -v ${PWD}:/code --workdir /code node:20-alpine3.18 sh -c "npm install && npm run build && npm run admin-project"
```

150 changes: 150 additions & 0 deletions admin/addFieldsToStudies.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// You must be connected to the correct ES HOST
// You can do:
// docker run --rm -it -v ${PWD}:/app -u node --network=host --workdir /app node:20-alpine3.18 sh
// node admin/addFieldsToStudies.mjs
import assert from 'node:assert/strict';
import EsInstance from '../dist/src/ElasticSearchClientInstance.js';
import readline from 'readline';

import { mockStudies, validateStudies } from './mockStudies.mjs';
const userReadline = readline.createInterface({
input: process.stdin,
output: process.stdout,
});

const yesOrNo = await new Promise(resolve => {
userReadline.question(`This script is intend to be for INCLUDE. Do you want to proceed y/n? > `, answer =>
resolve(answer === 'y'),
);
});
userReadline.close();
if (!yesOrNo) {
console.info('Terminating Script');
process.exit(0);
}

const { keys, values } = Object;

const ms = [...mockStudies];

const vr = validateStudies(ms);
const invalidStudies = vr.filter(v => !v[1]);
if (invalidStudies.length > 0) {
invalidStudies.forEach(v => {
const [code, , errors] = v;
console.log(`study=${code} is invalid`);
console.log(errors);
});
process.exit(0);
}

const sCodes = [...new Set(ms.map(x => x.study_code))];
const nOfStudiesToEnhance = ms.length;
assert(sCodes.length === nOfStudiesToEnhance, 'Duplicated study_codes in mocks');

const client = await EsInstance.default.getInstance();

//Quick validation
const rM = await client.indices.getMapping({ index: 'study_centric' });
assert(rM.statusCode === 200);
const m = values(rM.body)[0]?.mappings?.properties;
assert(!!m);
// !Notice Warning: mappings are multivalued (one for each study). Only the first found is used. So validation may, in certain instances, be incomplete,
const mappedKeys = ms.map(s => {
const sTopLevelKeys = keys(s);
const allKeysExistInMapping = sTopLevelKeys.every(sk => !!m[sk]);
return [allKeysExistInMapping, sTopLevelKeys.filter(sk => !m[sk])];
});

const firstLevelNestedOK = ['dataset', 'data_types', 'contacts', 'experimental_strategies'].every(k => m[k]?.type === 'nested');
const mappingSeemsValid = firstLevelNestedOK && mappedKeys.every(x => !!x[0]);
if (!mappingSeemsValid) {
console.error('It seems like not all values are mapped correctly.');
if (m.dataset.type === 'nested') {
console.error('Problematic keys: ', [
...new Set(
mappedKeys
.filter(x => !x[0])
.map(x => x[1])
.flat(),
),
]);
}
process.exit(0);
}

// Processing
const r = await client.search({
index: 'study_centric',
size: sCodes.length,
body: {
query: {
bool: {
must: [
{
terms: {
study_code: sCodes,
},
},
],
},
},
},
});
assert(r.statusCode === 200);
const hits = r.body.hits;
assert(
hits.total.value <= nOfStudiesToEnhance &&
hits?.hits &&
hits.hits.every(h => sCodes.includes(h._source.study_code)),
);

const operations = ms
.flatMap(doc => {
const oDoc = hits.hits.find(h => h._source.study_code === doc.study_code);
if (!oDoc) {
return undefined;
}
return [
{ update: { _index: oDoc._index, _id: oDoc._id } },
{
doc: {
...oDoc._source,
...doc,
},
},
];
})
.filter(x => !!x);

assert(operations.length >= 1);
const br = await client.bulk({ refresh: true, body: operations });
assert(br.statusCode === 200 || !br.body?.errors, br);

// Post-validation
const uItems = br.body.items;
// Not a perfect check theoretically, but it should be largely sufficient.
// Besides, identity ( f(x)=x ) transform is considered as an update
const allUpdated = uItems.length === ms.length;
const updatedDocsIds = uItems.map(x => x.update._id)
const updatedCodes = updatedDocsIds.reduce((xs, x) => {
const code = hits.hits.find(h => h._id === x)?._source.study_code;
return code ? [...xs, code] : xs
}, [])
console.log('Codes updated');
console.log(updatedCodes);
const notUpdatedCodes = sCodes.filter(c => !updatedCodes.includes(c))
if (notUpdatedCodes.length > 0) {
console.log('Codes NOT updated (no studies found with these codes in study_centric)');
console.log(notUpdatedCodes);
}
console.log(
allUpdated
? 'All items were updated'
: `Updated ${br.body.items.length} docs (ids=${uItems
.map(item => item.update._id)
.sort()
.join(',')})`,
);

process.exit(0);
15 changes: 8 additions & 7 deletions admin/arrangerApi.mjs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { updateFieldExtendedMapping } from '@arranger/admin/dist/schemas/ExtendedMapping/utils';
import { createNewIndex, getProjectMetadataEsLocation } from '@arranger/admin/dist/schemas/IndexSchema/utils';
import { addArrangerProject } from '@arranger/admin/dist/schemas/ProjectSchema/utils';
import { constants } from '@arranger/admin/dist/services/constants';
import { updateFieldExtendedMapping } from '@arranger/admin/dist/schemas/ExtendedMapping/utils.js';
import { createNewIndex, getProjectMetadataEsLocation } from '@arranger/admin/dist/schemas/IndexSchema/utils.js';
import { addArrangerProject } from '@arranger/admin/dist/schemas/ProjectSchema/utils.js';
import { constants } from '@arranger/admin/dist/services/constants.js';

const createNewIndices = async (esClient, confIndices) => {
const createNewIndexWithClient = createNewIndex(esClient);
Expand All @@ -10,11 +10,12 @@ const createNewIndices = async (esClient, confIndices) => {
}
};

const fixExtendedMapping = async (esClient, confExtendedMappingMutations) => {
const fixExtendedMapping = async (esClient, mutations) => {
const updateFieldExtendedMappingWithClient = updateFieldExtendedMapping(esClient);
for (const confExtendedMappingMutation of confExtendedMappingMutations) {
for (const [index, mutation] of mutations.entries()) {
console.debug('updating field = ', mutation?.field, ` ${index + 1} of ${mutations.length}`);
await updateFieldExtendedMappingWithClient({
...confExtendedMappingMutation,
...mutation,
});
}
};
Expand Down
42 changes: 42 additions & 0 deletions admin/checkAliasWithRelease.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { Client } from '@elastic/elasticsearch';
import { esHost } from '../dist/src/env.js';
import assert from 'node:assert/strict';

const cbKeepClinicalIndicesOnly = x =>
['file', 'biospecimen', 'participant', 'study'].some(stem => x.index.includes(stem));

const client = new Client({ node: esHost });

const rAllAliases = await client.cat.aliases({
h: 'alias,index',
format: 'json',
});

assert(rAllAliases.statusCode === 200);

const allAliases = rAllAliases.body;
const hasNext = allAliases.some(x => x.alias.includes('next_'));
const clinicalAliases = allAliases
.filter(cbKeepClinicalIndicesOnly)
.filter(x => (hasNext ? x.alias.includes('next_') : x));

const aliasToReleases = clinicalAliases.reduce((xs, x) => {
const r = 're' + x.index.split('_re_')[1];
const v = [...new Set(xs[x.alias] ? [...xs[x.alias], r] : [r])];
return {
...xs,
[x.alias]: v,
all: v,
};
}, {});

const { all, ...entities } = aliasToReleases;
console.log(`\n`);

//not the best test but it should suffice
const ok = hasNext ? all.length === 1 : all.length <= 2
if (!ok) {
console.warn('Check if the clinical aliases are ok - There might be a problem')
}
console.log(`Release(s) found: ${all}`);
console.log(entities);
13 changes: 13 additions & 0 deletions admin/checkConf.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import includeConf from './confInclude.json' assert { type: "json" };
import kfConf from './confKfNext.json' assert { type: "json" };

const kfs = kfConf.extendedMappingMutations.map(m => [m.field, m.graphqlField]);
const incs = includeConf.extendedMappingMutations.map(m => [m.field, m.graphqlField]);

console.info('mutation in Kf only');
const diffKfOnly = kfs.filter(kf => !incs.some(ins => kf[0] === ins[0] && kf[1] === ins[1])).map(x => ({ field: x[0], entity: x[1] }))
diffKfOnly.length === 0 ? console.log('No diff') : console.table(diffKfOnly)

console.info('mutation in Include only');
const diffIncOnly = incs.filter(ins => !kfs.some(kf => kf[0] === ins[0] && kf[1] === ins[1])).map(x => ({ field: x[0], entity: x[1] }))
diffIncOnly.length === 0 ? console.log('No diff') : console.table(diffIncOnly)
Loading

0 comments on commit aca210e

Please sign in to comment.