From cca042917173967494889f80e0ab3e9a8b741f50 Mon Sep 17 00:00:00 2001 From: ThanKarab Date: Fri, 30 Oct 2020 13:20:39 +0200 Subject: [PATCH 1/4] Flag to disable csv convertion. --- Exareme-Docker/files/root/exareme/bootstrap.sh | 10 ++++++++++ .../Compose-Files/docker-compose-master.yml | 1 + .../Compose-Files/docker-compose-worker.yml | 1 + Local-Deployment/docker-compose.yml | 1 + 4 files changed, 13 insertions(+) diff --git a/Exareme-Docker/files/root/exareme/bootstrap.sh b/Exareme-Docker/files/root/exareme/bootstrap.sh index d1da00f3d..df2f00ef9 100755 --- a/Exareme-Docker/files/root/exareme/bootstrap.sh +++ b/Exareme-Docker/files/root/exareme/bootstrap.sh @@ -32,6 +32,10 @@ if [[ -z ${ENVIRONMENT_TYPE} ]]; then echo "ENVIRONMENT_TYPE is unset. Check docker-compose file." exit fi +if [[ -z ${CONVERT_CSVS} ]]; then + echo "CONVERT_CSVS is unset. Check docker-compose file." + exit +fi timestamp() { date +%F' '%T @@ -78,6 +82,12 @@ getMasterIPFromConsul() { # Convert CSVs to DB convertCSVsToDB() { + + # Skip convertion if flag is false + if [[ ${CONVERT_CSVS} == "FALSE" ]]; then + return 0 + fi + # Both Master and Worker should transform the csvs to sqlite db files NODE_TYPE=${1} diff --git a/Federated-Deployment/Compose-Files/docker-compose-master.yml b/Federated-Deployment/Compose-Files/docker-compose-master.yml index ab4f064bc..1b38b05b9 100644 --- a/Federated-Deployment/Compose-Files/docker-compose-master.yml +++ b/Federated-Deployment/Compose-Files/docker-compose-master.yml @@ -51,6 +51,7 @@ services: - NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED - ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD - LOG_LEVEL=INFO # INFO / DEBUG + - CONVERT_CSVS=TRUE # TRUE / FALSE depends_on: - exareme-keystore deploy: diff --git a/Federated-Deployment/Compose-Files/docker-compose-worker.yml b/Federated-Deployment/Compose-Files/docker-compose-worker.yml index 9e68a793b..9ba97043f 100644 --- a/Federated-Deployment/Compose-Files/docker-compose-worker.yml +++ b/Federated-Deployment/Compose-Files/docker-compose-worker.yml @@ -34,6 +34,7 @@ services: - NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED - ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD - LOG_LEVEL=INFO # INFO / DEBUG + - CONVERT_CSVS=TRUE # TRUE / FALSE deploy: placement: constraints: diff --git a/Local-Deployment/docker-compose.yml b/Local-Deployment/docker-compose.yml index 9843a9e95..5d14439de 100644 --- a/Local-Deployment/docker-compose.yml +++ b/Local-Deployment/docker-compose.yml @@ -20,6 +20,7 @@ services: - NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED - ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD - LOG_LEVEL=INFO # INFO / DEBUG + - CONVERT_CSVS=TRUE # TRUE / FALSE depends_on: - exareme_keystore ports: From b2391be4b698d5dc787cc12f05b7400b3c55f670 Mon Sep 17 00:00:00 2001 From: ThanKarab Date: Fri, 30 Oct 2020 14:54:41 +0200 Subject: [PATCH 2/4] Added message when convertion is off. --- Exareme-Docker/files/root/exareme/bootstrap.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/Exareme-Docker/files/root/exareme/bootstrap.sh b/Exareme-Docker/files/root/exareme/bootstrap.sh index df2f00ef9..c55ddde3d 100755 --- a/Exareme-Docker/files/root/exareme/bootstrap.sh +++ b/Exareme-Docker/files/root/exareme/bootstrap.sh @@ -85,6 +85,7 @@ convertCSVsToDB() { # Skip convertion if flag is false if [[ ${CONVERT_CSVS} == "FALSE" ]]; then + echo "$(timestamp) CSV convertion turned off. " return 0 fi From 80e6c15d33b77bda1a160c9ed22fd14aa0df11b6 Mon Sep 17 00:00:00 2001 From: ThanKarab Date: Tue, 3 Nov 2020 10:53:10 +0200 Subject: [PATCH 3/4] Converter refactoring. Moved to python3. Removed nodeType that was not used. Added pathology specific convertion. --- Exareme-Docker/files/root/exareme/bootstrap.sh | 9 +++------ .../root/exareme/convert-csv-dataset-to-db.py | 17 +++++++++++------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/Exareme-Docker/files/root/exareme/bootstrap.sh b/Exareme-Docker/files/root/exareme/bootstrap.sh index c55ddde3d..61335f194 100755 --- a/Exareme-Docker/files/root/exareme/bootstrap.sh +++ b/Exareme-Docker/files/root/exareme/bootstrap.sh @@ -89,15 +89,12 @@ convertCSVsToDB() { return 0 fi - # Both Master and Worker should transform the csvs to sqlite db files - NODE_TYPE=${1} - # Removing all previous .db files from the DOCKER_DATA_FOLDER echo "$(timestamp) Deleting previous db files. " rm -rf ${DOCKER_DATA_FOLDER}/**/*.db echo "$(timestamp) Parsing the csv files in " ${DOCKER_DATA_FOLDER} " to db files. " - python ./convert-csv-dataset-to-db.py -f ${DOCKER_DATA_FOLDER} -t ${NODE_TYPE} + python3 ./convert-csv-dataset-to-db.py -f ${DOCKER_DATA_FOLDER} #Get the status code from previous command py_script=$? #If status code != 0 an error has occurred @@ -239,7 +236,7 @@ if [[ "${FEDERATION_ROLE}" == "master" ]]; then periodicExaremeNodesHealthCheck & # Prepare datasets from CSVs to SQLite db files - convertCSVsToDB "master" + convertCSVsToDB else ##### Running bootstrap on a worker node ##### @@ -263,7 +260,7 @@ else ##### Running bootstrap on a worker node ##### periodicExaremeNodesHealthCheck & # Prepare datasets from CSVs to SQLite db files - convertCSVsToDB "worker" + convertCSVsToDB fi diff --git a/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py b/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py index 343644105..d5e2d1e21 100755 --- a/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py +++ b/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py @@ -17,7 +17,7 @@ # This metadata dictionary contains only code and sqltype so that processing will be faster # It also includes the subjectcode def createMetadataDictionary(CDEsMetadataPath): - CDEsMetadata = open(CDEsMetadataPath) + CDEsMetadata = open(CDEsMetadataPath, "r", encoding="utf-8") metadataJSON = json.load(CDEsMetadata) metadataDictionary = {} @@ -44,7 +44,7 @@ def addGroupVariablesToDictionary(groupMetadata, metadataDictionary): # This metadata list is used to create the metadata table. It contains all the known information for each variable. def createMetadataList(CDEsMetadataPath): - CDEsMetadata = open(CDEsMetadataPath) + CDEsMetadata = open(CDEsMetadataPath, "r", encoding="utf-8") metadataJSON = json.load(CDEsMetadata) metadataList = [] @@ -80,7 +80,7 @@ def addGroupVariablesToList(groupMetadata, metadataList): if 'enumerations' in variable: enumerations = [] for enumeration in variable['enumerations']: - enumerations.append(unicode(enumeration['code'])) + enumerations.append(str(enumeration['code'])) variableDictionary['enumerations'] = ','.join(enumerations) else: variableDictionary['enumerations'] = None @@ -178,7 +178,7 @@ def createDataTable(metadataDictionary, cur): def addCSVInTheDataTable(csvFilePath, metadataDictionary, cur): # Open the csv - csvFile = open(csvFilePath, 'r') + csvFile = open(csvFilePath, "r", encoding="utf-8") csvReader = csv.reader(csvFile) # Create the csv INSERT statement @@ -267,14 +267,19 @@ def main(): parser = ArgumentParser() parser.add_argument('-f', '--pathologiesFolderPath', required=True, help='The folder with the pathologies data.') - parser.add_argument('-t', '--nodeType', required=True, - help='Is this a master or a worker node?' + parser.add_argument('-p', '--pathologies', required=False, + help='Specific pathologies to parse. (Example: "dementia,tbi"' ) args = parser.parse_args() pathologiesFolderPath = os.path.abspath(args.pathologiesFolderPath) # Get all pathologies pathologiesList = next(os.walk(pathologiesFolderPath))[1] + + if args.pathologies != None: + pathologiesToConvert = args.pathologies.split(",") + pathologiesList = list(set(pathologiesList) & set(pathologiesToConvert)) + print (pathologiesList) # Create the datasets db for each pathology for pathologyName in pathologiesList: From cb778ee6616b4dd7d4da1902be410538a86ac104 Mon Sep 17 00:00:00 2001 From: ThanKarab Date: Tue, 3 Nov 2020 10:56:41 +0200 Subject: [PATCH 4/4] Message when converting csvs of pathologies. --- Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py b/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py index d5e2d1e21..9bffa3fb6 100755 --- a/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py +++ b/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py @@ -279,7 +279,7 @@ def main(): if args.pathologies != None: pathologiesToConvert = args.pathologies.split(",") pathologiesList = list(set(pathologiesList) & set(pathologiesToConvert)) - print (pathologiesList) + print ("Converting csvs for pathologies: " + ",".join(pathologiesList)) # Create the datasets db for each pathology for pathologyName in pathologiesList: