diff --git a/Exareme-Docker/files/root/exareme/bootstrap.sh b/Exareme-Docker/files/root/exareme/bootstrap.sh index d1da00f3d..61335f194 100755 --- a/Exareme-Docker/files/root/exareme/bootstrap.sh +++ b/Exareme-Docker/files/root/exareme/bootstrap.sh @@ -32,6 +32,10 @@ if [[ -z ${ENVIRONMENT_TYPE} ]]; then echo "ENVIRONMENT_TYPE is unset. Check docker-compose file." exit fi +if [[ -z ${CONVERT_CSVS} ]]; then + echo "CONVERT_CSVS is unset. Check docker-compose file." + exit +fi timestamp() { date +%F' '%T @@ -78,15 +82,19 @@ getMasterIPFromConsul() { # Convert CSVs to DB convertCSVsToDB() { - # Both Master and Worker should transform the csvs to sqlite db files - NODE_TYPE=${1} + + # Skip convertion if flag is false + if [[ ${CONVERT_CSVS} == "FALSE" ]]; then + echo "$(timestamp) CSV convertion turned off. " + return 0 + fi # Removing all previous .db files from the DOCKER_DATA_FOLDER echo "$(timestamp) Deleting previous db files. " rm -rf ${DOCKER_DATA_FOLDER}/**/*.db echo "$(timestamp) Parsing the csv files in " ${DOCKER_DATA_FOLDER} " to db files. " - python ./convert-csv-dataset-to-db.py -f ${DOCKER_DATA_FOLDER} -t ${NODE_TYPE} + python3 ./convert-csv-dataset-to-db.py -f ${DOCKER_DATA_FOLDER} #Get the status code from previous command py_script=$? #If status code != 0 an error has occurred @@ -228,7 +236,7 @@ if [[ "${FEDERATION_ROLE}" == "master" ]]; then periodicExaremeNodesHealthCheck & # Prepare datasets from CSVs to SQLite db files - convertCSVsToDB "master" + convertCSVsToDB else ##### Running bootstrap on a worker node ##### @@ -252,7 +260,7 @@ else ##### Running bootstrap on a worker node ##### periodicExaremeNodesHealthCheck & # Prepare datasets from CSVs to SQLite db files - convertCSVsToDB "worker" + convertCSVsToDB fi diff --git a/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py b/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py index 343644105..9bffa3fb6 100755 --- a/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py +++ b/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py @@ -17,7 +17,7 @@ # This metadata dictionary contains only code and sqltype so that processing will be faster # It also includes the subjectcode def createMetadataDictionary(CDEsMetadataPath): - CDEsMetadata = open(CDEsMetadataPath) + CDEsMetadata = open(CDEsMetadataPath, "r", encoding="utf-8") metadataJSON = json.load(CDEsMetadata) metadataDictionary = {} @@ -44,7 +44,7 @@ def addGroupVariablesToDictionary(groupMetadata, metadataDictionary): # This metadata list is used to create the metadata table. It contains all the known information for each variable. def createMetadataList(CDEsMetadataPath): - CDEsMetadata = open(CDEsMetadataPath) + CDEsMetadata = open(CDEsMetadataPath, "r", encoding="utf-8") metadataJSON = json.load(CDEsMetadata) metadataList = [] @@ -80,7 +80,7 @@ def addGroupVariablesToList(groupMetadata, metadataList): if 'enumerations' in variable: enumerations = [] for enumeration in variable['enumerations']: - enumerations.append(unicode(enumeration['code'])) + enumerations.append(str(enumeration['code'])) variableDictionary['enumerations'] = ','.join(enumerations) else: variableDictionary['enumerations'] = None @@ -178,7 +178,7 @@ def createDataTable(metadataDictionary, cur): def addCSVInTheDataTable(csvFilePath, metadataDictionary, cur): # Open the csv - csvFile = open(csvFilePath, 'r') + csvFile = open(csvFilePath, "r", encoding="utf-8") csvReader = csv.reader(csvFile) # Create the csv INSERT statement @@ -267,14 +267,19 @@ def main(): parser = ArgumentParser() parser.add_argument('-f', '--pathologiesFolderPath', required=True, help='The folder with the pathologies data.') - parser.add_argument('-t', '--nodeType', required=True, - help='Is this a master or a worker node?' + parser.add_argument('-p', '--pathologies', required=False, + help='Specific pathologies to parse. (Example: "dementia,tbi"' ) args = parser.parse_args() pathologiesFolderPath = os.path.abspath(args.pathologiesFolderPath) # Get all pathologies pathologiesList = next(os.walk(pathologiesFolderPath))[1] + + if args.pathologies != None: + pathologiesToConvert = args.pathologies.split(",") + pathologiesList = list(set(pathologiesList) & set(pathologiesToConvert)) + print ("Converting csvs for pathologies: " + ",".join(pathologiesList)) # Create the datasets db for each pathology for pathologyName in pathologiesList: diff --git a/Federated-Deployment/Compose-Files/docker-compose-master.yml b/Federated-Deployment/Compose-Files/docker-compose-master.yml index ab4f064bc..1b38b05b9 100644 --- a/Federated-Deployment/Compose-Files/docker-compose-master.yml +++ b/Federated-Deployment/Compose-Files/docker-compose-master.yml @@ -51,6 +51,7 @@ services: - NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED - ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD - LOG_LEVEL=INFO # INFO / DEBUG + - CONVERT_CSVS=TRUE # TRUE / FALSE depends_on: - exareme-keystore deploy: diff --git a/Federated-Deployment/Compose-Files/docker-compose-worker.yml b/Federated-Deployment/Compose-Files/docker-compose-worker.yml index 9e68a793b..9ba97043f 100644 --- a/Federated-Deployment/Compose-Files/docker-compose-worker.yml +++ b/Federated-Deployment/Compose-Files/docker-compose-worker.yml @@ -34,6 +34,7 @@ services: - NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED - ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD - LOG_LEVEL=INFO # INFO / DEBUG + - CONVERT_CSVS=TRUE # TRUE / FALSE deploy: placement: constraints: diff --git a/Local-Deployment/docker-compose.yml b/Local-Deployment/docker-compose.yml index 9843a9e95..5d14439de 100644 --- a/Local-Deployment/docker-compose.yml +++ b/Local-Deployment/docker-compose.yml @@ -20,6 +20,7 @@ services: - NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED - ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD - LOG_LEVEL=INFO # INFO / DEBUG + - CONVERT_CSVS=TRUE # TRUE / FALSE depends_on: - exareme_keystore ports: