madgik · ThanKarab · Nov 3, 2020 · Oct 30, 2020 · Oct 30, 2020 · Nov 3, 2020
diff --git a/Exareme-Docker/files/root/exareme/bootstrap.sh b/Exareme-Docker/files/root/exareme/bootstrap.sh
@@ -32,6 +32,10 @@ if [[ -z ${ENVIRONMENT_TYPE} ]]; then
   echo "ENVIRONMENT_TYPE is unset. Check docker-compose file."
   exit
 fi
+if [[ -z ${CONVERT_CSVS} ]]; then
+  echo "CONVERT_CSVS is unset. Check docker-compose file."
+  exit
+fi
 
 timestamp() {
   date +%F' '%T
@@ -78,15 +82,19 @@ getMasterIPFromConsul() {
 
 # Convert CSVs to DB
 convertCSVsToDB() {
-  # Both Master and Worker should transform the csvs to sqlite db files
-  NODE_TYPE=${1}
+
+  # Skip convertion if flag is false
+  if [[ ${CONVERT_CSVS} == "FALSE" ]]; then
+    echo "$(timestamp) CSV convertion turned off. "
+	return 0
+  fi
 
   # Removing all previous .db files from the DOCKER_DATA_FOLDER
   echo "$(timestamp) Deleting previous db files. "
   rm -rf ${DOCKER_DATA_FOLDER}/**/*.db
 
   echo "$(timestamp) Parsing the csv files in " ${DOCKER_DATA_FOLDER} " to db files. "
-  python ./convert-csv-dataset-to-db.py -f ${DOCKER_DATA_FOLDER} -t ${NODE_TYPE}
+  python3 ./convert-csv-dataset-to-db.py -f ${DOCKER_DATA_FOLDER}
   #Get the status code from previous command
   py_script=$?
   #If status code != 0 an error has occurred
@@ -228,7 +236,7 @@ if [[ "${FEDERATION_ROLE}" == "master" ]]; then
   periodicExaremeNodesHealthCheck &
 
   # Prepare datasets from CSVs to SQLite db files
-  convertCSVsToDB "master"
+  convertCSVsToDB
 
 else ##### Running bootstrap on a worker node #####
 
@@ -252,7 +260,7 @@ else ##### Running bootstrap on a worker node #####
   periodicExaremeNodesHealthCheck &
 
   # Prepare datasets from CSVs to SQLite db files
-  convertCSVsToDB "worker"
+  convertCSVsToDB
 
 fi
 

diff --git a/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py b/Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py
@@ -17,7 +17,7 @@
 # This metadata dictionary contains only code and sqltype so that processing will be faster
 # It also includes the subjectcode
 def createMetadataDictionary(CDEsMetadataPath):
-    CDEsMetadata = open(CDEsMetadataPath)
+    CDEsMetadata = open(CDEsMetadataPath, "r", encoding="utf-8")
     metadataJSON = json.load(CDEsMetadata)
 
     metadataDictionary = {}
@@ -44,7 +44,7 @@ def addGroupVariablesToDictionary(groupMetadata, metadataDictionary):
 
 # This metadata list is used to create the metadata table. It contains all the known information for each variable.
 def createMetadataList(CDEsMetadataPath):
-    CDEsMetadata = open(CDEsMetadataPath)
+    CDEsMetadata = open(CDEsMetadataPath, "r", encoding="utf-8")
     metadataJSON = json.load(CDEsMetadata)
 
     metadataList = []
@@ -80,7 +80,7 @@ def addGroupVariablesToList(groupMetadata, metadataList):
             if 'enumerations' in variable:
                 enumerations = []
                 for enumeration in variable['enumerations']:
-                    enumerations.append(unicode(enumeration['code']))
+                    enumerations.append(str(enumeration['code']))
                 variableDictionary['enumerations'] = ','.join(enumerations)
             else:
                 variableDictionary['enumerations'] = None
@@ -178,7 +178,7 @@ def createDataTable(metadataDictionary, cur):
 
 def addCSVInTheDataTable(csvFilePath, metadataDictionary, cur):
     # Open the csv
-    csvFile = open(csvFilePath, 'r')
+    csvFile = open(csvFilePath, "r", encoding="utf-8")
     csvReader = csv.reader(csvFile)
 
     # Create the csv INSERT statement
@@ -267,14 +267,19 @@ def main():
     parser = ArgumentParser()
     parser.add_argument('-f', '--pathologiesFolderPath', required=True,
                         help='The folder with the pathologies data.')
-    parser.add_argument('-t', '--nodeType', required=True,
-                        help='Is this a master or a worker node?'
+    parser.add_argument('-p', '--pathologies', required=False,
+                        help='Specific pathologies to parse. (Example: "dementia,tbi"'
                         )
     args = parser.parse_args()
     pathologiesFolderPath = os.path.abspath(args.pathologiesFolderPath)
 
     # Get all pathologies
     pathologiesList = next(os.walk(pathologiesFolderPath))[1]
+
+    if args.pathologies != None:
+        pathologiesToConvert = args.pathologies.split(",")
+        pathologiesList = list(set(pathologiesList) & set(pathologiesToConvert))
+    print ("Converting csvs for pathologies: " + ",".join(pathologiesList))
 
     # Create the datasets db for each pathology
     for pathologyName in pathologiesList:

diff --git a/Federated-Deployment/Compose-Files/docker-compose-master.yml b/Federated-Deployment/Compose-Files/docker-compose-master.yml
@@ -51,6 +51,7 @@ services:
       - NODE_COMMUNICATION_TIMEOUT=30000        # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED
       - ENVIRONMENT_TYPE=PROD                   # TEST / DEV / PROD
       - LOG_LEVEL=INFO                          # INFO / DEBUG
+      - CONVERT_CSVS=TRUE                       # TRUE / FALSE
     depends_on:
       - exareme-keystore
     deploy:

diff --git a/Federated-Deployment/Compose-Files/docker-compose-worker.yml b/Federated-Deployment/Compose-Files/docker-compose-worker.yml
@@ -34,6 +34,7 @@ services:
       - NODE_COMMUNICATION_TIMEOUT=30000        # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED
       - ENVIRONMENT_TYPE=PROD                   # TEST / DEV / PROD
       - LOG_LEVEL=INFO                          # INFO / DEBUG
+      - CONVERT_CSVS=TRUE                       # TRUE / FALSE
     deploy:
       placement:
         constraints:

diff --git a/Local-Deployment/docker-compose.yml b/Local-Deployment/docker-compose.yml
@@ -20,6 +20,7 @@ services:
       - NODE_COMMUNICATION_TIMEOUT=30000        # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED
       - ENVIRONMENT_TYPE=PROD                   # TEST / DEV / PROD
       - LOG_LEVEL=INFO                          # INFO / DEBUG
+      - CONVERT_CSVS=TRUE                       # TRUE / FALSE
     depends_on:
       - exareme_keystore
     ports: