Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev/313 converter improvements #293

Merged
merged 4 commits into from
Nov 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions Exareme-Docker/files/root/exareme/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ if [[ -z ${ENVIRONMENT_TYPE} ]]; then
echo "ENVIRONMENT_TYPE is unset. Check docker-compose file."
exit
fi
if [[ -z ${CONVERT_CSVS} ]]; then
echo "CONVERT_CSVS is unset. Check docker-compose file."
exit
fi

timestamp() {
date +%F' '%T
Expand Down Expand Up @@ -78,15 +82,19 @@ getMasterIPFromConsul() {

# Convert CSVs to DB
convertCSVsToDB() {
# Both Master and Worker should transform the csvs to sqlite db files
NODE_TYPE=${1}

# Skip convertion if flag is false
if [[ ${CONVERT_CSVS} == "FALSE" ]]; then
echo "$(timestamp) CSV convertion turned off. "
return 0
fi

# Removing all previous .db files from the DOCKER_DATA_FOLDER
echo "$(timestamp) Deleting previous db files. "
rm -rf ${DOCKER_DATA_FOLDER}/**/*.db

echo "$(timestamp) Parsing the csv files in " ${DOCKER_DATA_FOLDER} " to db files. "
python ./convert-csv-dataset-to-db.py -f ${DOCKER_DATA_FOLDER} -t ${NODE_TYPE}
python3 ./convert-csv-dataset-to-db.py -f ${DOCKER_DATA_FOLDER}
#Get the status code from previous command
py_script=$?
#If status code != 0 an error has occurred
Expand Down Expand Up @@ -228,7 +236,7 @@ if [[ "${FEDERATION_ROLE}" == "master" ]]; then
periodicExaremeNodesHealthCheck &

# Prepare datasets from CSVs to SQLite db files
convertCSVsToDB "master"
convertCSVsToDB

else ##### Running bootstrap on a worker node #####

Expand All @@ -252,7 +260,7 @@ else ##### Running bootstrap on a worker node #####
periodicExaremeNodesHealthCheck &

# Prepare datasets from CSVs to SQLite db files
convertCSVsToDB "worker"
convertCSVsToDB

fi

Expand Down
17 changes: 11 additions & 6 deletions Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# This metadata dictionary contains only code and sqltype so that processing will be faster
# It also includes the subjectcode
def createMetadataDictionary(CDEsMetadataPath):
CDEsMetadata = open(CDEsMetadataPath)
CDEsMetadata = open(CDEsMetadataPath, "r", encoding="utf-8")
metadataJSON = json.load(CDEsMetadata)

metadataDictionary = {}
Expand All @@ -44,7 +44,7 @@ def addGroupVariablesToDictionary(groupMetadata, metadataDictionary):

# This metadata list is used to create the metadata table. It contains all the known information for each variable.
def createMetadataList(CDEsMetadataPath):
CDEsMetadata = open(CDEsMetadataPath)
CDEsMetadata = open(CDEsMetadataPath, "r", encoding="utf-8")
metadataJSON = json.load(CDEsMetadata)

metadataList = []
Expand Down Expand Up @@ -80,7 +80,7 @@ def addGroupVariablesToList(groupMetadata, metadataList):
if 'enumerations' in variable:
enumerations = []
for enumeration in variable['enumerations']:
enumerations.append(unicode(enumeration['code']))
enumerations.append(str(enumeration['code']))
variableDictionary['enumerations'] = ','.join(enumerations)
else:
variableDictionary['enumerations'] = None
Expand Down Expand Up @@ -178,7 +178,7 @@ def createDataTable(metadataDictionary, cur):

def addCSVInTheDataTable(csvFilePath, metadataDictionary, cur):
# Open the csv
csvFile = open(csvFilePath, 'r')
csvFile = open(csvFilePath, "r", encoding="utf-8")
csvReader = csv.reader(csvFile)

# Create the csv INSERT statement
Expand Down Expand Up @@ -267,14 +267,19 @@ def main():
parser = ArgumentParser()
parser.add_argument('-f', '--pathologiesFolderPath', required=True,
help='The folder with the pathologies data.')
parser.add_argument('-t', '--nodeType', required=True,
help='Is this a master or a worker node?'
parser.add_argument('-p', '--pathologies', required=False,
help='Specific pathologies to parse. (Example: "dementia,tbi"'
)
args = parser.parse_args()
pathologiesFolderPath = os.path.abspath(args.pathologiesFolderPath)

# Get all pathologies
pathologiesList = next(os.walk(pathologiesFolderPath))[1]

if args.pathologies != None:
pathologiesToConvert = args.pathologies.split(",")
pathologiesList = list(set(pathologiesList) & set(pathologiesToConvert))
print ("Converting csvs for pathologies: " + ",".join(pathologiesList))

# Create the datasets db for each pathology
for pathologyName in pathologiesList:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ services:
- NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED
- ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD
- LOG_LEVEL=INFO # INFO / DEBUG
- CONVERT_CSVS=TRUE # TRUE / FALSE
depends_on:
- exareme-keystore
deploy:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ services:
- NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED
- ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD
- LOG_LEVEL=INFO # INFO / DEBUG
- CONVERT_CSVS=TRUE # TRUE / FALSE
deploy:
placement:
constraints:
Expand Down
1 change: 1 addition & 0 deletions Local-Deployment/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ services:
- NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED
- ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD
- LOG_LEVEL=INFO # INFO / DEBUG
- CONVERT_CSVS=TRUE # TRUE / FALSE
depends_on:
- exareme_keystore
ports:
Expand Down