diff --git a/DESCRIPTION b/DESCRIPTION index 48500f1..5e9c268 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: dsUpload Title: Upload Functions for DataSHIELD Backends -Version: 4.6.0 +Version: 4.6.1 Authors@R: c(person(given = "Mariska", family = "Slofstra", diff --git a/R/reshape.R b/R/reshape.R index b1ca8c9..0f9ced0 100755 --- a/R/reshape.R +++ b/R/reshape.R @@ -32,19 +32,19 @@ du.reshape <- function(upload = TRUE, project, data_version, input_format, dict_ nonrep_data <- du.reshape.generate.non.repeated( data, dict_kind ) - if (exists("nonrep_data")) { + if (!is.null(nonrep_data)) { write_csv(nonrep_data, paste0(getwd(), "/", file_name_nonrep, ".csv"), na = "") } yearlyrep_data <- du.reshape.generate.yearly.repeated( data, dict_kind ) - if (exists("yearlyrep_data")) { + if (!is.null(yearlyrep_data)) { write_csv(yearlyrep_data, paste0(getwd(), "/", file_name_yearly, ".csv"), na = "") } monthlyrep_data <- du.reshape.generate.monthly.repeated( data, dict_kind ) - if (exists('monthlyrep_data')) { + if (!is.null(monthlyrep_data)) { write_csv(monthlyrep_data, paste0(getwd(), "/", file_name_monthly, ".csv"), na = "") } @@ -53,7 +53,7 @@ du.reshape <- function(upload = TRUE, project, data_version, input_format, dict_ weeklyrep_data <- du.reshape.generate.weekly.repeated( data, dict_kind ) - if (exists("weeklyrep_data")) { + if (!is.null(weeklyrep_data)) { write_csv(weeklyrep_data, paste0(getwd(), "/", file_name_weekly, ".csv"), na = "") weeklyrep_metadata <- du.retrieve.full.dict(du.enum.table.types()$WEEKLY, dict_kind) weeklyrep_data <- du.add.metadata(weeklyrep_data, weeklyrep_metadata) @@ -75,7 +75,7 @@ du.reshape <- function(upload = TRUE, project, data_version, input_format, dict_ trimester_data <- du.reshape.generate.trimesterly.repeated( data, dict_kind ) - if (exists("trimester_data")) { + if (!is.null(trimester_data)) { write_csv(trimester_data, paste0(getwd(), "/", file_name_trimester, ".csv"), na = "") trimester_metadata <- du.retrieve.full.dict(du.enum.table.types()$TRIMESTER, dict_kind) trimester_data <- du.add.metadata(trimester_data, trimester_metadata) @@ -94,9 +94,9 @@ du.reshape <- function(upload = TRUE, project, data_version, input_format, dict_ if (upload) { if (ds_upload.globals$login_data$driver == du.enum.backends()$OPAL) { - if (exists("nonrep_data")) du.opal.upload(dict_kind, file_name_nonrep) - if (exists("yearlyrep_data")) du.opal.upload(dict_kind, file_name_yearly) - if (exists("monthlyrep_data")){ du.opal.upload(dict_kind, file_name_monthly) } + if (!is.null(nonrep_data)) du.opal.upload(dict_kind, file_name_nonrep) + if (!is.null(yearlyrep_data)) du.opal.upload(dict_kind, file_name_yearly) + if (!is.null(monthlyrep_data)){ du.opal.upload(dict_kind, file_name_monthly) } } if (ds_upload.globals$login_data$driver == du.enum.backends()$ARMADILLO) { if (!is.null(nonrep_data)) { diff --git a/R/reshape_helpers.R b/R/reshape_helpers.R index a45e56c..20a4d47 100644 --- a/R/reshape_helpers.R +++ b/R/reshape_helpers.R @@ -46,13 +46,17 @@ du.read.source.file <- function(input_path, input_format) { #' #' @noRd du.data.frame.remove.all.na.rows <- function(dataframe) { - df <- dataframe[-c(1)] + if(ncol(dataframe) >= 1) { + df <- dataframe[-c(1)] - naLines <- df %>% - is.na() %>% - apply(MARGIN = 1, FUN = all) + naLines <- df %>% + is.na() %>% + apply(MARGIN = 1, FUN = all) - return(df[!naLines, ]) + return(df[!naLines, ]) + } else { + return(list(0,0)) + } } #' #' Matched the columns in the source data. @@ -204,7 +208,7 @@ du.reshape.generate.yearly.repeated <- function(data, dict_kind) { matched_columns <- du.match.columns(colnames(data), variables_yearly_repeated_dict$name) yearly_repeated_measures <- data[matched_columns] - if (nrow(du.data.frame.remove.all.na.rows(yearly_repeated_measures)) <= 0) { + if (ncol(yearly_repeated_measures) <= 0 || nrow(du.data.frame.remove.all.na.rows(yearly_repeated_measures)) <= 0) { message("[WARNING] No yearly-repeated measures found in this set") return() } @@ -276,7 +280,7 @@ du.reshape.generate.monthly.repeated <- function(data, dict_kind) { matched_columns <- du.match.columns(colnames(data), variables_monthly_repeated_dict$name) monthly_repeated_measures <- data[, matched_columns] - if (nrow(du.data.frame.remove.all.na.rows(monthly_repeated_measures)) <= 0) { + if (ncol(monthly_repeated_measures) <= 0 || nrow(du.data.frame.remove.all.na.rows(monthly_repeated_measures)) <= 0) { message("[WARNING] No monthly-repeated measures found in this set") return() } @@ -350,7 +354,7 @@ du.reshape.generate.weekly.repeated <- function(data, dict_kind) { matched_columns <- du.match.columns(colnames(data), variables_weekly_repeated_dict$name) weekly_repeated_measures <- data[, matched_columns] - if (nrow(du.data.frame.remove.all.na.rows(weekly_repeated_measures)) <= 0) { + if (ncol(weekly_repeated_measures) <= 0 || nrow(du.data.frame.remove.all.na.rows(weekly_repeated_measures)) <= 0) { message("[WARNING] No weekly-repeated measures found in this set") return() } @@ -429,7 +433,7 @@ du.reshape.generate.trimesterly.repeated <- function(data, dict_kind) { matched_columns <- du.match.columns(colnames(data), variables_trimesterly_repeated_dict$name) trimesterly_repeated_measures <- data[, matched_columns] - if (nrow(du.data.frame.remove.all.na.rows(trimesterly_repeated_measures)) <= 0) { + if (ncol(trimesterly_repeated_measures) <= 0 || nrow(du.data.frame.remove.all.na.rows(trimesterly_repeated_measures)) <= 0) { message("[WARNING] No trimesterly-repeated measures found in this set") return() } diff --git a/docs/404.html b/docs/404.html index df8f70c..955bbf5 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ Reference
@@ -45,7 +45,7 @@Goto Start –> Type ‘R’ within the Run field. Click on R x.x.x to get to the commandline interface
-install.packages("installr")
-library(installr)
-
-updateR()
install.packages("installr")
+library(installr)
+
+updateR()
You will get a wizard, please choose all the defaults and proceed with the installation.
You can use updateR
. Install it via the following commands:
install.packages('devtools') #assuming it is not already installed
-library(devtools)
-install_github('andreacirilloac/updateR')
-library(updateR)
-updateR(admin_password = 'Admin user password')
+install.packages('devtools') #assuming it is not already installed
+library(devtools)
+install_github('andreacirilloac/updateR')
+library(updateR)
+updateR(admin_password = 'Admin user password')
@@ -152,9 +152,9 @@reference: for more information check: http://www.andreacirillo.com/2018/03/10/updater-package-update-r-version-with-a-function-on-mac-osx/
When this message occurs it means the the types of the vectors concerning the yearly or monthly repeated measures are different. The reshape function will resolve this, but it is wise to check your dataset for these differences.
For instance:
-weight_01 == integer
-...
-wieght_04 == decimal
+weight_01 == integer
+...
+wieght_04 == decimal
When you do not specify the protocol in the URL.
Incorrect:
-builder <- newDSLoginBuilder()
-builder.append(server = "opal.cohort-example.org")
+builder <- newDSLoginBuilder()
+builder.append(server = "opal.cohort-example.org")
Results in:
Error in curl::curl_fetch_memory(url, handle = handle) :
Protocol "" not supported or disabled in libcurl
Correct:
-builder <- newDSLoginBuilder()
-builder.append(server = "https://opal.cohort-example.org")
+builder <- newDSLoginBuilder()
+builder.append(server = "https://opal.cohort-example.org")
When you receive the error below, you need to specify the database_name
in the du.upload()
method.
Error: Client error: (404) Not Found; NoSuchDatabase: opal_data
Example:
-du.upload(database_name = "example_database_name", .....)
+du.upload(database_name = "example_database_name", .....)
You can check the database name in the “Administration”-tab and then “Databases”. The second database has a name that you need to specify.
Error in read_fun(path = enc2native(normalizePath(path)), sheet_i = sheet, :
function 'Rcpp_precious_remove' not provided by package 'Rcpp'
Reinstalling can be done using the following code:
-install.packages("Rcpp")
-library(Rcpp)
+install.packages("Rcpp")
+library(Rcpp)
The beta dictionaries are used to be able to initiate creation of dictonaries in the harmonisation process of new variables in a very early stage. As a PI you can manage and create new dictionaries which can be uploaded and tested by involved cohorts in a pilot phase of the project. Dictionaries can be changed and updated at any time and the network will be unaffected. After testing and approving the final beta dictionary the variables will be included in the main dictionaries and released when in the 6 months cycle.
+The beta dictionaries are used to be able to initiate creation of +dictonaries in the harmonisation process of new variables in a very +early stage. As a PI you can manage and create new dictionaries which +can be uploaded and tested by involved cohorts in a pilot phase of the +project. Dictionaries can be changed and updated at any time and the +network will be unaffected. After testing and approving the final beta +dictionary the variables will be included in the main dictionaries and +released when in the 6 months cycle.
You need to ‘clone’ the repository on your own workingstation to be able to create new dictionaries. This means you get a copy of the directory on your own system. You can do this by executing the following command:
+You need to ‘clone’ the repository on your own workingstation to be +able to create new dictionaries. This means you get a copy of the +directory on your own system. You can do this by executing the following +command:
git clone https://github.com/lifecycle-project/ds-beta-dictionaries
Now you have your own copy to work in.
Within the dictionaries-directory of the repository you need to create a dictionary for your study. This will result in a structure like:
+Within the dictionaries-directory of the repository you need +to create a dictionary for your study. This will result in a structure +like:
When you finished writing the dictionary you need to commit and push them to the repository on Github. Please check the git-workflow document to get the dictionaries exposed.
+Then you need to create a dictionary based upon the variables you +want to harmonise. To illustrate how you do this you can use the example +as a reference.
+When you finished writing the dictionary you need to commit and push +them to the repository on Github. Please check the git-workflow +document to get the dictionaries exposed.
When a first version of a beta dictionary is available, you can upload it to the Armadillo with the following code:
+When a first version of a beta dictionary is available, you can +upload it to the Armadillo with the following code:
-login_data <- data.frame(
- server = "https://armadillo.test.molgenis.org",
- storage = "https://armadillo-minio.test.molgenis.org",
- driver = "ArmadilloDriver"
-)
-
-du.login(login_data)
-#> Login to: "https://armadillo.test.molgenis.org"
-#> [1] "We're opening a browser so you can log in with code 4G79WW"
-#> Logged on to: "https://armadillo.test.molgenis.org"
-
-du.upload.beta(
- dict_name = "example-dictionary",
- data_input_path = "/Users/sido/VisualStudioCodeProjects/ds-beta-dictionaries/data/example-dictionary/beta_dict_generated_data.csv",
- data_version = "1_0")
-#> Error in du.upload.beta(dict_name = "example-dictionary", data_input_path = "/Users/sido/VisualStudioCodeProjects/ds-beta-dictionaries/data/example-dictionary/beta_dict_generated_data.csv", : unused argument (data_version = "1_0")
login_data <- data.frame(
+ server = "https://armadillo.test.molgenis.org",
+ storage = "https://armadillo-minio.test.molgenis.org",
+ driver = "ArmadilloDriver"
+)
+
+du.login(login_data)
+#> Login to: "https://armadillo.test.molgenis.org"
+#> [1] "We're opening a browser so you can log in with code 4G79WW"
+#> Logged on to: "https://armadillo.test.molgenis.org"
+
+du.upload.beta(
+ dict_name = "example-dictionary",
+ data_input_path = "/Users/sido/VisualStudioCodeProjects/ds-beta-dictionaries/data/example-dictionary/beta_dict_generated_data.csv",
+ data_version = "1_0")
+#> Error in du.upload.beta(dict_name = "example-dictionary", data_input_path = "/Users/sido/VisualStudioCodeProjects/ds-beta-dictionaries/data/example-dictionary/beta_dict_generated_data.csv", : unused argument (data_version = "1_0")
You will be able to check the data in the Armadillo immediately.
When a first version of a beta dictionary is available, you can upload it to Opal with the following code:
+When a first version of a beta dictionary is available, you can +upload it to Opal with the following code:
-login_data <- data.frame(
- server = "https://opal.edge.molgenis.org",
- password = "ouf0uPh6",
- driver = "OpalDriver"
-)
-
-du.login(login_data)
-#> Login to: "https://opal.edge.molgenis.org"
-#> Logged on to: "https://opal.edge.molgenis.org"
-
-du.upload.beta(
- dict_name = "example-dictionary",
- data_input_path = "/Users/sido/VisualStudioCodeProjects/ds-beta-dictionaries/data/example-dictionary/beta_dict_generated_data.csv",
- data_version = "1_0")
-#> Error in du.upload.beta(dict_name = "example-dictionary", data_input_path = "/Users/sido/VisualStudioCodeProjects/ds-beta-dictionaries/data/example-dictionary/beta_dict_generated_data.csv", : unused argument (data_version = "1_0")
After this you need to manually upload the data just like you would in a released data upload.
+login_data <- data.frame(
+ server = "https://opal.edge.molgenis.org",
+ password = "ouf0uPh6",
+ driver = "OpalDriver"
+)
+
+du.login(login_data)
+#> Login to: "https://opal.edge.molgenis.org"
+#> Logged on to: "https://opal.edge.molgenis.org"
+
+du.upload.beta(
+ dict_name = "example-dictionary",
+ data_input_path = "/Users/sido/VisualStudioCodeProjects/ds-beta-dictionaries/data/example-dictionary/beta_dict_generated_data.csv",
+ data_version = "1_0")
+#> Error in du.upload.beta(dict_name = "example-dictionary", data_input_path = "/Users/sido/VisualStudioCodeProjects/ds-beta-dictionaries/data/example-dictionary/beta_dict_generated_data.csv", : unused argument (data_version = "1_0")
After this you need to manually upload the data just like you would +in a released data upload.
-@@ -219,12 +239,12 @@Be advised: the data is now placed into directories on the target system, in this case a “beta” directory
+Be advised: the data is now placed into directories on the target +system, in this case a “beta” directory
Developed by Sido Haakma, Angela Pinot de Moira, Maxime Cornet, Sebastian Rauschert.
+Developed by Mariska Slofstra, Sido Haakma, Angela Pinot de Moira, Maxime Cornet, Sebastian Rauschert.
We suggest to version the data that you upload. This is needed because of the reproducibility of the research that is going to be performed on the datasets. We are suggesting to do this based upon (semantic versioning). A better explanation on using semantic versioning in data can be found here: semantic versioning for data products.
+We suggest to version the data that you upload. This is needed +because of the reproducibility of the research that is going to be +performed on the datasets. We are suggesting to do this based upon (semantic versioning). A better explanation +on using semantic versioning in data can be found here: semantic +versioning for data products.
The table scheme we use is composed of 2 components, the data version and the table name.
+The table scheme we use is composed of 2 components, the data version +and the table name.
Semantic versioning is just one of the strategies you can use. Date-based versioning is also a good way to deal with this examples can be:
+Semantic versioning is just one of the strategies you can use. +Date-based versioning is also a good way to deal with this examples can +be:
To keep track of all the changed within the different versions of the data you uploaded you can keep track of a changelog.
-To view an example, please check out the changelogs of the dictionaries. You can use the same format, but for the data. When you archived the project it becomes less relevant.
+To keep track of all the changed within the different versions of the +data you uploaded you can keep track of a changelog.
+To view an example, please check out the changelogs +of the dictionaries. You can use the same format, but for the data. When +you archived the project it becomes less relevant.
Developed by Sido Haakma, Angela Pinot de Moira, Maxime Cornet, Sebastian Rauschert.
+Developed by Mariska Slofstra, Sido Haakma, Angela Pinot de Moira, Maxime Cornet, Sebastian Rauschert.
You can add new content and release the dictionaries for all consortia.
+You can add new content and release the dictionaries for all +consortia.
When you need to add new variables you need to perform 2 steps: - Adding the new variables to the dictionaries - Reshaping your data to DataSHIELD backend format
+When you need to add new variables you need to perform 2 steps: - +Adding the new variables to the dictionaries - Reshaping your data to +DataSHIELD backend format
When you add new dictionaries you need to place them in /dictionaries/core/x_x
or /dictionaries/outcome/x_x
.
When you add new dictionaries you need to place them in
+/dictionaries/core/x_x
or
+/dictionaries/outcome/x_x
.
For WP1 and 3 these 3 tables are namespaces this way:
You need to place them into R/data/dictionaries/x_x
as well.
You need to place them into R/data/dictionaries/x_x
as
+well.
Finally you need to amend the changelogs, check amend changelogs
-Please use the pull-request flow. For more information check GIT workflow
+Please use the pull-request flow. For more information check GIT +workflow
We are using an implementation of semantic versioning (semantic versioning). A better explanation on using semantic versioning in data can be found here: semantic versioning for data products.
-We now can distinguish 4 tables for the core variables and 4 tables for the outcome variables. They will be released in 2 sets. One for the core variables and one for the outcome variables.
+We are using an implementation of semantic versioning (semantic versioning). A better explanation +on using semantic versioning in data can be found here: semantic +versioning for data products.
+We now can distinguish 4 tables for the core variables and 4 tables +for the outcome variables. They will be released in 2 sets. One for the +core variables and one for the outcome variables.
Core variables
We are using semantic versioning in the data dictionary in LifeCycle. The implementation we now use is:
+We are using semantic versioning in the data dictionary in LifeCycle. +The implementation we now use is:
To keep track of all the changed within the different versions of the dictionaries and data releases we need to have changelogs. This way we can trace back what has happened in which release.
+To keep track of all the changed within the different versions of the +dictionaries and data releases we need to have changelogs. This way we +can trace back what has happened in which release.
Developed by Sido Haakma, Angela Pinot de Moira, Maxime Cornet, Sebastian Rauschert.
+Developed by Mariska Slofstra, Sido Haakma, Angela Pinot de Moira, Maxime Cornet, Sebastian Rauschert.
The aim of the EU Child Cohort Network is to bring together data from existing child cohorts into one open and sustainable, multi-disciplinary network. To ensure sure that the EU Child Cohort Network is both open and sustainable, the consortia are using the data-sharing platform DataSHIELD.
-Participating cohorts must harmonise data, following the data harmonisation manuals. Secondly, perform quality-control checks on their harmonised data. Thirdly, upload descriptions of harmonisation to the cohort catalogue. The last step is uploading the data into the DataSHIELD backends. The guide below guides you through the process.
+The aim of the EU Child Cohort Network is to bring together data from +existing child cohorts into one open and sustainable, multi-disciplinary +network. To ensure sure that the EU Child Cohort Network is both open +and sustainable, the consortia are using the data-sharing platform +DataSHIELD.
+Participating cohorts must harmonise data, following the data +harmonisation manuals. Secondly, perform quality-control checks on their +harmonised data. Thirdly, upload descriptions of harmonisation to the +cohort catalogue. The last step is uploading the data into the +DataSHIELD backends. The guide below guides you through the process.
You can install the package by executing the following command:
+You can install the package by executing the following commands:
+Step 1: install devtools
-install.packages("dsUpload", repos=c('https://registry.molgenis.org/repository/R/', 'https://cran.datashield.org'), dependencies = TRUE)
When you want to use it you need to load it.
+install.packages("devtools")
Step 2: load devtools and install ds-upload
-# load the package
-library(dsUpload)
-#> Loading required package: DSI
-#> Loading required package: progress
-#> Loading required package: R6
library(devtools)
+devtools::install_github("lifecycle-project/ds-upload")
+When you want to use it you need to load it.
+
+# load the package
+library(dsUpload)
+#> Loading required package: DSI
+#> Loading required package: progress
+#> Loading required package: R6
To simplify the upload and importing of data dictionaries this package is written to import and upload the data dictionaries and data in one run. When running the package, you need to specify the data dictionary version and the data input file. When you use data formats other than CSV use need to specify the data format as well
+To simplify the upload and importing of data dictionaries this +package is written to import and upload the data dictionaries and data +in one run. When running the package, you need to specify the data +dictionary version and the data input file. When you use data formats +other than CSV use need to specify the data format as well
Prerequisites
Upload core variables
-Merge all the variables that are obtained in the dictionary of the core variables. So in general that means merge the data of WP1 and WP3 into one set.
+Merge all the variables that are obtained in the dictionary of the +core variables. So in general that means merge the data of WP1 and WP3 +into one set.
Upload outcome variables
-Merge all the variables that are obtained in the dictionary of the outcome variables. So in general this means merge the data of WP4, WP5 and WP6 into one set.
+Merge all the variables that are obtained in the dictionary of the +outcome variables. So in general this means merge the data of WP4, WP5 +and WP6 into one set.
Please following the instruction below to upload the core and outcome variables in the Aramdillo.
-
-login_data <- data.frame(
- server = "https://armadillo.test.molgenis.org",
- storage = "https://armadillo-minio.test.molgenis.org",
- driver = "ArmadilloDriver")
Please following the instruction below to upload the core and outcome +variables in the Aramdillo.
-# login to the Armadillo server
-du.login(login_data = login_data)
-#> ***********************************************************************************
-#> [WARNING] You are not running the latest version of the dsUpload-package.
-#> [WARNING] If you want to upgrade to newest version : [ 4.0.6 ],
-#> [WARNING] Please run 'install.packages("dsUpload", repos = "https://registry.molgenis.org/repository/R/")'
-#> [WARNING] Check the release notes here: https://github.com/lifecycle-project/analysis-protocols/releases/tag/4.0.6
-#> ***********************************************************************************
-#> Login to: "https://armadillo.test.molgenis.org"
-#> [1] "We're opening a browser so you can log in with code GFL6Q6"
-#> Logged on to: "https://armadillo.test.molgenis.org"
# upload the data into the DataSHIELD backend
-# these are the core variables
-# be advised the default input format is 'CSV'
-# you can use STATA, SPSS, SAS, CSV's or R as source files
-du.upload(
-cohort_id = 'gecko',
- dict_version = '2_1',
- dict_kind = 'core',
- data_version = '1_0',
- data_input_format = 'CSV',
- data_input_path = 'https://github.com/lifecycle-project/ds-upload/blob/master/inst/examples/data/WP1/data/all_measurements_v1_2.csv?raw=true',
- run_mode = "non_interactive"
-
- )#> ***********************************************************************************
-#> [WARNING] You are not running the latest version of the dsUpload-package.
-#> [WARNING] If you want to upgrade to newest version : [ 4.0.6 ],
-#> [WARNING] Please run 'install.packages("dsUpload", repos = "https://registry.molgenis.org/repository/R/")'
-#> [WARNING] Check the release notes here: https://github.com/lifecycle-project/analysis-protocols/releases/tag/4.0.6
-#> ***********************************************************************************
-#> ######################################################
-#> Start upload data into DataSHIELD backend
-#> ------------------------------------------------------
-#> * Create temporary workdir
-#> ######################################################
-#> Start download dictionaries
-#> ------------------------------------------------------
-#> * Download: [ 2_1_monthly_rep.xlsx ]
-#> * Download: [ 2_1_non_rep.xlsx ]
-#> * Download: [ 2_1_trimester_rep.xlsx ]
-#> * Download: [ 2_1_yearly_rep.xlsx ]
-#> Successfully downloaded dictionaries
-#> ######################################################
-#> Start importing data dictionaries
-#> ######################################################
-#> * Check released dictionaries
-#> * Project : gecko already exists
-#> ######################################################
-#> Start converting and uploading data
-#> ######################################################
-#> * Setup: load data and set output directory
-#> ------------------------------------------------------
-#> [WARNING] This is an unmatched column, it will be dropped : [ art ].
-#> * Generating: non-repeated measures
-#> * Generating: yearly-repeated measures
-#> Aggregate function missing, defaulting to 'length'
-#> * Generating: monthly-repeated measures
-#> Aggregate function missing, defaulting to 'length'
-#> * Generating: trimesterly-repeated measures
-#> Aggregate function missing, defaulting to 'length'
-#> * Start importing: 2_1_core_1_0 into project: gecko
-#> Compressing...
-#>
-|
- | | 0%
- |
- |========================================================================| 100%
- #> Uploaded 2_1_core_1_0/trimester
-#> * Import finished successfully
-#> * Start importing: 2_1_core_1_0 into project: gecko
-#> Compressing...
-#>
-|
- | | 0%
- |
- |========== | 15%
- |
- |===================== | 29%
- |
- |=============================== | 44%
- |
- |========================================== | 58%
- |
- |==================================================== | 73%
- |
- |=============================================================== | 87%
- |
- |========================================================================| 100%
- #> Uploaded 2_1_core_1_0/non_rep
-#> * Import finished successfully
-#> * Start importing: 2_1_core_1_0 into project: gecko
-#> Compressing...
-#>
-|
- | | 0%
- |
- |========================================================================| 100%
- #> Uploaded 2_1_core_1_0/yearly_rep
-#> * Import finished successfully
-#> * Start importing: 2_1_core_1_0 into project: gecko
-#> Compressing...
-#>
-|
- | | 0%
- |
- |========================================================================| 100%
- #> Uploaded 2_1_core_1_0/monthly_rep
-#> * Import finished successfully
-#> ######################################################
-#> Converting and import successfully finished
-#> ######################################################
-#> * Reinstate default working directory
-#> * Cleanup temporary directory
# upload the outcome variables
-du.upload(
-cohort_id = 'gecko',
- dict_version = '1_1',
- dict_kind = 'outcome',
- data_version = '1_0',
- data_input_format = 'CSV',
- data_input_path = 'https://github.com/lifecycle-project/ds-upload/blob/master/inst/examples/data/WP6/nd_data_wp6.csv?raw=true',
- run_mode = "non_interactive"
-
- )#> ***********************************************************************************
-#> [WARNING] You are not running the latest version of the dsUpload-package.
-#> [WARNING] If you want to upgrade to newest version : [ 4.0.6 ],
-#> [WARNING] Please run 'install.packages("dsUpload", repos = "https://registry.molgenis.org/repository/R/")'
-#> [WARNING] Check the release notes here: https://github.com/lifecycle-project/analysis-protocols/releases/tag/4.0.6
-#> ***********************************************************************************
-#> ######################################################
-#> Start upload data into DataSHIELD backend
-#> ------------------------------------------------------
-#> * Create temporary workdir
-#> ######################################################
-#> Start download dictionaries
-#> ------------------------------------------------------
-#> * Download: [ 1_1_monthly_rep.xlsx ]
-#> * Download: [ 1_1_non_rep.xlsx ]
-#> * Download: [ 1_1_weekly_rep.xlsx ]
-#> * Download: [ 1_1_yearly_rep.xlsx ]
-#> Successfully downloaded dictionaries
-#> ######################################################
-#> Start importing data dictionaries
-#> ######################################################
-#> * Check released dictionaries
-#> * Project : gecko already exists
+login_data <- data.frame(
+ server = "https://armadillo.test.molgenis.org",
+ storage = "https://armadillo-minio.test.molgenis.org",
+ driver = "ArmadilloDriver")
+# login to the Armadillo server
+du.login(login_data = login_data)
+#> ***********************************************************************************
+#> [WARNING] You are not running the latest version of the dsUpload-package.
+#> [WARNING] If you want to upgrade to newest version : [ 4.0.6 ],
+#> [WARNING] Please run 'install.packages("dsUpload", repos = "https://registry.molgenis.org/repository/R/")'
+#> [WARNING] Check the release notes here: https://github.com/lifecycle-project/analysis-protocols/releases/tag/4.0.6
+#> ***********************************************************************************
+#> Login to: "https://armadillo.test.molgenis.org"
+#> [1] "We're opening a browser so you can log in with code GFL6Q6"
+#> Logged on to: "https://armadillo.test.molgenis.org"
# upload the data into the DataSHIELD backend
+# these are the core variables
+# be advised the default input format is 'CSV'
+# you can use STATA, SPSS, SAS, CSV's or R as source files
+du.upload(
+cohort_id = 'gecko',
+ dict_version = '2_1',
+ dict_kind = 'core',
+ data_version = '1_0',
+ data_input_format = 'CSV',
+ data_input_path = 'https://github.com/lifecycle-project/ds-upload/blob/master/inst/examples/data/WP1/data/all_measurements_v1_2.csv?raw=true',
+ run_mode = "non_interactive"
+
+ )#> ***********************************************************************************
+#> [WARNING] You are not running the latest version of the dsUpload-package.
+#> [WARNING] If you want to upgrade to newest version : [ 4.0.6 ],
+#> [WARNING] Please run 'install.packages("dsUpload", repos = "https://registry.molgenis.org/repository/R/")'
+#> [WARNING] Check the release notes here: https://github.com/lifecycle-project/analysis-protocols/releases/tag/4.0.6
+#> ***********************************************************************************
+#> ######################################################
+#> Start upload data into DataSHIELD backend
+#> ------------------------------------------------------
+#> * Create temporary workdir
+#> ######################################################
+#> Start download dictionaries
+#> ------------------------------------------------------
+#> * Download: [ 2_1_monthly_rep.xlsx ]
+#> * Download: [ 2_1_non_rep.xlsx ]
+#> * Download: [ 2_1_trimester_rep.xlsx ]
+#> * Download: [ 2_1_yearly_rep.xlsx ]
+#> Successfully downloaded dictionaries
+#> ######################################################
+#> Start importing data dictionaries
#> ######################################################
-#> Start converting and uploading data
-#> ######################################################
-#> * Setup: load data and set output directory
-#> ------------------------------------------------------
-#> * Generating: non-repeated measures
-#> * Generating: yearly-repeated measures
-#> * Generating: monthly-repeated measures
-#> * Generating: weekly-repeated measures
-#> * Start importing: 1_1_outcome_1_0 into project: gecko
-#> Compressing...
-#>
-|
- | | 0%
- |
- |========================================================================| 100%
- #> Uploaded 1_1_outcome_1_0/weekly_rep
-#> * Import finished successfully
-#> * Start importing: 1_1_outcome_1_0 into project: gecko
-#> Compressing...
-#>
+#> * Check released dictionaries
+#> * Project : gecko already exists
+#> ######################################################
+#> Start converting and uploading data
+#> ######################################################
+#> * Setup: load data and set output directory
+#> ------------------------------------------------------
+#> [WARNING] This is an unmatched column, it will be dropped : [ art ].
+#> * Generating: non-repeated measures
+#> * Generating: yearly-repeated measures
+#> Aggregate function missing, defaulting to 'length'
+#> * Generating: monthly-repeated measures
+#> Aggregate function missing, defaulting to 'length'
+#> * Generating: trimesterly-repeated measures
+#> Aggregate function missing, defaulting to 'length'
+#> * Start importing: 2_1_core_1_0 into project: gecko
+#> Compressing...
+#>
+|
+ | | 0%
|
- | | 0%
- |
- |========================================================================| 100%
- #> Uploaded 1_1_outcome_1_0/non_rep
-#> * Import finished successfully
-#> * Start importing: 1_1_outcome_1_0 into project: gecko
-#> Compressing...
-#>
+|========================================================================| 100%
+ #> Uploaded 2_1_core_1_0/trimester
+#> * Import finished successfully
+#> * Start importing: 2_1_core_1_0 into project: gecko
+#> Compressing...
+#>
+|
+ | | 0%
|
- | | 0%
+ |========== | 15%
|
- |====== | 8%
+ |===================== | 29%
|
- |=========== | 16%
+ |=============================== | 44%
|
- |================= | 24%
+ |========================================== | 58%
|
- |======================= | 31%
+ |==================================================== | 73%
|
- |============================ | 39%
+ |=============================================================== | 87%
|
- |================================== | 47%
- |
- |======================================== | 55%
- |
- |============================================= | 63%
- |
- |=================================================== | 71%
- |
- |========================================================= | 79%
- |
- |============================================================== | 86%
- |
- |==================================================================== | 94%
- |
- |========================================================================| 100%
- #> Uploaded 1_1_outcome_1_0/yearly_rep
-#> * Import finished successfully
-#> * Start importing: 1_1_outcome_1_0 into project: gecko
-#> Compressing...
-#>
-|
- | | 0%
- |
- |========================================================================| 100%
- #> Uploaded 1_1_outcome_1_0/monthly_rep
-#> * Import finished successfully
-#> ######################################################
-#> Converting and import successfully finished
-#> ######################################################
-#> * Reinstate default working directory
-#> * Cleanup temporary directory
# upload the outcome variables
+du.upload(
+cohort_id = 'gecko',
+ dict_version = '1_1',
+ dict_kind = 'outcome',
+ data_version = '1_0',
+ data_input_format = 'CSV',
+ data_input_path = 'https://github.com/lifecycle-project/ds-upload/blob/master/inst/examples/data/WP6/nd_data_wp6.csv?raw=true',
+ run_mode = "non_interactive"
+
+ )#> ***********************************************************************************
+#> [WARNING] You are not running the latest version of the dsUpload-package.
+#> [WARNING] If you want to upgrade to newest version : [ 4.0.6 ],
+#> [WARNING] Please run 'install.packages("dsUpload", repos = "https://registry.molgenis.org/repository/R/")'
+#> [WARNING] Check the release notes here: https://github.com/lifecycle-project/analysis-protocols/releases/tag/4.0.6
+#> ***********************************************************************************
+#> ######################################################
+#> Start upload data into DataSHIELD backend
+#> ------------------------------------------------------
+#> * Create temporary workdir
+#> ######################################################
+#> Start download dictionaries
+#> ------------------------------------------------------
+#> * Download: [ 1_1_monthly_rep.xlsx ]
+#> * Download: [ 1_1_non_rep.xlsx ]
+#> * Download: [ 1_1_weekly_rep.xlsx ]
+#> * Download: [ 1_1_yearly_rep.xlsx ]
+#> Successfully downloaded dictionaries
+#> ######################################################
+#> Start importing data dictionaries
+#> ######################################################
+#> * Check released dictionaries
+#> * Project : gecko already exists
+#> ######################################################
+#> Start converting and uploading data
+#> ######################################################
+#> * Setup: load data and set output directory
+#> ------------------------------------------------------
+#> * Generating: non-repeated measures
+#> * Generating: yearly-repeated measures
+#> * Generating: monthly-repeated measures
+#> * Generating: weekly-repeated measures
+#> * Start importing: 1_1_outcome_1_0 into project: gecko
+#> Compressing...
+#>
+|
+ | | 0%
+ |
+ |========================================================================| 100%
+ #> Uploaded 1_1_outcome_1_0/weekly_rep
+#> * Import finished successfully
+#> * Start importing: 1_1_outcome_1_0 into project: gecko
+#> Compressing...
+#>
+|
+ | | 0%
+ |
+ |========================================================================| 100%
+ #> Uploaded 1_1_outcome_1_0/non_rep
+#> * Import finished successfully
+#> * Start importing: 1_1_outcome_1_0 into project: gecko
+#> Compressing...
+#>
+|
+ | | 0%
+ |
+ |====== | 8%
+ |
+ |=========== | 16%
+ |
+ |================= | 24%
+ |
+ |======================= | 31%
+ |
+ |============================ | 39%
+ |
+ |================================== | 47%
+ |
+ |======================================== | 55%
+ |
+ |============================================= | 63%
+ |
+ |=================================================== | 71%
+ |
+ |========================================================= | 79%
+ |
+ |============================================================== | 86%
+ |
+ |==================================================================== | 94%
+ |
+ |========================================================================| 100%
+ #> Uploaded 1_1_outcome_1_0/yearly_rep
+#> * Import finished successfully
+#> * Start importing: 1_1_outcome_1_0 into project: gecko
+#> Compressing...
+#>
+|
+ | | 0%
+ |
+ |========================================================================| 100%
+ #> Uploaded 1_1_outcome_1_0/monthly_rep
+#> * Import finished successfully
+#> ######################################################
+#> Converting and import successfully finished
+#> ######################################################
+#> * Reinstate default working directory
+#> * Cleanup temporary directory
A video guiding you through the process can be found here:
-Check youtube channel: upload data dictionaries and data into Opal
+Check youtube channel: upload +data dictionaries and data into Opal
Alternatively, execute these commands in your R-console:
-
-login_data <- data.frame(
- server = "https://opal.edge.molgenis.org",
- user = "administrator",
- password = "ouf0uPh6",
- driver = "OpalDriver")
-# login to the DataSHIELD backend
-du.login(login_data = login_data)
-#> ***********************************************************************************
-#> [WARNING] You are not running the latest version of the dsUpload-package.
-#> [WARNING] If you want to upgrade to newest version : [ 4.0.6 ],
-#> [WARNING] Please run 'install.packages("dsUpload", repos = "https://registry.molgenis.org/repository/R/")'
-#> [WARNING] Check the release notes here: https://github.com/lifecycle-project/analysis-protocols/releases/tag/4.0.6
-#> ***********************************************************************************
-#> Login to: "https://opal.edge.molgenis.org"
-#> Logged on to: "https://opal.edge.molgenis.org"
login_data <- data.frame(
+ server = "https://opal.edge.molgenis.org",
+ user = "administrator",
+ password = "ouf0uPh6",
+ driver = "OpalDriver")
-# upload the data into the DataSHIELD backend
-# these are the core variables
-# be advised the default input format is 'CSV'
-# you can use STATA, SPSS, SAS and CSV's as source files
-du.upload(
- cohort_id = 'gecko',
- dict_version = '2_1',
- dict_kind = 'core',
- data_version = '1_0',
- data_input_format = 'CSV',
- data_input_path = 'https://github.com/lifecycle-project/ds-upload/blob/master/inst/examples/data/WP1/data/all_measurements_v1_2.csv?raw=true',
- run_mode = "non_interactive"
-)
-#> ***********************************************************************************
-#> [WARNING] You are not running the latest version of the dsUpload-package.
-#> [WARNING] If you want to upgrade to newest version : [ 4.0.6 ],
-#> [WARNING] Please run 'install.packages("dsUpload", repos = "https://registry.molgenis.org/repository/R/")'
-#> [WARNING] Check the release notes here: https://github.com/lifecycle-project/analysis-protocols/releases/tag/4.0.6
-#> ***********************************************************************************
-#> ######################################################
-#> Start upload data into DataSHIELD backend
-#> ------------------------------------------------------
-#> * Create temporary workdir
-#> ######################################################
-#> Start download dictionaries
-#> ------------------------------------------------------
-#> * Download: [ 2_1_monthly_rep.xlsx ]
-#> * Download: [ 2_1_non_rep.xlsx ]
-#> * Download: [ 2_1_trimester_rep.xlsx ]
-#> * Download: [ 2_1_yearly_rep.xlsx ]
-#> Successfully downloaded dictionaries
-#> ######################################################
-#> Start importing data dictionaries
-#> ######################################################
-#> * Check released dictionaries
-#> ------------------------------------------------------
-#> Start creating project: [ lc_gecko_core_2_1 ]
-#> * Project: [ lc_gecko_core_2_1 ] already exists
-#> ------------------------------------------------------
-#> Start importing dictionaries
-#> * Table: [ 1_0_monthly_rep ] already exists
-#> * Import variables into: [ 1_0_monthly_rep ]
-#> * Table: [ 1_0_non_rep ] already exists
-#> * Matched categories for table: [ 1_0_non_rep ]
-#> * Import variables into: [ 1_0_non_rep ]
-#> * Table: [ 1_0_trimester_rep ] already exists
-#> * Matched categories for table: [ 1_0_trimester_rep ]
-#> * Import variables into: [ 1_0_trimester_rep ]
-#> * Table: [ 1_0_yearly_rep ] already exists
-#> * Matched categories for table: [ 1_0_yearly_rep ]
-#> * Import variables into: [ 1_0_yearly_rep ]
-#> All dictionaries are populated correctly
-#> ######################################################
-#> Start converting and uploading data
-#> ######################################################
-#> * Setup: load data and set output directory
-#> ------------------------------------------------------
-#> [WARNING] This is an unmatched column, it will be dropped : [ art ].
-#> * Generating: non-repeated measures
-#> * Generating: yearly-repeated measures
-#> Aggregate function missing, defaulting to 'length'
-#> * Generating: monthly-repeated measures
-#> Aggregate function missing, defaulting to 'length'
-#> * Generating: trimesterly-repeated measures
-#> Aggregate function missing, defaulting to 'length'
-#> * Upload: [ 2021-01-29_11-40-58_1_0_trimester_repeated_measures.csv ] to directory [ core ]
-#> * Upload: [ 2021-01-29_11-40-58_1_0_non_repeated_measures.csv ] to directory [ core ]
-#> * Upload: [ 2021-01-29_11-40-58_1_0_yearly_repeated_measures.csv ] to directory [ core ]
-#> * Upload: [ 2021-01-29_11-40-58_1_0_monthly_repeated_measures.csv ] to directory [ core ]
-#> ######################################################
-#> Converting and import successfully finished
-#> ######################################################
-#> * Reinstate default working directory
-#> * Cleanup temporary directory
# login to the DataSHIELD backend
+du.login(login_data = login_data)
+#> ***********************************************************************************
+#> [WARNING] You are not running the latest version of the dsUpload-package.
+#> [WARNING] If you want to upgrade to newest version : [ 4.0.6 ],
+#> [WARNING] Please run 'install.packages("dsUpload", repos = "https://registry.molgenis.org/repository/R/")'
+#> [WARNING] Check the release notes here: https://github.com/lifecycle-project/analysis-protocols/releases/tag/4.0.6
+#> ***********************************************************************************
+#> Login to: "https://opal.edge.molgenis.org"
+#> Logged on to: "https://opal.edge.molgenis.org"
-# upload the outcome variables
-du.upload(
- cohort_id = 'gecko',
- dict_version = '1_1',
- dict_kind = 'outcome',
- data_version = '1_0',
- data_input_format = 'CSV',
- data_input_path = 'https://github.com/lifecycle-project/ds-upload/blob/master/inst/examples/data/WP6/nd_data_wp6.csv?raw=true',
- run_mode = "non_interactive"
-)
-#> ***********************************************************************************
-#> [WARNING] You are not running the latest version of the dsUpload-package.
-#> [WARNING] If you want to upgrade to newest version : [ 4.0.6 ],
-#> [WARNING] Please run 'install.packages("dsUpload", repos = "https://registry.molgenis.org/repository/R/")'
-#> [WARNING] Check the release notes here: https://github.com/lifecycle-project/analysis-protocols/releases/tag/4.0.6
-#> ***********************************************************************************
-#> ######################################################
-#> Start upload data into DataSHIELD backend
-#> ------------------------------------------------------
-#> * Create temporary workdir
-#> ######################################################
-#> Start download dictionaries
-#> ------------------------------------------------------
-#> * Download: [ 1_1_monthly_rep.xlsx ]
-#> * Download: [ 1_1_non_rep.xlsx ]
-#> * Download: [ 1_1_weekly_rep.xlsx ]
-#> * Download: [ 1_1_yearly_rep.xlsx ]
-#> Successfully downloaded dictionaries
-#> ######################################################
-#> Start importing data dictionaries
-#> ######################################################
-#> * Check released dictionaries
-#> ------------------------------------------------------
-#> Start creating project: [ lc_gecko_outcome_1_1 ]
-#> * Project: [ lc_gecko_outcome_1_1 ] already exists
-#> ------------------------------------------------------
-#> Start importing dictionaries
-#> * Table: [ 1_0_monthly_rep ] already exists
-#> * Matched categories for table: [ 1_0_monthly_rep ]
-#> * Import variables into: [ 1_0_monthly_rep ]
-#> * Table: [ 1_0_non_rep ] already exists
-#> * Matched categories for table: [ 1_0_non_rep ]
-#> * Import variables into: [ 1_0_non_rep ]
-#> * Table: [ 1_0_weekly_rep ] already exists
-#> * Import variables into: [ 1_0_weekly_rep ]
-#> * Table: [ 1_0_yearly_rep ] already exists
-#> * Matched categories for table: [ 1_0_yearly_rep ]
-#> * Import variables into: [ 1_0_yearly_rep ]
-#> All dictionaries are populated correctly
-#> ######################################################
-#> Start converting and uploading data
-#> ######################################################
-#> * Setup: load data and set output directory
-#> ------------------------------------------------------
-#> * Generating: non-repeated measures
-#> * Generating: yearly-repeated measures
-#> * Generating: monthly-repeated measures
-#> * Generating: weekly-repeated measures
-#> * Upload: [ 2021-01-29_11-41-21_1_0_weekly_repeated_measures.csv ] to directory [ outcome ]
-#> * Upload: [ 2021-01-29_11-41-21_1_0_non_repeated_measures.csv ] to directory [ outcome ]
-#> * Upload: [ 2021-01-29_11-41-21_1_0_yearly_repeated_measures.csv ] to directory [ outcome ]
-#> * Upload: [ 2021-01-29_11-41-21_1_0_monthly_repeated_measures.csv ] to directory [ outcome ]
-#> ######################################################
-#> Converting and import successfully finished
-#> ######################################################
-#> * Reinstate default working directory
-#> * Cleanup temporary directory
# upload the data into the DataSHIELD backend
+# these are the core variables
+# be advised the default input format is 'CSV'
+# you can use STATA, SPSS, SAS and CSV's as source files
+du.upload(
+ cohort_id = 'gecko',
+ dict_version = '2_1',
+ dict_kind = 'core',
+ data_version = '1_0',
+ data_input_format = 'CSV',
+ data_input_path = 'https://github.com/lifecycle-project/ds-upload/blob/master/inst/examples/data/WP1/data/all_measurements_v1_2.csv?raw=true',
+ run_mode = "non_interactive"
+)
+#> ***********************************************************************************
+#> [WARNING] You are not running the latest version of the dsUpload-package.
+#> [WARNING] If you want to upgrade to newest version : [ 4.0.6 ],
+#> [WARNING] Please run 'install.packages("dsUpload", repos = "https://registry.molgenis.org/repository/R/")'
+#> [WARNING] Check the release notes here: https://github.com/lifecycle-project/analysis-protocols/releases/tag/4.0.6
+#> ***********************************************************************************
+#> ######################################################
+#> Start upload data into DataSHIELD backend
+#> ------------------------------------------------------
+#> * Create temporary workdir
+#> ######################################################
+#> Start download dictionaries
+#> ------------------------------------------------------
+#> * Download: [ 2_1_monthly_rep.xlsx ]
+#> * Download: [ 2_1_non_rep.xlsx ]
+#> * Download: [ 2_1_trimester_rep.xlsx ]
+#> * Download: [ 2_1_yearly_rep.xlsx ]
+#> Successfully downloaded dictionaries
+#> ######################################################
+#> Start importing data dictionaries
+#> ######################################################
+#> * Check released dictionaries
+#> ------------------------------------------------------
+#> Start creating project: [ lc_gecko_core_2_1 ]
+#> * Project: [ lc_gecko_core_2_1 ] already exists
+#> ------------------------------------------------------
+#> Start importing dictionaries
+#> * Table: [ 1_0_monthly_rep ] already exists
+#> * Import variables into: [ 1_0_monthly_rep ]
+#> * Table: [ 1_0_non_rep ] already exists
+#> * Matched categories for table: [ 1_0_non_rep ]
+#> * Import variables into: [ 1_0_non_rep ]
+#> * Table: [ 1_0_trimester_rep ] already exists
+#> * Matched categories for table: [ 1_0_trimester_rep ]
+#> * Import variables into: [ 1_0_trimester_rep ]
+#> * Table: [ 1_0_yearly_rep ] already exists
+#> * Matched categories for table: [ 1_0_yearly_rep ]
+#> * Import variables into: [ 1_0_yearly_rep ]
+#> All dictionaries are populated correctly
+#> ######################################################
+#> Start converting and uploading data
+#> ######################################################
+#> * Setup: load data and set output directory
+#> ------------------------------------------------------
+#> [WARNING] This is an unmatched column, it will be dropped : [ art ].
+#> * Generating: non-repeated measures
+#> * Generating: yearly-repeated measures
+#> Aggregate function missing, defaulting to 'length'
+#> * Generating: monthly-repeated measures
+#> Aggregate function missing, defaulting to 'length'
+#> * Generating: trimesterly-repeated measures
+#> Aggregate function missing, defaulting to 'length'
+#> * Upload: [ 2021-01-29_11-40-58_1_0_trimester_repeated_measures.csv ] to directory [ core ]
+#> * Upload: [ 2021-01-29_11-40-58_1_0_non_repeated_measures.csv ] to directory [ core ]
+#> * Upload: [ 2021-01-29_11-40-58_1_0_yearly_repeated_measures.csv ] to directory [ core ]
+#> * Upload: [ 2021-01-29_11-40-58_1_0_monthly_repeated_measures.csv ] to directory [ core ]
+#> ######################################################
+#> Converting and import successfully finished
+#> ######################################################
+#> * Reinstate default working directory
+#> * Cleanup temporary directory
+
+# upload the outcome variables
+du.upload(
+ cohort_id = 'gecko',
+ dict_version = '1_1',
+ dict_kind = 'outcome',
+ data_version = '1_0',
+ data_input_format = 'CSV',
+ data_input_path = 'https://github.com/lifecycle-project/ds-upload/blob/master/inst/examples/data/WP6/nd_data_wp6.csv?raw=true',
+ run_mode = "non_interactive"
+)
+#> ***********************************************************************************
+#> [WARNING] You are not running the latest version of the dsUpload-package.
+#> [WARNING] If you want to upgrade to newest version : [ 4.0.6 ],
+#> [WARNING] Please run 'install.packages("dsUpload", repos = "https://registry.molgenis.org/repository/R/")'
+#> [WARNING] Check the release notes here: https://github.com/lifecycle-project/analysis-protocols/releases/tag/4.0.6
+#> ***********************************************************************************
+#> ######################################################
+#> Start upload data into DataSHIELD backend
+#> ------------------------------------------------------
+#> * Create temporary workdir
+#> ######################################################
+#> Start download dictionaries
+#> ------------------------------------------------------
+#> * Download: [ 1_1_monthly_rep.xlsx ]
+#> * Download: [ 1_1_non_rep.xlsx ]
+#> * Download: [ 1_1_weekly_rep.xlsx ]
+#> * Download: [ 1_1_yearly_rep.xlsx ]
+#> Successfully downloaded dictionaries
+#> ######################################################
+#> Start importing data dictionaries
+#> ######################################################
+#> * Check released dictionaries
+#> ------------------------------------------------------
+#> Start creating project: [ lc_gecko_outcome_1_1 ]
+#> * Project: [ lc_gecko_outcome_1_1 ] already exists
+#> ------------------------------------------------------
+#> Start importing dictionaries
+#> * Table: [ 1_0_monthly_rep ] already exists
+#> * Matched categories for table: [ 1_0_monthly_rep ]
+#> * Import variables into: [ 1_0_monthly_rep ]
+#> * Table: [ 1_0_non_rep ] already exists
+#> * Matched categories for table: [ 1_0_non_rep ]
+#> * Import variables into: [ 1_0_non_rep ]
+#> * Table: [ 1_0_weekly_rep ] already exists
+#> * Import variables into: [ 1_0_weekly_rep ]
+#> * Table: [ 1_0_yearly_rep ] already exists
+#> * Matched categories for table: [ 1_0_yearly_rep ]
+#> * Import variables into: [ 1_0_yearly_rep ]
+#> All dictionaries are populated correctly
+#> ######################################################
+#> Start converting and uploading data
+#> ######################################################
+#> * Setup: load data and set output directory
+#> ------------------------------------------------------
+#> * Generating: non-repeated measures
+#> * Generating: yearly-repeated measures
+#> * Generating: monthly-repeated measures
+#> * Generating: weekly-repeated measures
+#> * Upload: [ 2021-01-29_11-41-21_1_0_weekly_repeated_measures.csv ] to directory [ outcome ]
+#> * Upload: [ 2021-01-29_11-41-21_1_0_non_repeated_measures.csv ] to directory [ outcome ]
+#> * Upload: [ 2021-01-29_11-41-21_1_0_yearly_repeated_measures.csv ] to directory [ outcome ]
+#> * Upload: [ 2021-01-29_11-41-21_1_0_monthly_repeated_measures.csv ] to directory [ outcome ]
+#> ######################################################
+#> Converting and import successfully finished
+#> ######################################################
+#> * Reinstate default working directory
+#> * Cleanup temporary directory
-IMPORTANT: You can run this package for the core variables and for the outcome variables. Each of them requires changing some parameters in the function call. So dict_kind specific ‘core’ or ‘outcome’ variables and dict_version specifies the data dictionary version (check the changelogs here: https://github.com/lifecycle-project/ds-dictionaries/tree/master/changelogs).
+IMPORTANT: You can run this package for the core +variables and for the outcome variables. Each of them requires changing +some parameters in the function call. So dict_kind specific ‘core’ or +‘outcome’ variables and dict_version specifies the data dictionary +version (check the changelogs here: https://github.com/lifecycle-project/ds-dictionaries/tree/master/changelogs).
-IMPORTANT You can specify your upload format! So you do not have to export to CSV first. Supported upload formats are: ‘SPSS’, ‘SAS’, ‘STATA’ and ‘CSV’.
+IMPORTANT You can specify your upload format! So you +do not have to export to CSV first. Supported upload formats are: +‘SPSS’, ‘SAS’, ‘STATA’ and ‘CSV’.
If you run these commands, your data will be uploaded to the DataSHIELD backend. If you use Opal, you can now import these data into the tables manually.
+If you run these commands, your data will be uploaded to the +DataSHIELD backend. If you use Opal, you can now import these data into +the tables manually.
A video guiding you through the process can be found here: import data into Opal
Alternatively, execute these actions for Opal:
--IMPORTANT: make sure no NEW variables are introduce 11. Click on “Finish” 12. Check the “Task logs” (on the left side of the screen, in the icon bar)
+IMPORTANT: make sure no NEW variables are introduce +11. Click on “Finish” 12. Check the “Task logs” (on the left side of the +screen, in the icon bar)
It will match your data dictionary and determine which variables are matched or not. You can re-upload the source files as often as needed.
+It will match your data dictionary and determine which variables are +matched or not. You can re-upload the source files as often as +needed.
Developed by Sido Haakma, Angela Pinot de Moira, Maxime Cornet, Sebastian Rauschert.
+Developed by Mariska Slofstra, Sido Haakma, Angela Pinot de Moira, Maxime Cornet, Sebastian Rauschert.
Local quality control is conducted by each cohort after harmonisation, following the quality control instructions and scripts provided by each work package lead. These check:
+Local quality control is conducted by each cohort after +harmonisation, following the quality control instructions and scripts +provided by each work package lead. These check:
Any inconsistencies identified are investigated on a cases-by-case basis to establish which values are legitimate and which are errors, also in light of the other data available.
+Any inconsistencies identified are investigated on a cases-by-case +basis to establish which values are legitimate and which are errors, +also in light of the other data available.
Quality of harmonised data is also assessed at the central level (central quality control), by creating summary statistics for each variable in R/DataSHIELD, and comparing these across cohorts. As in the local quality control, this is to identify outliers and improbable values and inconsistencies in data, but also to identify large inconsistencies between cohorts. Where large inconsistencies are found, these are investigated further in order to establish to what extent these differences are real vs. an artefact of differing methodology. They could, for example, be due to different sampling and recruitment methods or differences in the instruments used to collect data; they could also be due to differences in the harmonisation process itself.
+Quality of harmonised data is also assessed at the central level +(central quality control), by creating summary statistics for each +variable in R/DataSHIELD, and comparing these across cohorts. As in the +local quality control, this is to identify outliers and improbable +values and inconsistencies in data, but also to identify large +inconsistencies between cohorts. Where large inconsistencies are found, +these are investigated further in order to establish to what extent +these differences are real vs. an artefact of differing methodology. +They could, for example, be due to different sampling and recruitment +methods or differences in the instruments used to collect data; they +could also be due to differences in the harmonisation process +itself.
The central quality control checks are carried out when data are uploaded and imported into a DataSHIELD backend (for example an institute’s DataSHIELD server). These four tables are now supported.
+The central quality control checks are carried out when data are +uploaded and imported into a DataSHIELD backend (for example an +institute’s DataSHIELD server). These four tables are now supported.
For each of the four tables in the network, means and standard deviations are computed for continuous variables, and frequencies and percentages for categorical variables; these are tabulated over year for yearly- and monthly-repeated variables, and over trimester for trimester repeated variables.
-Once computed, the summary statistics are pushed to a central server which creates a PDF of the combined results. It is possible to view the output from the function by specifying verbose = TRUE
in the method signature.
For each of the four tables in the network, means and standard +deviations are computed for continuous variables, and frequencies and +percentages for categorical variables; these are tabulated over year for +yearly- and monthly-repeated variables, and over trimester for trimester +repeated variables.
+Once computed, the summary statistics are pushed to a central server
+which creates a PDF of the combined results. It is possible to view the
+output from the function by specifying verbose = TRUE
in
+the method signature.
The quality control will be executed in the pipeline after uploading and importing the data into a DataSHIELD backend. However you can execute the flow seperatly as well.
+The quality control will be executed in the pipeline after uploading +and importing the data into a DataSHIELD backend. However you can +execute the flow seperatly as well.
-All of the measures are quality control based upon generic algorithems and with the help of the dsHelper package.
+All of the measures are quality control based upon generic +algorithems and with the help of the dsHelper +package.
Developed by Sido Haakma, Angela Pinot de Moira, Maxime Cornet, Sebastian Rauschert.
+Developed by Mariska Slofstra, Sido Haakma, Angela Pinot de Moira, Maxime Cornet, Sebastian Rauschert.
Haakma S, Pinot de Moira A, Cornet M, Rauschert S (2022). +
Slofstra M, Haakma S, Pinot de Moira A, Cornet M, Rauschert S (2022). dsUpload: Upload Functions for DataSHIELD Backends. -R package version 4.5.1, https://github.com/lifecycle-project/ds-upload. +R package version 4.6.1, https://github.com/lifecycle-project/ds-upload.
@Manual{, title = {dsUpload: Upload Functions for DataSHIELD Backends}, - author = {Sido Haakma and Angela {Pinot de Moira} and Maxime Cornet and Sebastian Rauschert}, + author = {Mariska Slofstra and Sido Haakma and Angela {Pinot de Moira} and Maxime Cornet and Sebastian Rauschert}, year = {2022}, - note = {R package version 4.5.1}, + note = {R package version 4.6.1}, url = {https://github.com/lifecycle-project/ds-upload}, }@@ -119,11 +123,11 @@
Please check uploading to DataSHIELD guide
+Please check [uploading to DataSHIELD guide](https://lifecycle-project.github.io/ds-upload
For detailled function descrptions, please check: references and above.
du.login(login_data)
du.login(login_data)
login data frame containing the server url
if (FALSE) {
-
-login_data <- data.frame(
- server = "https://armadillo.dev.molgenis.org",
- storage = "https://armadillo-minio.dev.molgenis.org",
- username = "admin",
- password = "admin",
- insecure = FALSE,
- options = NULL
-)
-
-du.login(login_data)
-}
+ if (FALSE) {
+
+login_data <- data.frame(
+ server = "https://armadillo.dev.molgenis.org",
+ storage = "https://armadillo-minio.dev.molgenis.org",
+ username = "admin",
+ password = "admin",
+ insecure = FALSE,
+ options = NULL
+)
+
+du.login(login_data)
+}
du.quality.control(
- project,
- tableType = du.enum.table.types()$ALL,
- data_version = NULL,
- verbose = FALSE
-)
du.quality.control(
+ project,
+ tableType = du.enum.table.types()$ALL,
+ data_version = NULL,
+ verbose = FALSE
+)
specify project you want to perform quality control on
limit the tables to run (can be non_rep, yearly_rep, monthly_rep, weekly_rep or trimester)
data version of the table
output the functions output when set to TRUE
du.upload.beta(
- upload = TRUE,
- dict_name = "",
- action = du.enum.action()$ALL,
- data_input_path = "",
- data_input_format = du.enum.input.format()$CSV,
- database_name = "opal_data",
- run_mode = du.enum.run.mode()$NORMAL
-)
du.upload.beta(
+ upload = TRUE,
+ dict_name = "",
+ action = du.enum.action()$ALL,
+ data_input_path = "",
+ data_input_format = du.enum.input.format()$CSV,
+ database_name = "opal_data",
+ run_mode = du.enum.run.mode()$NORMAL
+)
do we need to upload the DataSHIELD backend
name of the dictionary located on Github usually something like this: diabetes/test_vars_01
action to be performed, can be 'reshape', 'populate' or 'all'
path to the to-be-reshaped data
format of the database to be reshaped. Can be 'CSV', 'STATA', or 'SAS'
is the name of the data backend of DataSHIELD, default = opal_data
default = NORMAL, can be TEST and NON_INTERACTIIVE
du.upload(
- dict_version = "2_1",
- data_version = "1_0",
- dict_kind = du.enum.dict.kind()$CORE,
- cohort_id,
- database_name = "opal_data",
- data_input_format = du.enum.input.format()$CSV,
- data_input_path,
- action = du.enum.action()$ALL,
- upload = TRUE,
- run_mode = du.enum.run.mode()$NORMAL,
- override_project = NULL
-)
du.upload(
+ dict_version = "2_1",
+ data_version = "1_0",
+ dict_kind = du.enum.dict.kind()$CORE,
+ cohort_id,
+ database_name = "opal_data",
+ data_input_format = du.enum.input.format()$CSV,
+ data_input_path,
+ action = du.enum.action()$ALL,
+ upload = TRUE,
+ run_mode = du.enum.run.mode()$NORMAL,
+ override_project = NULL
+)
version of the data dictionary to be used
version of the dataset to be uploaded
can be 'core' or 'outcome'
cohort name
is the name of the data backend of DataSHIELD, default = opal_data
format of the database to be reshaped. Can be 'CSV', 'STATA', 'SAS' or RDS (R)
path to the to-be-uploaded data
action to be performed, can be 'reshape', 'populate' or 'all'
do you want to upload the data (true or false)
default = NORMAL, can be TEST and NON_INTERACTIIVE
overrides the generated project name
if (FALSE) {
-du.upload(
- data_input_format = "CSV",
- data_input_path = "~/path-to-file/all_measurements_v1_2.csv",
- data_version = "1_0",
- dict_version = "2_1",
- cohort_id = "gecko"
-)
-}
-
+ if (FALSE) {
+du.upload(
+ data_input_format = "CSV",
+ data_input_path = "~/path-to-file/all_measurements_v1_2.csv",
+ data_version = "1_0",
+ dict_version = "2_1",
+ cohort_id = "gecko"
+)
+}
+
Developed by Sido Haakma, Angela Pinot de Moira, Maxime Cornet, Sebastian Rauschert.
+Developed by Mariska Slofstra, Sido Haakma, Angela Pinot de Moira, Maxime Cornet, Sebastian Rauschert.