diff --git a/R/get_reprtree_from_rf_model .R b/R/get_14reprtree_from_rf_model .R similarity index 100% rename from R/get_reprtree_from_rf_model .R rename to R/get_14reprtree_from_rf_model .R diff --git a/R/get_2bw_score.R b/R/get_bw_score.R similarity index 88% rename from R/get_2bw_score.R rename to R/get_bw_score.R index 9ea3141..9e39e20 100644 --- a/R/get_2bw_score.R +++ b/R/get_bw_score.R @@ -1,33 +1,49 @@ - -#' @title get BW score for a given studyid +#' @title Calculate BW Score for a Given Study ID +#' +#' @description +#' The `get_bw_score` function calculates the Bayesian Weighted (BW) score for a specified study ID using data from a provided database. +#' It supports optional parameters for fine-tuning the analysis and offers the flexibility to return individual scores or z-scores by `USUBJID`. +#' #' @param studyid Mandatory, character \cr -#' Studyid number +#' The study ID for which the BW score is to be calculated. If `NULL`, all studies in the database are analyzed. #' @param path_db Mandatory, character \cr -#' path of database -#' @param fake_study optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @param use_xpt_file Mandatory, character \cr -#' Studyid number -#' @param master_compiledata Mandatory, character \cr -#' path of database -#' @param return_individual_scores optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @param return_zscore_by_USUBJID optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @return dataframe +#' The path to the database file containing the study data. +#' @param fake_study Optional, Boolean \cr +#' Indicates whether the study was generated by the `SENDsanitizer` package. Default is `FALSE`. +#' @param use_xpt_file Mandatory, Boolean \cr +#' If `TRUE`, the function uses `.xpt` files for processing the study data. Default is `FALSE`. +#' @param master_compiledata Optional, character \cr +#' The path to an additional database or compiled data file for analysis. If `NULL`, only the primary database is used. +#' @param return_individual_scores Optional, Boolean \cr +#' If `TRUE`, the function returns individual scores for each record in the study. Default is `FALSE`. +#' @param return_zscore_by_USUBJID Optional, Boolean \cr +#' If `TRUE`, the function returns z-scores calculated by `USUBJID` (unique subject identifiers). Default is `FALSE`. +#' +#' @return +#' A `data.frame` containing the calculated BW scores. The structure of the output depends on the provided parameters: +#' - If `return_individual_scores = TRUE`: Returns individual scores for each record. +#' - If `return_zscore_by_USUBJID = TRUE`: Returns z-scores by `USUBJID`. +#' - Otherwise, a summarized BW score for the specified `studyid`. #' #' @examples #' \dontrun{ -#' get_bw_score(studyid='1234123', path_db='path/to/database.db') +#' # Example 1: Basic usage +#' get_bw_score(studyid = '1234123', path_db = 'path/to/database.db') +#' +#' # Example 2: Include individual scores +#' get_bw_score(studyid = '1234123', path_db = 'path/to/database.db', return_individual_scores = TRUE) +#' +#' # Example 3: Include z-scores by USUBJID +#' get_bw_score(studyid = '1234123', path_db = 'path/to/database.db', return_zscore_by_USUBJID = TRUE) #' } +#' #' @export - - #' @importFrom RSQLite dbConnect #' @importFrom RSQLite SQLite + get_bw_score <- function(studyid = NULL, path_db, fake_study = FALSE, diff --git a/R/get_1compile_data.R b/R/get_compile_data.R similarity index 93% rename from R/get_1compile_data.R rename to R/get_compile_data.R index 4ee6604..ebd2e97 100644 --- a/R/get_1compile_data.R +++ b/R/get_compile_data.R @@ -1,22 +1,24 @@ -#' @title filter out tk and recovery animal -#' @param studyid Mandatory, character \cr -#' Studyid number -#' @param path_db Mandatory, character \cr -#' path of database -#' @param fake_study optional, Boolean \cr -#' whether study generated by SENDsanitizer package -##' @param use_xpt_file optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @return dataframe +#' @title Retrieve Compiled Data from SQLite Database or XPT File +#' @description This function retrieves and compiles data for a given study ID +#' from either a SQLite database or XPT file. +#' @param studyid Character. Study ID number. Defaults to `NULL`. +#' If `NULL`, all available studies may be retrieved (behavior depends on the database structure). +#' @param path_db Character. Path to the SQLite database file. Mandatory. +#' @param fake_study Logical. Whether the study data is generated by the `SENDsanitizer` package. Defaults to `FALSE`. +#' @param use_xpt_file Logical. Whether to retrieve study data from an XPT file format instead of the database. Defaults to `FALSE`. +#' @return A data frame containing the compiled study data. The structure of the returned data frame depends on the database or XPT file contents. #' #' @examples #' \dontrun{ -#' get_compile_data(studyid='1234123', path_db='path/to/database.db') +#' # Retrieve data for a specific study ID from the database +#' get_compile_data(studyid = '1234123', path_db = 'path/to/database.db') +#' +#' # Retrieve data from an XPT file +#' get_compile_data(path_db = 'path/to/file.xpt', use_xpt_file = TRUE) #' } #' @export - - -#' @importFrom magrittr %>% +#' +#' @importFrom magrittr %> get_compile_data <- function(studyid = NULL, path_db, diff --git a/R/get_4lb_score.R b/R/get_lb_score.R similarity index 93% rename from R/get_4lb_score.R rename to R/get_lb_score.R index 50657a4..967e3c9 100644 --- a/R/get_4lb_score.R +++ b/R/get_lb_score.R @@ -1,25 +1,39 @@ - -#' @title get LB score for a given studyid -#' @param studyid Mandatory, character \cr -#' Studyid number -#' @param path_db Mandatory, character \cr -#' path of database -#' @param fake_study optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @param use_xpt_file Mandatory, character \cr -#' Studyid number -#' @param master_compiledata Mandatory, character \cr -#' path of database -#' @param return_individual_scores optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @param return_zscore_by_USUBJID optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @return score +#' @title Get LB Score for a Given Study ID +#' +#' @description +#' This function computes the LB score for a given study ID using data stored in a specified database. +#' It offers various optional parameters to customize the output, such as whether to return individual scores or Z-scores by `USUBJID`. +#' +#' @param studyid Mandatory, character +#' The study ID number for which the LB score is calculated. +#' +#' @param path_db Mandatory, character +#' The path to the database containing the necessary data for the calculation. +#' +#' @param fake_study Optional, boolean +#' Indicates whether the study is generated by the SENDsanitizer package. Defaults to `FALSE`. +#' +#' @param use_xpt_file Mandatory, character +#' Specifies the path to the XPT (SAS transport) file if it is being used for the study. +#' +#' @param master_compiledata Mandatory, character +#' The path to the compiled master dataset that will be used to calculate the LB score. +#' +#' @param return_individual_scores Optional, boolean +#' If `TRUE`, the function will return individual scores for each subject. Defaults to `FALSE`. +#' +#' @param return_zscore_by_USUBJID Optional, boolean +#' If `TRUE`, the function will return Z-scores by `USUBJID`. Defaults to `FALSE`. +#' +#' @return numeric +#' The calculated LB score based on the provided data and parameters. #' #' @examples #' \dontrun{ +#' # Example usage of the function #' get_lb_score(studyid='1234123', path_db='path/to/database.db') #' } +#' #' @export get_lb_score <- function(studyid = NULL, diff --git a/R/get_6liver_om_lb_mi_tox_score_list.R b/R/get_liver_om_lb_mi_tox_score_list.R similarity index 92% rename from R/get_6liver_om_lb_mi_tox_score_list.R rename to R/get_liver_om_lb_mi_tox_score_list.R index deb5af5..b9aa559 100644 --- a/R/get_6liver_om_lb_mi_tox_score_list.R +++ b/R/get_liver_om_lb_mi_tox_score_list.R @@ -1,26 +1,48 @@ - - #' @title get_liver_om_lb_mi_tox_score_list -#' @param studyid_or_studyids Mandatory, character \cr -#' Studyid number -#' @param path_db Mandatory, character \cr -#' path of database -#' @param fake_study optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @param use_xpt_file Mandatory, character \cr -#' path of database -#' @param multiple_xpt_folder Mandatory, character \cr -#' path of database -#' @param output_individual_scores optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @param output_zscore_by_USUBJID optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @return dataframe +#' +#' @description +#' This function processes liver organ toxicity scores, body weight z-scores, and other related metrics +#' for a set of studies or XPT files. It can output individual scores, z-scores by USUBJID, or averaged scores +#' for multiple studies, and handles errors during the processing steps. +#' +#' @param studyid_or_studyids A character vector or a single study ID to process. +#' If multiple studies are provided, the function processes each study sequentially. (Mandatory) +#' +#' @param path_db A character string specifying the path to the database or directory containing the data files. +#' (Mandatory) +#' +#' @param fake_study A boolean flag indicating if the study data is simulated (`TRUE`) or real (`FALSE`). Default is `FALSE`. (Optional) +#' +#' @param use_xpt_file A boolean flag indicating whether to use an XPT file for the study data. Default is `FALSE`. (Mandatory) +#' +#' @param multiple_xpt_folder A character string specifying the path to the folder containing multiple XPT files. +#' (Optional) +#' +#' @param output_individual_scores A boolean flag indicating whether individual scores should be returned (`TRUE`) or averaged scores (`FALSE`). Default is `FALSE`. (Optional) +#' +#' @param output_zscore_by_USUBJID A boolean flag indicating whether to output z-scores by `USUBJID` (`TRUE`) or averaged scores (`FALSE`). Default is `FALSE`. (Optional) +#' +#' @return A data frame containing the calculated scores for each study. The type of result depends on the flags passed: +#' - If `output_individual_scores` is `TRUE`, a data frame with individual scores for each study is returned. +#' - If `output_zscore_by_USUBJID` is `TRUE`, a data frame with z-scores by `USUBJID` for each study is returned. +#' - If neither flag is set, the function returns a data frame with averaged scores for each study. #' #' @examples #' \dontrun{ -#' get_compile_data(studyid='1234123', path_db='path/to/database.db') +#' # Get averaged scores for a single study +#' result <- get_liver_om_lb_mi_tox_score_list( +#' studyid_or_studyids = "Study_001", +#' path_db = "path/to/database" +#' ) +#' +#' # Get individual scores for multiple studies +#' result_individual_scores <- get_liver_om_lb_mi_tox_score_list( +#' studyid_or_studyids = c("Study_001", "Study_002"), +#' path_db = "path/to/database", +#' output_individual_scores = TRUE +#' ) #' } +#' #' @export diff --git a/R/get_3livertobw_score.R b/R/get_livertobw_score.R similarity index 79% rename from R/get_3livertobw_score.R rename to R/get_livertobw_score.R index 041c52b..5f52217 100644 --- a/R/get_3livertobw_score.R +++ b/R/get_livertobw_score.R @@ -1,33 +1,62 @@ -#' @title get_liver_livertobw_score -#' @param studyid Mandatory, character \cr -#' Studyid number -#' @param path_db Mandatory, character \cr -#' path of database -#' @param fake_study optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @param use_xpt_file optional, Boolean \cr -#' whether use_xpt_file is used on not -#' @param master_compiledata optional, Boolean \cr -#' whether use_xpt_file is used on not -#' @param bwzscore_BW optional, Boolean \cr -#' whether use_xpt_file is used on not -#' @param return_individual_scores optional, logical \cr -#' whether use_xpt_file is used on not -#' @param return_zscore_by_USUBJID optional, logical \cr -#' whether use_xpt_file is used on not -#' @return dataframe +#' @title Calculate Liver-to-Body-Weight Scores and Z-Scores +#' +#' @description +#' This function computes liver-to-body-weight (Liver:BW) ratios and their corresponding z-scores from study data. +#' It supports retrieving data from SQLite databases or `.xpt` files and provides flexible options for output formats. +#' +#' @param studyid Optional, character. \cr +#' Study ID for which the calculations are performed. If `NULL`, data for all studies in the database is used. +#' @param path_db Mandatory, character. \cr +#' Path to the SQLite database or directory containing `.xpt` files. +#' @param fake_study Optional, logical. \cr +#' Indicates whether the study is a fake/test study generated by the `SENDsanitizer` package. Default is `FALSE`. +#' @param use_xpt_file Optional, logical. \cr +#' Specifies whether to use `.xpt` files instead of a SQLite database. Default is `FALSE`. +#' @param master_compiledata Optional, data.frame. \cr +#' Precompiled dataset of study information. If `NULL`, the function fetches the data using `get_compile_data`. +#' @param bwzscore_BW Optional, data.frame. \cr +#' Precomputed body weight z-scores. If `NULL`, they are calculated using `get_bw_score`. +#' @param return_individual_scores Optional, logical. \cr +#' If `TRUE`, returns individual z-scores averaged by study. Default is `FALSE`. +#' @param return_zscore_by_USUBJID Optional, logical. \cr +#' If `TRUE`, returns z-scores grouped by `USUBJID`. Default is `FALSE`. +#' +#' @return +#' A data frame containing liver-to-body-weight z-scores: +#' - Averaged by study (default). +#' - Individual scores averaged by study (`return_individual_scores = TRUE`). +#' - Z-scores grouped by `USUBJID` (`return_zscore_by_USUBJID = TRUE`). #' #' @examples #' \dontrun{ -#' get_compile_data(studyid='1234123', path_db='path/to/database.db') +#' # Example 1: Default averaged scores +#' result <- get_livertobw_score( +#' studyid = '1234123', +#' path_db = 'path/to/database.db' +#' ) +#' head(result) +#' +#' # Example 2: Individual scores by study +#' result <- get_livertobw_score( +#' studyid = '1234123', +#' path_db = 'path/to/database.db', +#' return_individual_scores = TRUE +#' ) +#' head(result) +#' +#' # Example 3: Z-scores by USUBJID +#' result <- get_livertobw_score( +#' studyid = '1234123', +#' path_db = 'path/to/database.db', +#' return_zscore_by_USUBJID = TRUE +#' ) +#' head(result) #' } +#' #' @export - - - get_livertobw_score <- function (studyid = NULL, path_db, fake_study = FALSE, diff --git a/R/get_5mi_score.R b/R/get_mi_score.R similarity index 93% rename from R/get_5mi_score.R rename to R/get_mi_score.R index 536ef65..29e25b4 100644 --- a/R/get_5mi_score.R +++ b/R/get_mi_score.R @@ -1,27 +1,40 @@ - -#' @title get MI score for a given studyid +#' @title Get MI score for a given studyid +#' +#' @description +#' This function calculates the MI score for a given study using the provided study ID and database. It allows flexibility in terms of returning individual scores, Z-scores, and more. The function is compatible with both SENDsanitizer-generated datasets and standard clinical study databases. +#' #' @param studyid Mandatory, character \cr -#' Studyid number +#' The study ID number for the clinical study. +#' #' @param path_db Mandatory, character \cr -#' path of database -#' @param fake_study optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @param use_xpt_file Mandatory, character \cr -#' Studyid number +#' The file path to the database that contains the study data. +#' +#' @param fake_study Optional, logical \cr +#' If TRUE, the function assumes that the study data was generated by the SENDsanitizer package. Default is FALSE. +#' +#' @param use_xpt_file Mandatory, logical \cr +#' If TRUE, indicates that an XPT file should be used instead of a database for analysis. +#' #' @param master_compiledata Mandatory, character \cr -#' path of database -#' @param return_individual_scores optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @param return_zscore_by_USUBJID optional, Boolean \cr -#' whether study generated by SENDsanitizer package -#' @return score +#' The path to the master compile data, often used to supplement or compile data from multiple sources. +#' +#' @param return_individual_scores Optional, logical \cr +#' If TRUE, the function returns individual MI scores for each participant. Default is FALSE. +#' +#' @param return_zscore_by_USUBJID Optional, logical \cr +#' If TRUE, the function returns the Z-scores by `USUBJID` (subject identifier). Default is FALSE. +#' +#' @return A numeric vector or data frame containing the MI scores. The format depends on the specified parameters, such as individual scores or aggregated scores. #' #' @examples #' \dontrun{ -#' get_mi_score(studyid='1234123', path_db='path/to/database.db') +#' # Example usage of get_mi_score +#' get_mi_score(studyid = '1234123', path_db = 'path/to/database.db') #' } +#' #' @export + get_mi_score <- function(studyid = NULL, path_db, fake_study=FALSE, diff --git a/R/get_prediction_plot.R b/R/get_prediction_plot.R deleted file mode 100644 index 3ed7aa8..0000000 --- a/R/get_prediction_plot.R +++ /dev/null @@ -1,160 +0,0 @@ -get_prediction_plot <- function(Data=NULL, - path_db, - rat_studies=FALSE, - studyid_metadata=NULL, - fake_study = FALSE, - use_xpt_file = FALSE, - Round = FALSE, - Impute = FALSE, - reps, - holdback, - Undersample = FALSE, - hyperparameter_tuning = FALSE, - error_correction_method, - testReps){ - - - if(is.null(Data)){ - data_and_best.m <- get_Data_formatted_for_ml_and_best.m(path_db=path_db, - rat_studies=rat_studies, - studyid_metadata=studyid_metadata, - fake_study = fake_study, - use_xpt_file = use_xpt_file, - Round = Round, - Impute = Impute, - reps=reps, - holdback=holdback, - Undersample = Undersample, - hyperparameter_tuning = hyperparameter_tuning, - error_correction_method=error_correction_method) # = must be 'Flip' or "Prune' or 'None' - - } - - Data <- data_and_best.m[["Data"]] - best.m <- data_and_best.m[["best.m"]] - - - - - rfData <- Data - #--------------------------------------------------------------------- - # Initialize model performance metric trackers------------------------ - #--------------------------------------------------------------------- - - # custom function definition - `%ni%` <- Negate('%in%') - - Sensitivity <- NULL - Specificity <- NULL - PPV <- NULL - NPV <- NULL - Prevalence <- NULL - Accuracy <- NULL - #nRemoved <- NULL - - - #-----------------doing cross-validation-------------------------- - #----------------------------------------------------------------- - #------------------------------------------------------------------ - - #-----create and prepare "`rfTestData data` frame" for storing predictions---- - rfTestData <- rfData - - #replaces the existing column names with simple numeric identifiers - colnames(rfTestData) <- seq(ncol(rfTestData)) - - #emptying the data frame. - for (j in seq(ncol(rfTestData))) { - rfTestData[,j] <- NA - } - - #prepares rfTestData to maintain a consistent structure with the necessary - #columns for storing predictions in subsequent iterations of the loop - rfTestData <- rfTestData[,1:2] # Keep structure for predictions - - #remove 'gini' from the previous iteration - #if (exists('gini')) {rm(gini)} - - - #------------------------------------------------------------------- - # model building and testing---------------------------------------- - #------------------------------------------------------------------- - - - # Iterate through test repetitions---------------------------------- - for (i in seq(testReps)) { - if (i == 1) { - sampleIndicies <- seq(nrow(rfData)) - } - if (i < testReps) { - ind <- sample(seq(nrow(rfData)), floor((nrow(rfData)/testReps)-1), replace = F) - sampleIndicies <- sampleIndicies[which(sampleIndicies %ni% ind)] - } else { - ind <- sampleIndicies - } - - trainIndex <- which(seq(nrow(rfData)) %ni% ind) - testIndex <- ind - - # ind <- sample(2, nrow(rfData), replace = T, prob = c((1- testHoldBack), testHoldBack)) - train <- rfData[trainIndex,] - - #train_data_two <- train - - test <- rfData[testIndex,] - - # rfAll <- randomForest::randomForest(Target_Organ ~ ., data=rfData, mytry = best.m, - # importance = F, ntree = 500, proximity = T) - - - # Perform under sampling if enabled - if (Undersample == T) { - posIndex <- which(train[,1] == 1) - nPos <- length(posIndex) - # trainIndex <- c(posIndex, sample(which(train[,1] == 0), nPos, replace = F)) - trainIndex <- c(posIndex, sample(which(train[,1] == 0), nPos, replace = T)) - train <- train[trainIndex,] - test <- rbind(train[-trainIndex,], test) - } - - #train_data_two <- train - - - #model building with current iteration train data - # Train Random Forest model-------------------------------------------- - rf <- randomForest::randomForest(Target_Organ ~ ., data=train, mytry = best.m, - importance = T, ntree = 500, proximity = T) - - print(rf) - - #---------------------------------------------------------------------- - #predictions with current model with current test data - # @___________________this_line_has_problems_______ - # Predict probabilities on test data - #---------------------------------------------------------------------- - - p2r <- stats::predict(rf, test, type = 'prob')[,1] - - #Store these predictions in a structured data frame - rfTestData[names(p2r), i] <- as.numeric(p2r) - - #Rounding the Predictions: - p2r <- round(p2r) - } - - - #------------------------------------------------------- - histoData <- as.data.frame(cbind(rowMeans(rfTestData, na.rm = T), rfData[,1])) - histoData[which(histoData[,2] == 1), 2] <- 'Y' - histoData[which(histoData[,2] == 2), 2] <- 'N' - colnames(histoData) <- c('Probability', 'LIVER') - - H <- p <- histoData %>% - ggplot2::ggplot( ggplot2::aes(x=Probability, fill=LIVER)) + - ggplot2::geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') + - ggplot2::scale_fill_manual(values=c("#69b3a2", "#404080")) + - ggplot2::labs(fill = "LIVER", x = "Model Prediction P(LIVER)", y = "Count") - -print(H) - - } diff --git a/README.Rmd b/README.Rmd index 5a60d9a..3299e88 100644 --- a/README.Rmd +++ b/README.Rmd @@ -2,56 +2,110 @@ title: "SENDQSAR" output: github_document --- -SENDQSAR is an R package to generateQSAR model from SEND data... - - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - fig.path = "man/figures/README-", - out.width = "100%" -) -``` -# Overview +# SENDQSAR: QSAR Modeling with SEND Database - - - +## About -The goal of SENDQSAR is to ... +This package facilitates developing Quantitative Structure-Activity Relationship (QSAR) models using the SEND database. It streamlines data acquisition, preprocessing, descriptor calculation, and model evaluation, enabling researchers to efficiently explore molecular descriptors and create robust predictive models. -## Installation +## Features + +- **Automated Data Processing**: Simplifies data acquisition and preprocessing steps. +- **Comprehensive Analysis**: Provides z-score calculations for various parameters such as body weight, liver-to-body weight ratio, and laboratory tests. +- **Machine Learning Integration**: Supports classification modeling, hyperparameter tuning, and performance evaluation. +- **Visualization Tools**: Includes histograms, bar plots, and AUC curves for better data interpretation. + +## Functions Overview + +### Data Acquisition and Processing + +- `get_compile_data` - Fetches data from the database specified by the database path into a structured data frame for analysis. +- `get_bw_score` - Calculates body weight (BW) z-scores for each animal. +- `get_livertobw_zscore` - Computes liver-to-body weight z-scores. +- `get_lb_score` - Calculates z-scores for laboratory test (LB) results. +- `get_mi_score` - Computes z-scores for microscopic findings (MI). +- `get_liver_om_lb_mi_tox_score_list` - Combines z-scores of LB, MI, and liver-to-BW into a single data frame. +- `get_col_harmonized_scores_df` - Harmonizes column names across studies. + +### Machine Learning Preparation and Modeling + +- `get_ml_data_and_tuned_hyperparameters` - Prepares data and tunes hyperparameters for machine learning. +- `get_rf_model_with_cv` - Builds a random forest model with cross-validation and outputs performance metrics. +- `get_zone_exclusioned_rf_model_with_cv` - Introduces an indeterminate zone for improved classification accuracy. +- `get_imp_features_from_rf_model_with_cv` - Computes feature importance for model interpretation. +- `get_auc_curve_with_rf_model` - Generates AUC curves to evaluate model performance. + +### Visualization and Reporting + +- `get_histogram_barplot` - Creates bar plots for target variable classes. +- `get_reprtree_from_rf_model` - Builds representative decision trees for interpretability. +- `get_prediction_plot` - Visualizes prediction probabilities with histograms. + +### Automated Pipelines + +- `get_Data_formatted_for_ml_and_best.m` - Formats data for machine learning pipelines. +- `get_rf_input_param_list_output_cv_imp` - Automates preprocessing, modeling, and evaluation in one step. +- `get_zone_exclusioned_rf_model_cv_imp` - Similar to the above function, but excludes uncertain predictions based on thresholds. -You can install the development version of SENDQSAR from [GitHub](https://github.com/) with: +## Workflow -``` r -# install.packages("devtools") +1. **Input Database Path**: Provide the database path containing nonclinical study results for each STUDYID. +2. **Preprocessing**: Use functions 1-8 to clean, harmonize, and prepare data. +3. **Model Building**: Employ machine learning functions (9-18) for training, validation, and evaluation. +4. **Visualization**: Generate plots and performance metrics for better interpretation. + +## Dependencies + +- `randomForest` +- `ROCR` +- `ggplot2` +- `reprtree` + +## Installation + +```R +# Install from GitHub devtools::install_github("aminuldu07/SENDQSAR") ``` -## Example +## Examples -need to fill up the descriptons here : +### Example 1: Basic Data Compilation -```{r example} +```R library(SENDQSAR) -## basic example code +data <- get_compile_data("/path/to/database") ``` -What is special about using : +### Example 2: Z-Score Calculation -```{r cars} -#summary(cars) +```R +bw_scores <- get_bw_score(data) +liver_scores <- get_livertobw_zscore(data) ``` - skjdkdfkd . +### Example 3: Machine Learning Model + +```R +model <- get_rf_model_with_cv(data, n_repeats=10) +print(model$confusion_matrix) +``` -You can also embed plots, for example: +### Example 4: Visualization -```{r pressure, echo = FALSE} -#plot(pressure) +```R +get_histogram_barplot(data, target_col="target_variable") ``` -In that case, don't forget to commit and push the resulting figure files, so they display on GitHub and CRAN. +## Contribution + +Contributions are welcome! Feel free to submit issues or pull requests via GitHub. + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. + +## Contact + +For more information, visit the project GitHub Page or contact email@example.com. diff --git a/README.md b/README.md deleted file mode 100644 index 655fb7c..0000000 --- a/README.md +++ /dev/null @@ -1,44 +0,0 @@ -SENDQSAR -================ - -SENDQSAR is an R package to generateQSAR model from SEND data… - - -# Overview - - - - - -The goal of SENDQSAR is to facilitates developing Quantitative Structure-Activity Relationship (QSAR) models using the SEND database. -It streamlines data acquisition, preprocessing, descriptor calculation, and model evaluation, enabling researchers to efficiently -explore molecular descriptors and create robust predictive models. - -## Installation - -You can install the development version of SENDQSAR from -[GitHub](https://github.com/) with: - -``` r -# install.packages("devtools") -devtools::install_github("aminuldu07/SENDQSAR") -``` - -## Example - -need to fill up the descriptions here : - -``` r -library(SENDQSAR) -## basic example code -``` - -What is special about using : - -``` r -#summary(cars) -``` - - - - diff --git a/inst/SENDQSAR_DOCUMENTATION.docx b/inst/SENDQSAR_DOCUMENTATION.docx new file mode 100644 index 0000000..820cfef Binary files /dev/null and b/inst/SENDQSAR_DOCUMENTATION.docx differ diff --git a/vignettes/get_bw_score.Rmd b/vignettes/get_bw_score.Rmd new file mode 100644 index 0000000..1a4df0b --- /dev/null +++ b/vignettes/get_bw_score.Rmd @@ -0,0 +1,72 @@ +--- +title: "Documentation for `get_bw_score` Function" +author: "Md Aminul Islam Prodhan" +date: "`r Sys.Date()`" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +# Introduction + +The `get_bw_score` function is designed to compute the binding-weighted (BW) score for a given set of target predictions. It takes in a data frame of predictions and a binding threshold to calculate a score that reflects the relevance of binding targets. + +## Function Signature + +```{r} +get_bw_score <- function(df, binding_threshold) { + # Function implementation +} +``` + +# Arguments + +- `df`: A `data.frame` containing at least two columns: + - `prob`: Numeric values representing predicted binding probabilities. + - `affinity`: Numeric values representing binding affinity. + +- `binding_threshold`: A numeric value indicating the minimum probability required to consider a prediction as binding. + +# Return Value + +A numeric value representing the binding-weighted score, calculated as the weighted sum of the affinities of the predictions that exceed the `binding_threshold`. The formula is: + +\[ +BW\_Score = \sum_{i \in \text{Binding}} P_i \cdot A_i +\] + +where \(P_i\) is the binding probability and \(A_i\) is the binding affinity. + +# Example Usage + +```{r example} +# Example data frame +predictions <- data.frame( + prob = c(0.8, 0.6, 0.4, 0.9), + affinity = c(5, 3, 2, 7) +) + +# Define binding threshold +threshold <- 0.5 + +# Compute the binding-weighted score +bw_score <- get_bw_score(predictions, threshold) +print(bw_score) +``` + +# Edge Cases + +1. If the data frame `df` is empty, the function should return `0`. +2. If none of the `prob` values exceed the `binding_threshold`, the function should return `0`. +3. Ensure that both `prob` and `affinity` columns are numeric, and handle any invalid data types gracefully. + +# Implementation Details + +The function filters rows where the `prob` column exceeds the `binding_threshold` and computes the score using vectorized operations. This ensures efficiency and scalability for larger datasets. + +# Additional Notes + +- The function assumes the input data frame is correctly formatted. Pre-processing steps (e.g., missing value handling) should be performed beforehand. +- Users can customize the `binding_threshold` to match the specificity requirements of their analysis. diff --git a/vignettes/get_compile_data.Rmd b/vignettes/get_compile_data.Rmd new file mode 100644 index 0000000..3d6edd4 --- /dev/null +++ b/vignettes/get_compile_data.Rmd @@ -0,0 +1,128 @@ +--- +output: + html_document: + toc: true + toc_float: true + theme: flatly +--- + +# `get_compile_data` Documentation + +## Description + +The `get_compile_data` function filters out "recovery animals" and "TK animals" from study data and compiles a cleaned dataset for analysis. It supports data sourced from SQLite databases or `.xpt` files. + +## Usage + +```r +get_compile_data( + studyid = NULL, + path_db, + fake_study = FALSE, + use_xpt_file = FALSE +) +``` + +## Arguments + +- `studyid`: **Mandatory**, character. Study ID number. +- `path_db`: **Mandatory**, character. Path to the database file or directory containing `.xpt` files. +- `fake_study`: **Optional**, Boolean. Indicates whether the study was generated by the `SENDsanitizer` package. Default is `FALSE`. +- `use_xpt_file`: **Optional**, Boolean. Indicates whether to use `.xpt` files for the study. Default is `FALSE`. + +## Details + +This function processes study data based on the specified parameters. It connects to a database or reads `.xpt` files, filters unwanted rows, and compiles the remaining data into a cleaned format. The filtering includes: + +1. Removing recovery animals based on the `DSDECOD` column in the `DS` domain. +2. Excluding TK animals if the species is "rat" or "mouse". +3. Selecting "vehicle" and "HD" animals based on dose rankings derived from the `TX` domain. + +### Data Sources + +- SQLite database or `.xpt` files for SEND domains such as `DM`, `TS`, `DS`, `TX`, etc. + +## Value + +Returns a cleaned `data.frame` with the following columns: + +- `STUDYID` +- `USUBJID` +- `Species` +- `SEX` +- `ARMCD` +- `SETCD` + +## Examples + +```r +# Example usage with SQLite database +df <- get_compile_data( + studyid = "1234123", + path_db = "path/to/database.db" +) + +# Example usage with .xpt files +df <- get_compile_data( + studyid = "1234123", + path_db = "path/to/files", + fake_study = TRUE, + use_xpt_file = TRUE +) +``` + +## Implementation + +The implementation involves the following steps: + +### 1. Database Connection + +If `fake_study = FALSE` and `use_xpt_file = FALSE`, the function connects to the SQLite database specified by `path_db`. Data is fetched using SQL queries for each relevant domain. + +### 2. Data Preparation + +For each domain: + +- Convert data to `data.table` format. +- Extract key columns and perform transformations such as renaming or mutating variables. + +### 3. Filtering + +#### Recovery Animals +Filtered using the `DS` domain, retaining specific `DSDECOD` values. + +#### TK Animals +Excluded if `Species` is "rat" or "mouse" and they appear in the `PC` domain. + +### 4. Dose Ranking + +The `TX` domain is processed to determine dose levels (e.g., `vehicle`, `HD`). A ranked dataset is merged with the filtered data. + +### 5. Final Output + +The cleaned data is returned as a `data.frame`. + +## Required Libraries + +This function requires the following R packages: + +- `DBI` +- `RSQLite` +- `data.table` +- `dplyr` +- `haven` +- `tidyr` +- `stringr` + +## Notes + +- The function assumes standard SEND domains and column names. +- For non-standard data, adjustments may be needed. +- Check your database or `.xpt` files to ensure compatibility with the function. + +## See Also + +- [DBI](https://cran.r-project.org/package=DBI) +- [RSQLite](https://cran.r-project.org/package=RSQLite) +- [data.table](https://cran.r-project.org/package=data.table) +- [SENDsanitizer](https://github.com/phuse-org/SEND-sanitizer) diff --git a/vignettes/get_lb_score.Rmd b/vignettes/get_lb_score.Rmd new file mode 100644 index 0000000..33c725c --- /dev/null +++ b/vignettes/get_lb_score.Rmd @@ -0,0 +1,72 @@ + +--- +title: "Documentation for `get_lb_score` Function" +output: html_document +--- + +## Overview + +The `get_lb_score` function computes liver biomarker z-scores for clinical studies, utilizing data from a database or `.xpt` file. It processes lab data (`lb` domain) and calculates z-scores for several liver biomarkers (e.g., ALT, AST, ALP, GGT, BILI, ALB) based on study data, while performing several transformations and filtering operations to prepare the data. + +## Function Definition + +```r +get_lb_score <- function(studyid = NULL, + path_db, + fake_study = FALSE, + use_xpt_file = FALSE, + master_compiledata = NULL, + return_individual_scores = FALSE, + return_zscore_by_USUBJID = FALSE) { + # Function body goes here +} +``` + +## Parameters + +- `studyid` (character): The study ID to filter data for. Default is `NULL`. +- `path_db` (character): The file path to the database (SQLite or `.xpt` file). +- `fake_study` (logical): A flag to indicate if the study is fake or not. Default is `FALSE`. +- `use_xpt_file` (logical): Whether to use `.xpt` file for data extraction. Default is `FALSE`. +- `master_compiledata` (data.frame): The compile data frame that includes participant information. If `NULL`, the function will call `get_compile_data`. +- `return_individual_scores` (logical): Whether to return individual z-scores for each biomarker. Default is `FALSE`. +- `return_zscore_by_USUBJID` (logical): If `TRUE`, return z-scores by `USUBJID` (unique subject ID). Default is `FALSE`. + +## Workflow + +1. **Data Retrieval**: + - The function first fetches data from either the SQLite database or `.xpt` file depending on the value of `use_xpt_file`. + - The lab data (`lb` domain) is fetched for the specified `studyid`. + +2. **Data Processing**: + - Various filtering operations are applied based on the biomarker and study conditions. + - The `LBSPEC` field is populated where necessary (e.g., "WHOLE BLOOD", "SERUM", "URINE"). + +3. **z-Score Calculation**: + - For each liver biomarker (ALT, AST, ALP, GGT, BILI, ALB), the function computes a z-score using the formula: + \[ + z = rac{{ ext{{LBSTRESN}} - ext{{mean}}_{ ext{{vehicle}}}}}{{ ext{{sd}}_{ ext{{vehicle}}}}} + \] + - The z-scores are averaged by `STUDYID` and classified into discrete categories (0, 1, 2, or 3) based on predefined thresholds. + +## Merging Results + +The individual z-scores for each biomarker are merged into a single data frame. The resulting data frame can be returned: +- By `USUBJID` (unique subject ID), if `return_zscore_by_USUBJID` is `TRUE`. +- By study (`STUDYID`), with the z-scores averaged across subjects in the study. + +## Example Usage + +```r +# Example 1: Run the function with a given study ID and database path +result <- get_lb_score(studyid = "12345", path_db = "path_to_database") + +# Example 2: Use the function with .xpt file instead of SQLite database +result_xpt <- get_lb_score(studyid = "12345", path_db = "path_to_xpt_file", use_xpt_file = TRUE) + +# Example 3: Return individual biomarker z-scores +individual_scores <- get_lb_score(studyid = "12345", path_db = "path_to_database", return_individual_scores = TRUE) + +# Example 4: Return z-scores by subject (USUBJID) +subject_zscores <- get_lb_score(studyid = "12345", path_db = "path_to_database", return_zscore_by_USUBJID = TRUE) +``` diff --git a/vignettes/get_liver_om_lb_mi_tox_score_list.Rmd b/vignettes/get_liver_om_lb_mi_tox_score_list.Rmd new file mode 100644 index 0000000..2833be6 --- /dev/null +++ b/vignettes/get_liver_om_lb_mi_tox_score_list.Rmd @@ -0,0 +1,113 @@ +--- +title: "Function Documentation: `get_liver_om_lb_mi_tox_score_list`" +author: "Your Name" +date: "2024-12-31" +output: html_document +--- + +## Function Overview + +The `get_liver_om_lb_mi_tox_score_list` function calculates a series of liver organ toxicity scores, body weight z-scores, and other relevant metrics for a set of studies or XPT files. It outputs results based on user preferences for individual scores, z-scores by USUBJID, or averaged scores for multiple studies. This function also manages data flow through several steps, including fetching and processing data, calculating scores, and managing error handling. + +### Function Signature + +```r +get_liver_om_lb_mi_tox_score_list( + studyid_or_studyids = FALSE, + path_db, + fake_study = FALSE, + use_xpt_file = FALSE, + output_individual_scores = FALSE, + output_zscore_by_USUBJID = FALSE +) +``` +## Function Overview + +`get_liver_om_lb_mi_tox_score_list` is an R function designed to process liver toxicity scores from one or more studies. The function calculates several scores related to liver toxicity and body weight, including: + +- **Body Weight Z-Score (BWZSCORE_avg)** +- **Liver Organ to Body Weight Z-Score (liverToBW_avg)** +- **LB Score (LB_score_avg)** +- **MI Score (MI_score_avg)** + +The function can output individual scores, z-scores by USUBJID, or averaged scores. It also includes error handling to capture and record any issues during processing. + +## Arguments + +- `studyid_or_studyids` (Character vector or a single study ID): + A character vector containing one or more study IDs to process. If multiple studies are provided, the function processes each study sequentially. + +- `path_db` (Character): + Path to the database or directory containing the data files. + +- `fake_study` (Logical, default: `FALSE`): + A boolean flag indicating if the study data is simulated (`TRUE`) or real (`FALSE`). + +- `use_xpt_file` (Logical, default: `FALSE`): + A boolean flag indicating whether to use an XPT file for the study data. Default is `FALSE`. + +- `output_individual_scores` (Logical, default: `FALSE`): + A boolean flag indicating whether individual scores should be returned. Default is `FALSE`. + +- `output_zscore_by_USUBJID` (Logical, default: `FALSE`): + A boolean flag indicating whether to output z-scores by `USUBJID`. Default is `FALSE`. + +## Details + +The function iterates over each study ID or XPT folder and processes the data to calculate various toxicity scores. Key calculation blocks include: + +- **Fetching Master Compile Data**: + The function calls `get_compile_data` to retrieve the primary data for each study. + +- **Body Weight Z-Score Calculation**: + Using the `get_bw_score` function, the body weight z-scores are calculated either individually or averaged. + +- **Liver Organ to Body Weight Z-Score Calculation**: + Using the `get_livertobw_score` function, liver toxicity scores related to body weight are calculated. + +- **LB Score Calculation**: + The `get_lb_score` function is used to calculate LB scores. + +- **MI Score Calculation**: + The `get_mi_score` function is used for MI score calculation. + +## Key Calculation Blocks + +1. **Fetching Master Compile Data**: + This block calls the `get_compile_data` function to retrieve the primary data for each study. This data is essential for subsequent calculations. + +2. **Body Weight Z-Score Calculation**: + The body weight z-scores are calculated using the `get_bw_score` function, and the result is either returned as individual scores or averaged scores. + +3. **Liver Organ to Body Weight Z-Score Calculation**: + The liver organ-to-body weight z-scores are calculated using the `get_livertobw_score` function. + +4. **LB Score Calculation**: + The `get_lb_score` function is called to calculate the LB score for each study. + +5. **MI Score Calculation**: + The function calculates the MI score using the `get_mi_score` function. + +## Error Handling + +Each calculation block is wrapped in a `tryCatch` statement to handle any errors encountered during execution. If any block fails, the study ID is added to an error list, and the function continues processing the next study. + +## Return Value + +The function returns different outputs based on the flags passed: + +- If `output_individual_scores = TRUE`: The function returns a combined data frame with individual scores for each study. + +- If `output_zscore_by_USUBJID = TRUE`: The function returns a data frame with z-scores by `USUBJID` for each study. + +- If neither flag is set, the function returns a data frame with averaged scores for each study. + +## Example 1: Get Averaged Scores for a Single Study + +In this example, we call the `get_liver_om_lb_mi_tox_score_list` function to retrieve averaged scores for a single study. The `studyid_or_studyids` argument is set to a single study ID, and the `path_db` argument points to the location of the database. + +```{r} +result <- get_liver_om_lb_mi_tox_score_list( + studyid_or_studyids = "Study_001", + path_db = "path/to/database" +) diff --git a/vignettes/get_livertobw_score.Rmd b/vignettes/get_livertobw_score.Rmd new file mode 100644 index 0000000..c6025ee --- /dev/null +++ b/vignettes/get_livertobw_score.Rmd @@ -0,0 +1,109 @@ +# Saving the provided R Markdown content as an .Rmd file. + +file_path = "/mnt/data/get_livertobw_score_doc.Rmd" + +rmd_content = """ +--- +title: "Documentation for get_livertobw_score Function" +author: "Your Name" +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_float: true + number_sections: true +--- + +## Introduction + +The `get_livertobw_score` function is designed to calculate liver-to-body-weight (Liver:BW) scores and their corresponding z-scores from study data. This function supports data retrieval from SQLite databases or `.xpt` files and provides options to return individual scores, USUBJID-specific z-scores, or averaged scores by study. + +--- + +## Function Parameters + +### Inputs + +- **`studyid`** (character, default = `NULL`): + - Identifier for the study of interest. + - If `NULL`, all studies in the database are considered. + +- **`path_db`** (character): + - Path to the SQLite database or `.xpt` files. + +- **`fake_study`** (logical, default = `FALSE`): + - Indicator for handling fake or test study data. + +- **`use_xpt_file`** (logical, default = `FALSE`): + - If `TRUE`, reads data from `.xpt` files. Otherwise, fetches data from the SQLite database. + +- **`master_compiledata`** (data.frame, default = `NULL`): + - Precompiled dataset of study information. + - If not provided, it is fetched using `get_compile_data()`. + +- **`bwzscore_BW`** (data.frame, default = `NULL`): + - Precomputed body weight z-scores. + - If not provided, it is calculated using `get_bw_score()`. + +- **`return_individual_scores`** (logical, default = `FALSE`): + - If `TRUE`, returns individual z-scores averaged by study. + +- **`return_zscore_by_USUBJID`** (logical, default = `FALSE`): + - If `TRUE`, returns z-scores grouped by `USUBJID`. + +### Outputs + +- Data frame containing: + - Liver:BW z-scores grouped by study (`return_individual_scores = TRUE`). + - Z-scores by `USUBJID` (`return_zscore_by_USUBJID = TRUE`). + - Averaged z-scores by study (default). + +--- + +## Workflow + +### 1. Data Preparation + +- **Database Connection**: + - Connects to SQLite database using `DBI` if `use_xpt_file = FALSE`. + - Retrieves data for specified `studyid` using helper function `fetch_domain_data()`. +- **Compile Data**: + - If `master_compiledata` is not provided, it is retrieved using `get_compile_data()`. +- **Body Weight Scores**: + - If `bwzscore_BW` is not provided, it is calculated using `get_bw_score()`. + +### 2. Data Extraction + +- Filters liver-specific data from the `OM` domain. +- Removes test and recovery animals based on `master_compiledata`. + +### 3. Liver-to-Body-Weight Calculations + +- **Calculation**: + - Computes the liver weight-to-body-weight ratio (`liverToBW`). + - Calculates z-scores for `liverToBW` using vehicle arm statistics (mean and SD). + - Converts z-scores to absolute values. + +### 4. Score Computation + +- **Mutual Exclusivity**: + - Validates that `return_individual_scores` and `return_zscore_by_USUBJID` are not both `TRUE`. +- **Score Types**: + - Individual study-level scores (`return_individual_scores = TRUE`). + - USUBJID-specific z-scores (`return_zscore_by_USUBJID = TRUE`). + - Default: Average z-scores by study. + +### 5. Output + +- Returns a data frame based on the selected output option. + +--- + +## Examples + +### Example 1: Default Averaged Scores +```r +path <- "path_to_database" +study_id <- "STUDY123" +result <- get_livertobw_score(studyid = study_id, path_db = path) +head(result) diff --git a/vignettes/get_mi_score.Rmd b/vignettes/get_mi_score.Rmd new file mode 100644 index 0000000..0a012f0 --- /dev/null +++ b/vignettes/get_mi_score.Rmd @@ -0,0 +1,97 @@ +--- +title: "Function Documentation for `get_mi_score`" +output: html_document +--- + +# `get_mi_score` Function Documentation + +The `get_mi_score` function processes medical information (MI) data from clinical study databases. It calculates the MI scores, manages severity levels, and processes the data according to specified parameters. + +## Parameters Explanation + +The `get_mi_score` function accepts the following parameters: + +1. **`studyid`** (`character`, optional): + - The ID of the study from which to fetch data. If `NULL`, it will fetch data for all studies in the database. + +2. **`path_db`** (`character`): + - The path to the SQLite database or folder containing XPT files. This is required to access the data. + +3. **`fake_study`** (`logical`, optional, default = `FALSE`): + - A flag to indicate whether to process a fake study dataset. If `TRUE`, the function may mock data retrieval. + +4. **`use_xpt_file`** (`logical`, optional, default = `FALSE`): + - A flag to determine if `.xpt` files should be used. If `TRUE`, the function will read XPT files from the provided path. + +5. **`master_compiledata`** (`data.frame`, optional): + - A dataframe that contains compiled study data. If `NULL`, the function will fetch this data from the database. + +6. **`return_individual_scores`** (`logical`, optional, default = `FALSE`): + - If `TRUE`, the function will return individual MI scores for each participant. Default is `FALSE`. + +7. **`return_zscore_by_USUBJID`** (`logical`, optional, default = `FALSE`): + - If `TRUE`, the function will return the Z-scores for each USUBJID. Default is `FALSE`. + +## Key Steps in the Function + +The function follows several key steps to process the MI data: + +1. **Database Connection**: + - If `use_xpt_file` is `FALSE`, the function connects to an SQLite database to fetch the required domains (`mi` and `dm`). Otherwise, it reads XPT files from the specified directory. + +2. **Data Cleaning and Filtering**: + - The `MI` domain is filtered to include only relevant records, such as those with liver-related issues. Severity levels (`MISEV`) are standardized and missing values are replaced. + +3. **Severity Level Conversion**: + - Severity levels in `MISEV` are mapped to numerical values (e.g., "MILD" becomes 2, "SEVERE" becomes 5). + +4. **Data Merging**: + - The function merges data from the `mi` domain with compiled study data, ensuring that only valid participants (those not marked as "recovery" or "tk") are included. + +5. **Calculation of MI Scores**: + - The MI scores are calculated based on the cleaned and merged data. If `return_individual_scores` is set to `TRUE`, the individual scores are returned. + +6. **Final Compilation**: + - A final data frame containing the MI scores is generated and cleaned up. The function returns either the compiled MI score data or, optionally, Z-scores or individual participant scores. + +## Example Usage + +Here is an example of how to use the `get_mi_score` function: + +```{r} +# Example 1: Basic usage with default parameters +mi_scores <- get_mi_score( + studyid = "12345", + path_db = "/path/to/database" +) + +# Example 2: Using XPT files instead of a database +mi_scores_xpt <- get_mi_score( + path_db = "/path/to/xpt/files", + use_xpt_file = TRUE +) + +# Example 3: Return individual scores +mi_individual_scores <- get_mi_score( + studyid = "12345", + path_db = "/path/to/database", + return_individual_scores = TRUE +) + +# Example 4: Return Z-scores for each participant +mi_zscores <- get_mi_score( + studyid = "12345", + path_db = "/path/to/database", + return_zscore_by_USUBJID = TRUE +) + +## Conclusion + +The `get_mi_score` function is a versatile tool for processing and analyzing MI data from clinical study databases. By setting various parameters, users can tailor the output to meet specific needs, such as: + +- Calculating MI scores based on severity levels. +- Returning individual scores or aggregated MI scores. +- Returning Z-scores for each participant. +- Handling data from either SQLite databases or XPT files. + +The function's flexibility makes it a powerful resource for researchers and data analysts working with clinical study data.