diff --git a/NAMESPACE b/NAMESPACE index 549f2d0..4282f18 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -36,6 +36,7 @@ export(get_site_id) export(get_specimen_type_id) export(groupby_vec) export(make_clean_os_names) +export(mark_all_duplicates) export(num_thousandth_sep) export(nums_clean) export(parse_all_sites) diff --git a/R/gen_clinical_data.R b/R/gen_clinical_data.R index 6185d90..77efe29 100644 --- a/R/gen_clinical_data.R +++ b/R/gen_clinical_data.R @@ -120,7 +120,7 @@ gen_clinical_data <- function(clinical_data, stop("ppid_col is not present in the additional data set") } clinical_data <- merge(additional_data, - clinical_data, by = ppid_col ) + clinical_data, by = ppid_col, all.x = TRUE) }else{ diff --git a/R/mark_all_duplicates.R b/R/mark_all_duplicates.R new file mode 100644 index 0000000..22df3ec --- /dev/null +++ b/R/mark_all_duplicates.R @@ -0,0 +1,27 @@ +# WARNING - Generated by {fusen} from dev/sample_selections.Rmd: do not edit by hand + +#' Mark All Duplicates in a Vector +#' +#' This function identifies all duplicate elements in a vector, marking both the first +#' occurrence and subsequent duplicates. +#' +#' @param vec A numeric or character vector where duplicates are to be identified. +#' +#' @return A logical vector of the same length as `vec`, where TRUE indicates that +#' the corresponding element in `vec` is a duplicate. +#' +#' @export +#' @examples +#' +#' vec <- c("apple", "banana", "apple", "cherry", "banana") +#' mark_all_duplicates(vec) +mark_all_duplicates <- function(vec) { + # Check duplicates from the start and from the end + dup_from_start <- duplicated(vec) + dup_from_end <- duplicated(vec, fromLast = TRUE) + + # Combine the two to mark all duplicates (including first occurrences) + all_duplicates <- dup_from_start | dup_from_end + return(all_duplicates) +} + diff --git a/R/timestamp_to_date.R b/R/timestamp_to_date.R index dfa4980..f5e430c 100644 --- a/R/timestamp_to_date.R +++ b/R/timestamp_to_date.R @@ -31,17 +31,24 @@ timestamp_to_date <- function(timestamp, date_cols = NULL){ } + #' Convert Numeric Timestamp to Date #' #' This method converts a numeric timestamp to a datetime object. #' #' @param timestamp Numeric timestamp. -#' @param date_cols Names of the timestamp columns to convert if the object timestamp is a data.frame. This is not needed for this method +#' @param date_cols Names of the timestamp columns to convert if the object timestamp is a data.frame. #' -#' @return doesn't return anything because the data frame is converted to data.table and function applied in place. +#' @return A datetime object. #' #' @export + + timestamp_to_date.numeric <- function(timestamp, date_cols = NULL){ + if (!is.numeric(timestamp)) { + warning("Input timestamp is not numeric. Skipping conversion.") + return(timestamp) + } timestamp <- as.numeric(timestamp) timestamp <- timestamp/1000 @@ -49,6 +56,7 @@ timestamp_to_date.numeric <- function(timestamp, date_cols = NULL){ return(timestamp) } + #' Convert Timestamp Columns in a Data Frame to Date #' #' This method converts timestamp columns in a data frame to date columns. diff --git a/dev/OpenSpecimenAPI.Rmd b/dev/OpenSpecimenAPI.Rmd index d8c57ef..8f21d08 100644 --- a/dev/OpenSpecimenAPI.Rmd +++ b/dev/OpenSpecimenAPI.Rmd @@ -118,17 +118,24 @@ timestamp_to_date <- function(timestamp, date_cols = NULL){ } + #' Convert Numeric Timestamp to Date #' #' This method converts a numeric timestamp to a datetime object. #' #' @param timestamp Numeric timestamp. -#' @param date_cols Names of the timestamp columns to convert if the object timestamp is a data.frame. This is not needed for this method +#' @param date_cols Names of the timestamp columns to convert if the object timestamp is a data.frame. #' -#' @return doesn't return anything because the data frame is converted to data.table and function applied in place. +#' @return A datetime object. #' #' @export + + timestamp_to_date.numeric <- function(timestamp, date_cols = NULL){ + if (!is.numeric(timestamp)) { + warning("Input timestamp is not numeric. Skipping conversion.") + return(timestamp) + } timestamp <- as.numeric(timestamp) timestamp <- timestamp/1000 @@ -136,6 +143,7 @@ timestamp_to_date.numeric <- function(timestamp, date_cols = NULL){ return(timestamp) } + #' Convert Timestamp Columns in a Data Frame to Date #' #' This method converts timestamp columns in a data frame to date columns. diff --git a/dev/config_fusen.yaml b/dev/config_fusen.yaml index 2136658..941b128 100644 --- a/dev/config_fusen.yaml +++ b/dev/config_fusen.yaml @@ -102,6 +102,7 @@ sample_selections.Rmd: - R/create_save_workbook.R - R/gen_clinical_data.R - R/make_clean_os_names.R + - R/mark_all_duplicates.R - R/process_save_selected_aliquots.R - R/read_os_data.R - R/samples_distributed_summary.R @@ -124,6 +125,7 @@ sample_selections.Rmd: - tests/testthat/test-create_save_workbook.R - tests/testthat/test-gen_clinical_data.R - tests/testthat/test-samples_distributed_summary.R + - tests/testthat/test-mark_all_duplicates.R vignettes: vignettes/sample-selections-vignette.Rmd inflate: flat_file: dev/sample_selections.Rmd diff --git a/dev/sample_selections.Rmd b/dev/sample_selections.Rmd index 8f1341b..92d8c93 100644 --- a/dev/sample_selections.Rmd +++ b/dev/sample_selections.Rmd @@ -1256,7 +1256,7 @@ gen_clinical_data <- function(clinical_data, stop("ppid_col is not present in the additional data set") } clinical_data <- merge(additional_data, - clinical_data, by = ppid_col ) + clinical_data, by = ppid_col, all.x = TRUE) }else{ @@ -1498,6 +1498,45 @@ test_that("samples_distributed_summary works", { ``` +# mark_all_duplicates + +```{r function-mark_all_duplicates} +#' Mark All Duplicates in a Vector +#' +#' This function identifies all duplicate elements in a vector, marking both the first +#' occurrence and subsequent duplicates. +#' +#' @param vec A numeric or character vector where duplicates are to be identified. +#' +#' @return A logical vector of the same length as `vec`, where TRUE indicates that +#' the corresponding element in `vec` is a duplicate. +#' +#' @export +mark_all_duplicates <- function(vec) { + # Check duplicates from the start and from the end + dup_from_start <- duplicated(vec) + dup_from_end <- duplicated(vec, fromLast = TRUE) + + # Combine the two to mark all duplicates (including first occurrences) + all_duplicates <- dup_from_start | dup_from_end + return(all_duplicates) +} + +``` + +```{r example-mark_all_duplicates} + +vec <- c("apple", "banana", "apple", "cherry", "banana") +mark_all_duplicates(vec) +``` + +```{r tests-mark_all_duplicates} +test_that("mark_all_duplicates works", { + expect_true(inherits(mark_all_duplicates, "function")) +}) +``` + + ```{r development-load} # Load already included functions if relevant pkgload::load_all(export_all = FALSE) diff --git a/man/mark_all_duplicates.Rd b/man/mark_all_duplicates.Rd new file mode 100644 index 0000000..c22e72e --- /dev/null +++ b/man/mark_all_duplicates.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mark_all_duplicates.R +\name{mark_all_duplicates} +\alias{mark_all_duplicates} +\title{Mark All Duplicates in a Vector} +\usage{ +mark_all_duplicates(vec) +} +\arguments{ +\item{vec}{A numeric or character vector where duplicates are to be identified.} +} +\value{ +A logical vector of the same length as \code{vec}, where TRUE indicates that +the corresponding element in \code{vec} is a duplicate. +} +\description{ +This function identifies all duplicate elements in a vector, marking both the first +occurrence and subsequent duplicates. +} +\examples{ + +vec <- c("apple", "banana", "apple", "cherry", "banana") +mark_all_duplicates(vec) +} diff --git a/man/timestamp_to_date.numeric.Rd b/man/timestamp_to_date.numeric.Rd index a270969..0dde921 100644 --- a/man/timestamp_to_date.numeric.Rd +++ b/man/timestamp_to_date.numeric.Rd @@ -9,10 +9,10 @@ \arguments{ \item{timestamp}{Numeric timestamp.} -\item{date_cols}{Names of the timestamp columns to convert if the object timestamp is a data.frame. This is not needed for this method} +\item{date_cols}{Names of the timestamp columns to convert if the object timestamp is a data.frame.} } \value{ -doesn't return anything because the data frame is converted to data.table and function applied in place. +A datetime object. } \description{ This method converts a numeric timestamp to a datetime object. diff --git a/tests/testthat/test-mark_all_duplicates.R b/tests/testthat/test-mark_all_duplicates.R new file mode 100644 index 0000000..b8bbbbe --- /dev/null +++ b/tests/testthat/test-mark_all_duplicates.R @@ -0,0 +1,5 @@ +# WARNING - Generated by {fusen} from dev/sample_selections.Rmd: do not edit by hand + +test_that("mark_all_duplicates works", { + expect_true(inherits(mark_all_duplicates, "function")) +}) diff --git a/vignettes/sample-selections-vignette.Rmd b/vignettes/sample-selections-vignette.Rmd index a97b4fd..08ea6a8 100644 --- a/vignettes/sample-selections-vignette.Rmd +++ b/vignettes/sample-selections-vignette.Rmd @@ -461,3 +461,20 @@ samples_distributed_summary(df =list_dfs$clinical_data , order_col = "Total") ``` +# mark_all_duplicates + + + + + +```{r example-mark_all_duplicates} + +vec <- c("apple", "banana", "apple", "cherry", "banana") +mark_all_duplicates(vec) +``` + + + + + +