Convert early_zero_weight() to data.table

Merck · Nov 21, 2023 · 0b90ba2 · 0b90ba2
1 parent b7386d7
commit 0b90ba2
Show file tree

Hide file tree

Showing 5 changed files with 113 additions and 25 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: simtrial
 Type: Package
 Title: Clinical Trial Simulation
-Version: 0.3.0.5
+Version: 0.3.0.6
 Authors@R: c(
     person("Keaven", "Anderson", email = "[email protected]", role = c("aut")),
     person("Yilong", "Zhang", email = "[email protected]", role =  c("aut")),

diff --git a/R/early_zero_weight.R b/R/early_zero_weight.R
@@ -26,6 +26,8 @@
 #' @return A data frame. The column `weight` contains the weights for the
 #'   early zero weighted logrank test for the data in `x`.
 #'
+#' @importFrom data.table ":=" as.data.table merge.data.table setDF
+#'
 #' @export
 #'
 #' @references
@@ -90,38 +92,31 @@
 #'   early_zero_weight(early_period = 2, fail_rate = fail_rate) %>%
 #'   filter(row_number() %in% seq(5, 200, 40))
 early_zero_weight <- function(x, early_period = 4, fail_rate = NULL) {
-  n_stratum <- length(unique((x$stratum)))
+  ans <- as.data.table(x)
+  n_stratum <- length(unique(ans$stratum))
 
   # If it is unstratified design
   if (n_stratum == 1) {
-    ans <- x %>%
-      mutate(weight = case_when(
-        tte < early_period ~ 0,
-        tte >= early_period ~ 1
-      ))
+    ans[, weight := ifelse(tte < early_period, 0, 1)]
   } else {
     if (is.null(fail_rate)) {
       stop("For stratified design to use `early_zero_weight()`, `fail_rate` can't be `NULL`.")
     }
-    if (!all((fail_rate %>% group_by(stratum) %>% summarise(x = n() == 2))$x)) {
+    fail_rate <- as.data.table(fail_rate)
+    # require 2 piece failure rate per stratum
+    two_piece_fr <- fail_rate[, .(check = .N == 2), by = "stratum"]
+    if (!all(two_piece_fr$check)) {
       stop("`early_zero_weight()` only allows delayed treatment effect, that is, 2 piece failure rate with HR = 1 at the first period.")
     }
 
-    late_hr <- fail_rate %>%
-      filter(hr != 1) %>%
-      select(stratum, hr)
-    delay_change_time <- fail_rate %>%
-      filter(hr == 1) %>%
-      select(stratum, duration)
+    late_hr <- fail_rate[hr != 1, .(stratum, hr)]
+    delay_change_time <- fail_rate[hr == 1, .(stratum, duration)]
 
-    ans <- x %>%
-      left_join(late_hr) %>%
-      left_join(delay_change_time) %>%
-      mutate(weight = case_when(
-        tte < duration ~ 0,
-        tte >= duration ~ hr
-      ))
+    ans <- merge.data.table(ans, late_hr, by = "stratum", all.x = TRUE)
+    ans <- merge.data.table(ans, delay_change_time, by = "stratum", all.x = TRUE)
+    ans[, weight := ifelse(tte < duration, 0, hr)]
   }
 
-  ans
+  setDF(ans)
+  return(ans)
 }
diff --git a/R/global.R b/R/global.R
@@ -55,7 +55,8 @@ utils::globalVariables(
     "time",
     "treatment",
     "tte",
-    "var_o_minus_e"
+    "var_o_minus_e",
+    "weight"
   )
 )
 

diff --git a/tests/testthat/test-unvalidated-data.table.R b/tests/testthat/test-unvalidated-data.table.R
@@ -14,6 +14,12 @@ test_that("functions that use data.table still return a data frame", {
   x <- sim_pw_surv(n = 20)
   expect_identical(class(cut_data_by_date(x, 5)), class_expected)
 
+  # early_zero_weight()
+  x <- sim_pw_surv(n = 200)
+  x <- cut_data_by_event(x, 125)
+  x <- counting_process(x, arm = "experimental")
+  expect_identical(class(early_zero_weight(x, early_period = 2)), class_expected)
+
   # fh_weight()
   expect_identical(class(fh_weight()), class_expected)
 
@@ -69,6 +75,51 @@ test_that("functions that use data.table do not modify input data table", {
   cut_data_by_date(x, 5)
   expect_identical(x, x_original)
 
+  # early_zero_weight()
+  x <- sim_pw_surv(n = 200)
+  x <- cut_data_by_event(x, 125)
+  x <- counting_process(x, arm = "experimental")
+  data.table::setDT(x)
+  x_original <- data.table::copy(x)
+  early_zero_weight(x, early_period = 2)
+  expect_identical(x, x_original)
+  # stratified
+  x <- data.frame(
+    stratum = rep(c("Biomarker-negative", "Biomarker-positive"), c(3L, 6L)),
+    events = rep(1, 9L),
+    n_event_tol = c(0, 1, 1, 0, 1, 0, 1, 1, 0),
+    tte = c(
+      1.5993802300052806, 2.0909139412967335, 10.506616675248615,
+      0.9680172928009441, 1.5338585759371668, 8.796812031499012, 10.51114008985088,
+      13.752032056906227, 19.173970377498854
+    ),
+    n_risk_tol = c(4, 3, 2, 10, 9, 8, 7, 6, 5),
+    n_risk_trt = c(2L, 2L, 1L, 6L, 6L, 5L, 5L, 4L, 3L),
+    s = c(1, 0.75, 0.5, 1, 0.9, 0.7999999999999999, 0.7, 0.6, 0.5),
+    o_minus_e = c(
+      -0.5, 0.33333333333333337, 0.5, -0.6, 0.33333333333333337, -0.625,
+      0.2857142857142857, 0.33333333333333337, -0.6
+    ),
+    var_o_minus_e = c(
+      0.25, 0.2222222222222222, 0.25, 0.24000000000000002, 0.2222222222222222,
+      0.234375, 0.20408163265306123, 0.22222222222222224, 0.24
+    )
+  )
+  fail_rate <- data.frame(
+    stratum = rep(c("Biomarker-positive", "Biomarker-negative"), each = 2L),
+    duration = c(3, 1000, 4, 1000),
+    fail_rate = rep(c(0.06931471805599453, 0.08664339756999316), each = 2L),
+    dropout_rate = rep(0.01, 4L),
+    hr = c(1, 0.7, 1, 0.8)
+  )
+  data.table::setDT(x)
+  x_original <- data.table::copy(x)
+  data.table::setDT(fail_rate)
+  fail_rate_original <- data.table::copy(fail_rate)
+  early_zero_weight(x, early_period = 2, fail_rate = fail_rate)
+  expect_identical(x, x_original)
+  expect_identical(fail_rate, fail_rate_original)
+
   # fh_weight()
   x <- sim_pw_surv()
   x <- cut_data_by_event(x, 125)

diff --git a/tests/testthat/test-unvalidated-early_zero_weight.R b/tests/testthat/test-unvalidated-early_zero_weight.R
@@ -1,5 +1,3 @@
-library(dplyr)
-
 test_that("early_zero_weight() with unstratified data", {
   # Example 1: Unstratified
   set.seed(123)
@@ -60,3 +58,46 @@ test_that("early_zero_weight() with stratified data", {
   expect_equal(observed, expected)
 })
 
+test_that("early_zero_weight() fails with bad input", {
+  input <- data.frame(
+    stratum = rep(c("Biomarker-negative", "Biomarker-positive"), c(3L, 6L)),
+    events = rep(1, 9L),
+    n_event_tol = c(0, 1, 1, 0, 1, 0, 1, 1, 0),
+    tte = c(
+      1.5993802300052806, 2.0909139412967335, 10.506616675248615,
+      0.9680172928009441, 1.5338585759371668, 8.796812031499012, 10.51114008985088,
+      13.752032056906227, 19.173970377498854
+    ),
+    n_risk_tol = c(4, 3, 2, 10, 9, 8, 7, 6, 5),
+    n_risk_trt = c(2L, 2L, 1L, 6L, 6L, 5L, 5L, 4L, 3L),
+    s = c(1, 0.75, 0.5, 1, 0.9, 0.7999999999999999, 0.7, 0.6, 0.5),
+    o_minus_e = c(
+      -0.5, 0.33333333333333337, 0.5, -0.6, 0.33333333333333337, -0.625,
+      0.2857142857142857, 0.33333333333333337, -0.6
+    ),
+    var_o_minus_e = c(
+      0.25, 0.2222222222222222, 0.25, 0.24000000000000002, 0.2222222222222222,
+      0.234375, 0.20408163265306123, 0.22222222222222224, 0.24
+    )
+  )
+
+  # missing fail_rate
+  expect_error(
+    early_zero_weight(input, early_period = 2),
+    "For stratified design to use `early_zero_weight\\(\\)`, `fail_rate` can't be `NULL`."
+  )
+
+  # not a 2 piece failure rate
+  fail_rate <- data.frame(
+    stratum = rep(c("Biomarker-positive", "Biomarker-negative"), each = 2L),
+    duration = c(3, 1000, 4, 1000),
+    fail_rate = rep(c(0.06931471805599453, 0.08664339756999316), each = 2L),
+    dropout_rate = rep(0.01, 4L),
+    hr = c(1, 0.7, 1, 0.8)
+  )
+  fail_rate <- fail_rate[-1, ]
+  expect_error(
+    early_zero_weight(input, early_period = 2, fail_rate = fail_rate),
+    "`early_zero_weight\\(\\)` only allows delayed treatment effect, that is, 2 piece failure rate with HR = 1 at the first period."
+  )
+})