diff --git a/NEWS.md b/NEWS.md index e6eeda24..4932e252 100644 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,8 @@ * `print.dtplyr_step()` gains `n`, `max_extra_cols`, and `max_footer_lines` args (#464) +* `transmute()` preserves row count and avoids unnecessary copies (#470) + # dtplyr 1.3.1 * Fix for failing R CMD check. diff --git a/R/step-subset-transmute.R b/R/step-subset-transmute.R index b2c6e20c..20b137dc 100644 --- a/R/step-subset-transmute.R +++ b/R/step-subset-transmute.R @@ -13,41 +13,10 @@ #' dt <- lazy_dt(dplyr::starwars) #' dt %>% transmute(name, sh = paste0(species, "/", homeworld)) transmute.dtplyr_step <- function(.data, ...) { - dots <- capture_new_vars(.data, ...) - dots_list <- process_new_vars(.data, dots) - dots <- dots_list$dots - - groups <- group_vars(.data) - if (!is_empty(groups)) { - # TODO could check if there is actually anything mutated, e.g. to avoid - # DT[, .(x = x)] - is_group_var <- names(dots) %in% groups - group_dots <- dots[is_group_var] - - .data <- mutate(ungroup(.data), !!!group_dots) - .data <- group_by(.data, !!!syms(groups)) - - dots <- dots[!is_group_var] - } - - if (is_empty(dots)) { - # grouping variables have been removed from `dots` so `select()` would - # produce a message "Adding grouping vars". - # As `dplyr::transmute()` doesn't generate a message when adding group vars - # we can also leave it away here - return(select(.data, !!!group_vars(.data))) - } - - if (!dots_list$use_braces) { - j <- call2(".", !!!dots) - } else { - j <- mutate_with_braces(dots)$expr - } - vars <- union(group_vars(.data), names(dots)) - out <- step_subset_j(.data, vars = vars, j = j) - if (dots_list$need_removal_step) { - out <- select(out, -tidyselect::all_of(dots_list$vars_removed)) - } - - out + out <- mutate(.data, ..., .keep = "none") + cols_expr <- names(capture_new_vars(.data, ...)) + cols_group <- group_vars(.data) + cols_group <- setdiff(cols_group, cols_expr) + cols_retain <- c(cols_group, cols_expr) + select(out, any_of(cols_retain)) } diff --git a/tests/testthat/_snaps/step-call.md b/tests/testthat/_snaps/step-call.md index fc85d2d0..b1ff0514 100644 --- a/tests/testthat/_snaps/step-call.md +++ b/tests/testthat/_snaps/step-call.md @@ -17,6 +17,13 @@ Output setnames(copy(DT), c("a", "b", "c"), toupper) +# can compute distinct computed variables + + Code + dt %>% distinct(z = x + y) %>% show_query() + Output + unique(copy(dt)[, `:=`(z = x + y)][, `:=`(c("x", "y"), NULL)]) + # errors are raised Code diff --git a/tests/testthat/test-step-call.R b/tests/testthat/test-step-call.R index 2ff4e8d6..0b17dfad 100644 --- a/tests/testthat/test-step-call.R +++ b/tests/testthat/test-step-call.R @@ -136,9 +136,8 @@ test_that("keeps all variables if requested", { test_that("can compute distinct computed variables", { dt <- lazy_dt(data.table(x = c(1, 1), y = c(1, 2)), "dt") - expect_equal( - dt %>% distinct(z = x + y) %>% show_query(), - expr(unique(dt[, .(z = x + y)])) + expect_snapshot( + dt %>% distinct(z = x + y) %>% show_query() ) expect_equal( diff --git a/tests/testthat/test-step-mutate.R b/tests/testthat/test-step-mutate.R index 172c8e52..14756c41 100644 --- a/tests/testthat/test-step-mutate.R +++ b/tests/testthat/test-step-mutate.R @@ -59,11 +59,6 @@ test_that("generates single calls as expect", { dt %>% group_by(x) %>% mutate(x2 = x * 2) %>% show_query(), expr(copy(DT)[, `:=`(x2 = x * 2), by = .(x)]) ) - - expect_equal( - dt %>% transmute(x2 = x * 2) %>% show_query(), - expr(DT[, .(x2 = x * 2)]) - ) }) test_that("mutate generates compound expression if needed", { diff --git a/tests/testthat/test-step-subset-summarise.R b/tests/testthat/test-step-subset-summarise.R index fae4eba3..6e7921da 100644 --- a/tests/testthat/test-step-subset-summarise.R +++ b/tests/testthat/test-step-subset-summarise.R @@ -5,11 +5,6 @@ test_that("simple calls generate expected translations", { dt %>% summarise(x = mean(x)) %>% show_query(), expr(DT[, .(x = mean(x))]) ) - - expect_equal( - dt %>% transmute(x) %>% show_query(), - expr(DT[, .(x = x)]) - ) }) test_that("can use with across", { diff --git a/tests/testthat/test-step-subset-transmute.R b/tests/testthat/test-step-subset-transmute.R index e892fb51..1059ccfb 100644 --- a/tests/testthat/test-step-subset-transmute.R +++ b/tests/testthat/test-step-subset-transmute.R @@ -1,249 +1,34 @@ -test_that("simple calls generate expected translations", { +test_that("works", { dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") expect_equal( - dt %>% transmute(x) %>% show_query(), - expr(DT[, .(x = x)]) + dt %>% transmute(x) %>% collect(), + dt %>% mutate(x, .keep = "none") %>% collect() ) }) -test_that("transmute generates compound expression if needed", { - dt <- lazy_dt(data.table(x = 1, y = 2), "DT") - - expect_equal( - dt %>% transmute(x2 = x * 2, x4 = x2 * 2) %>% show_query(), - expr(DT[, { - x2 <- x * 2 - x4 <- x2 * 2 - .(x2, x4) - }]) - ) -}) - -test_that("allows multiple assignment to the same variable", { - dt <- lazy_dt(data.table(x = 1, y = 2), "DT") - - # when nested - expect_equal( - dt %>% transmute(x = x * 2, x = x * 2) %>% show_query(), - expr(DT[, { - x <- x * 2 - x <- x * 2 - .(x) - }]) - ) - - # when not nested - expect_equal( - dt %>% transmute(z = 2, y = 3) %>% show_query(), - expr(DT[, .(z = 2, y = 3)]) - ) -}) - - -test_that("groups are respected", { - dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(y = 2) - - expect_equal(dt$vars, c("x", "y")) - expect_equal( - dt %>% show_query(), - expr(DT[, .(y = 2), keyby = .(x)]) - ) -}) - -test_that("grouping vars can be transmuted", { - dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(x = x + 1, y = 2) +test_that("empty dots preserves groups", { + dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") %>% + group_by(y) - expect_equal(dt$vars, c("x", "y")) - expect_equal(dt$groups, "x") - expect_equal( - dt %>% show_query(), - expr(copy(DT)[, `:=`(x = x + 1)][, .(y = 2), keyby = .(x)]) - ) - - skip("transmuting grouping vars with nesting is not supported") - dt <- lazy_dt(data.table(x = 1), "DT") %>% - group_by(x) %>% - transmute(x = x + 1, y = x + 1, x = y + 1) + res <- dt %>% transmute() %>% collect() - expect_equal(dt$vars, c("x", "y")) - expect_equal( - dt %>% collect(), - tibble(x = 4, y = 3) %>% group_by(x) - ) + expect_equal(names(res), "y") }) -test_that("empty transmute works", { - dt <- lazy_dt(data.frame(x = 1), "DT") - expect_equal(transmute(dt) %>% show_query(), expr(DT[, 0L])) - expect_equal(transmute(dt)$vars, character()) - expect_equal(transmute(dt, !!!list()) %>% show_query(), expr(DT[, 0L])) +test_that("preserves column order", { + dt <- lazy_dt(data.table(x = 1, y = 1), "DT") - dt_grouped <- lazy_dt(data.frame(x = 1), "DT") %>% group_by(x) - expect_equal(transmute(dt_grouped)$vars, "x") -}) + res <- dt %>% transmute(y, x) %>% collect() -test_that("only transmuting groups works", { - dt <- lazy_dt(data.frame(x = 1)) %>% group_by(x) - expect_equal(transmute(dt, x) %>% collect(), dt %>% collect()) - expect_equal(transmute(dt, x)$vars, "x") + expect_equal(names(res), c("y", "x")) }) -test_that("across() can access previously created variables", { - dt <- lazy_dt(data.frame(x = 1), "DT") - step <- transmute(dt, y = 2, across(y, sqrt)) - expect_equal( - collect(step), - tibble(y = sqrt(2)) - ) - expect_equal( - show_query(step), - expr(DT[, { - y <- 2 - y <- sqrt(y) - .(y) - }]) - ) -}) - -test_that("new columns take precedence over global variables", { - dt <- lazy_dt(data.frame(x = 1), "DT") - y <- 'global var' - step <- transmute(dt, y = 2, z = y + 1) - expect_equal( - collect(step), - tibble(y = 2, z = 3) - ) - expect_equal( - show_query(step), - expr(DT[, { - y <- 2 - z <- y + 1 - .(y, z) - }]) - ) -}) - -# var = NULL ------------------------------------------------------------- - -test_that("var = NULL when var is in original data", { - dt <- lazy_dt(data.frame(x = 1), "DT") - step <- dt %>% transmute(x = 2, z = x*2, x = NULL) - expect_equal( - collect(step), - tibble(z = 4) - ) - expect_equal( - step$vars, - "z" - ) - expect_equal( - show_query(step), - expr(DT[, { - x <- 2 - z <- x * 2 - .(x, z) - }][, `:=`("x", NULL)]) - ) -}) - -test_that("var = NULL when var is in final output", { - dt <- lazy_dt(data.frame(x = 1), "DT") - step <- transmute(dt, y = NULL, y = 3) - expect_equal( - collect(step), - tibble(y = 3) - ) - expect_equal( - show_query(step), - expr(DT[, { - y <- NULL - y <- 3 - .(y) - }]) - ) -}) - -test_that("temp var with nested arguments", { - dt <- lazy_dt(data.frame(x = 1), "DT") - step <- transmute(dt, y = 2, z = y*2, y = NULL) - expect_equal( - collect(step), - tibble(z = 4) - ) - expect_equal( - step$vars, - "z" - ) - expect_equal( - show_query(step), - expr(DT[, { - y <- 2 - z <- y * 2 - .(y, z) - }][, `:=`("y", NULL)]) - ) -}) - -test_that("temp var with no new vars added", { - dt <- lazy_dt(data.frame(x = 1), "DT") - step <- transmute(dt, y = 2, y = NULL) - expect_equal( - collect(step), - tibble() - ) - expect_equal( - step$vars, - character() - ) - expect_equal( - show_query(step), - expr(DT[, { - y <- 2 - .(y) - }][, `:=`("y", NULL)]) - ) -}) - -test_that("var = NULL works when data is grouped", { - dt <- lazy_dt(data.frame(x = 1, g = 1), "DT") %>% group_by(g) +test_that("works correctly when column is both added and removed in the same call", { + dt <- lazy_dt(data.table(x = 1, y = 2), "DT") - # when var is in original data - step <- dt %>% transmute(x = 2, z = x*2, x = NULL) - expect_equal( - collect(step), - tibble(g = 1, z = 4) %>% group_by(g) - ) - expect_equal( - step$vars, - c("g", "z") - ) - expect_equal( - show_query(step), - expr(DT[, { - x <- 2 - z <- x * 2 - .(x, z) - }, keyby = .(g)][, `:=`("x", NULL)]) - ) + res <- dt %>% transmute(y, z = 3, z = NULL) %>% collect() - # when var is not in original data - step <- transmute(dt, y = 2, z = y*2, y = NULL) - expect_equal( - collect(step), - tibble(g = 1, z = 4) %>% group_by(g) - ) - expect_equal( - step$vars, - c("g", "z") - ) - expect_equal( - show_query(step), - expr(DT[, { - y <- 2 - z <- y * 2 - .(y, z) - }, keyby = .(g)][, `:=`("y", NULL)]) - ) + expect_equal(names(res), "y") })