Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use mutate(.keep = "none") in transmute() #483

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

* `print.dtplyr_step()` gains `n`, `max_extra_cols`, and `max_footer_lines` args (#464)

* `transmute()` preserves row count and avoids unnecessary copies (#470)

# dtplyr 1.3.1

* Fix for failing R CMD check.
Expand Down
43 changes: 6 additions & 37 deletions R/step-subset-transmute.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,41 +13,10 @@
#' dt <- lazy_dt(dplyr::starwars)
#' dt %>% transmute(name, sh = paste0(species, "/", homeworld))
transmute.dtplyr_step <- function(.data, ...) {
dots <- capture_new_vars(.data, ...)
dots_list <- process_new_vars(.data, dots)
dots <- dots_list$dots

groups <- group_vars(.data)
if (!is_empty(groups)) {
# TODO could check if there is actually anything mutated, e.g. to avoid
# DT[, .(x = x)]
is_group_var <- names(dots) %in% groups
group_dots <- dots[is_group_var]

.data <- mutate(ungroup(.data), !!!group_dots)
.data <- group_by(.data, !!!syms(groups))

dots <- dots[!is_group_var]
}

if (is_empty(dots)) {
# grouping variables have been removed from `dots` so `select()` would
# produce a message "Adding grouping vars".
# As `dplyr::transmute()` doesn't generate a message when adding group vars
# we can also leave it away here
return(select(.data, !!!group_vars(.data)))
}

if (!dots_list$use_braces) {
j <- call2(".", !!!dots)
} else {
j <- mutate_with_braces(dots)$expr
}
vars <- union(group_vars(.data), names(dots))
out <- step_subset_j(.data, vars = vars, j = j)
if (dots_list$need_removal_step) {
out <- select(out, -tidyselect::all_of(dots_list$vars_removed))
}

out
out <- mutate(.data, ..., .keep = "none")
cols_expr <- names(capture_new_vars(.data, ...))
cols_group <- group_vars(.data)
cols_group <- setdiff(cols_group, cols_expr)
cols_retain <- c(cols_group, cols_expr)
select(out, any_of(cols_retain))
}
7 changes: 7 additions & 0 deletions tests/testthat/_snaps/step-call.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@
Output
setnames(copy(DT), c("a", "b", "c"), toupper)

# can compute distinct computed variables

Code
dt %>% distinct(z = x + y) %>% show_query()
Output
unique(copy(dt)[, `:=`(z = x + y)][, `:=`(c("x", "y"), NULL)])

# errors are raised

Code
Expand Down
5 changes: 2 additions & 3 deletions tests/testthat/test-step-call.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,8 @@ test_that("keeps all variables if requested", {
test_that("can compute distinct computed variables", {
dt <- lazy_dt(data.table(x = c(1, 1), y = c(1, 2)), "dt")

expect_equal(
dt %>% distinct(z = x + y) %>% show_query(),
expr(unique(dt[, .(z = x + y)]))
expect_snapshot(
dt %>% distinct(z = x + y) %>% show_query()
)

expect_equal(
Expand Down
5 changes: 0 additions & 5 deletions tests/testthat/test-step-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,6 @@ test_that("generates single calls as expect", {
dt %>% group_by(x) %>% mutate(x2 = x * 2) %>% show_query(),
expr(copy(DT)[, `:=`(x2 = x * 2), by = .(x)])
)

expect_equal(
dt %>% transmute(x2 = x * 2) %>% show_query(),
expr(DT[, .(x2 = x * 2)])
)
})

test_that("mutate generates compound expression if needed", {
Expand Down
5 changes: 0 additions & 5 deletions tests/testthat/test-step-subset-summarise.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@ test_that("simple calls generate expected translations", {
dt %>% summarise(x = mean(x)) %>% show_query(),
expr(DT[, .(x = mean(x))])
)

expect_equal(
dt %>% transmute(x) %>% show_query(),
expr(DT[, .(x = x)])
)
})

test_that("can use with across", {
Expand Down
247 changes: 16 additions & 231 deletions tests/testthat/test-step-subset-transmute.R
Original file line number Diff line number Diff line change
@@ -1,249 +1,34 @@
test_that("simple calls generate expected translations", {
test_that("works", {
dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")

expect_equal(
dt %>% transmute(x) %>% show_query(),
expr(DT[, .(x = x)])
dt %>% transmute(x) %>% collect(),
dt %>% mutate(x, .keep = "none") %>% collect()
)
})

test_that("transmute generates compound expression if needed", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")

expect_equal(
dt %>% transmute(x2 = x * 2, x4 = x2 * 2) %>% show_query(),
expr(DT[, {
x2 <- x * 2
x4 <- x2 * 2
.(x2, x4)
}])
)
})

test_that("allows multiple assignment to the same variable", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")

# when nested
expect_equal(
dt %>% transmute(x = x * 2, x = x * 2) %>% show_query(),
expr(DT[, {
x <- x * 2
x <- x * 2
.(x)
}])
)

# when not nested
expect_equal(
dt %>% transmute(z = 2, y = 3) %>% show_query(),
expr(DT[, .(z = 2, y = 3)])
)
})


test_that("groups are respected", {
dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(y = 2)

expect_equal(dt$vars, c("x", "y"))
expect_equal(
dt %>% show_query(),
expr(DT[, .(y = 2), keyby = .(x)])
)
})

test_that("grouping vars can be transmuted", {
dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(x = x + 1, y = 2)
test_that("empty dots preserves groups", {
dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") %>%
group_by(y)

expect_equal(dt$vars, c("x", "y"))
expect_equal(dt$groups, "x")
expect_equal(
dt %>% show_query(),
expr(copy(DT)[, `:=`(x = x + 1)][, .(y = 2), keyby = .(x)])
)

skip("transmuting grouping vars with nesting is not supported")
dt <- lazy_dt(data.table(x = 1), "DT") %>%
group_by(x) %>%
transmute(x = x + 1, y = x + 1, x = y + 1)
res <- dt %>% transmute() %>% collect()

expect_equal(dt$vars, c("x", "y"))
expect_equal(
dt %>% collect(),
tibble(x = 4, y = 3) %>% group_by(x)
)
expect_equal(names(res), "y")
})

test_that("empty transmute works", {
dt <- lazy_dt(data.frame(x = 1), "DT")
expect_equal(transmute(dt) %>% show_query(), expr(DT[, 0L]))
expect_equal(transmute(dt)$vars, character())
expect_equal(transmute(dt, !!!list()) %>% show_query(), expr(DT[, 0L]))
test_that("preserves column order", {
dt <- lazy_dt(data.table(x = 1, y = 1), "DT")

dt_grouped <- lazy_dt(data.frame(x = 1), "DT") %>% group_by(x)
expect_equal(transmute(dt_grouped)$vars, "x")
})
res <- dt %>% transmute(y, x) %>% collect()

test_that("only transmuting groups works", {
dt <- lazy_dt(data.frame(x = 1)) %>% group_by(x)
expect_equal(transmute(dt, x) %>% collect(), dt %>% collect())
expect_equal(transmute(dt, x)$vars, "x")
expect_equal(names(res), c("y", "x"))
})

test_that("across() can access previously created variables", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = 2, across(y, sqrt))
expect_equal(
collect(step),
tibble(y = sqrt(2))
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
y <- sqrt(y)
.(y)
}])
)
})

test_that("new columns take precedence over global variables", {
dt <- lazy_dt(data.frame(x = 1), "DT")
y <- 'global var'
step <- transmute(dt, y = 2, z = y + 1)
expect_equal(
collect(step),
tibble(y = 2, z = 3)
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
z <- y + 1
.(y, z)
}])
)
})

# var = NULL -------------------------------------------------------------

test_that("var = NULL when var is in original data", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- dt %>% transmute(x = 2, z = x*2, x = NULL)
expect_equal(
collect(step),
tibble(z = 4)
)
expect_equal(
step$vars,
"z"
)
expect_equal(
show_query(step),
expr(DT[, {
x <- 2
z <- x * 2
.(x, z)
}][, `:=`("x", NULL)])
)
})

test_that("var = NULL when var is in final output", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = NULL, y = 3)
expect_equal(
collect(step),
tibble(y = 3)
)
expect_equal(
show_query(step),
expr(DT[, {
y <- NULL
y <- 3
.(y)
}])
)
})

test_that("temp var with nested arguments", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = 2, z = y*2, y = NULL)
expect_equal(
collect(step),
tibble(z = 4)
)
expect_equal(
step$vars,
"z"
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
z <- y * 2
.(y, z)
}][, `:=`("y", NULL)])
)
})

test_that("temp var with no new vars added", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = 2, y = NULL)
expect_equal(
collect(step),
tibble()
)
expect_equal(
step$vars,
character()
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
.(y)
}][, `:=`("y", NULL)])
)
})

test_that("var = NULL works when data is grouped", {
dt <- lazy_dt(data.frame(x = 1, g = 1), "DT") %>% group_by(g)
test_that("works correctly when column is both added and removed in the same call", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")

# when var is in original data
step <- dt %>% transmute(x = 2, z = x*2, x = NULL)
expect_equal(
collect(step),
tibble(g = 1, z = 4) %>% group_by(g)
)
expect_equal(
step$vars,
c("g", "z")
)
expect_equal(
show_query(step),
expr(DT[, {
x <- 2
z <- x * 2
.(x, z)
}, keyby = .(g)][, `:=`("x", NULL)])
)
res <- dt %>% transmute(y, z = 3, z = NULL) %>% collect()

# when var is not in original data
step <- transmute(dt, y = 2, z = y*2, y = NULL)
expect_equal(
collect(step),
tibble(g = 1, z = 4) %>% group_by(g)
)
expect_equal(
step$vars,
c("g", "z")
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
z <- y * 2
.(y, z)
}, keyby = .(g)][, `:=`("y", NULL)])
)
expect_equal(names(res), "y")
})

Loading