refactor: ♻️ use base pipe instead

Closes #194
rostools · Nov 9, 2024 · 67510d0 · 67510d0
1 parent dfbacf4
commit 67510d0
Show file tree

Hide file tree

Showing 7 changed files with 187 additions and 187 deletions.
diff --git a/appendix/extra-exercises.qmd b/appendix/extra-exercises.qmd
@@ -36,14 +36,14 @@ Let's make a change to our function and test out how the
 #' @return A data.frame/tibble.
 #'
 descriptive_stats <- function(data) {
-  data %>%
-    dplyr::group_by(metabolite) %>%
+  data |>
+    dplyr::group_by(metabolite) |>
     dplyr::summarise(dplyr::across(value, list(
       mean = mean,
       sd = sd,
       median = median,
       iqr = IQR
-    ))) %>%
+    ))) |>
     dplyr::mutate(dplyr::across(tidyselect::where(is.numeric), ~round(.x, digits = 1)))
 }
 ```
@@ -79,6 +79,6 @@ for now, commit the changes to the Git history with
 #| eval: false
 #| code-fold: true
 #| code-summary: "**Click for the solution**. Only click if you are struggling or are out of time."
-linear_reg_specs <- linear_reg() %>%
+linear_reg_specs <- linear_reg() |>
   set_engine("lm")
 ```
diff --git a/appendix/extras.qmd b/appendix/extras.qmd
@@ -39,8 +39,8 @@ side by side, rather than stacked.
 
 ```{r bar-plot-gender-class}
 #| eval: true
-gender_by_class_plot <- lipidomics %>%
-  distinct(code, gender, class) %>%
+gender_by_class_plot <- lipidomics |>
+  distinct(code, gender, class) |>
   ggplot(aes(x = class, fill = gender)) +
   geom_bar(position = "dodge")
 gender_by_class_plot

diff --git a/appendix/model-variability.qmd b/appendix/model-variability.qmd
@@ -80,7 +80,7 @@ Create another code chunk at the bottom of `doc/learning.qmd` to add
 this code:
 
 ```{r split-metabolite-for-bootstrap}
-lipidomics_list <- lipidomics %>%
+lipidomics_list <- lipidomics |>
   split_by_metabolite()
 ```
 
@@ -120,8 +120,8 @@ functions. We'll `arrange()` by `code` to show how we can have duplicate
 persons when resampling:
 
 ```{r show-split-contents-analysis}
-bootstraps(lipidomics_list[[1]], times = 10)$splits[[1]] %>%
-  analysis() %>%
+bootstraps(lipidomics_list[[1]], times = 10)$splits[[1]] |>
+  analysis() |>
   arrange(code)
 ```
 
@@ -136,9 +136,9 @@ metabolites.
 
 ```{r create-workflow-for-bootstrap}
 workflow_for_bootstrap <- create_model_workflow(
-  logistic_reg() %>%
+  logistic_reg() |>
     set_engine("glm"),
-  lipidomics_list[[1]] %>%
+  lipidomics_list[[1]] |>
     create_recipe_spec(starts_with("metabolite_"))
 )
 ```
@@ -192,11 +192,11 @@ workflow we've used throughout the course.
 #'
 generate_model_variation <- function(data) {
   create_model_workflow(
-    parsnip::logistic_reg() %>%
+    parsnip::logistic_reg() |>
       parsnip::set_engine("glm"),
-    data %>%
+    data |>
       create_recipe_spec(tidyselect::starts_with("metabolite_"))
-  ) %>%
+  ) |>
     tune::fit_resamples(
       resamples = rsample::bootstraps(data, times = 10),
       control = tune::control_resamples(
@@ -210,7 +210,7 @@ generate_model_variation <- function(data) {
 Re-writing the code to use the function, it becomes:
 
 ```{r use-generate-variation-function}
-bootstrapped_results <- lipidomics_list[[1]] %>%
+bootstrapped_results <- lipidomics_list[[1]] |>
   generate_model_variation()
 bootstrapped_results
 ```
@@ -225,8 +225,8 @@ each model fit to the resampled set. We'll ignore all but the
 regular tibble based on the column given.
 
 ```{r unnext-extracts}
-bootstrapped_results %>%
-  select(id, .extracts) %>%
+bootstrapped_results |>
+  select(id, .extracts) |>
   unnest(cols = .extracts)
 ```
 
@@ -235,9 +235,9 @@ new column `.extracts` where each row is called a `<tibble>`). So let's
 again `unnest()` this new `.extracts` column.
 
 ```{r unnest-unnest-bootstrap}
-bootstrapped_results %>%
-  select(id, .extracts) %>%
-  unnest(cols = .extracts) %>%
+bootstrapped_results |>
+  select(id, .extracts) |>
+  unnest(cols = .extracts) |>
   unnest(cols = .extracts)
 ```
 
@@ -248,11 +248,11 @@ only want the metabolite `estimate`, so we can use `filter()` and
 names with `add_original_metabolite_names()`.
 
 ```{r unnest-unnest-tidy-bootstrap-results}
-bootstrapped_results %>%
-  select(id, .extracts) %>%
-  unnest(cols = .extracts) %>%
-  unnest(cols = .extracts) %>%
-  filter(str_detect(term, "metabolite_")) %>%
+bootstrapped_results |>
+  select(id, .extracts) |>
+  unnest(cols = .extracts) |>
+  unnest(cols = .extracts) |>
+  filter(str_detect(term, "metabolite_")) |>
   add_original_metabolite_names(lipidomics)
 ```
 
@@ -266,13 +266,13 @@ Using the same workflow as before, let's convert this into a function:
 #' @return A data frame.
 #'
 tidy_bootstrap_output <- function(bootstrap_results) {
-  bootstrap_results %>%
-    dplyr::select(id, .extracts) %>%
+  bootstrap_results |>
+    dplyr::select(id, .extracts) |>
     # Need to unnest twice since first `.extracts` is a nest of another two
     # columns of `.extracts` and `.config`.
-    tidyr::unnest(cols = .extracts) %>%
-    tidyr::unnest(cols = .extracts) %>%
-    dplyr::filter(stringr::str_detect(term, "metabolite_")) %>%
+    tidyr::unnest(cols = .extracts) |>
+    tidyr::unnest(cols = .extracts) |>
+    dplyr::filter(stringr::str_detect(term, "metabolite_")) |>
     add_original_metabolite_names(lipidomics)
 }
 ```
@@ -285,10 +285,10 @@ run.
 
 ```{r chain-data-to-bootstrap-results}
 #| eval: false
-metabolites_with_bootstrap_results <- lipidomics %>%
-  split_by_metabolite() %>%
-  map(generate_model_variation) %>%
-  map(tidy_bootstrap_output) %>% 
+metabolites_with_bootstrap_results <- lipidomics |>
+  split_by_metabolite() |>
+  map(generate_model_variation) |>
+  map(tidy_bootstrap_output) |> 
   list_rbind()
 metabolites_with_bootstrap_results
 ```
@@ -317,7 +317,7 @@ Use this code as a guide for the function.
 
 ``` r
 calculate_variation <- function(___) {
-  ___ %>% 
+  ___ |> 
     # Code from above.
     ___
 }
@@ -334,10 +334,10 @@ calculate_variation <- function(___) {
 #' @return A data frame (or file path)
 #'
 calculate_variation <- function(data) {
-  data %>%
-    split_by_metabolite() %>%
-    purrr::map(generate_model_variation) %>%
-    purrr::map(tidy_bootstrap_output) %>% 
+  data |>
+    split_by_metabolite() |>
+    purrr::map(generate_model_variation) |>
+    purrr::map(tidy_bootstrap_output) |> 
     purrr::list_rbind()
 }
 ```
@@ -425,7 +425,7 @@ will use `scales = "free"` because the range of values for `estimate`
 are different for each `metabolite`.
 
 ```{r plot-variation}
-metabolites_with_bootstrap_results %>%
+metabolites_with_bootstrap_results |>
   ggplot(aes(x = estimate)) +
   geom_dotplot() +
   facet_wrap(vars(metabolite), scales = "free")
@@ -456,7 +456,7 @@ Let's use our function workflow with this code:
 #' @return A ggplot2 image.
 #'
 plot_variation <- function(model_results) {
-  model_results %>%
+  model_results |>
     ggplot2::ggplot(ggplot2::aes(x = estimate)) +
     ggplot2::geom_dotplot() +
     ggplot2::facet_wrap(ggplot2::vars(metabolite), scales = "free")

diff --git a/data-raw/nmr-omics.R b/data-raw/nmr-omics.R
@@ -35,36 +35,36 @@ lipidomics_full <- read_xlsx(
 # - Subject level data
 
 # Keep only lipidomic values
-lipidomics_only <- lipidomics_full %>%
+lipidomics_only <- lipidomics_full |>
   # Want to remove columns 2, 3, and 4 since they are "limits"
   # (we don't need them for this course)
-  select(-2:-4) %>%
+  select(-2:-4) |>
   # Remove the subject data rows
-  slice(-1:-4) %>%
-  mutate(across(-V1, as.numeric)) %>%
+  slice(-1:-4) |>
+  mutate(across(-V1, as.numeric)) |>
   # Make it so the metabolite values are all in one column,
   # which will make it easier to join with the subject data later.
-  pivot_longer(-V1) %>%
+  pivot_longer(-V1) |>
   rename(metabolite = V1)
 
 # Keep only subject data
-subject_only <- lipidomics_full %>%
+subject_only <- lipidomics_full |>
   # Remove the first metabolic name and limit columns,
   # don't need for this
-  select(-1:-3) %>%
+  select(-1:-3) |>
   # Keep only the subject data raw
-  slice(1:4) %>%
-  pivot_longer(cols = -V4) %>%
-  pivot_wider(names_from = V4, values_from = value) %>%
+  slice(1:4) |>
+  pivot_longer(cols = -V4) |>
+  pivot_wider(names_from = V4, values_from = value) |>
   # There is a weird "" before some of the numbers, so we have
   # extract just the number first before converting to numeric.
-  mutate(Age = as.numeric(stringr::str_extract(Age, "\\d+"))) %>%
+  mutate(Age = as.numeric(stringr::str_extract(Age, "\\d+"))) |>
   rename_with(snakecase::to_snake_case)
 
 lipidomics <- full_join(
   subject_only,
   lipidomics_only
-) %>%
+) |>
   # Don't need anymore
   select(-name)
 

diff --git a/sessions/pipelines.qmd b/sessions/pipelines.qmd
@@ -410,9 +410,9 @@ write out the code!
 ```{r mean-sd-by-each-metabolite}
 #| filename: "doc/learning.qmd"
 #| eval: true
-lipidomics %>%
-  group_by(metabolite) %>%
-  summarise(across(value, list(mean = mean, sd = sd))) %>%
+lipidomics |>
+  group_by(metabolite) |>
+  summarise(across(value, list(mean = mean, sd = sd))) |>
   mutate(across(where(is.numeric), ~round(.x, digits = 1)))
 ```
 
@@ -478,9 +478,9 @@ we will be using this feature throughout the rest of this course.
 #' @return A data.frame/tibble.
 #'
 descriptive_stats <- function(data) {
-  data %>%
-    dplyr::group_by(metabolite) %>%
-    dplyr::summarise(dplyr::across(value, list(mean = mean, sd = sd))) %>%
+  data |>
+    dplyr::group_by(metabolite) |>
+    dplyr::summarise(dplyr::across(value, list(mean = mean, sd = sd))) |>
     dplyr::mutate(dplyr::across(tidyselect::where(is.numeric), ~round(.x, digits = 1)))
 }
 ```
@@ -587,7 +587,7 @@ It probably won't run though. That's because `{targets}` doesn't know
 about the packages that you need for the pipeline. To add it, we need to
 go to the `tar_option_set()` section of the `_targets.R` file and add to
 the `packages = c("tibble")` code with the packages we use that aren't
-explicitly called via `::` (e.g. `%>%`). For now, we only need to add
+explicitly called via `::` (e.g. `|>`). For now, we only need to add
 `"dplyr"` to the `packages` argument.
 
 We can now put this code in the `packages` argument of
@@ -728,7 +728,7 @@ tasks:
 #' @return A ggplot2 graph.
 #'
 plot_distributions <- function(data) {
-  data %>% 
+  data |> 
     ggplot2::ggplot(ggplot2::aes(x = value)) +
     ggplot2::geom_histogram() +
     ggplot2::facet_wrap(ggplot2::vars(metabolite), scales = "free")
@@ -904,9 +904,9 @@ can use it to format the final table text to be `mean value (SD value)`:
 
 ```{r stats-to-table}
 #| filename: "doc/learning.qmd"
-targets::tar_read(df_stats_by_metabolite) %>% 
-  mutate(MeanSD = glue::glue("{value_mean} ({value_sd})")) %>%
-  select(Metabolite = metabolite, `Mean SD` = MeanSD) %>%
+targets::tar_read(df_stats_by_metabolite) |> 
+  mutate(MeanSD = glue::glue("{value_mean} ({value_sd})")) |>
+  select(Metabolite = metabolite, `Mean SD` = MeanSD) |>
   knitr::kable(caption = "Descriptive statistics of the metabolites.")
 ```
 
@@ -915,9 +915,9 @@ targets::tar_read(df_stats_by_metabolite) %>%
 #| echo: false
 #| purl: true
 pretty_basic_stats_code <- '
-targets::tar_read(df_stats_by_metabolite) %>% 
-  mutate(MeanSD = glue::glue("{value_mean} ({value_sd})")) %>%
-  select(Metabolite = metabolite, `Mean SD` = MeanSD) %>%
+targets::tar_read(df_stats_by_metabolite) |> 
+  mutate(MeanSD = glue::glue("{value_mean} ({value_sd})")) |>
+  select(Metabolite = metabolite, `Mean SD` = MeanSD) |>
   knitr::kable(caption = "Descriptive statistics of the metabolites.")
 '
 revise_by_text(
@@ -932,10 +932,10 @@ git_ci("doc/learning.qmd", "Basic stats as a pretty table.")
 ```{r execute-only-table-basic-stats}
 #| eval: true
 #| echo: false
-lipidomics %>% 
-  descriptive_stats() %>% 
-  mutate(MeanSD = glue::glue("{value_mean} ({value_sd})")) %>%
-  select(Metabolite = metabolite, `Mean (SD)` = MeanSD) %>%
+lipidomics |> 
+  descriptive_stats() |> 
+  mutate(MeanSD = glue::glue("{value_mean} ({value_sd})")) |>
+  select(Metabolite = metabolite, `Mean (SD)` = MeanSD) |>
   knitr::kable(caption = "The mean and standard deviation of metabolites in the lipidomics dataset.")
 ```