From 67510d0ee1d95d2cea71417b1a667095014d8177 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Sat, 9 Nov 2024 19:55:03 +0100 Subject: [PATCH] refactor: :recycle: use base pipe instead Closes #194 --- appendix/extra-exercises.qmd | 8 +- appendix/extras.qmd | 4 +- appendix/model-variability.qmd | 70 +++++++------- data-raw/nmr-omics.R | 24 ++--- sessions/pipelines.qmd | 36 +++---- sessions/stats-analyses-basic.qmd | 94 +++++++++--------- sessions/stats-analyses-multiple.qmd | 138 +++++++++++++-------------- 7 files changed, 187 insertions(+), 187 deletions(-) diff --git a/appendix/extra-exercises.qmd b/appendix/extra-exercises.qmd index 4a0950e..1977cb0 100644 --- a/appendix/extra-exercises.qmd +++ b/appendix/extra-exercises.qmd @@ -36,14 +36,14 @@ Let's make a change to our function and test out how the #' @return A data.frame/tibble. #' descriptive_stats <- function(data) { - data %>% - dplyr::group_by(metabolite) %>% + data |> + dplyr::group_by(metabolite) |> dplyr::summarise(dplyr::across(value, list( mean = mean, sd = sd, median = median, iqr = IQR - ))) %>% + ))) |> dplyr::mutate(dplyr::across(tidyselect::where(is.numeric), ~round(.x, digits = 1))) } ``` @@ -79,6 +79,6 @@ for now, commit the changes to the Git history with #| eval: false #| code-fold: true #| code-summary: "**Click for the solution**. Only click if you are struggling or are out of time." -linear_reg_specs <- linear_reg() %>% +linear_reg_specs <- linear_reg() |> set_engine("lm") ``` diff --git a/appendix/extras.qmd b/appendix/extras.qmd index 2bec845..62f55af 100644 --- a/appendix/extras.qmd +++ b/appendix/extras.qmd @@ -39,8 +39,8 @@ side by side, rather than stacked. ```{r bar-plot-gender-class} #| eval: true -gender_by_class_plot <- lipidomics %>% - distinct(code, gender, class) %>% +gender_by_class_plot <- lipidomics |> + distinct(code, gender, class) |> ggplot(aes(x = class, fill = gender)) + geom_bar(position = "dodge") gender_by_class_plot diff --git a/appendix/model-variability.qmd b/appendix/model-variability.qmd index fc27865..ec05383 100644 --- a/appendix/model-variability.qmd +++ b/appendix/model-variability.qmd @@ -80,7 +80,7 @@ Create another code chunk at the bottom of `doc/learning.qmd` to add this code: ```{r split-metabolite-for-bootstrap} -lipidomics_list <- lipidomics %>% +lipidomics_list <- lipidomics |> split_by_metabolite() ``` @@ -120,8 +120,8 @@ functions. We'll `arrange()` by `code` to show how we can have duplicate persons when resampling: ```{r show-split-contents-analysis} -bootstraps(lipidomics_list[[1]], times = 10)$splits[[1]] %>% - analysis() %>% +bootstraps(lipidomics_list[[1]], times = 10)$splits[[1]] |> + analysis() |> arrange(code) ``` @@ -136,9 +136,9 @@ metabolites. ```{r create-workflow-for-bootstrap} workflow_for_bootstrap <- create_model_workflow( - logistic_reg() %>% + logistic_reg() |> set_engine("glm"), - lipidomics_list[[1]] %>% + lipidomics_list[[1]] |> create_recipe_spec(starts_with("metabolite_")) ) ``` @@ -192,11 +192,11 @@ workflow we've used throughout the course. #' generate_model_variation <- function(data) { create_model_workflow( - parsnip::logistic_reg() %>% + parsnip::logistic_reg() |> parsnip::set_engine("glm"), - data %>% + data |> create_recipe_spec(tidyselect::starts_with("metabolite_")) - ) %>% + ) |> tune::fit_resamples( resamples = rsample::bootstraps(data, times = 10), control = tune::control_resamples( @@ -210,7 +210,7 @@ generate_model_variation <- function(data) { Re-writing the code to use the function, it becomes: ```{r use-generate-variation-function} -bootstrapped_results <- lipidomics_list[[1]] %>% +bootstrapped_results <- lipidomics_list[[1]] |> generate_model_variation() bootstrapped_results ``` @@ -225,8 +225,8 @@ each model fit to the resampled set. We'll ignore all but the regular tibble based on the column given. ```{r unnext-extracts} -bootstrapped_results %>% - select(id, .extracts) %>% +bootstrapped_results |> + select(id, .extracts) |> unnest(cols = .extracts) ``` @@ -235,9 +235,9 @@ new column `.extracts` where each row is called a ``). So let's again `unnest()` this new `.extracts` column. ```{r unnest-unnest-bootstrap} -bootstrapped_results %>% - select(id, .extracts) %>% - unnest(cols = .extracts) %>% +bootstrapped_results |> + select(id, .extracts) |> + unnest(cols = .extracts) |> unnest(cols = .extracts) ``` @@ -248,11 +248,11 @@ only want the metabolite `estimate`, so we can use `filter()` and names with `add_original_metabolite_names()`. ```{r unnest-unnest-tidy-bootstrap-results} -bootstrapped_results %>% - select(id, .extracts) %>% - unnest(cols = .extracts) %>% - unnest(cols = .extracts) %>% - filter(str_detect(term, "metabolite_")) %>% +bootstrapped_results |> + select(id, .extracts) |> + unnest(cols = .extracts) |> + unnest(cols = .extracts) |> + filter(str_detect(term, "metabolite_")) |> add_original_metabolite_names(lipidomics) ``` @@ -266,13 +266,13 @@ Using the same workflow as before, let's convert this into a function: #' @return A data frame. #' tidy_bootstrap_output <- function(bootstrap_results) { - bootstrap_results %>% - dplyr::select(id, .extracts) %>% + bootstrap_results |> + dplyr::select(id, .extracts) |> # Need to unnest twice since first `.extracts` is a nest of another two # columns of `.extracts` and `.config`. - tidyr::unnest(cols = .extracts) %>% - tidyr::unnest(cols = .extracts) %>% - dplyr::filter(stringr::str_detect(term, "metabolite_")) %>% + tidyr::unnest(cols = .extracts) |> + tidyr::unnest(cols = .extracts) |> + dplyr::filter(stringr::str_detect(term, "metabolite_")) |> add_original_metabolite_names(lipidomics) } ``` @@ -285,10 +285,10 @@ run. ```{r chain-data-to-bootstrap-results} #| eval: false -metabolites_with_bootstrap_results <- lipidomics %>% - split_by_metabolite() %>% - map(generate_model_variation) %>% - map(tidy_bootstrap_output) %>% +metabolites_with_bootstrap_results <- lipidomics |> + split_by_metabolite() |> + map(generate_model_variation) |> + map(tidy_bootstrap_output) |> list_rbind() metabolites_with_bootstrap_results ``` @@ -317,7 +317,7 @@ Use this code as a guide for the function. ``` r calculate_variation <- function(___) { - ___ %>% + ___ |> # Code from above. ___ } @@ -334,10 +334,10 @@ calculate_variation <- function(___) { #' @return A data frame (or file path) #' calculate_variation <- function(data) { - data %>% - split_by_metabolite() %>% - purrr::map(generate_model_variation) %>% - purrr::map(tidy_bootstrap_output) %>% + data |> + split_by_metabolite() |> + purrr::map(generate_model_variation) |> + purrr::map(tidy_bootstrap_output) |> purrr::list_rbind() } ``` @@ -425,7 +425,7 @@ will use `scales = "free"` because the range of values for `estimate` are different for each `metabolite`. ```{r plot-variation} -metabolites_with_bootstrap_results %>% +metabolites_with_bootstrap_results |> ggplot(aes(x = estimate)) + geom_dotplot() + facet_wrap(vars(metabolite), scales = "free") @@ -456,7 +456,7 @@ Let's use our function workflow with this code: #' @return A ggplot2 image. #' plot_variation <- function(model_results) { - model_results %>% + model_results |> ggplot2::ggplot(ggplot2::aes(x = estimate)) + ggplot2::geom_dotplot() + ggplot2::facet_wrap(ggplot2::vars(metabolite), scales = "free") diff --git a/data-raw/nmr-omics.R b/data-raw/nmr-omics.R index cfcb975..30087e7 100644 --- a/data-raw/nmr-omics.R +++ b/data-raw/nmr-omics.R @@ -35,36 +35,36 @@ lipidomics_full <- read_xlsx( # - Subject level data # Keep only lipidomic values -lipidomics_only <- lipidomics_full %>% +lipidomics_only <- lipidomics_full |> # Want to remove columns 2, 3, and 4 since they are "limits" # (we don't need them for this course) - select(-2:-4) %>% + select(-2:-4) |> # Remove the subject data rows - slice(-1:-4) %>% - mutate(across(-V1, as.numeric)) %>% + slice(-1:-4) |> + mutate(across(-V1, as.numeric)) |> # Make it so the metabolite values are all in one column, # which will make it easier to join with the subject data later. - pivot_longer(-V1) %>% + pivot_longer(-V1) |> rename(metabolite = V1) # Keep only subject data -subject_only <- lipidomics_full %>% +subject_only <- lipidomics_full |> # Remove the first metabolic name and limit columns, # don't need for this - select(-1:-3) %>% + select(-1:-3) |> # Keep only the subject data raw - slice(1:4) %>% - pivot_longer(cols = -V4) %>% - pivot_wider(names_from = V4, values_from = value) %>% + slice(1:4) |> + pivot_longer(cols = -V4) |> + pivot_wider(names_from = V4, values_from = value) |> # There is a weird "​" before some of the numbers, so we have # extract just the number first before converting to numeric. - mutate(Age = as.numeric(stringr::str_extract(Age, "\\d+"))) %>% + mutate(Age = as.numeric(stringr::str_extract(Age, "\\d+"))) |> rename_with(snakecase::to_snake_case) lipidomics <- full_join( subject_only, lipidomics_only -) %>% +) |> # Don't need anymore select(-name) diff --git a/sessions/pipelines.qmd b/sessions/pipelines.qmd index 9e9fe94..7c6f97f 100644 --- a/sessions/pipelines.qmd +++ b/sessions/pipelines.qmd @@ -410,9 +410,9 @@ write out the code! ```{r mean-sd-by-each-metabolite} #| filename: "doc/learning.qmd" #| eval: true -lipidomics %>% - group_by(metabolite) %>% - summarise(across(value, list(mean = mean, sd = sd))) %>% +lipidomics |> + group_by(metabolite) |> + summarise(across(value, list(mean = mean, sd = sd))) |> mutate(across(where(is.numeric), ~round(.x, digits = 1))) ``` @@ -478,9 +478,9 @@ we will be using this feature throughout the rest of this course. #' @return A data.frame/tibble. #' descriptive_stats <- function(data) { - data %>% - dplyr::group_by(metabolite) %>% - dplyr::summarise(dplyr::across(value, list(mean = mean, sd = sd))) %>% + data |> + dplyr::group_by(metabolite) |> + dplyr::summarise(dplyr::across(value, list(mean = mean, sd = sd))) |> dplyr::mutate(dplyr::across(tidyselect::where(is.numeric), ~round(.x, digits = 1))) } ``` @@ -587,7 +587,7 @@ It probably won't run though. That's because `{targets}` doesn't know about the packages that you need for the pipeline. To add it, we need to go to the `tar_option_set()` section of the `_targets.R` file and add to the `packages = c("tibble")` code with the packages we use that aren't -explicitly called via `::` (e.g. `%>%`). For now, we only need to add +explicitly called via `::` (e.g. `|>`). For now, we only need to add `"dplyr"` to the `packages` argument. We can now put this code in the `packages` argument of @@ -728,7 +728,7 @@ tasks: #' @return A ggplot2 graph. #' plot_distributions <- function(data) { - data %>% + data |> ggplot2::ggplot(ggplot2::aes(x = value)) + ggplot2::geom_histogram() + ggplot2::facet_wrap(ggplot2::vars(metabolite), scales = "free") @@ -904,9 +904,9 @@ can use it to format the final table text to be `mean value (SD value)`: ```{r stats-to-table} #| filename: "doc/learning.qmd" -targets::tar_read(df_stats_by_metabolite) %>% - mutate(MeanSD = glue::glue("{value_mean} ({value_sd})")) %>% - select(Metabolite = metabolite, `Mean SD` = MeanSD) %>% +targets::tar_read(df_stats_by_metabolite) |> + mutate(MeanSD = glue::glue("{value_mean} ({value_sd})")) |> + select(Metabolite = metabolite, `Mean SD` = MeanSD) |> knitr::kable(caption = "Descriptive statistics of the metabolites.") ``` @@ -915,9 +915,9 @@ targets::tar_read(df_stats_by_metabolite) %>% #| echo: false #| purl: true pretty_basic_stats_code <- ' -targets::tar_read(df_stats_by_metabolite) %>% - mutate(MeanSD = glue::glue("{value_mean} ({value_sd})")) %>% - select(Metabolite = metabolite, `Mean SD` = MeanSD) %>% +targets::tar_read(df_stats_by_metabolite) |> + mutate(MeanSD = glue::glue("{value_mean} ({value_sd})")) |> + select(Metabolite = metabolite, `Mean SD` = MeanSD) |> knitr::kable(caption = "Descriptive statistics of the metabolites.") ' revise_by_text( @@ -932,10 +932,10 @@ git_ci("doc/learning.qmd", "Basic stats as a pretty table.") ```{r execute-only-table-basic-stats} #| eval: true #| echo: false -lipidomics %>% - descriptive_stats() %>% - mutate(MeanSD = glue::glue("{value_mean} ({value_sd})")) %>% - select(Metabolite = metabolite, `Mean (SD)` = MeanSD) %>% +lipidomics |> + descriptive_stats() |> + mutate(MeanSD = glue::glue("{value_mean} ({value_sd})")) |> + select(Metabolite = metabolite, `Mean (SD)` = MeanSD) |> knitr::kable(caption = "The mean and standard deviation of metabolites in the lipidomics dataset.") ``` diff --git a/sessions/stats-analyses-basic.qmd b/sessions/stats-analyses-basic.qmd index 7f43343..8a85df8 100644 --- a/sessions/stats-analyses-basic.qmd +++ b/sessions/stats-analyses-basic.qmd @@ -406,7 +406,7 @@ In the new code chunk, we will set up the model specs: ```{r logistic-reg-specs} #| filename: "doc/learning.qmd" -log_reg_specs <- logistic_reg() %>% +log_reg_specs <- logistic_reg() |> set_engine("glm") log_reg_specs ``` @@ -451,8 +451,8 @@ that there seems to be a data input error, since there are three ```{r too-many-cholesterols} #| filename: "doc/learning.qmd" -lipidomics %>% - count(code, metabolite) %>% +lipidomics |> + count(code, metabolite) |> filter(n > 1) ``` @@ -480,8 +480,8 @@ by setting the `values_fn` with `mean`. ```{r lipidomic-to-wider} #| column: page-inset-right #| filename: "doc/learning.qmd" -lipidomics_wide <- lipidomics %>% - mutate(metabolite = snakecase::to_snake_case(metabolite)) %>% +lipidomics_wide <- lipidomics |> + mutate(metabolite = snakecase::to_snake_case(metabolite)) |> pivot_wider( names_from = metabolite, values_from = value, @@ -500,10 +500,10 @@ moving them over into the `R/functions.R` file. #| column: page-inset-right #| filename: "doc/learning.qmd" column_values_to_snake_case <- function(data) { - data %>% + data |> dplyr::mutate(metabolite = snakecase::to_snake_case(metabolite)) } -lipidomics %>% +lipidomics |> column_values_to_snake_case() ``` @@ -523,11 +523,11 @@ where one of your function's arguments is to select columns: ```{r test-nse, error=TRUE} test_nse <- function(data, columns) { - data %>% + data |> dplyr::select(columns) } -lipidomics %>% +lipidomics |> test_nse(class) ``` @@ -551,14 +551,14 @@ approach is to wrap the argument with "curly-curly" (`{{}}`). ```{r test-nse-fixed} test_nse <- function(data, columns) { - data %>% + data |> dplyr::select({{ columns }}) } -lipidomics %>% +lipidomics |> test_nse(class) -lipidomics %>% +lipidomics |> test_nse(c(class, age)) ``` ::: @@ -577,11 +577,11 @@ We can use curly-curly (combined with `across()`) to apply #| column: page-inset-right #| filename: "doc/learning.qmd" column_values_to_snake_case <- function(data, columns) { - data %>% + data |> dplyr::mutate(dplyr::across({{ columns }}, snakecase::to_snake_case)) } -lipidomics %>% +lipidomics |> column_values_to_snake_case(metabolite) ``` @@ -600,7 +600,7 @@ with {{< var keybind.source >}} #' @return A data frame. #' column_values_to_snake_case <- function(data, columns) { - data %>% + data |> dplyr::mutate(dplyr::across({{ columns }}, snakecase::to_snake_case)) } ``` @@ -610,9 +610,9 @@ Now add the new function above the `pivot_wider()` code in the `doc/learning.qmd ```{r snakecase-function-before-pivot-wider} #| column: page-inset-right #| filename: "doc/learning.qmd" -lipidomics_wide <- lipidomics %>% - column_values_to_snake_case(metabolite) %>% - mutate(metabolite = snakecase::to_snake_case(metabolite)) %>% +lipidomics_wide <- lipidomics |> + column_values_to_snake_case(metabolite) |> + mutate(metabolite = snakecase::to_snake_case(metabolite)) |> pivot_wider( names_from = metabolite, values_from = value, @@ -631,7 +631,7 @@ it into a new function. 1. Name the new function `metabolites_to_wider`. 2. Include one argument in the new `function()`: `data`. -3. Use `data %>%` at the beginning, like we did with the +3. Use `data |>` at the beginning, like we did with the `column_values_to_snake_case()`. 4. Use `tidyr::` before the `pivot_wider()` function. 5. Add the Roxygen documentation with {{< var keybind.roxygen >}}. @@ -650,7 +650,7 @@ it into a new function. #' @return A wide data frame. #' metabolites_to_wider <- function(data) { - data %>% + data |> tidyr::pivot_wider( names_from = metabolite, values_from = value, @@ -694,8 +694,8 @@ some nice features that we will use later on. ```{r recipes-without-formula} #| filename: "doc/learning.qmd" -recipe(lipidomics_wide) %>% - update_role(metabolite_cholesterol, age, gender, new_role = "predictor") %>% +recipe(lipidomics_wide) |> + update_role(metabolite_cholesterol, age, gender, new_role = "predictor") |> update_role(class, new_role = "outcome") ``` @@ -740,9 +740,9 @@ variables. We can add this to the end of the recipe: ```{r recipes-with-step-normalize} #| filename: "doc/learning.qmd" -recipe(lipidomics_wide) %>% - update_role(metabolite_cholesterol, age, gender, new_role = "predictor") %>% - update_role(class, new_role = "outcome") %>% +recipe(lipidomics_wide) |> + update_role(metabolite_cholesterol, age, gender, new_role = "predictor") |> + update_role(class, new_role = "outcome") |> step_normalize(starts_with("metabolite_")) ``` @@ -763,9 +763,9 @@ from the `{tidyselect}` package. #' @return #' create_recipe_spec <- function(data, metabolite_variable) { - recipes::recipe(data) %>% - recipes::update_role({{ metabolite_variable }}, age, gender, new_role = "predictor") %>% - recipes::update_role(class, new_role = "outcome") %>% + recipes::recipe(data) |> + recipes::update_role({{ metabolite_variable }}, age, gender, new_role = "predictor") |> + recipes::update_role(class, new_role = "outcome") |> recipes::step_normalize(tidyselect::starts_with("metabolite_")) } ``` @@ -775,7 +775,7 @@ And test it out: ```{r use-create-recipe-specs-fn} #| filename: "doc/learning.qmd" #| column: page-inset-right -recipe_specs <- lipidomics_wide %>% +recipe_specs <- lipidomics_wide |> create_recipe_spec(metabolite_cholesterol) recipe_specs ``` @@ -812,8 +812,8 @@ slightly different types). All model workflows need to start with ```{r use-workflow-for-model} #| column: page-inset-right #| filename: "doc/learning.qmd" -workflow() %>% - add_model(log_reg_specs) %>% +workflow() |> + add_model(log_reg_specs) |> add_recipe(recipe_specs) ``` @@ -832,8 +832,8 @@ inside the `R/functions.R` file. #' @return A workflow object #' create_model_workflow <- function(model_specs, recipe_specs) { - workflows::workflow() %>% - workflows::add_model(model_specs) %>% + workflows::workflow() |> + workflows::add_model(model_specs) |> workflows::add_recipe(recipe_specs) } ``` @@ -847,9 +847,9 @@ creation from scratch: #| column: page-inset-right #| filename: "doc/learning.qmd" model_workflow <- create_model_workflow( - logistic_reg() %>% + logistic_reg() |> set_engine("glm"), - lipidomics_wide %>% + lipidomics_wide |> create_recipe_spec(metabolite_cholesterol) ) model_workflow @@ -861,7 +861,7 @@ Now, we can do the final thing: Fitting the data to the model with ```{r fit-model-workflow-to-data} #| column: page-inset-right #| filename: "doc/learning.qmd" -fitted_model <- model_workflow %>% +fitted_model <- model_workflow |> fit(lipidomics_wide) fitted_model ``` @@ -875,7 +875,7 @@ the `extract_fit_parsnip()` function. ```{r extract-model-fit} #| column: page-inset-right #| filename: "doc/learning.qmd" -fitted_model %>% +fitted_model |> extract_fit_parsnip() ``` @@ -891,7 +891,7 @@ dependencies: use_package("broom") ``` -Then, we add the `tidy()` function to our model using the `%>%` pipe. +Then, we add the `tidy()` function to our model using the `|>` pipe. Since we are using a logistic regression model, we need to consider how we want the estimates to be presented, probably depending on how we want to visualize our results. If we set `exponentiate = TRUE` in `tidy()`, @@ -902,8 +902,8 @@ coefficient. Here we choose `exponentiate = TRUE`: ```{r tidy-up-model-results} #| column: page-inset-right #| filename: "doc/learning.qmd" -fitted_model %>% - extract_fit_parsnip() %>% +fitted_model |> + extract_fit_parsnip() |> tidy(exponentiate = TRUE) ``` @@ -921,8 +921,8 @@ thing here: Make another function (and move it to `R/functions.R`)! #' @return A data frame. #' tidy_model_output <- function(workflow_fitted_model) { - workflow_fitted_model %>% - workflows::extract_fit_parsnip() %>% + workflow_fitted_model |> + workflows::extract_fit_parsnip() |> broom::tidy(exponentiate = TRUE) } ``` @@ -932,7 +932,7 @@ Replacing the code in the `doc/learning.qmd` file to use the function. ```{r use-tidy-model-output-fn} #| column: page-inset-right #| filename: "doc/learning.qmd" -fitted_model %>% +fitted_model |> tidy_model_output() ``` @@ -942,12 +942,12 @@ If we revise the code so it is one pipe, it would look like: #| column: page-inset-right #| filename: "doc/learning.qmd" create_model_workflow( - logistic_reg() %>% + logistic_reg() |> set_engine("glm"), - lipidomics_wide %>% + lipidomics_wide |> create_recipe_spec(metabolite_cholesterol) -) %>% - fit(lipidomics_wide) %>% +) |> + fit(lipidomics_wide) |> tidy_model_output() ``` diff --git a/sessions/stats-analyses-multiple.qmd b/sessions/stats-analyses-multiple.qmd index 2e9c35e..40b4a4f 100644 --- a/sessions/stats-analyses-multiple.qmd +++ b/sessions/stats-analyses-multiple.qmd @@ -8,11 +8,11 @@ source(here::here("R/project-functions.R")) library(tidyverse) library(tidymodels) lipidomics <- read_csv(here::here("data/lipidomics.csv")) -lipidomics_wide <- lipidomics %>% - mutate(metabolite = snakecase::to_snake_case(metabolite)) %>% +lipidomics_wide <- lipidomics |> + mutate(metabolite = snakecase::to_snake_case(metabolite)) |> metabolites_to_wider() -# lipidomics %>% -# calculate_estimates() %>% +# lipidomics |> +# calculate_estimates() |> # readr::write_csv(here::here("data/model-estimates.csv")) ``` @@ -102,7 +102,7 @@ case: ```{r chain-col-to-snakecase} #| filename: "doc/learning.qmd" -lipidomics %>% +lipidomics |> column_values_to_snake_case(metabolite) ``` @@ -134,16 +134,16 @@ three. ```{r chain-split-by-metabolite} #| eval: false #| filename: "doc/learning.qmd" -lipidomics %>% - column_values_to_snake_case(metabolite) %>% +lipidomics |> + column_values_to_snake_case(metabolite) |> group_split(metabolite) ``` ```{r output-only-chain-split-by-metabolite} #| echo: false -lipidomics %>% - column_values_to_snake_case(metabolite) %>% - group_split(metabolite) %>% +lipidomics |> + column_values_to_snake_case(metabolite) |> + group_split(metabolite) |> head(3) ``` @@ -155,18 +155,18 @@ the first three): ```{r chain-map-to-wider} #| eval: false #| filename: "doc/learning.qmd" -lipidomics %>% - column_values_to_snake_case(metabolite) %>% - group_split(metabolite) %>% +lipidomics |> + column_values_to_snake_case(metabolite) |> + group_split(metabolite) |> map(metabolites_to_wider) ``` ```{r output-only-chain-map-to-wider} #| echo: false -lipidomics %>% - column_values_to_snake_case(metabolite) %>% - group_split(metabolite) %>% - map(metabolites_to_wider) %>% +lipidomics |> + column_values_to_snake_case(metabolite) |> + group_split(metabolite) |> + map(metabolites_to_wider) |> head(3) ``` @@ -188,9 +188,9 @@ into the `R/functions.R` file, and then `source()` the file with #' @return A list of data frames. #' split_by_metabolite <- function(data) { - data %>% - column_values_to_snake_case(metabolite) %>% - dplyr::group_split(metabolite) %>% + data |> + column_values_to_snake_case(metabolite) |> + dplyr::group_split(metabolite) |> purrr::map(metabolites_to_wider) } ``` @@ -200,14 +200,14 @@ In the `doc/learning.qmd`, use the new function in the code: ```{r split-by-metabolite} #| filename: "doc/learning.qmd" #| eval: false -lipidomics %>% +lipidomics |> split_by_metabolite() ``` ```{r output-only-split-by-metabolite} #| echo: false -lipidomics %>% - split_by_metabolite() %>% +lipidomics |> + split_by_metabolite() |> head(3) ``` @@ -232,12 +232,12 @@ move into the `R/functions.R` file, and then `source()` the file with #' generate_model_results <- function(data) { create_model_workflow( - parsnip::logistic_reg() %>% + parsnip::logistic_reg() |> parsnip::set_engine("glm"), - data %>% + data |> create_recipe_spec(tidyselect::starts_with("metabolite_")) - ) %>% - parsnip::fit(data) %>% + ) |> + parsnip::fit(data) |> tidy_model_output() } ``` @@ -247,9 +247,9 @@ Then we add it to the end of the pipe, but using `map()` and ```{r chain-generate-model-results} #| filename: "doc/learning.qmd" -lipidomics %>% - split_by_metabolite() %>% - map(generate_model_results) %>% +lipidomics |> + split_by_metabolite() |> + map(generate_model_results) |> list_rbind() ``` @@ -259,10 +259,10 @@ and `str_detect()`. ```{r chain-filter-terms} #| filename: "doc/learning.qmd" -model_estimates <- lipidomics %>% - split_by_metabolite() %>% - map(generate_model_results) %>% - list_rbind() %>% +model_estimates <- lipidomics |> + split_by_metabolite() |> + map(generate_model_results) |> + list_rbind() |> filter(str_detect(term, "metabolite_")) model_estimates ``` @@ -278,8 +278,8 @@ create a duplicate column of `metabolite` called `term` (to match the ```{r duplicate-original-vars} #| filename: "doc/learning.qmd" -lipidomics %>% - select(metabolite) %>% +lipidomics |> + select(metabolite) |> mutate(term = metabolite) ``` @@ -288,9 +288,9 @@ function on the `term` column. ```{r dup-column-to-snakecase} #| filename: "doc/learning.qmd" -lipidomics %>% - select(metabolite) %>% - mutate(term = metabolite) %>% +lipidomics |> + select(metabolite) |> + mutate(term = metabolite) |> column_values_to_snake_case(term) ``` @@ -299,10 +299,10 @@ case'd name, so we can add that with `mutate()` and `str_c()`: ```{r dup-column-append-metabolite} #| filename: "doc/learning.qmd" -lipidomics %>% - select(metabolite) %>% - mutate(term = metabolite) %>% - column_values_to_snake_case(term) %>% +lipidomics |> + select(metabolite) |> + mutate(term = metabolite) |> + column_values_to_snake_case(term) |> mutate(term = str_c("metabolite_", term)) ``` @@ -313,10 +313,10 @@ only the `metabolite` and `term` variables. ```{r dup-column-distinct} #| filename: "doc/learning.qmd" -lipidomics %>% - mutate(term = metabolite) %>% - column_values_to_snake_case(term) %>% - mutate(term = str_c("metabolite_", term)) %>% +lipidomics |> + mutate(term = metabolite) |> + column_values_to_snake_case(term) |> + mutate(term = str_c("metabolite_", term)) |> distinct(term, metabolite) ``` @@ -324,11 +324,11 @@ The last step is to `right_join()` with the `model_estimates`: ```{r dup-column-full-join} #| filename: "doc/learning.qmd" -lipidomics %>% - mutate(term = metabolite) %>% - column_values_to_snake_case(term) %>% - mutate(term = str_c("metabolite_", term)) %>% - distinct(term, metabolite) %>% +lipidomics |> + mutate(term = metabolite) |> + column_values_to_snake_case(term) |> + mutate(term = str_c("metabolite_", term)) |> + distinct(term, metabolite) |> right_join(model_estimates, by = "term") ``` @@ -365,7 +365,7 @@ as a starting point. ``` r ___ <- function(___, ___) { - ___ %>% + ___ |> } ``` @@ -381,11 +381,11 @@ ___ <- function(___, ___) { #' @return A data frame. #' add_original_metabolite_names <- function(model_results, data) { - data %>% - dplyr::mutate(term = metabolite) %>% - column_values_to_snake_case(term) %>% - dplyr::mutate(term = stringr::str_c("metabolite_", term)) %>% - dplyr::distinct(term, metabolite) %>% + data |> + dplyr::mutate(term = metabolite) |> + column_values_to_snake_case(term) |> + dplyr::mutate(term = stringr::str_c("metabolite_", term)) |> + dplyr::distinct(term, metabolite) |> dplyr::right_join(model_results, by = "term") } ``` @@ -395,9 +395,9 @@ results, using the scaffold below as a starting point. ``` r calculate_estimates <- function(data) { - ___ %>% + ___ |> # All the other code to create the results - ___ %>% + ___ |> add_original_metabolite_names(data) } ``` @@ -413,11 +413,11 @@ calculate_estimates <- function(data) { #' @return A data frame. #' calculate_estimates <- function(data) { - data %>% - split_by_metabolite() %>% - purrr::map(generate_model_results) %>% - purrr::list_rbind() %>% - dplyr::filter(stringr::str_detect(term, "metabolite_")) %>% + data |> + split_by_metabolite() |> + purrr::map(generate_model_results) |> + purrr::list_rbind() |> + dplyr::filter(stringr::str_detect(term, "metabolite_")) |> add_original_metabolite_names(data) } ``` @@ -519,7 +519,7 @@ git_ci("doc/learning.qmd", "Add code for model estimates to report.") ```{r exec-only-model-estimates} #| include: false -model_estimates <- here::here("data/model-estimates.csv") %>% +model_estimates <- here::here("data/model-estimates.csv") |> readr::read_csv(show_col_types = FALSE) ``` @@ -539,7 +539,7 @@ dot-whisker plots, the "geom" we would use is called ```{r plot-estimates-pointrange-only} #| filename: "doc/learning.qmd" -plot_estimates <- model_estimates %>% +plot_estimates <- model_estimates |> ggplot(aes( x = estimate, y = metabolite, @@ -581,7 +581,7 @@ as a target in the pipeline. Use the scaffold below as a guide. ``` r plot_estimates <- function(results) { - ___ %>% + ___ |> # Plot code here: ___ } @@ -598,7 +598,7 @@ plot_estimates <- function(results) { #' @return A ggplot2 figure. #' plot_estimates <- function(results) { - results %>% + results |> ggplot2::ggplot(ggplot2::aes( x = estimate, y = metabolite, xmin = estimate - std.error,