From 285865463308384e92393d219d71e233bcb9a6f5 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 23 May 2024 10:58:56 +0200 Subject: [PATCH] update --- R/visualisation_matrix.R | 39 +++++---- R/visualisation_recipe.estimate_means.R | 32 ++++---- R/visualisation_recipe.estimate_slopes.R | 32 ++++---- man/visualisation_matrix.Rd | 79 +++++++++++++++---- ...visualisation_recipe.estimate_predicted.Rd | 16 ++-- 5 files changed, 125 insertions(+), 73 deletions(-) diff --git a/R/visualisation_matrix.R b/R/visualisation_matrix.R index 98461f921..80dc97b13 100644 --- a/R/visualisation_matrix.R +++ b/R/visualisation_matrix.R @@ -4,7 +4,7 @@ #' function. Same arguments apply. #' #' @inheritParams insight::get_datagrid -#' @param target Deprecated name. Please use `at` instead. +#' @param target,at Deprecated name. Please use `by` instead. #' #' @return Reference grid data frame. #' @@ -15,24 +15,24 @@ #' data <- rbind(iris, iris[149, ], make.row.names = FALSE) #' #' # Single variable is of interest; all others are "fixed" -#' visualisation_matrix(data, at = "Sepal.Length") -#' visualisation_matrix(data, at = "Sepal.Length", length = 3) -#' visualisation_matrix(data, at = "Sepal.Length", range = "ci", ci = 0.90) -#' visualisation_matrix(data, at = "Sepal.Length", factors = "mode") +#' visualisation_matrix(data, by = "Sepal.Length") +#' visualisation_matrix(data, by = "Sepal.Length", length = 3) +#' visualisation_matrix(data, by = "Sepal.Length", range = "ci", ci = 0.90) +#' visualisation_matrix(data, by = "Sepal.Length", factors = "mode") #' #' # Multiple variables are of interest, creating a combination -#' visualisation_matrix(data, at = c("Sepal.Length", "Species"), length = 3) -#' visualisation_matrix(data, at = c(1, 3), length = 3) -#' visualisation_matrix(data, at = c("Sepal.Length", "Species"), preserve_range = TRUE) -#' visualisation_matrix(data, at = c("Sepal.Length", "Species"), numerics = 0) -#' visualisation_matrix(data, at = c("Sepal.Length = 3", "Species")) -#' visualisation_matrix(data, at = c("Sepal.Length = c(3, 1)", "Species = 'setosa'")) +#' visualisation_matrix(data, by = c("Sepal.Length", "Species"), length = 3) +#' visualisation_matrix(data, by = c(1, 3), length = 3) +#' visualisation_matrix(data, by = c("Sepal.Length", "Species"), preserve_range = TRUE) +#' visualisation_matrix(data, by = c("Sepal.Length", "Species"), numerics = 0) +#' visualisation_matrix(data, by = c("Sepal.Length = 3", "Species")) +#' visualisation_matrix(data, by = c("Sepal.Length = c(3, 1)", "Species = 'setosa'")) #' #' # with list-style at-argument -#' visualisation_matrix(data, at = list(Sepal.Length = c(1, 3), Species = "setosa")) +#' visualisation_matrix(data, by = list(Sepal.Length = c(1, 3), Species = "setosa")) #' #' # Standardize -#' vizdata <- visualisation_matrix(data, at = "Sepal.Length") +#' vizdata <- visualisation_matrix(data, by = "Sepal.Length") #' standardize(vizdata) #' @export visualisation_matrix <- function(x, ...) { @@ -46,20 +46,25 @@ visualisation_matrix <- function(x, ...) { #' @rdname visualisation_matrix #' @export visualisation_matrix.data.frame <- function(x, - at = "all", + by = "all", target = NULL, + at = NULL, factors = "reference", numerics = "mean", preserve_range = FALSE, reference = x, ...) { if (!is.null(target)) { - insight::format_warning("The 'target' argument name is deprecated in favour of 'at'. Please replace 'target' with 'at'.") - at <- target + insight::format_warning("The 'target` argument name is deprecated in favour of `by`. Please replace `target` with `by`.") # nolint + by <- target + } + if (!is.null(at)) { + insight::format_warning("The `at` argument is deprecated and will be removed in the future. Please use `by` instead.") # nolint + by <- at } insight::get_datagrid(x, - at = at, + by = by, factors = factors, numerics = numerics, preserve_range = preserve_range, diff --git a/R/visualisation_recipe.estimate_means.R b/R/visualisation_recipe.estimate_means.R index 8ece2f849..c062f32fe 100644 --- a/R/visualisation_recipe.estimate_means.R +++ b/R/visualisation_recipe.estimate_means.R @@ -23,14 +23,14 @@ #' data$new_factor <- as.factor(rep(c("A", "B"), length.out = nrow(mtcars))) #' #' model <- lm(mpg ~ new_factor * cyl * wt, data = data) -#' x <- estimate_means(model, at = c("new_factor", "cyl")) +#' x <- estimate_means(model, by =c("new_factor", "cyl")) #' plot(visualisation_recipe(x)) #' #' # Modulations -------------- -#' x <- estimate_means(model, at = c("new_factor", "wt")) +#' x <- estimate_means(model, by =c("new_factor", "wt")) #' plot(visualisation_recipe(x)) #' -#' # x <- estimate_means(model, at = c("new_factor", "cyl", "wt")) +#' # x <- estimate_means(model, by =c("new_factor", "cyl", "wt")) #' # plot(visualisation_recipe(x)) # TODO: broken #' #' #' # GLMs --------------------- @@ -54,17 +54,17 @@ visualisation_recipe.estimate_means <- function(x, # Main aesthetics ----------------- - data <- as.data.frame(x) + vis_data <- as.data.frame(x) y <- info$response color <- NULL alpha <- NULL - levels <- info$at[info$at %in% names(data[!sapply(data, is.numeric)])] - modulate <- info$at[info$at %in% names(data[sapply(data, is.numeric)])] - x1 <- levels[1] - if (length(levels) > 1L) { - color <- levels[2] - if (length(levels) > 2L) { + by_levels <- info$at[info$at %in% names(vis_data[!sapply(vis_data, is.numeric)])] + modulate <- info$at[info$at %in% names(vis_data[sapply(vis_data, is.numeric)])] + x1 <- by_levels[1] + if (length(by_levels) > 1L) { + color <- by_levels[2] + if (length(by_levels) > 2L) { # TODO: add facetting (needs updating see::geom_from_list to work with facets) insight::format_warning("Cannot deal with more than 2 levels variables for now. Other ones will be omitted.") } @@ -120,7 +120,7 @@ visualisation_recipe.estimate_means <- function(x, # Line layers[[paste0("l", l)]] <- .visualisation_means_line( - data, + vis_data, x1, y = info$coef_name[1], color = color, @@ -131,7 +131,7 @@ visualisation_recipe.estimate_means <- function(x, # Pointrange layers[[paste0("l", l)]] <- .visualisation_means_pointrange( - data, + vis_data, x1, y = info$coef_name[1], color = color, @@ -145,7 +145,7 @@ visualisation_recipe.estimate_means <- function(x, # Out class(layers) <- unique(c("visualisation_recipe", "see_visualisation_recipe", class(layers))) - attr(layers, "data") <- data + attr(layers, "data") <- vis_data layers } @@ -229,16 +229,16 @@ visualisation_recipe.estimate_means <- function(x, .visualisation_means_labs <- function(info, x1, y, labs = NULL) { if (all(info$coef_name == "Probability")) { - title <- "Estimated Mean Probabilities" + vis_title <- "Estimated Mean Probabilities" } else { - title <- "Estimated Means" + vis_title <- "Estimated Means" } out <- list( geom = "labs", x = x1, y = y, - title = paste0(title, " (", format(insight::find_formula(info$model)), ")") + title = paste0(vis_title, " (", format(insight::find_formula(info$model)), ")") ) if (!is.null(labs)) out <- utils::modifyList(out, labs) # Update with additional args out diff --git a/R/visualisation_recipe.estimate_slopes.R b/R/visualisation_recipe.estimate_slopes.R index 028e981b3..0f03bc614 100644 --- a/R/visualisation_recipe.estimate_slopes.R +++ b/R/visualisation_recipe.estimate_slopes.R @@ -6,18 +6,18 @@ #' # ============================================== #' if (require("ggplot2")) { #' model <- lm(Sepal.Width ~ Species * Petal.Length, data = iris) -#' x <- estimate_slopes(model, trend = "Petal.Length", at = "Species") +#' x <- estimate_slopes(model, trend = "Petal.Length", by ="Species") #' #' layers <- visualisation_recipe(x) #' layers #' plot(layers) #' #' model <- lm(Petal.Length ~ poly(Sepal.Width, 4), data = iris) -#' x <- estimate_slopes(model, at = "Sepal.Width", length = 20) +#' x <- estimate_slopes(model, by ="Sepal.Width", length = 20) #' plot(visualisation_recipe(x)) #' #' model <- lm(Petal.Length ~ Species * poly(Sepal.Width, 3), data = iris) -#' x <- estimate_slopes(model, at = c("Sepal.Width", "Species")) +#' x <- estimate_slopes(model, by =c("Sepal.Width", "Species")) #' plot(visualisation_recipe(x)) #' } #' \donttest{ @@ -27,11 +27,11 @@ #' data$Petal.Length <- data$Petal.Length^2 #' #' model <- mgcv::gam(Sepal.Width ~ t2(Petal.Width, Petal.Length), data = data) -#' x <- estimate_slopes(model, at = c("Petal.Width", "Petal.Length"), length = 20) +#' x <- estimate_slopes(model, by =c("Petal.Width", "Petal.Length"), length = 20) #' plot(visualisation_recipe(x)) #' #' model <- mgcv::gam(Sepal.Width ~ t2(Petal.Width, Petal.Length, by = Species), data = data) -#' x <- estimate_slopes(model, at = c("Petal.Width", "Petal.Length", "Species"), length = 10) +#' x <- estimate_slopes(model, by =c("Petal.Width", "Petal.Length", "Species"), length = 10) #' plot(visualisation_recipe(x)) #' } #' } @@ -80,19 +80,17 @@ visualisation_recipe.estimate_slopes <- function(x, if (length(facs) > 0) { facet <- facs[1] } + } else if (length(facs) > 0) { + color <- facs[1] + fill <- facs[1] + group_ribbon <- facs[1] + group_line <- facs[1] + alpha <- "Confidence" } else { - if (length(facs) > 0) { - color <- facs[1] - fill <- facs[1] - group_ribbon <- facs[1] - group_line <- facs[1] - alpha <- "Confidence" - } else { - group_ribbon <- ".group" - group_line <- 1 - fill <- "Confidence" - color <- NULL - } + group_ribbon <- ".group" + group_line <- 1 + fill <- "Confidence" + color <- NULL } } diff --git a/man/visualisation_matrix.Rd b/man/visualisation_matrix.Rd index dec6f37d1..28c0850e2 100644 --- a/man/visualisation_matrix.Rd +++ b/man/visualisation_matrix.Rd @@ -11,8 +11,9 @@ visualisation_matrix(x, ...) \method{visualisation_matrix}{data.frame}( x, - at = "all", + by = "all", target = NULL, + at = NULL, factors = "reference", numerics = "mean", preserve_range = FALSE, @@ -30,9 +31,57 @@ visualisation_matrix(x, ...) \item{...}{Arguments passed to or from other methods (for instance, \code{length} or \code{range} to control the spread of numeric variables.).} -\item{at}{Deprecated. Use \code{by} instead.} +\item{by}{Indicates the \emph{focal predictors} (variables) for the reference grid +and at which values focal predictors should be represented. If not specified +otherwise, representative values for numeric variables or predictors are +evenly distributed from the minimum to the maximum, with a total number of +\code{length} values covering that range (see 'Examples'). Possible options for +\code{by} are: +\itemize{ +\item \code{"all"}, which will include all variables or predictors. +\item a character vector of one or more variable or predictor names, like +\code{c("Species", "Sepal.Width")}, which will create a grid of all combinations +of unique values. For factors, will use all levels, for numeric variables, +will use a range of length \code{length} (evenly spread from minimum to maximum) +and for character vectors, will use all unique values. +\item a list of named elements, indicating focal predictors and their representative +values, e.g. \code{by = list(Sepal.Length = c(2, 4), Species = "setosa")}. +\item a string with assignments, e.g. \code{by = "Sepal.Length = 2"} or +\code{by = c("Sepal.Length = 2", "Species = 'setosa'")} - note the usage of single +and double quotes to assign strings within strings. +} + +There is a special handling of assignments with \emph{brackets}, i.e. values +defined inside \code{[} and \verb{]}.For \strong{numeric} variables, the value(s) inside +the brackets should either be +\itemize{ +\item two values, indicating minimum and maximum (e.g. \code{by = "Sepal.Length = [0, 5]"}), +for which a range of length \code{length} (evenly spread from given minimum to +maximum) is created. +\item more than two numeric values \code{by = "Sepal.Length = [2,3,4,5]"}, in which +case these values are used as representative values. +\item a "token" that creates pre-defined representative values: +\itemize{ +\item for mean and -/+ 1 SD around the mean: \code{"x = [sd]"} +\item for median and -/+ 1 MAD around the median: \code{"x = [mad]"} +\item for Tukey's five number summary (minimum, lower-hinge, median, upper-hinge, maximum): \code{"x = [fivenum]"} +\item for terciles, including minimum and maximum: \code{"x = [terciles]"} +\item for terciles, excluding minimum and maximum: \code{"x = [terciles2]"} +\item for quartiles, including minimum and maximum: \code{"x = [quartiles]"} +\item for quartiles, excluding minimum and maximum: \code{"x = [quartiles2]"} +\item for minimum and maximum value: \code{"x = [minmax]"} +\item for 0 and the maximum value: \code{"x = [zeromax]"} +} +} + +For \strong{factor} variables, the value(s) inside the brackets should indicate +one or more factor levels, like \code{by = "Species = [setosa, versicolor]"}. +\strong{Note}: the \code{length} argument will be ignored when using brackets-tokens. + +The remaining variables not specified in \code{by} will be fixed (see also arguments +\code{factors} and \code{numerics}).} -\item{target}{Deprecated name. Please use \code{at} instead.} +\item{target, at}{Deprecated name. Please use \code{by} instead.} \item{factors}{Type of summary for factors. Can be \code{"reference"} (set at the reference level), \code{"mode"} (set at the most common level) or \code{"all"} to @@ -66,23 +115,23 @@ library(modelbased) data <- rbind(iris, iris[149, ], make.row.names = FALSE) # Single variable is of interest; all others are "fixed" -visualisation_matrix(data, at = "Sepal.Length") -visualisation_matrix(data, at = "Sepal.Length", length = 3) -visualisation_matrix(data, at = "Sepal.Length", range = "ci", ci = 0.90) -visualisation_matrix(data, at = "Sepal.Length", factors = "mode") +visualisation_matrix(data, by = "Sepal.Length") +visualisation_matrix(data, by = "Sepal.Length", length = 3) +visualisation_matrix(data, by = "Sepal.Length", range = "ci", ci = 0.90) +visualisation_matrix(data, by = "Sepal.Length", factors = "mode") # Multiple variables are of interest, creating a combination -visualisation_matrix(data, at = c("Sepal.Length", "Species"), length = 3) -visualisation_matrix(data, at = c(1, 3), length = 3) -visualisation_matrix(data, at = c("Sepal.Length", "Species"), preserve_range = TRUE) -visualisation_matrix(data, at = c("Sepal.Length", "Species"), numerics = 0) -visualisation_matrix(data, at = c("Sepal.Length = 3", "Species")) -visualisation_matrix(data, at = c("Sepal.Length = c(3, 1)", "Species = 'setosa'")) +visualisation_matrix(data, by = c("Sepal.Length", "Species"), length = 3) +visualisation_matrix(data, by = c(1, 3), length = 3) +visualisation_matrix(data, by = c("Sepal.Length", "Species"), preserve_range = TRUE) +visualisation_matrix(data, by = c("Sepal.Length", "Species"), numerics = 0) +visualisation_matrix(data, by = c("Sepal.Length = 3", "Species")) +visualisation_matrix(data, by = c("Sepal.Length = c(3, 1)", "Species = 'setosa'")) # with list-style at-argument -visualisation_matrix(data, at = list(Sepal.Length = c(1, 3), Species = "setosa")) +visualisation_matrix(data, by = list(Sepal.Length = c(1, 3), Species = "setosa")) # Standardize -vizdata <- visualisation_matrix(data, at = "Sepal.Length") +vizdata <- visualisation_matrix(data, by = "Sepal.Length") standardize(vizdata) } diff --git a/man/visualisation_recipe.estimate_predicted.Rd b/man/visualisation_recipe.estimate_predicted.Rd index 14190120f..152784e8c 100644 --- a/man/visualisation_recipe.estimate_predicted.Rd +++ b/man/visualisation_recipe.estimate_predicted.Rd @@ -126,14 +126,14 @@ data$cyl <- as.factor(data$cyl) data$new_factor <- as.factor(rep(c("A", "B"), length.out = nrow(mtcars))) model <- lm(mpg ~ new_factor * cyl * wt, data = data) -x <- estimate_means(model, at = c("new_factor", "cyl")) +x <- estimate_means(model, by =c("new_factor", "cyl")) plot(visualisation_recipe(x)) # Modulations -------------- -x <- estimate_means(model, at = c("new_factor", "wt")) +x <- estimate_means(model, by =c("new_factor", "wt")) plot(visualisation_recipe(x)) -# x <- estimate_means(model, at = c("new_factor", "cyl", "wt")) +# x <- estimate_means(model, by =c("new_factor", "cyl", "wt")) # plot(visualisation_recipe(x)) # TODO: broken #' # GLMs --------------------- @@ -257,18 +257,18 @@ if (require("ggplot2") && require("rstanarm")) { # ============================================== if (require("ggplot2")) { model <- lm(Sepal.Width ~ Species * Petal.Length, data = iris) - x <- estimate_slopes(model, trend = "Petal.Length", at = "Species") + x <- estimate_slopes(model, trend = "Petal.Length", by ="Species") layers <- visualisation_recipe(x) layers plot(layers) model <- lm(Petal.Length ~ poly(Sepal.Width, 4), data = iris) - x <- estimate_slopes(model, at = "Sepal.Width", length = 20) + x <- estimate_slopes(model, by ="Sepal.Width", length = 20) plot(visualisation_recipe(x)) model <- lm(Petal.Length ~ Species * poly(Sepal.Width, 3), data = iris) - x <- estimate_slopes(model, at = c("Sepal.Width", "Species")) + x <- estimate_slopes(model, by =c("Sepal.Width", "Species")) plot(visualisation_recipe(x)) } \donttest{ @@ -278,11 +278,11 @@ if (require("mgcv")) { data$Petal.Length <- data$Petal.Length^2 model <- mgcv::gam(Sepal.Width ~ t2(Petal.Width, Petal.Length), data = data) - x <- estimate_slopes(model, at = c("Petal.Width", "Petal.Length"), length = 20) + x <- estimate_slopes(model, by =c("Petal.Width", "Petal.Length"), length = 20) plot(visualisation_recipe(x)) model <- mgcv::gam(Sepal.Width ~ t2(Petal.Width, Petal.Length, by = Species), data = data) - x <- estimate_slopes(model, at = c("Petal.Width", "Petal.Length", "Species"), length = 10) + x <- estimate_slopes(model, by =c("Petal.Width", "Petal.Length", "Species"), length = 10) plot(visualisation_recipe(x)) } }