From 285865463308384e92393d219d71e233bcb9a6f5 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Thu, 23 May 2024 10:58:56 +0200
Subject: [PATCH] update

---
 R/visualisation_matrix.R                      | 39 +++++----
 R/visualisation_recipe.estimate_means.R       | 32 ++++----
 R/visualisation_recipe.estimate_slopes.R      | 32 ++++----
 man/visualisation_matrix.Rd                   | 79 +++++++++++++++----
 ...visualisation_recipe.estimate_predicted.Rd | 16 ++--
 5 files changed, 125 insertions(+), 73 deletions(-)

diff --git a/R/visualisation_matrix.R b/R/visualisation_matrix.R
index 98461f921..80dc97b13 100644
--- a/R/visualisation_matrix.R
+++ b/R/visualisation_matrix.R
@@ -4,7 +4,7 @@
 #' function. Same arguments apply.
 #'
 #' @inheritParams insight::get_datagrid
-#' @param target Deprecated name. Please use `at` instead.
+#' @param target,at Deprecated name. Please use `by` instead.
 #'
 #' @return Reference grid data frame.
 #'
@@ -15,24 +15,24 @@
 #' data <- rbind(iris, iris[149, ], make.row.names = FALSE)
 #'
 #' # Single variable is of interest; all others are "fixed"
-#' visualisation_matrix(data, at = "Sepal.Length")
-#' visualisation_matrix(data, at = "Sepal.Length", length = 3)
-#' visualisation_matrix(data, at = "Sepal.Length", range = "ci", ci = 0.90)
-#' visualisation_matrix(data, at = "Sepal.Length", factors = "mode")
+#' visualisation_matrix(data, by = "Sepal.Length")
+#' visualisation_matrix(data, by = "Sepal.Length", length = 3)
+#' visualisation_matrix(data, by = "Sepal.Length", range = "ci", ci = 0.90)
+#' visualisation_matrix(data, by = "Sepal.Length", factors = "mode")
 #'
 #' # Multiple variables are of interest, creating a combination
-#' visualisation_matrix(data, at = c("Sepal.Length", "Species"), length = 3)
-#' visualisation_matrix(data, at = c(1, 3), length = 3)
-#' visualisation_matrix(data, at = c("Sepal.Length", "Species"), preserve_range = TRUE)
-#' visualisation_matrix(data, at = c("Sepal.Length", "Species"), numerics = 0)
-#' visualisation_matrix(data, at = c("Sepal.Length = 3", "Species"))
-#' visualisation_matrix(data, at = c("Sepal.Length = c(3, 1)", "Species = 'setosa'"))
+#' visualisation_matrix(data, by = c("Sepal.Length", "Species"), length = 3)
+#' visualisation_matrix(data, by = c(1, 3), length = 3)
+#' visualisation_matrix(data, by = c("Sepal.Length", "Species"), preserve_range = TRUE)
+#' visualisation_matrix(data, by = c("Sepal.Length", "Species"), numerics = 0)
+#' visualisation_matrix(data, by = c("Sepal.Length = 3", "Species"))
+#' visualisation_matrix(data, by = c("Sepal.Length = c(3, 1)", "Species = 'setosa'"))
 #'
 #' # with list-style at-argument
-#' visualisation_matrix(data, at = list(Sepal.Length = c(1, 3), Species = "setosa"))
+#' visualisation_matrix(data, by = list(Sepal.Length = c(1, 3), Species = "setosa"))
 #'
 #' # Standardize
-#' vizdata <- visualisation_matrix(data, at = "Sepal.Length")
+#' vizdata <- visualisation_matrix(data, by = "Sepal.Length")
 #' standardize(vizdata)
 #' @export
 visualisation_matrix <- function(x, ...) {
@@ -46,20 +46,25 @@ visualisation_matrix <- function(x, ...) {
 #' @rdname visualisation_matrix
 #' @export
 visualisation_matrix.data.frame <- function(x,
-                                            at = "all",
+                                            by = "all",
                                             target = NULL,
+                                            at = NULL,
                                             factors = "reference",
                                             numerics = "mean",
                                             preserve_range = FALSE,
                                             reference = x,
                                             ...) {
   if (!is.null(target)) {
-    insight::format_warning("The 'target' argument name is deprecated in favour of 'at'. Please replace 'target' with 'at'.")
-    at <- target
+    insight::format_warning("The 'target` argument name is deprecated in favour of `by`. Please replace `target` with `by`.") # nolint
+    by <- target
+  }
+  if (!is.null(at)) {
+    insight::format_warning("The `at` argument is deprecated and will be removed in the future. Please use `by` instead.") # nolint
+    by <- at
   }
 
   insight::get_datagrid(x,
-    at = at,
+    by = by,
     factors = factors,
     numerics = numerics,
     preserve_range = preserve_range,
diff --git a/R/visualisation_recipe.estimate_means.R b/R/visualisation_recipe.estimate_means.R
index 8ece2f849..c062f32fe 100644
--- a/R/visualisation_recipe.estimate_means.R
+++ b/R/visualisation_recipe.estimate_means.R
@@ -23,14 +23,14 @@
 #' data$new_factor <- as.factor(rep(c("A", "B"), length.out = nrow(mtcars)))
 #'
 #' model <- lm(mpg ~ new_factor * cyl * wt, data = data)
-#' x <- estimate_means(model, at = c("new_factor", "cyl"))
+#' x <- estimate_means(model, by =c("new_factor", "cyl"))
 #' plot(visualisation_recipe(x))
 #'
 #' # Modulations --------------
-#' x <- estimate_means(model, at = c("new_factor", "wt"))
+#' x <- estimate_means(model, by =c("new_factor", "wt"))
 #' plot(visualisation_recipe(x))
 #'
-#' # x <- estimate_means(model, at = c("new_factor", "cyl", "wt"))
+#' # x <- estimate_means(model, by =c("new_factor", "cyl", "wt"))
 #' # plot(visualisation_recipe(x))  # TODO: broken
 #'
 #' #'   # GLMs ---------------------
@@ -54,17 +54,17 @@ visualisation_recipe.estimate_means <- function(x,
 
 
   # Main aesthetics -----------------
-  data <- as.data.frame(x)
+  vis_data <- as.data.frame(x)
   y <- info$response
   color <- NULL
   alpha <- NULL
 
-  levels <- info$at[info$at %in% names(data[!sapply(data, is.numeric)])]
-  modulate <- info$at[info$at %in% names(data[sapply(data, is.numeric)])]
-  x1 <- levels[1]
-  if (length(levels) > 1L) {
-    color <- levels[2]
-    if (length(levels) > 2L) {
+  by_levels <- info$at[info$at %in% names(vis_data[!sapply(vis_data, is.numeric)])]
+  modulate <- info$at[info$at %in% names(vis_data[sapply(vis_data, is.numeric)])]
+  x1 <- by_levels[1]
+  if (length(by_levels) > 1L) {
+    color <- by_levels[2]
+    if (length(by_levels) > 2L) {
       # TODO: add facetting (needs updating see::geom_from_list to work with facets)
       insight::format_warning("Cannot deal with more than 2 levels variables for now. Other ones will be omitted.")
     }
@@ -120,7 +120,7 @@ visualisation_recipe.estimate_means <- function(x,
 
   # Line
   layers[[paste0("l", l)]] <- .visualisation_means_line(
-    data,
+    vis_data,
     x1,
     y = info$coef_name[1],
     color = color,
@@ -131,7 +131,7 @@ visualisation_recipe.estimate_means <- function(x,
 
   # Pointrange
   layers[[paste0("l", l)]] <- .visualisation_means_pointrange(
-    data,
+    vis_data,
     x1,
     y = info$coef_name[1],
     color = color,
@@ -145,7 +145,7 @@ visualisation_recipe.estimate_means <- function(x,
 
   # Out
   class(layers) <- unique(c("visualisation_recipe", "see_visualisation_recipe", class(layers)))
-  attr(layers, "data") <- data
+  attr(layers, "data") <- vis_data
   layers
 }
 
@@ -229,16 +229,16 @@ visualisation_recipe.estimate_means <- function(x,
 
 .visualisation_means_labs <- function(info, x1, y, labs = NULL) {
   if (all(info$coef_name == "Probability")) {
-    title <- "Estimated Mean Probabilities"
+    vis_title <- "Estimated Mean Probabilities"
   } else {
-    title <- "Estimated Means"
+    vis_title <- "Estimated Means"
   }
 
   out <- list(
     geom = "labs",
     x = x1,
     y = y,
-    title = paste0(title, " (", format(insight::find_formula(info$model)), ")")
+    title = paste0(vis_title, " (", format(insight::find_formula(info$model)), ")")
   )
   if (!is.null(labs)) out <- utils::modifyList(out, labs) # Update with additional args
   out
diff --git a/R/visualisation_recipe.estimate_slopes.R b/R/visualisation_recipe.estimate_slopes.R
index 028e981b3..0f03bc614 100644
--- a/R/visualisation_recipe.estimate_slopes.R
+++ b/R/visualisation_recipe.estimate_slopes.R
@@ -6,18 +6,18 @@
 #' # ==============================================
 #' if (require("ggplot2")) {
 #'   model <- lm(Sepal.Width ~ Species * Petal.Length, data = iris)
-#'   x <- estimate_slopes(model, trend = "Petal.Length", at = "Species")
+#'   x <- estimate_slopes(model, trend = "Petal.Length", by ="Species")
 #'
 #'   layers <- visualisation_recipe(x)
 #'   layers
 #'   plot(layers)
 #'
 #'   model <- lm(Petal.Length ~ poly(Sepal.Width, 4), data = iris)
-#'   x <- estimate_slopes(model, at = "Sepal.Width", length = 20)
+#'   x <- estimate_slopes(model, by ="Sepal.Width", length = 20)
 #'   plot(visualisation_recipe(x))
 #'
 #'   model <- lm(Petal.Length ~ Species * poly(Sepal.Width, 3), data = iris)
-#'   x <- estimate_slopes(model, at = c("Sepal.Width", "Species"))
+#'   x <- estimate_slopes(model, by =c("Sepal.Width", "Species"))
 #'   plot(visualisation_recipe(x))
 #' }
 #' \donttest{
@@ -27,11 +27,11 @@
 #'   data$Petal.Length <- data$Petal.Length^2
 #'
 #'   model <- mgcv::gam(Sepal.Width ~ t2(Petal.Width, Petal.Length), data = data)
-#'   x <- estimate_slopes(model, at = c("Petal.Width", "Petal.Length"), length = 20)
+#'   x <- estimate_slopes(model, by =c("Petal.Width", "Petal.Length"), length = 20)
 #'   plot(visualisation_recipe(x))
 #'
 #'   model <- mgcv::gam(Sepal.Width ~ t2(Petal.Width, Petal.Length, by = Species), data = data)
-#'   x <- estimate_slopes(model, at = c("Petal.Width", "Petal.Length", "Species"), length = 10)
+#'   x <- estimate_slopes(model, by =c("Petal.Width", "Petal.Length", "Species"), length = 10)
 #'   plot(visualisation_recipe(x))
 #' }
 #' }
@@ -80,19 +80,17 @@ visualisation_recipe.estimate_slopes <- function(x,
       if (length(facs) > 0) {
         facet <- facs[1]
       }
+    } else if (length(facs) > 0) {
+      color <- facs[1]
+      fill <- facs[1]
+      group_ribbon <- facs[1]
+      group_line <- facs[1]
+      alpha <- "Confidence"
     } else {
-      if (length(facs) > 0) {
-        color <- facs[1]
-        fill <- facs[1]
-        group_ribbon <- facs[1]
-        group_line <- facs[1]
-        alpha <- "Confidence"
-      } else {
-        group_ribbon <- ".group"
-        group_line <- 1
-        fill <- "Confidence"
-        color <- NULL
-      }
+      group_ribbon <- ".group"
+      group_line <- 1
+      fill <- "Confidence"
+      color <- NULL
     }
   }
 
diff --git a/man/visualisation_matrix.Rd b/man/visualisation_matrix.Rd
index dec6f37d1..28c0850e2 100644
--- a/man/visualisation_matrix.Rd
+++ b/man/visualisation_matrix.Rd
@@ -11,8 +11,9 @@ visualisation_matrix(x, ...)
 
 \method{visualisation_matrix}{data.frame}(
   x,
-  at = "all",
+  by = "all",
   target = NULL,
+  at = NULL,
   factors = "reference",
   numerics = "mean",
   preserve_range = FALSE,
@@ -30,9 +31,57 @@ visualisation_matrix(x, ...)
 \item{...}{Arguments passed to or from other methods (for instance, \code{length}
 or \code{range} to control the spread of numeric variables.).}
 
-\item{at}{Deprecated. Use \code{by} instead.}
+\item{by}{Indicates the \emph{focal predictors} (variables) for the reference grid
+and at which values focal predictors should be represented. If not specified
+otherwise, representative values for numeric variables or predictors are
+evenly distributed from the minimum to the maximum, with a total number of
+\code{length} values covering that range (see 'Examples'). Possible options for
+\code{by} are:
+\itemize{
+\item \code{"all"}, which will include all variables or predictors.
+\item a character vector of one or more variable or predictor names, like
+\code{c("Species", "Sepal.Width")}, which will create a grid of all combinations
+of unique values. For factors, will use all levels, for numeric variables,
+will use a range of length \code{length} (evenly spread from minimum to maximum)
+and for character vectors, will use all unique values.
+\item a list of named elements, indicating focal predictors and their representative
+values, e.g. \code{by = list(Sepal.Length = c(2, 4), Species = "setosa")}.
+\item a string with assignments, e.g. \code{by = "Sepal.Length = 2"} or
+\code{by = c("Sepal.Length = 2", "Species = 'setosa'")} - note the usage of single
+and double quotes to assign strings within strings.
+}
+
+There is a special handling of assignments with \emph{brackets}, i.e. values
+defined inside \code{[} and \verb{]}.For \strong{numeric} variables, the value(s) inside
+the brackets should either be
+\itemize{
+\item two values, indicating minimum and maximum (e.g. \code{by = "Sepal.Length = [0, 5]"}),
+for which a range of length \code{length} (evenly spread from given minimum to
+maximum) is created.
+\item more than two numeric values \code{by = "Sepal.Length = [2,3,4,5]"}, in which
+case these values are used as representative values.
+\item a "token" that creates pre-defined representative values:
+\itemize{
+\item for mean and -/+ 1 SD around the mean: \code{"x = [sd]"}
+\item for median and -/+ 1 MAD around the median: \code{"x = [mad]"}
+\item for Tukey's five number summary (minimum, lower-hinge, median, upper-hinge, maximum): \code{"x = [fivenum]"}
+\item for terciles, including minimum and maximum: \code{"x = [terciles]"}
+\item for terciles, excluding minimum and maximum: \code{"x = [terciles2]"}
+\item for quartiles, including minimum and maximum: \code{"x = [quartiles]"}
+\item for quartiles, excluding minimum and maximum: \code{"x = [quartiles2]"}
+\item for minimum and maximum value: \code{"x = [minmax]"}
+\item for 0 and the maximum value: \code{"x = [zeromax]"}
+}
+}
+
+For \strong{factor} variables, the value(s) inside the brackets should indicate
+one or more factor levels, like \code{by = "Species = [setosa, versicolor]"}.
+\strong{Note}: the \code{length} argument will be ignored when using brackets-tokens.
+
+The remaining variables not specified in \code{by} will be fixed (see also arguments
+\code{factors} and \code{numerics}).}
 
-\item{target}{Deprecated name. Please use \code{at} instead.}
+\item{target, at}{Deprecated name. Please use \code{by} instead.}
 
 \item{factors}{Type of summary for factors. Can be \code{"reference"} (set at the
 reference level), \code{"mode"} (set at the most common level) or \code{"all"} to
@@ -66,23 +115,23 @@ library(modelbased)
 data <- rbind(iris, iris[149, ], make.row.names = FALSE)
 
 # Single variable is of interest; all others are "fixed"
-visualisation_matrix(data, at = "Sepal.Length")
-visualisation_matrix(data, at = "Sepal.Length", length = 3)
-visualisation_matrix(data, at = "Sepal.Length", range = "ci", ci = 0.90)
-visualisation_matrix(data, at = "Sepal.Length", factors = "mode")
+visualisation_matrix(data, by = "Sepal.Length")
+visualisation_matrix(data, by = "Sepal.Length", length = 3)
+visualisation_matrix(data, by = "Sepal.Length", range = "ci", ci = 0.90)
+visualisation_matrix(data, by = "Sepal.Length", factors = "mode")
 
 # Multiple variables are of interest, creating a combination
-visualisation_matrix(data, at = c("Sepal.Length", "Species"), length = 3)
-visualisation_matrix(data, at = c(1, 3), length = 3)
-visualisation_matrix(data, at = c("Sepal.Length", "Species"), preserve_range = TRUE)
-visualisation_matrix(data, at = c("Sepal.Length", "Species"), numerics = 0)
-visualisation_matrix(data, at = c("Sepal.Length = 3", "Species"))
-visualisation_matrix(data, at = c("Sepal.Length = c(3, 1)", "Species = 'setosa'"))
+visualisation_matrix(data, by = c("Sepal.Length", "Species"), length = 3)
+visualisation_matrix(data, by = c(1, 3), length = 3)
+visualisation_matrix(data, by = c("Sepal.Length", "Species"), preserve_range = TRUE)
+visualisation_matrix(data, by = c("Sepal.Length", "Species"), numerics = 0)
+visualisation_matrix(data, by = c("Sepal.Length = 3", "Species"))
+visualisation_matrix(data, by = c("Sepal.Length = c(3, 1)", "Species = 'setosa'"))
 
 # with list-style at-argument
-visualisation_matrix(data, at = list(Sepal.Length = c(1, 3), Species = "setosa"))
+visualisation_matrix(data, by = list(Sepal.Length = c(1, 3), Species = "setosa"))
 
 # Standardize
-vizdata <- visualisation_matrix(data, at = "Sepal.Length")
+vizdata <- visualisation_matrix(data, by = "Sepal.Length")
 standardize(vizdata)
 }
diff --git a/man/visualisation_recipe.estimate_predicted.Rd b/man/visualisation_recipe.estimate_predicted.Rd
index 14190120f..152784e8c 100644
--- a/man/visualisation_recipe.estimate_predicted.Rd
+++ b/man/visualisation_recipe.estimate_predicted.Rd
@@ -126,14 +126,14 @@ data$cyl <- as.factor(data$cyl)
 data$new_factor <- as.factor(rep(c("A", "B"), length.out = nrow(mtcars)))
 
 model <- lm(mpg ~ new_factor * cyl * wt, data = data)
-x <- estimate_means(model, at = c("new_factor", "cyl"))
+x <- estimate_means(model, by =c("new_factor", "cyl"))
 plot(visualisation_recipe(x))
 
 # Modulations --------------
-x <- estimate_means(model, at = c("new_factor", "wt"))
+x <- estimate_means(model, by =c("new_factor", "wt"))
 plot(visualisation_recipe(x))
 
-# x <- estimate_means(model, at = c("new_factor", "cyl", "wt"))
+# x <- estimate_means(model, by =c("new_factor", "cyl", "wt"))
 # plot(visualisation_recipe(x))  # TODO: broken
 
 #'   # GLMs ---------------------
@@ -257,18 +257,18 @@ if (require("ggplot2") && require("rstanarm")) {
 # ==============================================
 if (require("ggplot2")) {
   model <- lm(Sepal.Width ~ Species * Petal.Length, data = iris)
-  x <- estimate_slopes(model, trend = "Petal.Length", at = "Species")
+  x <- estimate_slopes(model, trend = "Petal.Length", by ="Species")
 
   layers <- visualisation_recipe(x)
   layers
   plot(layers)
 
   model <- lm(Petal.Length ~ poly(Sepal.Width, 4), data = iris)
-  x <- estimate_slopes(model, at = "Sepal.Width", length = 20)
+  x <- estimate_slopes(model, by ="Sepal.Width", length = 20)
   plot(visualisation_recipe(x))
 
   model <- lm(Petal.Length ~ Species * poly(Sepal.Width, 3), data = iris)
-  x <- estimate_slopes(model, at = c("Sepal.Width", "Species"))
+  x <- estimate_slopes(model, by =c("Sepal.Width", "Species"))
   plot(visualisation_recipe(x))
 }
 \donttest{
@@ -278,11 +278,11 @@ if (require("mgcv")) {
   data$Petal.Length <- data$Petal.Length^2
 
   model <- mgcv::gam(Sepal.Width ~ t2(Petal.Width, Petal.Length), data = data)
-  x <- estimate_slopes(model, at = c("Petal.Width", "Petal.Length"), length = 20)
+  x <- estimate_slopes(model, by =c("Petal.Width", "Petal.Length"), length = 20)
   plot(visualisation_recipe(x))
 
   model <- mgcv::gam(Sepal.Width ~ t2(Petal.Width, Petal.Length, by = Species), data = data)
-  x <- estimate_slopes(model, at = c("Petal.Width", "Petal.Length", "Species"), length = 10)
+  x <- estimate_slopes(model, by =c("Petal.Width", "Petal.Length", "Species"), length = 10)
   plot(visualisation_recipe(x))
 }
 }