diff --git a/NAMESPACE b/NAMESPACE index e5a58c80e..34d05af61 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -65,10 +65,14 @@ export(find_inversions) export(get_emcontrasts) export(get_emmeans) export(get_emtrends) +export(get_marginalcontrasts) export(get_marginaleffects) +export(get_marginalmeans) export(model_emcontrasts) export(model_emmeans) export(model_emtrends) +export(model_marginalcontrasts) +export(model_marginalmeans) export(print_md) export(reshape_grouplevel) export(smoothing) diff --git a/R/estimate_contrasts.R b/R/estimate_contrasts.R index d3635d89e..407f7a427 100644 --- a/R/estimate_contrasts.R +++ b/R/estimate_contrasts.R @@ -7,13 +7,13 @@ #' @inheritParams estimate_means #' @inheritParams get_emcontrasts #' @param p_adjust The p-values adjustment method for frequentist multiple -#' comparisons. Can be one of "holm" (default), "tukey", "hochberg", "hommel", -#' "bonferroni", "BH", "BY", "fdr" or "none". See the p-value adjustment -#' section in the `emmeans::test` documentation. +#' comparisons. Can be one of `"holm"` (default), `"tukey"`, `"hochberg"`, +#' `"hommel"`, `"bonferroni"`, `"BH"`, `"BY"`, `"fdr"` or `"none"`. See the +#' p-value adjustment section in the `emmeans::test` documentation. #' #' @inherit estimate_slopes details #' -#' @examplesIf require("lme4", quietly = TRUE) && require("emmeans", quietly = TRUE) && require("rstanarm", quietly = TRUE) +#' @examplesIf all(insight::check_if_installed(c("lme4", "emmeans", "rstanarm"), quietly = TRUE)) #' \dontrun{ #' # Basic usage #' model <- lm(Sepal.Width ~ Species, data = iris) @@ -34,9 +34,6 @@ #' # Or with custom specifications #' estimate_contrasts(model, contrast = c("Species", "Petal.Width=c(1, 2)")) #' -#' # Can fixate the numeric at a specific value -#' estimate_contrasts(model, fixed = "Petal.Width") -#' #' # Or modulate it #' estimate_contrasts(model, by = "Petal.Width", length = 4) #' @@ -51,22 +48,24 @@ #' model <- lme4::lmer(Sepal.Width ~ Species + (1 | Petal.Length_factor), data = data) #' estimate_contrasts(model) #' -#' library(rstanarm) -#' #' data <- mtcars #' data$cyl <- as.factor(data$cyl) #' data$am <- as.factor(data$am) #' -#' model <- stan_glm(mpg ~ cyl * am, data = data, refresh = 0) +#' model <- rstanarm::stan_glm(mpg ~ cyl * am, data = data, refresh = 0) #' estimate_contrasts(model) -#' estimate_contrasts(model, fixed = "am") +#' # fix `am` at value 1 +#' estimate_contrasts(model, by = c("cyl", "am='1'")) #' -#' model <- stan_glm(mpg ~ cyl * wt, data = data, refresh = 0) +#' model <- rstanarm::stan_glm(mpg ~ cyl * wt, data = data, refresh = 0) #' estimate_contrasts(model) -#' estimate_contrasts(model, fixed = "wt") #' estimate_contrasts(model, by = "wt", length = 4) #' -#' model <- stan_glm(Sepal.Width ~ Species + Petal.Width + Petal.Length, data = iris, refresh = 0) +#' model <- rstanarm::stan_glm( +#' Sepal.Width ~ Species + Petal.Width + Petal.Length, +#' data = iris, +#' refresh = 0 +#' ) #' estimate_contrasts(model, by = "Petal.Length", test = "bf") #' } #' @@ -75,7 +74,6 @@ estimate_contrasts <- function(model, contrast = NULL, by = NULL, - fixed = NULL, transform = "none", ci = 0.95, p_adjust = "holm", @@ -85,7 +83,6 @@ estimate_contrasts <- function(model, estimated <- get_emcontrasts(model, contrast = contrast, by = by, - fixed = fixed, transform = transform, method = method, adjust = p_adjust, @@ -142,7 +139,6 @@ estimate_contrasts <- function(model, attr(out, "transform") <- transform attr(out, "at") <- info$by attr(out, "by") <- info$by - attr(out, "fixed") <- info$fixed attr(out, "contrast") <- info$contrast attr(out, "p_adjust") <- p_adjust diff --git a/R/estimate_means.R b/R/estimate_means.R index 9916b3bbe..1b4ccd16b 100644 --- a/R/estimate_means.R +++ b/R/estimate_means.R @@ -19,7 +19,6 @@ #' model <- lm(Petal.Length ~ Sepal.Width * Species, data = iris) #' #' estimate_means(model) -#' estimate_means(model, fixed = "Sepal.Width") #' estimate_means(model, by = c("Species", "Sepal.Width"), length = 2) #' estimate_means(model, by = "Species=c('versicolor', 'setosa')") #' estimate_means(model, by = "Sepal.Width=c(2, 4)") @@ -28,7 +27,7 @@ #' estimate_means(model, by = "Sepal.Width=c(2, 4)") #' #' # Methods that can be applied to it: -#' means <- estimate_means(model, fixed = "Sepal.Width") +#' means <- estimate_means(model, by = c("Species", "Sepal.Width=0")) #' #' @examplesIf require("see") && require("emmeans", quietly = TRUE) #' plot(means) # which runs visualisation_recipe() @@ -48,19 +47,18 @@ #' @export estimate_means <- function(model, by = "auto", - fixed = NULL, transform = "response", ci = 0.95, backend = "emmeans", ...) { if (backend == "emmeans") { # Emmeans ------------------------------------------------------------------ - estimated <- get_emmeans(model, by, fixed, transform = transform, ...) + estimated <- get_emmeans(model, by, transform = transform, ...) means <- .format_emmeans_means(estimated, model, ci, transform, ...) } else { # Marginalmeans ------------------------------------------------------------ - estimated <- .get_marginalmeans(model, by, ci = ci, ...) - means <- .format_marginaleffects_means(estimated, model, ...) + estimated <- get_marginalmeans(model, by, transform = transform, ci = ci, ...) + means <- .format_marginaleffects_means(estimated, model, transform, ...) } diff --git a/R/get_emcontrasts.R b/R/get_emcontrasts.R index e4637b7f8..e4469d140 100644 --- a/R/get_emcontrasts.R +++ b/R/get_emcontrasts.R @@ -1,35 +1,30 @@ #' @rdname get_emmeans #' #' @param contrast A character vector indicating the name of the variable(s) -#' for which to compute the contrasts. +#' for which to compute the contrasts. #' @param method Contrast method. See same argument in [emmeans::contrast]. #' -#' @examples -#' if (require("emmeans", quietly = TRUE)) { -#' # Basic usage -#' model <- lm(Sepal.Width ~ Species, data = iris) -#' get_emcontrasts(model) +#' @examplesIf insight::check_if_installed("emmeans", quietly = TRUE) +#' # Basic usage +#' model <- lm(Sepal.Width ~ Species, data = iris) +#' get_emcontrasts(model) #' -#' # Dealing with interactions -#' model <- lm(Sepal.Width ~ Species * Petal.Width, data = iris) -#' # By default: selects first factor -#' get_emcontrasts(model) -#' # Can also run contrasts between points of numeric -#' get_emcontrasts(model, contrast = "Petal.Width", length = 3) -#' # Or both -#' get_emcontrasts(model, contrast = c("Species", "Petal.Width"), length = 2) -#' # Or with custom specifications -#' estimate_contrasts(model, contrast = c("Species", "Petal.Width=c(1, 2)")) -#' # Can fixate the numeric at a specific value -#' get_emcontrasts(model, fixed = "Petal.Width") -#' # Or modulate it -#' get_emcontrasts(model, by = "Petal.Width", length = 4) -#' } +#' # Dealing with interactions +#' model <- lm(Sepal.Width ~ Species * Petal.Width, data = iris) +#' # By default: selects first factor +#' get_emcontrasts(model) +#' # Can also run contrasts between points of numeric +#' get_emcontrasts(model, contrast = "Petal.Width", length = 3) +#' # Or both +#' get_emcontrasts(model, contrast = c("Species", "Petal.Width"), length = 2) +#' # Or with custom specifications +#' estimate_contrasts(model, contrast = c("Species", "Petal.Width=c(1, 2)")) +#' # Or modulate it +#' get_emcontrasts(model, by = "Petal.Width", length = 4) #' @export get_emcontrasts <- function(model, contrast = NULL, by = NULL, - fixed = NULL, transform = "none", method = "pairwise", ...) { @@ -37,7 +32,7 @@ get_emcontrasts <- function(model, insight::check_if_installed("emmeans") # Guess arguments - my_args <- .guess_emcontrasts_arguments(model, contrast, by, fixed, ...) + my_args <- .guess_emcontrasts_arguments(model, contrast, by, ...) # Run emmeans estimated <- emmeans::emmeans( @@ -57,7 +52,6 @@ get_emcontrasts <- function(model, attr(out, "contrast") <- my_args$contrast attr(out, "at") <- my_args$by attr(out, "by") <- my_args$by - attr(out, "fixed") <- my_args$fixed out } @@ -74,7 +68,6 @@ model_emcontrasts <- get_emcontrasts .guess_emcontrasts_arguments <- function(model, contrast = NULL, by = NULL, - fixed = NULL, ...) { # Gather info predictors <- insight::find_predictors(model, effects = "fixed", flatten = TRUE, ...) @@ -91,6 +84,6 @@ model_emcontrasts <- get_emcontrasts contrast <- predictors } - my_args <- list(contrast = contrast, by = by, fixed = fixed) + my_args <- list(contrast = contrast, by = by) .format_emmeans_arguments(model, args = my_args, data = model_data, ...) } diff --git a/R/get_emmeans.R b/R/get_emmeans.R index a6ca5c03e..df3a9aebc 100644 --- a/R/get_emmeans.R +++ b/R/get_emmeans.R @@ -8,22 +8,18 @@ #' functions. #' #' @param model A statistical model. -#' @param fixed A character vector indicating the names of the predictors to be -#' "fixed" (i.e., maintained), so that the estimation is made at these values. -#' @param transform Is passed to the `type` argument in -#' `emmeans::emmeans()`. See -#' [this vignette](https://CRAN.R-project.org/package=emmeans/vignettes/transformations.html). -#' Can be `"none"` (default for contrasts), `"response"` -#' (default for means), `"mu"`, `"unlink"`, `"log"`. -#' `"none"` will leave the values on scale of the linear predictors. -#' `"response"` will transform them on scale of the response variable. -#' Thus for a logistic model, `"none"` will give estimations expressed in -#' log-odds (probabilities on logit scale) and `"response"` in terms of -#' probabilities. +#' @param transform Is passed to the `type` argument in `emmeans::emmeans()`. +#' See [this vignette](https://CRAN.R-project.org/package=emmeans/vignettes/transformations.html). +#' Can be `"none"` (default for contrasts), `"response"` (default for means), +#' `"mu"`, `"unlink"`, `"log"`. `"none"` will leave the values on scale of the +#' linear predictors. `"response"` will transform them on scale of the response +#' variable. Thus for a logistic model, `"none"` will give estimations expressed +#' in log-odds (probabilities on logit scale) and `"response"` in terms of +#' probabilities. #' @param by The predictor variable(s) at which to evaluate the desired effect -#' / mean / contrasts. Other predictors of the model that are not included -#' here will be collapsed and "averaged" over (the effect will be estimated -#' across them). +#' / mean / contrasts. Other predictors of the model that are not included +#' here will be collapsed and "averaged" over (the effect will be estimated +#' across them). #' @param ... Other arguments passed for instance to [insight::get_datagrid()]. #' #' @examples @@ -49,14 +45,13 @@ #' @export get_emmeans <- function(model, by = "auto", - fixed = NULL, transform = "response", ...) { # check if available insight::check_if_installed("emmeans") # Guess arguments - my_args <- .guess_emmeans_arguments(model, by, fixed, ...) + my_args <- .guess_emmeans_arguments(model, by, ...) # Run emmeans @@ -78,7 +73,6 @@ get_emmeans <- function(model, attr(estimated, "at") <- my_args$by attr(estimated, "by") <- my_args$by - attr(estimated, "fixed") <- my_args$fixed estimated } @@ -91,7 +85,7 @@ model_emmeans <- get_emmeans # ========================================================================= # HELPERS ---------------------------------------------------------------- # ========================================================================= -# This function is the actual equivalent of .get_marginalmeans(); both being used +# This function is the actual equivalent of get_marginalmeans(); both being used # in estimate_means #' @keywords internal @@ -122,7 +116,6 @@ model_emmeans <- get_emmeans attr(means, "at") <- info$by attr(means, "by") <- info$by - attr(means, "fixed") <- info$fixed means } @@ -135,7 +128,6 @@ model_emmeans <- get_emmeans #' @keywords internal .guess_emmeans_arguments <- function(model, by = NULL, - fixed = NULL, ...) { # Gather info predictors <- insight::find_predictors(model, effects = "fixed", flatten = TRUE, ...) @@ -150,7 +142,7 @@ model_emmeans <- get_emmeans insight::format_alert(paste0("We selected `by = c(", toString(paste0('"', by, '"')), ")`.")) } - my_args <- list(by = by, fixed = fixed) + my_args <- list(by = by) .format_emmeans_arguments(model, args = my_args, data = my_data, ...) } @@ -177,7 +169,6 @@ model_emmeans <- get_emmeans } else { if (!is.null(args$by) && all(args$by == "all")) { target <- insight::find_predictors(model, effects = "fixed", flatten = TRUE) - target <- target[!target %in% args$fixed] } else { target <- args$by } @@ -200,16 +191,6 @@ model_emmeans <- get_emmeans } } - # Deal with 'fixed' - if (!is.null(args$fixed)) { - fixed <- insight::get_datagrid(data[args$fixed], by = NULL, ...) - if (is.null(args$data_matrix)) { - args$data_matrix <- fixed - } else { - args$data_matrix <- merge(args$data_matrix, fixed) - } - } - # Get 'specs' and 'at' # -------------------- if (is.null(args$data_matrix)) { diff --git a/R/get_emtrends.R b/R/get_emtrends.R index 305570ba7..e659e0ffd 100644 --- a/R/get_emtrends.R +++ b/R/get_emtrends.R @@ -20,13 +20,12 @@ get_emtrends <- function(model, trend = NULL, by = NULL, - fixed = NULL, ...) { # check if available insight::check_if_installed("emmeans") # Guess arguments - my_args <- .guess_emtrends_arguments(model, trend, by, fixed, ...) + my_args <- .guess_emtrends_arguments(model, trend, by, ...) # Run emtrends estimated <- emmeans::emtrends( @@ -40,7 +39,6 @@ get_emtrends <- function(model, attr(estimated, "trend") <- my_args$trend attr(estimated, "at") <- my_args$by attr(estimated, "by") <- my_args$by - attr(estimated, "fixed") <- my_args$fixed estimated } @@ -56,7 +54,6 @@ model_emtrends <- get_emtrends .guess_emtrends_arguments <- function(model, trend = NULL, by = NULL, - fixed = NULL, ...) { # Gather info predictors <- insight::find_predictors(model, effects = "fixed", flatten = TRUE, ...) @@ -75,6 +72,6 @@ model_emtrends <- get_emtrends trend <- trend[1] } - my_args <- list(trend = trend, by = by, fixed = fixed) + my_args <- list(trend = trend, by = by) .format_emmeans_arguments(model, args = my_args, data = model_data, ...) } diff --git a/R/get_marginalcontrasts.R b/R/get_marginalcontrasts.R index b57561b43..c1444750d 100644 --- a/R/get_marginalcontrasts.R +++ b/R/get_marginalcontrasts.R @@ -1,65 +1,27 @@ -# Wrapper around marginaleffects to get marginal means -# -# library(modelbased) -# if (require("marginaleffects")) { -# -# # Frequentist models -# # ------------------- -# model <- lm(Petal.Length ~ Sepal.Width * Species, data = iris) -# -# estimate_means(model) -# estimate_means(model, fixed = "Sepal.Width") -# estimate_means(model, by = c("Species", "Sepal.Width"), length = 2) -# estimate_means(model, by = "Species=c('versicolor', 'setosa')") -# estimate_means(model, by = "Sepal.Width=c(2, 4)") -# estimate_means(model, by = c("Species", "Sepal.Width=0")) -# estimate_means(model, by = "Sepal.Width", length = 5) -# estimate_means(model, by = "Sepal.Width=c(2, 4)") -# -# # Methods that can be applied to it: -# means <- estimate_means(model, fixed = "Sepal.Width") -# if (require("see")) { -# plot(means) # which runs visualisation_recipe() -# } -# standardize(means) -# -# if (require("lme4")) { -# data <- iris -# data$Petal.Length_factor <- ifelse(data$Petal.Length < 4.2, "A", "B") -# -# model <- lmer(Petal.Length ~ Sepal.Width + Species + (1 | Petal.Length_factor), data = data) -# estimate_means(model) -# estimate_means(model, by = "Sepal.Width", length = 3) -# } -# } -# } -# #' @keywords internal -# .get_marginalmeans <- function(model, -# by = "auto", -# fixed = NULL, -# transform = "response", -# ci = 0.95, -# ...) { -# -# # check if available -# insight::check_if_installed("marginaleffects") -# -# # Guess arguments -# args <- modelbased:::.guess_emmeans_arguments(model, at, fixed, ...) -# -# # Run emmeans -# means <- marginaleffects::marginalmeans(model, args$at, conf_level = ci) -# -# # Format names -# names(means)[names(means) %in% c("conf.low")] <- "CI_low" -# names(means)[names(means) %in% c("conf.high")] <- "CI_high" -# names(means)[names(means) %in% c("std.error")] <- "SE" -# names(means)[names(means) %in% c("marginalmean")] <- "Mean" -# names(means)[names(means) %in% c("p.value")] <- "p" -# names(means)[names(means) %in% c("statistic")] <- "p" -# -# ifelse(insight::find_statistic(model) == "t-statistic", "t", "statistic") -# -# out -# -# } +#' @rdname get_marginalmeans +#' +#' @param method Contrast method. +#' @export +get_marginalcontrasts <- function(model, + by = NULL, + ci = 0.95, + method = "pairwise", + ...) { + out <- estimate_means( + model = model, + by = by, + ci = ci, + hypothesis = method, + backend = "marginaleffects", + ... + ) + + attr(out, "table_title") <- c("Marginal Contrasts Analysis", "blue") + attr(out, "table_footer") <- .estimate_means_footer(out, by = by, type = "contrasts", p_adjust = NULL) + + out +} + +#' @rdname get_marginalmeans +#' @export +model_marginalcontrasts <- get_marginalcontrasts diff --git a/R/get_marginaleffects.R b/R/get_marginaleffects.R index 76f5ffd49..491716cb0 100644 --- a/R/get_marginaleffects.R +++ b/R/get_marginaleffects.R @@ -1,23 +1,17 @@ -#' Easy marginaleffects -#' -#' Modelbased-like API to create \pkg{marginaleffects} objects. This is -#' Work-in-progress. +#' @rdname get_marginalmeans #' #' @inheritParams get_emmeans #' -#' @examples -#' if (require("marginaleffects")) { -#' model <- lm(Sepal.Width ~ Species * Petal.Length, data = iris) +#' @examplesIf insight::check_if_installed("marginaleffects", quietly = TRUE) +#' model <- lm(Sepal.Width ~ Species * Petal.Length, data = iris) #' -#' get_marginaleffects(model, trend = "Petal.Length", by = "Species") -#' get_marginaleffects(model, trend = "Petal.Length", by = "Petal.Length") -#' get_marginaleffects(model, trend = "Petal.Length", by = c("Species", "Petal.Length")) -#' } +#' get_marginaleffects(model, trend = "Petal.Length", by = "Species") +#' get_marginaleffects(model, trend = "Petal.Length", by = "Petal.Length") +#' get_marginaleffects(model, trend = "Petal.Length", by = c("Species", "Petal.Length")) #' @export get_marginaleffects <- function(model, trend = NULL, by = NULL, - fixed = NULL, ...) { # check if available insight::check_if_installed("marginaleffects") @@ -35,17 +29,20 @@ get_marginaleffects <- function(model, by <- by[!by %in% trend] } - newdata <- insight::get_datagrid(model, by = by, ...) - - fixed <- names(newdata)[!names(newdata) %in% c(by, trend)] - if (length(fixed) == 0) fixed <- NULL + datagrid <- insight::get_datagrid(model, by = by, ...) + at_specs <- attributes(datagrid)$at_specs # Compute stuff - estimated <- marginaleffects::slopes(model, variables = trend, newdata = newdata, ...) + estimated <- marginaleffects::avg_slopes( + model, + variables = trend, + by = at_specs$varname, + newdata = datagrid, + ... + ) attr(estimated, "trend") <- trend attr(estimated, "at") <- by attr(estimated, "by") <- by - attr(estimated, "fixed") <- fixed estimated } diff --git a/R/get_marginaleffects_type.R b/R/get_marginaleffects_type.R new file mode 100644 index 000000000..8e48876ce --- /dev/null +++ b/R/get_marginaleffects_type.R @@ -0,0 +1,145 @@ +#' @keywords internal +.get_type_argument <- function(model, transform = NULL, ...) { + dots <- list(...) + model_class <- class(model)[1] + + # no transformation always returns link-scale + if (identical(transform, "none")) { + return("link") + } + + # for unrecognized model classes, return "response" + if (!model_class %in% .typedic$class) { + return("response") + } + + # extract all valid types for model class + valid_types <- .typedic$type[.typedic$class == model_class] + + # check if user supplied type-argument + if (!is.null(dots$type)) { + if (!dots$type %in% valid_types) { + insight::format_error(paste0( + "The option provided in the `type` argument is not recognized.", + " Valid options are: ", + datawizard::text_concatenate(valid_types, enclose = "`"), + "." + )) + } else { + type <- dots$type + } + } else { + type <- valid_types[1] + } + + # return default type + type +} + + +# all valid "type" arguments for each model class. +# Run "marginaleffects:::type_dictionary_build()" to update this list +.typedic <- data.frame( + class = c( + "bam", "bam", "bart", "bart", "betareg", "betareg", "betareg", + "betareg", "betareg", "bife", "bife", "bracl", "brglmFit", "brglmFit", + "brmsfit", "brmsfit", "brmsfit", "brmsfit", "brmultinom", "brmultinom", + "clm", "clm", "clm", "clogit", "clogit", "clogit", "clogit", + "coxph", "coxph", "coxph", "coxph", "coxph_weightit", "coxph_weightit", + "coxph_weightit", "coxph_weightit", "crch", "crch", "crch", "crch", + "hetprob", "hetprob", "hxlr", "hxlr", "hxlr", "hxlr", "ivpml", + "ivpml", "flexsurvreg", "flexsurvreg", "flexsurvreg", "flexsurvreg", + "flexsurvreg", "flexsurvreg", "flexsurvreg", "flexsurvreg", "flexsurvreg", + "fixest", "fixest", "fixest", "hurdle", "hurdle", "hurdle", "hurdle", + "iv_robust", "lm", "gam", "gam", "Gam", "Gam", "Gam", "geeglm", + "geeglm", "Gls", "glimML", "glimML", "glm", "glm", "glm", "glmerMod", + "glmerMod", "glmgee", "glmrob", "glmrob", "glmmTMB", "glmmTMB", + "glmmTMB", "glmmTMB", "glmmTMB", "glmmTMB", "glmmPQL", "glmmPQL", + "glmx", "glm_weightit", "glm_weightit", "glm_weightit", "glm_weightit", + "glm_weightit", "ivreg", "lmerMod", "lmerModLmerTest", "lmrob", + "lm_robust", "lrm", "lrm", "lrm", "mblogit", "mblogit", "mblogit", + "mclogit", "mclogit", "mclogit", "MCMCglmm", "model_fit", "model_fit", + "model_fit", "workflow", "workflow", "workflow", "multinom", + "multinom", "multinom_weightit", "multinom_weightit", "multinom_weightit", + "mhurdle", "mhurdle", "mhurdle", "mlogit", "mvgam", "mvgam", + "mvgam", "mvgam", "mvgam", "negbin", "negbin", "negbin", "ols", + "oohbchoice", "oohbchoice", "orm", "orm", "orm", "ordinal_weightit", + "ordinal_weightit", "ordinal_weightit", "ordinal_weightit", "ordinal_weightit", + "polr", "rendo.base", "rendo.base", "rlm", "selection", "selection", + "selection", "speedlm", "speedglm", "speedglm", "stanreg", "stanreg", + "survreg", "survreg", "survreg", "svyglm", "svyglm", "svyolr", + "tobit", "tobit1", "tobit1", "tobit1", "zeroinfl", "zeroinfl", + "zeroinfl", "zeroinfl" + ), + type = c( + "response", "link", "ev", "ppd", "response", "link", "precision", + "quantile", "variance", "response", "link", "probs", "response", + "link", "response", "link", "prediction", "average", "probs", + "class", "prob", "cum.prob", "linear.predictor", "expected", + "lp", "risk", "survival", "survival", "expected", "lp", "risk", + "survival", "expected", "lp", "risk", "response", "location", + "scale", "density", "pr", "xb", "location", "cumprob", "scale", + "density", "pr", "xb", "survival", "response", "mean", "link", + "lp", "linear", "rmst", "hazard", "cumhaz", "invlink(link)", + "response", "link", "response", "prob", "count", "zero", "response", + "response", "response", "link", "invlink(link)", "response", + "link", "response", "link", "lp", "response", "link", "invlink(link)", + "response", "link", "response", "link", "response", "response", + "link", "response", "link", "conditional", "zprob", "zlink", + "disp", "response", "link", "response", "invlink(link)", "probs", + "response", "lp", "link", "response", "response", "response", + "response", "response", "fitted", "lp", "mean", "response", "latent", + "link", "response", "latent", "link", "response", "numeric", + "prob", "class", "numeric", "prob", "class", "probs", "latent", + "probs", "response", "mean", "E", "Ep", "p", "response", "response", + "link", "expected", "detection", "latent_N", "invlink(link)", + "response", "link", "lp", "probability", "utility", "fitted", + "mean", "lp", "probs", "response", "link", "lp", "mean", "probs", + "response", "link", "response", "response", "link", "unconditional", + "response", "response", "link", "response", "link", "response", + "link", "quantile", "response", "link", "probs", "response", + "expvalue", "linpred", "prob", "response", "prob", "count", "zero" + ), + stringsAsFactors = FALSE +) + + +# the default "type" arguments for each model class. Used to set the +# default type in "ggaverage()" +# Run following code to update this list: +# x <- marginaleffects:::type_dictionary_build() +# x[!duplicated(x$class), ] +# Finally, add "other" as first element to "class" and "response" to "type" +.default_type <- data.frame( + class = c( + "other", + "bam", "bart", "betareg", "bife", "bracl", + "brglmFit", "brmsfit", "brmultinom", "clm", "clogit", "coxph", + "coxph_weightit", "crch", "hetprob", "hxlr", "ivpml", "flexsurvreg", + "fixest", "hurdle", "iv_robust", "lm", "gam", "Gam", "geeglm", + "Gls", "glimML", "glm", "glmerMod", "glmgee", "glmrob", "glmmTMB", + "glmmPQL", "glmx", "glm_weightit", "ivreg", "lmerMod", "lmerModLmerTest", + "lmrob", "lm_robust", "lrm", "mblogit", "mclogit", "MCMCglmm", + "model_fit", "workflow", "multinom", "multinom_weightit", "mhurdle", + "mlogit", "mvgam", "negbin", "ols", "oohbchoice", "orm", "ordinal_weightit", + "polr", "rendo.base", "rlm", "selection", "speedlm", "speedglm", + "stanreg", "survreg", "svyglm", "svyolr", "tobit", "tobit1", + "zeroinfl" + ), + type = c( + "response", + "response", "ev", "response", "response", + "probs", "response", "response", "probs", "prob", "expected", + "survival", "survival", "response", "pr", "location", "pr", "survival", + "invlink(link)", "response", "response", "response", "response", + "invlink(link)", "response", "lp", "response", "invlink(link)", + "response", "response", "response", "response", "response", "response", + "invlink(link)", "response", "response", "response", "response", + "response", "fitted", "response", "response", "response", "numeric", + "numeric", "probs", "probs", "E", "response", "response", "invlink(link)", + "lp", "probability", "fitted", "probs", "probs", "response", + "response", "response", "response", "response", "response", "response", + "response", "probs", "response", "expvalue", "response" + ), + stringsAsFactors = FALSE +) diff --git a/R/get_marginalmeans.R b/R/get_marginalmeans.R index 9085e639e..fc62be3fd 100644 --- a/R/get_marginalmeans.R +++ b/R/get_marginalmeans.R @@ -1,61 +1,138 @@ -#' @keywords internal -.get_marginalmeans <- function(model, - by = "auto", - ci = 0.95, - marginal = FALSE, - ...) { +#' Easy 'avg_predictions' and 'avg_slopes' +#' +#' The `get_marginalmeans()` function is a wrapper to facilitate the usage of +#' `marginaleffects::avg_predictions()` and `marginaleffects::avg_slopes()`, +#' providing a somewhat simpler and intuitive API to find the specifications and +#' variables of interest. It is meanly made to for the developers to facilitate +#' the organization and debugging, and end-users should rather use the +#' `estimate_*()` series of functions. +#' +#' @param model A statistical model. +#' @param transform Can be used to easily modulate the `type` argument in +#' `marginaleffects::avg_predictions()`. Can be `"none"` or `"response"`. +#' `"none"` will leave the values on scale of the linear predictors. +#' `"response"` will transform them on scale of the response variable. Thus for +#' a logistic model, `"none"` will give estimations expressed in log-odds +#' (probabilities on logit scale) and `"response"` in terms of probabilities. +#' @param by The predictor variable(s) at which to evaluate the desired effect +#' / mean / contrasts. Other predictors of the model that are not included +#' here will be collapsed and "averaged" over (the effect will be estimated +#' across them). +#' @param ci Level for confidence intervals. +#' @param ... Other arguments passed, for instance, to [insight::get_datagrid()] +#' or [marginaleffects::avg_predictions()]. +#' +#' @examplesIf insight::check_if_installed("marginaleffects", quietly = TRUE) +#' model <- lm(Sepal.Length ~ Species + Petal.Width, data = iris) +#' +#' # By default, 'by' is set to "Species" +#' get_marginalmeans(model) +#' +#' # Overall mean (close to 'mean(iris$Sepal.Length)') +#' get_marginalmeans(model, by = NULL) +#' +#' # One can estimate marginal means at several values of a 'modulate' variable +#' get_marginalmeans(model, by = "Petal.Width", length = 3) +#' +#' # Interactions +#' model <- lm(Sepal.Width ~ Species * Petal.Length, data = iris) +#' +#' get_marginalmeans(model) +#' get_marginalmeans(model, by = c("Species", "Petal.Length"), length = 2) +#' get_marginalmeans(model, by = c("Species", "Petal.Length = c(1, 3, 5)"), length = 2) +#' @export +get_marginalmeans <- function(model, + by = "auto", + transform = NULL, + ci = 0.95, + ...) { # check if available insight::check_if_installed("marginaleffects") + dots <- list(...) # Guess arguments my_args <- .guess_arguments_means(model, by, ...) + # find default response-type + type <- .get_type_argument(model, transform, ...) + + # setup arguments + dg_args <- list( + model, + by = my_args$by, + factors = "all", + include_random = TRUE + ) + # always show all theoretical values by default + if (is.null(dots$preserve_range)) { + dg_args$preserve_range <- FALSE + } + # add user-arguments from "...", but remove those arguments that are already set + dots[c("by", "factors", "include_random")] <- NULL + dg_args <- insight::compact_list(c(dg_args, dots)) + # Get corresponding datagrid (and deal with particular ats) - datagrid <- insight::get_datagrid(model, by = my_args$by, ...) - # Drop random effects - datagrid <- datagrid[insight::find_predictors(model, effects = "fixed", flatten = TRUE)] + datagrid <- do.call(insight::get_datagrid, dg_args) at_specs <- attributes(datagrid)$at_specs - if (marginal) { - means <- marginaleffects::predictions(model, - newdata = insight::get_data(model), - by = at_specs$varname, - conf_level = ci - ) - } else if (insight::is_mixed_model(model)) { - means <- marginaleffects::predictions(model, - newdata = datagrid, - by = at_specs$varname, - conf_level = ci, - re.form = NA - ) - } else { - means <- marginaleffects::predictions(model, - newdata = datagrid, - by = at_specs$varname, - conf_level = ci - ) - } + # model df + dof <- insight::get_df(model, verbose = FALSE) + + # setup arguments + fun_args <- list( + model, + by = at_specs$varname, + newdata = as.data.frame(datagrid), + conf_level = ci, + df = dof, + type = type + ) + # add user-arguments from "...", but remove those arguments that are already set + dots[c("by", "newdata", "conf_level", "df", "type", "verbose")] <- NULL + fun_args <- insight::compact_list(c(fun_args, dots)) + + ## TODO: need to check against different mixed models results from other packages + # set to NULL + fun_args$re.form <- NULL + + # we can use this function for contrasts as well, + # just need to add "hypothesis" argument + means <- suppressWarnings(do.call(marginaleffects::avg_predictions, fun_args)) + attr(means, "at") <- my_args$by attr(means, "by") <- my_args$by + attr(means, "focal_terms") <- at_specs$varname means } +#' @rdname get_marginalmeans +#' @export +model_marginalmeans <- get_marginalmeans + # Format ------------------------------------------------------------------ #' @keywords internal -.format_marginaleffects_means <- function(means, model, ...) { - # check if available - insight::check_if_installed("datawizard") +.format_marginaleffects_means <- function(means, model, transform = NULL, ...) { + # model information + model_data <- insight::get_data(model) + info <- insight::model_info(model, verbose = FALSE) + non_focal <- setdiff(colnames(model_data), attr(means, "focal_terms")) + + # estimate name + if (!identical(transform, "none") && (info$is_binomial || info$is_bernoulli)) { + estimate_name <- "Probability" + } else { + estimate_name <- "Mean" + } # Format - params <- parameters::parameters(means) - params <- datawizard::data_relocate(params, c("Predicted", "SE", "CI_low", "CI_high"), after = -1) - params <- datawizard::data_rename(params, "Predicted", "Mean") - params <- datawizard::data_remove(params, c("p", "Statistic", "s.value", "S", "CI")) - params <- datawizard::data_restoretype(params, insight::get_data(model)) + params <- suppressWarnings(parameters::model_parameters(means, verbose = FALSE)) + params <- datawizard::data_relocate(params, c("Predicted", "SE", "CI_low", "CI_high"), after = -1, verbose = FALSE) # nolint + params <- datawizard::data_rename(params, "Predicted", estimate_name) + params <- datawizard::data_remove(params, c("p", "Statistic", "s.value", "S", "CI", "df", "rowid_dedup", non_focal), verbose = FALSE) # nolint + params <- datawizard::data_restoretype(params, model_data) # Store info attr(params, "at") <- attr(means, "by") @@ -71,15 +148,14 @@ predictors <- insight::find_predictors(model, flatten = TRUE, ...) model_data <- insight::get_data(model) - # Guess arguments ('by' and 'fixed') + # Guess arguments 'by' if (identical(by, "auto")) { # Find categorical predictors by <- predictors[!vapply(model_data[predictors], is.numeric, logical(1))] if (!length(by) || all(is.na(by))) { - insight::format_error("Model contains no categorical factor. Please specify 'by'.") + insight::format_error("Model contains no categorical predictor. Please specify `by`.") } insight::format_alert(paste0("We selected `by = c(", toString(paste0('"', by, '"')), ")`.")) } - list(by = by) } diff --git a/man/estimate_contrasts.Rd b/man/estimate_contrasts.Rd index e797b2a7c..105aa50ca 100644 --- a/man/estimate_contrasts.Rd +++ b/man/estimate_contrasts.Rd @@ -8,7 +8,6 @@ estimate_contrasts( model, contrast = NULL, by = NULL, - fixed = NULL, transform = "none", ci = 0.95, p_adjust = "holm", @@ -27,26 +26,21 @@ for which to compute the contrasts.} here will be collapsed and "averaged" over (the effect will be estimated across them).} -\item{fixed}{A character vector indicating the names of the predictors to be -"fixed" (i.e., maintained), so that the estimation is made at these values.} - -\item{transform}{Is passed to the \code{type} argument in -\code{emmeans::emmeans()}. See -\href{https://CRAN.R-project.org/package=emmeans/vignettes/transformations.html}{this vignette}. -Can be \code{"none"} (default for contrasts), \code{"response"} -(default for means), \code{"mu"}, \code{"unlink"}, \code{"log"}. -\code{"none"} will leave the values on scale of the linear predictors. -\code{"response"} will transform them on scale of the response variable. -Thus for a logistic model, \code{"none"} will give estimations expressed in -log-odds (probabilities on logit scale) and \code{"response"} in terms of +\item{transform}{Is passed to the \code{type} argument in \code{emmeans::emmeans()}. +See \href{https://CRAN.R-project.org/package=emmeans/vignettes/transformations.html}{this vignette}. +Can be \code{"none"} (default for contrasts), \code{"response"} (default for means), +\code{"mu"}, \code{"unlink"}, \code{"log"}. \code{"none"} will leave the values on scale of the +linear predictors. \code{"response"} will transform them on scale of the response +variable. Thus for a logistic model, \code{"none"} will give estimations expressed +in log-odds (probabilities on logit scale) and \code{"response"} in terms of probabilities.} \item{ci}{Confidence Interval (CI) level. Default to \code{0.95} (\verb{95\%}).} \item{p_adjust}{The p-values adjustment method for frequentist multiple -comparisons. Can be one of "holm" (default), "tukey", "hochberg", "hommel", -"bonferroni", "BH", "BY", "fdr" or "none". See the p-value adjustment -section in the \code{emmeans::test} documentation.} +comparisons. Can be one of \code{"holm"} (default), \code{"tukey"}, \code{"hochberg"}, +\code{"hommel"}, \code{"bonferroni"}, \code{"BH"}, \code{"BY"}, \code{"fdr"} or \code{"none"}. See the +p-value adjustment section in the \code{emmeans::test} documentation.} \item{method}{Contrast method. See same argument in \link[emmeans:contrast]{emmeans::contrast}.} @@ -117,7 +111,7 @@ the effect of x averaged over all conditions, or instead within each condition (\code{using [estimate_slopes]}). } \examples{ -\dontshow{if (require("lme4", quietly = TRUE) && require("emmeans", quietly = TRUE) && require("rstanarm", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(insight::check_if_installed(c("lme4", "emmeans", "rstanarm"), quietly = TRUE))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ # Basic usage model <- lm(Sepal.Width ~ Species, data = iris) @@ -138,9 +132,6 @@ estimate_contrasts(model, contrast = c("Species", "Petal.Width"), length = 2) # Or with custom specifications estimate_contrasts(model, contrast = c("Species", "Petal.Width=c(1, 2)")) -# Can fixate the numeric at a specific value -estimate_contrasts(model, fixed = "Petal.Width") - # Or modulate it estimate_contrasts(model, by = "Petal.Width", length = 4) @@ -155,22 +146,24 @@ data$Petal.Length_factor <- ifelse(data$Petal.Length < 4.2, "A", "B") model <- lme4::lmer(Sepal.Width ~ Species + (1 | Petal.Length_factor), data = data) estimate_contrasts(model) -library(rstanarm) - data <- mtcars data$cyl <- as.factor(data$cyl) data$am <- as.factor(data$am) -model <- stan_glm(mpg ~ cyl * am, data = data, refresh = 0) +model <- rstanarm::stan_glm(mpg ~ cyl * am, data = data, refresh = 0) estimate_contrasts(model) -estimate_contrasts(model, fixed = "am") +# fix `am` at value 1 +estimate_contrasts(model, by = c("cyl", "am='1'")) -model <- stan_glm(mpg ~ cyl * wt, data = data, refresh = 0) +model <- rstanarm::stan_glm(mpg ~ cyl * wt, data = data, refresh = 0) estimate_contrasts(model) -estimate_contrasts(model, fixed = "wt") estimate_contrasts(model, by = "wt", length = 4) -model <- stan_glm(Sepal.Width ~ Species + Petal.Width + Petal.Length, data = iris, refresh = 0) +model <- rstanarm::stan_glm( + Sepal.Width ~ Species + Petal.Width + Petal.Length, + data = iris, + refresh = 0 +) estimate_contrasts(model, by = "Petal.Length", test = "bf") } \dontshow{\}) # examplesIf} diff --git a/man/estimate_means.Rd b/man/estimate_means.Rd index 0422edd0e..67c364fe0 100644 --- a/man/estimate_means.Rd +++ b/man/estimate_means.Rd @@ -7,7 +7,6 @@ estimate_means( model, by = "auto", - fixed = NULL, transform = "response", ci = 0.95, backend = "emmeans", @@ -22,18 +21,13 @@ estimate_means( here will be collapsed and "averaged" over (the effect will be estimated across them).} -\item{fixed}{A character vector indicating the names of the predictors to be -"fixed" (i.e., maintained), so that the estimation is made at these values.} - -\item{transform}{Is passed to the \code{type} argument in -\code{emmeans::emmeans()}. See -\href{https://CRAN.R-project.org/package=emmeans/vignettes/transformations.html}{this vignette}. -Can be \code{"none"} (default for contrasts), \code{"response"} -(default for means), \code{"mu"}, \code{"unlink"}, \code{"log"}. -\code{"none"} will leave the values on scale of the linear predictors. -\code{"response"} will transform them on scale of the response variable. -Thus for a logistic model, \code{"none"} will give estimations expressed in -log-odds (probabilities on logit scale) and \code{"response"} in terms of +\item{transform}{Is passed to the \code{type} argument in \code{emmeans::emmeans()}. +See \href{https://CRAN.R-project.org/package=emmeans/vignettes/transformations.html}{this vignette}. +Can be \code{"none"} (default for contrasts), \code{"response"} (default for means), +\code{"mu"}, \code{"unlink"}, \code{"log"}. \code{"none"} will leave the values on scale of the +linear predictors. \code{"response"} will transform them on scale of the response +variable. Thus for a logistic model, \code{"none"} will give estimations expressed +in log-odds (probabilities on logit scale) and \code{"response"} in terms of probabilities.} \item{ci}{Confidence Interval (CI) level. Default to \code{0.95} (\verb{95\%}).} @@ -117,7 +111,6 @@ library(modelbased) model <- lm(Petal.Length ~ Sepal.Width * Species, data = iris) estimate_means(model) -estimate_means(model, fixed = "Sepal.Width") estimate_means(model, by = c("Species", "Sepal.Width"), length = 2) estimate_means(model, by = "Species=c('versicolor', 'setosa')") estimate_means(model, by = "Sepal.Width=c(2, 4)") @@ -126,7 +119,7 @@ estimate_means(model, by = "Sepal.Width", length = 5) estimate_means(model, by = "Sepal.Width=c(2, 4)") # Methods that can be applied to it: -means <- estimate_means(model, fixed = "Sepal.Width") +means <- estimate_means(model, by = c("Species", "Sepal.Width=0")) \dontshow{\}) # examplesIf} \dontshow{if (require("see") && require("emmeans", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} plot(means) # which runs visualisation_recipe() diff --git a/man/get_emmeans.Rd b/man/get_emmeans.Rd index 8fb9e7cbb..262d57ca8 100644 --- a/man/get_emmeans.Rd +++ b/man/get_emmeans.Rd @@ -14,7 +14,6 @@ get_emcontrasts( model, contrast = NULL, by = NULL, - fixed = NULL, transform = "none", method = "pairwise", ... @@ -24,19 +23,18 @@ model_emcontrasts( model, contrast = NULL, by = NULL, - fixed = NULL, transform = "none", method = "pairwise", ... ) -get_emmeans(model, by = "auto", fixed = NULL, transform = "response", ...) +get_emmeans(model, by = "auto", transform = "response", ...) -model_emmeans(model, by = "auto", fixed = NULL, transform = "response", ...) +model_emmeans(model, by = "auto", transform = "response", ...) -get_emtrends(model, trend = NULL, by = NULL, fixed = NULL, ...) +get_emtrends(model, trend = NULL, by = NULL, ...) -model_emtrends(model, trend = NULL, by = NULL, fixed = NULL, ...) +model_emtrends(model, trend = NULL, by = NULL, ...) } \arguments{ \item{model}{A statistical model.} @@ -49,18 +47,13 @@ for which to compute the contrasts.} here will be collapsed and "averaged" over (the effect will be estimated across them).} -\item{fixed}{A character vector indicating the names of the predictors to be -"fixed" (i.e., maintained), so that the estimation is made at these values.} - -\item{transform}{Is passed to the \code{type} argument in -\code{emmeans::emmeans()}. See -\href{https://CRAN.R-project.org/package=emmeans/vignettes/transformations.html}{this vignette}. -Can be \code{"none"} (default for contrasts), \code{"response"} -(default for means), \code{"mu"}, \code{"unlink"}, \code{"log"}. -\code{"none"} will leave the values on scale of the linear predictors. -\code{"response"} will transform them on scale of the response variable. -Thus for a logistic model, \code{"none"} will give estimations expressed in -log-odds (probabilities on logit scale) and \code{"response"} in terms of +\item{transform}{Is passed to the \code{type} argument in \code{emmeans::emmeans()}. +See \href{https://CRAN.R-project.org/package=emmeans/vignettes/transformations.html}{this vignette}. +Can be \code{"none"} (default for contrasts), \code{"response"} (default for means), +\code{"mu"}, \code{"unlink"}, \code{"log"}. \code{"none"} will leave the values on scale of the +linear predictors. \code{"response"} will transform them on scale of the response +variable. Thus for a logistic model, \code{"none"} will give estimations expressed +in log-odds (probabilities on logit scale) and \code{"response"} in terms of probabilities.} \item{method}{Contrast method. See same argument in \link[emmeans:contrast]{emmeans::contrast}.} @@ -79,26 +72,24 @@ debugging, and end-users should rather use the \verb{estimate_*()} series of functions. } \examples{ -if (require("emmeans", quietly = TRUE)) { - # Basic usage - model <- lm(Sepal.Width ~ Species, data = iris) - get_emcontrasts(model) - - # Dealing with interactions - model <- lm(Sepal.Width ~ Species * Petal.Width, data = iris) - # By default: selects first factor - get_emcontrasts(model) - # Can also run contrasts between points of numeric - get_emcontrasts(model, contrast = "Petal.Width", length = 3) - # Or both - get_emcontrasts(model, contrast = c("Species", "Petal.Width"), length = 2) - # Or with custom specifications - estimate_contrasts(model, contrast = c("Species", "Petal.Width=c(1, 2)")) - # Can fixate the numeric at a specific value - get_emcontrasts(model, fixed = "Petal.Width") - # Or modulate it - get_emcontrasts(model, by = "Petal.Width", length = 4) -} +\dontshow{if (insight::check_if_installed("emmeans", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# Basic usage +model <- lm(Sepal.Width ~ Species, data = iris) +get_emcontrasts(model) + +# Dealing with interactions +model <- lm(Sepal.Width ~ Species * Petal.Width, data = iris) +# By default: selects first factor +get_emcontrasts(model) +# Can also run contrasts between points of numeric +get_emcontrasts(model, contrast = "Petal.Width", length = 3) +# Or both +get_emcontrasts(model, contrast = c("Species", "Petal.Width"), length = 2) +# Or with custom specifications +estimate_contrasts(model, contrast = c("Species", "Petal.Width=c(1, 2)")) +# Or modulate it +get_emcontrasts(model, by = "Petal.Width", length = 4) +\dontshow{\}) # examplesIf} model <- lm(Sepal.Length ~ Species + Petal.Width, data = iris) if (require("emmeans", quietly = TRUE)) { diff --git a/man/get_marginaleffects.Rd b/man/get_marginaleffects.Rd deleted file mode 100644 index 0b60c02bb..000000000 --- a/man/get_marginaleffects.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_marginaleffects.R -\name{get_marginaleffects} -\alias{get_marginaleffects} -\title{Easy marginaleffects} -\usage{ -get_marginaleffects(model, trend = NULL, by = NULL, fixed = NULL, ...) -} -\arguments{ -\item{model}{A statistical model.} - -\item{trend}{A character indicating the name of the variable -for which to compute the slopes.} - -\item{by}{The predictor variable(s) at which to evaluate the desired effect -/ mean / contrasts. Other predictors of the model that are not included -here will be collapsed and "averaged" over (the effect will be estimated -across them).} - -\item{fixed}{A character vector indicating the names of the predictors to be -"fixed" (i.e., maintained), so that the estimation is made at these values.} - -\item{...}{Other arguments passed for instance to \code{\link[insight:get_datagrid]{insight::get_datagrid()}}.} -} -\description{ -Modelbased-like API to create \pkg{marginaleffects} objects. This is -Work-in-progress. -} -\examples{ -if (require("marginaleffects")) { - model <- lm(Sepal.Width ~ Species * Petal.Length, data = iris) - - get_marginaleffects(model, trend = "Petal.Length", by = "Species") - get_marginaleffects(model, trend = "Petal.Length", by = "Petal.Length") - get_marginaleffects(model, trend = "Petal.Length", by = c("Species", "Petal.Length")) -} -} diff --git a/man/get_marginalmeans.Rd b/man/get_marginalmeans.Rd new file mode 100644 index 000000000..3fb162503 --- /dev/null +++ b/man/get_marginalmeans.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_marginalcontrasts.R, +% R/get_marginaleffects.R, R/get_marginalmeans.R +\name{get_marginalcontrasts} +\alias{get_marginalcontrasts} +\alias{model_marginalcontrasts} +\alias{get_marginaleffects} +\alias{get_marginalmeans} +\alias{model_marginalmeans} +\title{Easy 'avg_predictions' and 'avg_slopes'} +\usage{ +get_marginalcontrasts(model, by = NULL, ci = 0.95, method = "pairwise", ...) + +model_marginalcontrasts(model, by = NULL, ci = 0.95, method = "pairwise", ...) + +get_marginaleffects(model, trend = NULL, by = NULL, ...) + +get_marginalmeans(model, by = "auto", transform = NULL, ci = 0.95, ...) + +model_marginalmeans(model, by = "auto", transform = NULL, ci = 0.95, ...) +} +\arguments{ +\item{model}{A statistical model.} + +\item{by}{The predictor variable(s) at which to evaluate the desired effect +/ mean / contrasts. Other predictors of the model that are not included +here will be collapsed and "averaged" over (the effect will be estimated +across them).} + +\item{ci}{Level for confidence intervals.} + +\item{method}{Contrast method.} + +\item{...}{Other arguments passed, for instance, to \code{\link[insight:get_datagrid]{insight::get_datagrid()}} +or \code{\link[marginaleffects:predictions]{marginaleffects::avg_predictions()}}.} + +\item{trend}{A character indicating the name of the variable +for which to compute the slopes.} + +\item{transform}{Can be used to easily modulate the \code{type} argument in +\code{marginaleffects::avg_predictions()}. Can be \code{"none"} or \code{"response"}. +\code{"none"} will leave the values on scale of the linear predictors. +\code{"response"} will transform them on scale of the response variable. Thus for +a logistic model, \code{"none"} will give estimations expressed in log-odds +(probabilities on logit scale) and \code{"response"} in terms of probabilities.} +} +\description{ +The \code{get_marginalmeans()} function is a wrapper to facilitate the usage of +\code{marginaleffects::avg_predictions()} and \code{marginaleffects::avg_slopes()}, +providing a somewhat simpler and intuitive API to find the specifications and +variables of interest. It is meanly made to for the developers to facilitate +the organization and debugging, and end-users should rather use the +\verb{estimate_*()} series of functions. +} +\examples{ +\dontshow{if (insight::check_if_installed("marginaleffects", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +model <- lm(Sepal.Width ~ Species * Petal.Length, data = iris) + +get_marginaleffects(model, trend = "Petal.Length", by = "Species") +get_marginaleffects(model, trend = "Petal.Length", by = "Petal.Length") +get_marginaleffects(model, trend = "Petal.Length", by = c("Species", "Petal.Length")) +\dontshow{\}) # examplesIf} +\dontshow{if (insight::check_if_installed("marginaleffects", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +model <- lm(Sepal.Length ~ Species + Petal.Width, data = iris) + +# By default, 'by' is set to "Species" +get_marginalmeans(model) + +# Overall mean (close to 'mean(iris$Sepal.Length)') +get_marginalmeans(model, by = NULL) + +# One can estimate marginal means at several values of a 'modulate' variable +get_marginalmeans(model, by = "Petal.Width", length = 3) + +# Interactions +model <- lm(Sepal.Width ~ Species * Petal.Length, data = iris) + +get_marginalmeans(model) +get_marginalmeans(model, by = c("Species", "Petal.Length"), length = 2) +get_marginalmeans(model, by = c("Species", "Petal.Length = c(1, 3, 5)"), length = 2) +\dontshow{\}) # examplesIf} +} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index c2c6770dd..646af42b3 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -15,7 +15,7 @@ reference: contents: - describe_nonlinear - get_emmeans - - get_marginaleffects + - get_marginalmeans - find_inversions - zero_crossings - smoothing diff --git a/tests/testthat/_snaps/estimate_means.md b/tests/testthat/_snaps/estimate_means.md index 7502b9e1a..e8633d81c 100644 --- a/tests/testthat/_snaps/estimate_means.md +++ b/tests/testthat/_snaps/estimate_means.md @@ -8,19 +8,19 @@ mined | spp | Mean | SE | 95% CI ------------------------------------------- yes | GP | 0.04 | 0.03 | [-0.02, 0.11] - no | GP | 1.94 | 0.29 | [ 1.38, 2.50] - yes | PR | 0.11 | 0.06 | [ 0.00, 0.22] - no | PR | 0.47 | 0.12 | [ 0.23, 0.70] - yes | DM | 0.44 | 0.14 | [ 0.16, 0.71] - no | DM | 2.34 | 0.34 | [ 1.68, 3.00] - yes | EC-A | 0.08 | 0.05 | [-0.01, 0.18] - no | EC-A | 1.10 | 0.23 | [ 0.65, 1.55] - yes | EC-L | 0.20 | 0.07 | [ 0.06, 0.35] - no | EC-L | 3.78 | 0.50 | [ 2.80, 4.75] - yes | DES-L | 0.45 | 0.13 | [ 0.19, 0.72] - no | DES-L | 3.46 | 0.45 | [ 2.57, 4.35] - yes | DF | 0.44 | 0.12 | [ 0.20, 0.68] - no | DF | 1.87 | 0.29 | [ 1.30, 2.44] + yes | PR | 0.11 | 0.06 | [ 0.00, 0.23] + yes | DM | 0.45 | 0.15 | [ 0.17, 0.74] + yes | EC-A | 0.09 | 0.05 | [-0.01, 0.18] + yes | EC-L | 0.21 | 0.08 | [ 0.06, 0.36] + yes | DES-L | 0.47 | 0.14 | [ 0.20, 0.75] + yes | DF | 0.46 | 0.13 | [ 0.21, 0.71] + no | GP | 2.03 | 0.30 | [ 1.44, 2.61] + no | PR | 0.49 | 0.12 | [ 0.24, 0.73] + no | DM | 2.44 | 0.35 | [ 1.75, 3.14] + no | EC-A | 1.15 | 0.24 | [ 0.68, 1.62] + no | EC-L | 3.95 | 0.52 | [ 2.93, 4.97] + no | DES-L | 3.61 | 0.47 | [ 2.68, 4.55] + no | DF | 1.95 | 0.30 | [ 1.36, 2.55] Marginal means estimated at mined @@ -31,22 +31,22 @@ Output Estimated Marginal Means - mined | spp | Mean | SE | 95% CI - ------------------------------------------ - yes | GP | 0.26 | 0.04 | [0.19, 0.33] - no | GP | 2.01 | 0.19 | [1.63, 2.39] - yes | PR | 0.07 | 0.01 | [0.04, 0.09] - no | PR | 0.50 | 0.10 | [0.31, 0.69] - yes | DM | 0.33 | 0.04 | [0.25, 0.41] - no | DM | 2.53 | 0.22 | [2.10, 2.96] - yes | EC-A | 0.12 | 0.02 | [0.08, 0.16] - no | EC-A | 0.93 | 0.13 | [0.67, 1.19] - yes | EC-L | 0.49 | 0.06 | [0.37, 0.60] - no | EC-L | 3.74 | 0.27 | [3.22, 4.26] - yes | DES-L | 0.52 | 0.06 | [0.39, 0.64] - no | DES-L | 3.96 | 0.28 | [3.42, 4.50] - yes | DF | 0.28 | 0.04 | [0.21, 0.36] - no | DF | 2.18 | 0.20 | [1.78, 2.57] + mined | spp | Mean | 95% CI + ----------------------------------- + yes | GP | 0.26 | [0.20, 0.34] + yes | PR | 0.07 | [0.04, 0.10] + yes | DM | 0.33 | [0.26, 0.43] + yes | EC-A | 0.12 | [0.09, 0.17] + yes | EC-L | 0.49 | [0.38, 0.62] + yes | DES-L | 0.52 | [0.41, 0.66] + yes | DF | 0.28 | [0.22, 0.37] + no | GP | 2.01 | [1.66, 2.43] + no | PR | 0.50 | [0.34, 0.73] + no | DM | 2.53 | [2.14, 3.00] + no | EC-A | 0.93 | [0.70, 1.23] + no | EC-L | 3.74 | [3.25, 4.30] + no | DES-L | 3.96 | [3.46, 4.54] + no | DF | 2.18 | [1.81, 2.61] Marginal means estimated at mined diff --git a/tests/testthat/test-attributes_visualisation.R b/tests/testthat/test-attributes_visualisation.R index 6751932ee..936eb22cd 100644 --- a/tests/testthat/test-attributes_visualisation.R +++ b/tests/testthat/test-attributes_visualisation.R @@ -4,11 +4,6 @@ test_that("attributes_means", { estim <- suppressMessages(estimate_means(model)) expect_identical(attributes(estim)$by, "Species") - expect_null(attributes(estim)$fixed) - - estim <- suppressMessages(estimate_means(model, fixed = "Sepal.Width")) - expect_identical(attributes(estim)$by, "Species") - expect_identical(attributes(estim)$fixed, "Sepal.Width") estim <- suppressMessages(estimate_means(model, by = "all")) expect_identical(attributes(estim)$by, c("Species", "Sepal.Width")) @@ -23,12 +18,6 @@ test_that("attributes_contrasts", { estim <- suppressMessages(estimate_contrasts(model)) expect_identical(attributes(estim)$contrast, "Species") expect_null(attributes(estim)$by) - expect_null(attributes(estim)$fixed) - - estim <- suppressMessages(estimate_contrasts(model, fixed = "Sepal.Width")) - expect_identical(attributes(estim)$contrast, "Species") - expect_identical(attributes(estim)$fixed, "Sepal.Width") - expect_null(attributes(estim)$modulate) }) diff --git a/tests/testthat/test-estimate_contrasts.R b/tests/testthat/test-estimate_contrasts.R index 643eb28e4..a652df6c5 100644 --- a/tests/testthat/test-estimate_contrasts.R +++ b/tests/testthat/test-estimate_contrasts.R @@ -24,14 +24,14 @@ test_that("estimate_contrasts - Frequentist", { expect_identical(dim(estim), c(3L, 9L)) estim <- suppressMessages(estimate_contrasts(model, levels = "Species")) expect_identical(dim(estim), c(3L, 9L)) - estim <- suppressMessages(estimate_contrasts(model, fixed = "fac")) + estim <- suppressMessages(estimate_contrasts(model, by = c("Species", "fac='A'"))) expect_identical(dim(estim), c(3L, 10L)) # One factor and one continuous model <- lm(Sepal.Width ~ Species * Petal.Width, data = iris) estim <- suppressMessages(estimate_contrasts(model)) expect_identical(dim(estim), c(3L, 9L)) - estim <- suppressMessages(estimate_contrasts(model, fixed = "Petal.Width")) + estim <- suppressMessages(estimate_contrasts(model, by = c("Species", "Petal.Width=0"))) expect_identical(dim(estim), c(3L, 10L)) estim <- suppressMessages(estimate_contrasts(model, by = "Petal.Width", length = 4)) expect_identical(dim(estim), c(12L, 10L)) @@ -54,8 +54,6 @@ test_that("estimate_contrasts - Frequentist", { estim <- suppressMessages(estimate_contrasts(model, by = "all")) expect_identical(dim(estim), c(12L, 11L)) - estim <- suppressMessages(estimate_contrasts(model, contrast = c("vs", "am"), fixed = "gear")) - expect_identical(dim(estim), c(6L, 10L)) estim <- suppressMessages(estimate_contrasts(model, contrast = c("vs", "am"), by = "gear='5'")) expect_identical(dim(estim), c(1L, 10L)) @@ -70,8 +68,6 @@ test_that("estimate_contrasts - Frequentist", { estim <- suppressMessages(estimate_contrasts(model, contrast = c("factor1", "factor2", "factor3"), by = "all")) expect_identical(dim(estim), c(28L, 9L)) - estim <- suppressMessages(estimate_contrasts(model, contrast = c("factor1", "factor2"), fixed = "factor3")) - expect_identical(dim(estim), c(6L, 10L)) estim <- suppressMessages(estimate_contrasts(model, contrast = c("factor1", "factor2"), by = "factor3='F'")) expect_identical(dim(estim), c(6L, 10L)) estim <- suppressMessages(estimate_contrasts(model, contrast = c("factor1", "factor2"), by = "factor3")) @@ -133,7 +129,7 @@ test_that("estimate_contrasts - Bayesian", { ) estim <- suppressMessages(estimate_contrasts(model, contrast = "all")) expect_identical(dim(estim), c(15L, 7L)) - estim <- suppressMessages(estimate_contrasts(model, fixed = "Petal.Length_factor")) + estim <- suppressMessages(estimate_contrasts(model, by = c("Species", "Petal.Length_factor='A'"))) expect_identical(dim(estim), c(3L, 8L)) model <- suppressWarnings( @@ -147,7 +143,7 @@ test_that("estimate_contrasts - Bayesian", { ) estim <- suppressMessages(estimate_contrasts(model)) expect_identical(dim(estim), c(3L, 7L)) - estim <- suppressMessages(estimate_contrasts(model, fixed = "Petal.Width")) + estim <- suppressMessages(estimate_contrasts(model, by = c("Species", "Petal.Width=0"))) expect_identical(dim(estim), c(3L, 8L)) estim <- suppressMessages(estimate_contrasts(model, by = "Petal.Width", length = 4)) expect_identical(dim(estim), c(12L, 8L)) diff --git a/tests/testthat/test-estimate_means.R b/tests/testthat/test-estimate_means.R index 7c6aaafda..bbce3949e 100644 --- a/tests/testthat/test-estimate_means.R +++ b/tests/testthat/test-estimate_means.R @@ -1,9 +1,9 @@ -test_that("estimate_means() - core", { - skip_if_not_installed("rstanarm") - skip_if_not_installed("emmeans") - skip_if_not_installed("marginaleffects") +skip_if_not_installed("emmeans") +skip_if_not_installed("marginaleffects") - # library(testthat) +test_that("estimate_means() - lm", { + data(mtcars) + data(iris) dat <- mtcars dat$gear <- as.factor(dat$gear) @@ -13,26 +13,33 @@ test_that("estimate_means() - core", { # Simple model <- lm(vs ~ cyl, data = dat) estim1 <- suppressMessages(estimate_means(model)) - expect_identical(dim(estim1), c(3L, 5L)) estim2 <- suppressMessages(estimate_means(model, backend = "marginaleffects")) + expect_identical(dim(estim1), c(3L, 5L)) expect_identical(dim(estim2), c(3L, 5L)) - expect_lt(max(estim1$Mean - estim2$Mean), 1e-10) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-3) # Interaction (factor * continuous) model <- lm(mpg ~ wt * gear, data = dat) estim1 <- suppressMessages(estimate_means(model)) - expect_identical(dim(estim1), c(3L, 5L)) estim2 <- suppressMessages(estimate_means(model, backend = "marginaleffects")) - expect_identical(dim(estim2), c(3L, 6L)) - expect_lt(max(estim1$Mean - estim2$Mean), 1e-10) + expect_identical(dim(estim1), c(3L, 5L)) + expect_identical(dim(estim2), c(3L, 5L)) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_named(estim1, c("gear", "Mean", "SE", "CI_low", "CI_high")) + expect_named(estim2, c("gear", "Mean", "SE", "CI_low", "CI_high")) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-3) # At specific levels model <- lm(Sepal.Width ~ Species, data = iris) estim1 <- suppressMessages(estimate_means(model, by = "Species=c('versicolor', 'virginica')")) - expect_identical(dim(estim1), c(2L, 5L)) estim2 <- suppressMessages(estimate_means(model, by = "Species=c('versicolor', 'virginica')", backend = "marginaleffects")) + expect_identical(dim(estim1), c(2L, 5L)) expect_identical(dim(estim2), c(2L, 5L)) - expect_lt(max(estim1$Mean - estim2$Mean), 1e-10) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_named(estim1, c("Species", "Mean", "SE", "CI_low", "CI_high")) + expect_named(estim2, c("Species", "Mean", "SE", "CI_low", "CI_high")) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-3) # Interactions between factors dat <- iris @@ -41,46 +48,46 @@ test_that("estimate_means() - core", { model <- lm(Sepal.Width ~ Species * Petal.Length_factor, data = dat) estim1 <- suppressMessages(estimate_means(model, by = "all")) - expect_identical(dim(estim1), c(6L, 6L)) estim2 <- suppressWarnings(suppressMessages(estimate_means(model, by = "all", backend = "marginaleffects"))) + expect_identical(dim(estim1), c(6L, 6L)) expect_identical(dim(estim2), c(6L, 6L)) + expect_equal(estim2$Mean, c(3.428, 3.79557, 2.54211, 2.90968, 2.60643, 2.974), tolerance = 1e-4) + expect_named(estim1, c("Species", "Petal.Length_factor", "Mean", "SE", "CI_low", "CI_high")) + expect_named(estim2, c("Species", "Petal.Length_factor", "Mean", "SE", "CI_low", "CI_high")) # No interaction (two factors) model <- lm(Petal.Length ~ Sepal.Width + Species, data = iris) estim1 <- suppressMessages(estimate_means(model)) - expect_identical(dim(estim1), c(3L, 5L)) estim2 <- suppressMessages(estimate_means(model, backend = "marginaleffects")) - expect_identical(dim(estim2), c(3L, 6L)) - expect_lt(max(estim1$Mean - estim2$Mean), 1e-10) - + expect_identical(dim(estim1), c(3L, 5L)) + expect_identical(dim(estim2), c(3L, 5L)) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-3) + expect_named(estim2, c("Species", "Mean", "SE", "CI_low", "CI_high")) # At specific levels of continuous estim1 <- suppressMessages(estimate_means(model, by = "Sepal.Width")) - expect_equal(dim(estim1), c(10, 5)) estim2 <- suppressMessages(estimate_means(model, by = "Sepal.Width", backend = "marginaleffects")) - expect_equal(dim(estim2), c(10, 6)) + expect_identical(dim(estim1), c(10L, 5L)) + expect_identical(dim(estim2), c(10L, 5L)) # Note that the absolute values are different here... for unclear reasons expect_true(max(diff(estim1$Mean) - diff(estim2$Mean)) < 1e-10) - - # TODO: add the marginaleffects comparison for the remaining tests - dat <- iris - dat$y <- as.numeric(as.factor(ifelse(dat$Sepal.Width > 3, "A", "B"))) - 1 - dat <<- dat - model <- glm(y ~ Species, family = "binomial", data = dat) - - estim <- suppressMessages(estimate_means(model)) - expect_identical(dim(estim), c(3L, 5L)) - estim <- suppressMessages(estimate_means(model, transform = "response")) - expect_identical(dim(estim), c(3L, 5L)) - expect_true(all(estim$Probability >= 0) & all(estim$Probability <= 1)) - + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) model <- lm(Petal.Length ~ Sepal.Width + Species, data = iris) - estim <- suppressMessages(estimate_means(model)) - expect_identical(dim(estim), c(3L, 5L)) + estim1 <- suppressMessages(estimate_means(model)) + estim2 <- suppressMessages(estimate_means(model, backend = "marginaleffects")) + expect_identical(dim(estim1), c(3L, 5L)) + expect_identical(dim(estim2), c(3L, 5L)) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-3) - estim <- suppressMessages(estimate_means(model, by = "all")) - expect_equal(dim(estim), c(30, 6)) + estim1 <- suppressMessages(estimate_means(model, by = "all")) + estim2 <- suppressMessages(estimate_means(model, by = "all", backend = "marginaleffects")) + expect_identical(dim(estim1), c(30L, 6L)) + expect_identical(dim(estim2), c(30L, 6L)) + expect_equal(estim1$Mean[order(estim1$Sepal.Width)], estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low[order(estim1$Sepal.Width)], estim2$CI_low, tolerance = 1e-3) # In formula modification # FIXME: this got broken but it seems just to tedious to fix. Don't use in formula transforms. @@ -91,22 +98,61 @@ test_that("estimate_means() - core", { # One continuous and one factor model <- lm(Petal.Length ~ Species * Sepal.Width, data = iris) - estim <- suppressMessages(estimate_means(model)) - expect_identical(dim(estim), c(3L, 5L)) - estim <- suppressMessages(estimate_means(model, fixed = "Sepal.Width")) - expect_identical(dim(estim), c(3L, 6L)) - estim <- suppressMessages(estimate_means(model, by = c("Species", "Sepal.Width"), length = 2)) - expect_identical(dim(estim), c(6L, 6L)) - estim <- suppressMessages(estimate_means(model, by = "Species=c('versicolor', 'setosa')")) - expect_identical(dim(estim), c(2L, 5L)) - estim <- suppressMessages(estimate_means(model, by = "Sepal.Width=c(2, 4)")) - expect_identical(dim(estim), c(2L, 5L)) - estim <- suppressMessages(estimate_means(model, by = c("Species", "Sepal.Width=0"))) - expect_identical(dim(estim), c(3L, 6L)) - estim <- suppressMessages(estimate_means(model, by = "Sepal.Width", length = 5)) - expect_equal(dim(estim), c(5, 5)) - estim <- suppressMessages(estimate_means(model, by = "Sepal.Width=c(2, 4)")) - expect_identical(dim(estim), c(2L, 5L)) + estim1 <- suppressMessages(estimate_means(model)) + estim2 <- suppressMessages(estimate_means(model, backend = "marginaleffects")) + expect_identical(dim(estim1), c(3L, 5L)) + expect_identical(dim(estim2), c(3L, 5L)) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-3) + + estim1 <- suppressMessages(estimate_means(model, by = c("Species", "Sepal.Width"), length = 2)) + estim2 <- suppressMessages(estimate_means(model, by = c("Species", "Sepal.Width"), length = 2, backend = "marginaleffects")) + expect_identical(dim(estim1), c(6L, 6L)) + expect_identical(dim(estim2), c(6L, 6L)) + expect_equal(estim1$Mean[order(estim1$Species)], estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low[order(estim1$Species)], estim2$CI_low, tolerance = 1e-3) + + estim1 <- suppressMessages(estimate_means(model, by = "Species=c('versicolor', 'setosa')")) + estim2 <- suppressMessages(estimate_means(model, by = "Species=c('versicolor', 'setosa')", backend = "marginaleffects")) + expect_identical(dim(estim1), c(2L, 5L)) + expect_identical(dim(estim2), c(2L, 5L)) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-3) + + estim1 <- suppressMessages(estimate_means(model, by = "Sepal.Width=c(2, 4)")) + estim2 <- suppressMessages(estimate_means(model, by = "Sepal.Width=c(2, 4)", backend = "marginaleffects")) + expect_identical(dim(estim1), c(2L, 5L)) + expect_identical(dim(estim2), c(2L, 5L)) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-3) + + estim1 <- suppressMessages(estimate_means(model, by = c("Species", "Sepal.Width=0"))) + estim2 <- suppressMessages(estimate_means(model, by = c("Species", "Sepal.Width=0"), backend = "marginaleffects")) + expect_identical(dim(estim1), c(3L, 6L)) + expect_identical(dim(estim2), c(3L, 6L)) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-3) + + estim1 <- suppressMessages(estimate_means(model, by = "Sepal.Width", length = 5)) + estim2 <- suppressMessages(estimate_means(model, by = "Sepal.Width", length = 5, backend = "marginaleffects")) + expect_identical(dim(estim1), c(5L, 5L)) + expect_identical(dim(estim2), c(5L, 5L)) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-3) + + estim1 <- suppressMessages(estimate_means(model, by = "Sepal.Width=c(2, 4)")) + estim2 <- suppressMessages(estimate_means(model, by = "Sepal.Width=c(2, 4)", backend = "marginaleffects")) + expect_identical(dim(estim1), c(2L, 5L)) + expect_identical(dim(estim2), c(2L, 5L)) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-3) + + estim1 <- suppressMessages(estimate_means(model, by = c("Species=c('versicolor', 'setosa')", "Sepal.Width=c(2, 4)"))) + estim2 <- suppressMessages(estimate_means(model, by = c("Species=c('versicolor', 'setosa')", "Sepal.Width=c(2, 4)"), backend = "marginalmeans")) + expect_identical(dim(estim1), c(4L, 6L)) + expect_identical(dim(estim2), c(4L, 6L)) + expect_equal(estim1$Mean, estim2$Mean[order(estim2$Sepal.Width)], tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low[order(estim2$Sepal.Width)], tolerance = 1e-3) # Two factors dat <- iris @@ -114,26 +160,55 @@ test_that("estimate_means() - core", { dat <<- dat model <- lm(Petal.Length ~ Species * Petal.Length_factor, data = dat) - estim <- suppressMessages(estimate_means(model, by = "all")) - expect_identical(dim(estim), c(6L, 6L)) - estim <- suppressMessages(estimate_means(model, by = "Petal.Length_factor")) - expect_identical(dim(estim), c(2L, 5L)) - estim <- suppressMessages(estimate_means(model, by = "Petal.Length_factor='B'")) - expect_identical(as.character(unique(estim$Petal.Length_factor)), "B") + estim1 <- suppressMessages(estimate_means(model, by = "all")) + estim2 <- suppressMessages(estimate_means(model, by = "all", backend = "marginaleffects")) + expect_identical(dim(estim1), c(6L, 6L)) + expect_identical(dim(estim2), c(6L, 6L)) + expect_equal(estim2$Mean, c(1.462, 2.24638, 3.77368, 4.55806, 4.76762, 5.552), tolerance = 1e-4) + + estim1 <- suppressMessages(estimate_means(model, by = "Petal.Length_factor")) + estim2 <- suppressMessages(estimate_means(model, by = "Petal.Length_factor", backend = "marginaleffects")) + expect_identical(dim(estim1), c(2L, 5L)) + expect_identical(dim(estim2), c(2L, 5L)) + expect_equal(estim2$Mean, c(3.33443, 4.11881), tolerance = 1e-4) + estim <- suppressMessages(estimate_means(model, by = "Petal.Length_factor='B'", backend = "marginaleffects")) + expect_equal(estim$Mean, 4.11882, tolerance = 1e-4) # Three factors dat <- mtcars - dat[c("gear", "vs", "am")] <- sapply(dat[c("gear", "vs", "am")], as.factor) + dat[c("gear", "vs", "am")] <- lapply(dat[c("gear", "vs", "am")], as.factor) dat <<- dat model <- lm(mpg ~ gear * vs * am, data = dat) - estim <- suppressMessages(estimate_means(model)) - expect_equal(dim(estim), c(12, 7)) - estim <- suppressMessages(estimate_means(model, fixed = "am")) - expect_equal(dim(estim), c(6, 7)) - estim <- suppressMessages(estimate_means(model, by = c("gear='5'", "vs"))) - expect_equal(dim(estim), c(2, 7)) + estim1 <- suppressMessages(estimate_means(model)) + estim2 <- suppressMessages(estimate_means(model, backend = "marginaleffects")) + expect_identical(dim(estim1), c(12L, 7L)) + expect_identical(dim(estim2), c(12L, 7L)) + expect_equal( + estim2$Mean, + c( + 15.05, 22.03333, 20.33333, 27.31667, 14.01667, 21, 21.05, 28.03333, + 12.14167, 19.125, 23.41667, 30.4 + ), + tolerance = 1e-4 + ) + + estim1 <- suppressMessages(estimate_means(model, by = c("gear", "vs", "am='1'"))) + estim2 <- suppressMessages(estimate_means(model, by = c("gear", "vs", "am='1'"), backend = "marginaleffects")) + expect_identical(dim(estim1), c(6L, 7L)) + expect_identical(dim(estim2), c(6L, 7L)) + expect_equal( + estim2$Mean, + c(22.03333, 27.31667, 21, 28.03333, 19.125, 30.4), + tolerance = 1e-4 + ) + + estim1 <- suppressMessages(estimate_means(model, by = c("gear='5'", "vs"))) + estim2 <- suppressMessages(estimate_means(model, by = c("gear='5'", "vs"), backend = "marginaleffects")) + expect_identical(dim(estim1), c(2L, 7L)) + expect_identical(dim(estim2), c(2L, 6L)) + expect_equal(estim2$Mean, c(15.63333, 26.90833), tolerance = 1e-4) dat <- iris dat$factor1 <- ifelse(dat$Sepal.Width > 3, "A", "B") @@ -142,34 +217,78 @@ test_that("estimate_means() - core", { dat <<- dat model <- lm(Petal.Width ~ factor1 * factor2 * factor3, data = dat) - estim <- suppressMessages(estimate_means(model)) - expect_equal(dim(estim), c(8, 7)) - estim <- suppressMessages(estimate_means(model, fixed = "factor3")) - expect_equal(dim(estim), c(4, 7)) + estim1 <- suppressMessages(estimate_means(model)) + estim2 <- suppressMessages(estimate_means(model, backend = "marginaleffects")) + expect_identical(dim(estim1), c(8L, 7L)) + expect_identical(dim(estim2), c(8L, 7L)) + expect_equal( + estim2$Mean, + c(0.27727, 0.23, 2.004, 2.6842, 1.05, 0.41818, 1.60435, 1.7), + tolerance = 1e-4 + ) + estim1 <- suppressMessages(estimate_means(model, by = c("factor1", "factor2", "factor3='E'"))) + estim2 <- suppressMessages(estimate_means(model, by = c("factor1", "factor2", "factor3='E'"), backend = "marginaleffects")) + expect_identical(dim(estim1), c(4L, 7L)) + expect_identical(dim(estim2), c(4L, 7L)) + expect_equal(estim1$Mean, estim2$Mean[order(estim2$factor2, decreasing = TRUE)], tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low[order(estim2$factor2, decreasing = TRUE)], tolerance = 1e-3) +}) +test_that("estimate_means() - glm", { + data(iris) # GLM + dat <- iris + dat$y <- as.numeric(as.factor(ifelse(dat$Sepal.Width > 3, "A", "B"))) - 1 + dat <<- dat + model <- glm(y ~ Species, family = "binomial", data = dat) + + estim1 <- suppressMessages(estimate_means(model)) + estim2 <- suppressMessages(estimate_means(model, backend = "marginaleffects")) + expect_identical(dim(estim1), c(3L, 5L)) + expect_identical(dim(estim2), c(3L, 4L)) + expect_true(all(estim1$Probability >= 0) & all(estim1$Probability <= 1)) + expect_true(all(estim2$Probability >= 0) & all(estim2$Probability <= 1)) + expect_equal(estim1$Probability, estim2$Probability, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-2) + expect_named(estim2, c("Species", "Probability", "CI_low", "CI_high")) + dat <- iris dat$Petal.Length_factor <- as.factor(ifelse(dat$Petal.Length < 4.2, "A", "B")) dat <<- dat model <- glm(Petal.Length_factor ~ Species, data = dat, family = "binomial") - estim <- suppressMessages(estimate_means(model)) - expect_identical(dim(estim), c(3L, 5L)) - estim <- suppressMessages(estimate_means(model, transform = "none")) - expect_identical(dim(estim), c(3L, 5L)) + estim1 <- suppressMessages(estimate_means(model)) + estim2 <- suppressMessages(estimate_means(model, backend = "marginaleffects", verbose = FALSE)) + expect_identical(dim(estim1), c(3L, 5L)) + expect_identical(dim(estim2), c(3L, 4L)) + expect_equal(estim1$Probability, estim2$Probability, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-2) + + estim1 <- suppressMessages(estimate_means(model, transform = "none")) + estim2 <- suppressMessages(estimate_means(model, transform = "none", verbose = FALSE, backend = "marginaleffects")) + expect_identical(dim(estim1), c(3L, 5L)) + expect_identical(dim(estim2), c(3L, 5L)) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-2) model <- glm(Petal.Length ~ Species, data = iris, family = "Gamma") - estim <- suppressMessages(estimate_means(model)) - expect_identical(dim(estim), c(3L, 5L)) + estim1 <- suppressMessages(estimate_means(model)) + estim2 <- suppressMessages(estimate_means(model, backend = "marginaleffects")) + expect_identical(dim(estim1), c(3L, 5L)) + expect_identical(dim(estim2), c(3L, 4L)) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-2) }) + test_that("estimate_means() - mixed models", { - skip_if_not_installed("rstanarm") skip_if_not_installed("emmeans") skip_if_not_installed("lme4") skip_if_not_installed("glmmTMB") + + data(iris) dat <- iris dat$Petal.Length_factor <- as.factor(ifelse(dat$Petal.Length < 4.2, "A", "B")) dat <<- dat @@ -191,15 +310,44 @@ test_that("estimate_means() - mixed models", { data(Salamanders, package = "glmmTMB") m <- glmmTMB::glmmTMB( - count ~ mined * spp + (1|site), + count ~ mined * spp + (1 | site), zi = ~mined, family = poisson(), data = Salamanders ) out <- estimate_means(m, c("mined", "spp"), backend = "marginaleffects") expect_snapshot(print(out)) + out1 <- estimate_means(m, c("mined", "spp"), type = "conditional", backend = "marginaleffects") + out2 <- estimate_means(m, c("mined", "spp")) + expect_equal(out1$Mean[order(out1$spp)], out2$rate, tolerance = 1e-1) m <- glm(count ~ mined + spp, family = poisson(), data = Salamanders) out <- estimate_means(m, c("mined", "spp"), backend = "marginaleffects") expect_snapshot(print(out)) + out1 <- estimate_means(m, c("mined", "spp"), backend = "marginaleffects") + out2 <- estimate_means(m, c("mined", "spp")) + expect_equal(out1$Mean[order(out1$spp)], out2$rate, tolerance = 1e-3) + + data(sleepstudy, package = "lme4") + model <- lme4::lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) + + estim1 <- suppressMessages(estimate_means(model, by = "Days")) + estim2 <- suppressMessages(estimate_means(model, by = "Days", backend = "marginaleffects")) + expect_identical(dim(estim1), c(10L, 5L)) + expect_identical(dim(estim2), c(10L, 5L)) + expect_equal(estim1$Mean, estim2$Mean, tolerance = 1e-4) + expect_equal(estim1$CI_low, estim2$CI_low, tolerance = 1e-1) + + data(cbpp, package = "lme4") + gm1 <- lme4::glmer( + cbind(incidence, size - incidence) ~ period + (1 | herd), + data = cbpp, + family = "binomial" + ) + estim1 <- suppressMessages(estimate_means(gm1)) + estim2 <- suppressMessages(estimate_means(gm1, backend = "marginaleffects")) + expect_identical(dim(estim1), c(4L, 5L)) + expect_identical(dim(estim2), c(4L, 5L)) + expect_equal(estim2$Probability, c(0.21521, 0.0954, 0.08453, 0.05599), tolerance = 1e-3) + expect_equal(estim2$CI_low, c(0.14056, 0.04352, 0.0349, 0.0116), tolerance = 1e-3) }) diff --git a/tests/testthat/test-get_marginaleffects.R b/tests/testthat/test-get_marginaleffects.R index 15dbbc9a3..8ddaf5dbb 100644 --- a/tests/testthat/test-get_marginaleffects.R +++ b/tests/testthat/test-get_marginaleffects.R @@ -3,8 +3,6 @@ test_that("get_marginaleffects", { skip_if_not_installed("lme4") model <- lm(Sepal.Width ~ Species * Petal.Length, data = iris) - - expect_equal(nrow(get_marginaleffects(model, trend = "Petal.Length", by = "Species")), 3L) - - # get_marginaleffects(model, trend = "Petal.Length", by = "Species", length = 10) + out <- get_marginaleffects(model, trend = "Petal.Length", by = "Species") + expect_identical(nrow(out), 3L) }) diff --git a/vignettes/derivatives.Rmd b/vignettes/derivatives.Rmd index af66bd344..b2dd5676d 100644 --- a/vignettes/derivatives.Rmd +++ b/vignettes/derivatives.Rmd @@ -1,16 +1,16 @@ --- title: "Interpret simple and complex models using the power of Effect Derivatives" -output: +output: rmarkdown::html_vignette: toc: true fig_width: 10.08 fig_height: 6 tags: [r, estimate, estimate response, predictions, non linear, polynomial] vignette: > - %\VignetteIndexEntry{Interpret simple and complex models using the power of derivatives} + %\VignetteIndexEntry{Interpret simple and complex models using the power of Effect Derivatives} \usepackage[utf8]{inputenc} %\VignetteEngine{knitr::rmarkdown} -editor_options: +editor_options: chunk_output_type: console bibliography: bibliography.bib --- @@ -77,7 +77,7 @@ report::report(rez) print(report::report(cor.test(data$y, data$x)), width = 80) ``` -Great, we know there is a significant correlation between the two variables! 🥳 +Great, we know there is a significant correlation between the two variables! 🥳 But what we would like to know is **for every increase of 1 on *x*, how much does *y* increase?** In other words, what is the **slope** of the relationship? @@ -93,7 +93,7 @@ plot(modelbased::estimate_relation(model_lm)) parameters::parameters(model_lm) ``` -The parameters table shows us that the **effect of x** is `12.75`. This means that for every increase of 1 on `x`, `y` increases by `12.75`. +The parameters table shows us that the **effect of x** is `12.75`. This means that for every increase of 1 on `x`, `y` increases by `12.75`. Congrats, we've answered our question! @@ -136,7 +136,7 @@ In the figure above, the above plot shows a non-linear relationship between the You can see that the derivative peaks when the slope of the relationship is at its highest (when it is the steepest), and then decrease again until reaching 0. A zero-crossing of the derivative means an inversion of the the trend; it is when the relationship starts to be negative. -Derivatives can be computed for statistical models, including simple ones such as linear regressions. Before you look at the answer below, try to think and imagine what would the derivative plot of our previously computed linear model would look like? +Derivatives can be computed for statistical models, including simple ones such as linear regressions. Before you look at the answer below, try to think and imagine what would the derivative plot of our previously computed linear model would look like? We know that the slope is `12.75` (from our parameters analysis). Does it change across the course of the relationship? No, because it is a straight line, so the slope is constant. If the slope is constant, then the derivative should be... a constant line too, right? Let's verify this. @@ -256,6 +256,6 @@ plot(deriv, line = list(color = "blue")) + geom_hline(yintercept = 0, linetype = "dashed") ``` -Again, the GAM nicely recovered the shape of the relationship. +Again, the GAM nicely recovered the shape of the relationship. **In summary, effects derivatives can be used to easily leverage the power of GAMs.** diff --git a/vignettes/describe_nonlinear.Rmd b/vignettes/describe_nonlinear.Rmd index 9b7ee988b..09726ec74 100644 --- a/vignettes/describe_nonlinear.Rmd +++ b/vignettes/describe_nonlinear.Rmd @@ -1,6 +1,6 @@ --- title: "Model and describe non-linear relationships" -output: +output: rmarkdown::html_vignette: toc: true fig_width: 10.08 @@ -10,7 +10,7 @@ vignette: > %\VignetteIndexEntry{Model and describe non-linear relationships} \usepackage[utf8]{inputenc} %\VignetteEngine{knitr::rmarkdown} -editor_options: +editor_options: chunk_output_type: console bibliography: bibliography.bib --- @@ -129,7 +129,7 @@ df <- data_filter(df, Delay <= 14 & Memory >= 20) # Plot the density of the point and a loess smooth line ggplot(df, aes(x = Delay, y = Memory)) + - stat_density_2d(geom = "raster", aes(fill = ..density..), contour = FALSE) + + stat_density_2d(geom = "raster", aes(fill = after_stat(density)), contour = FALSE) + geom_jitter(width = 0.2, height = 0.2) + scale_fill_viridis_c() + geom_smooth(formula = "y ~ x", method = "loess", color = "red", se = FALSE) + diff --git a/vignettes/estimate_contrasts.Rmd b/vignettes/estimate_contrasts.Rmd index 785e66ce1..2ac24637d 100644 --- a/vignettes/estimate_contrasts.Rmd +++ b/vignettes/estimate_contrasts.Rmd @@ -1,6 +1,6 @@ --- title: "Contrast analysis" -output: +output: rmarkdown::html_vignette: toc: true fig_width: 10.08 @@ -10,7 +10,7 @@ vignette: > %\VignetteIndexEntry{Contrast analysis} \usepackage[utf8]{inputenc} %\VignetteEngine{knitr::rmarkdown} -editor_options: +editor_options: chunk_output_type: console bibliography: bibliography.bib --- @@ -119,14 +119,14 @@ contrasts$Contrast <- paste(contrasts$Level1, "-", contrasts$Level2) # Visualise the changes in the differences ggplot(contrasts, aes(x = Petal.Width, y = Difference)) + geom_ribbon(aes(fill = Contrast, ymin = CI_low, ymax = CI_high), alpha = 0.2) + - geom_line(aes(colour = Contrast), size = 1) + + geom_line(aes(colour = Contrast), linewidth = 1) + geom_hline(yintercept = 0, linetype = "dashed") + theme_modern() + ylab("Difference") ``` As we can see, the difference between *versicolor* and *virginica* increases as -`Petal.Width` increases. +`Petal.Width` increases. # Conclusion diff --git a/vignettes/estimate_means.Rmd b/vignettes/estimate_means.Rmd index f05bb53bb..9440ffc51 100644 --- a/vignettes/estimate_means.Rmd +++ b/vignettes/estimate_means.Rmd @@ -1,6 +1,6 @@ --- title: "What are, why use and how to get marginal means" -output: +output: rmarkdown::html_vignette: toc: true fig_width: 10.08 @@ -10,7 +10,7 @@ vignette: > %\VignetteIndexEntry{What are, why use and how to get marginal means} \usepackage[utf8]{inputenc} %\VignetteEngine{knitr::rmarkdown} -editor_options: +editor_options: chunk_output_type: console bibliography: bibliography.bib --- @@ -37,7 +37,7 @@ if (!requireNamespace("ggplot2", quietly = TRUE) || set.seed(333) ``` -This vignette will introduce the concept of **marginal means**. +This vignette will introduce the concept of **marginal means**. # Raw Means @@ -79,7 +79,7 @@ ggplot(iris, aes(x = Species, y = Sepal.Width, fill = Species)) + However, these **raw means** might be biased, as the number of observations in each group might be different. Moreover, there might be some hidden covariance or mediation with other variables in the dataset, creating a "spurious" influence -on the means. +on the means. **How can we take these influences into account while calculating means?** @@ -116,7 +116,7 @@ plot: ggplot(iris, aes(x = Species, y = Sepal.Width, fill = Species)) + geom_violin() + geom_jitter2(width = 0.05, alpha = 0.5) + - geom_line(data = means, aes(y = Mean, group = 1), size = 1) + + geom_line(data = means, aes(y = Mean, group = 1), linewidth = 1) + geom_pointrange( data = means, aes(y = Mean, ymin = CI_low, ymax = CI_high), @@ -172,7 +172,7 @@ ggplot(iris, aes(x = Species, y = Sepal.Width, fill = Species)) + That's interesting! It seems that after adjusting the model for petal characteristics, the differences between `Species` seems to be even bigger! -**But are these differences "significant"?** +**But are these differences "significant"?** That's where the contrast analysis comes into play! Click [here to read the tutorial on **contrast analysis**](https://easystats.github.io/modelbased/articles/estimate_contrasts.html).