diff --git a/01 path analysis/Appendix - advanced plotting.R b/01 path analysis/Appendix - advanced plotting.R new file mode 100644 index 0000000..f0dacff --- /dev/null +++ b/01 path analysis/Appendix - advanced plotting.R @@ -0,0 +1,129 @@ +library(lavaan) +library(tidySEM) +library(dplyr) + +income_psych <- read.csv("income_psych.csv") + +mediation_model <- ' + mood_neg ~ anxiety + income ~ anxiety + mood_neg +' +fit <- sem(mediation_model, data = income_psych) + +## See below for how "plot1.png" and "plot2.png" were made. + + + +# Changing Nodes (Vars) --------------------------------------------------- + +(nods <- get_nodes(fit)) + +nods <- nods %>% + mutate(label = c("Negative\nMood", "Income", "Anxiety")) # \n is a break line + +graph_sem(fit, + nodes = nods) + + +# Changing Layout --------------------------------------------------------- + +lay <- get_layout( + NA, "mood_neg", NA, + "anxiety", NA, "income", + rows = 2 +) + +graph_sem(fit, + nodes = nods, + layout = lay, angle = 90) + + + +# Changing Edges (Paths) -------------------------------------------------- + + +(edgs <- get_edges(fit, + columns = c("est_std", "confint_std", "pval_std"), + digits = 3)) + +edgs <- edgs %>% + mutate( + label = paste0(est_std, "\n", confint_std), + color = "black", + color = replace(color, as.numeric(est_std) > 0 & to!=from, "green"), + color = replace(color, as.numeric(est_std) < 0, "red"), + linetype = "solid", + linetype = replace(linetype, as.numeric(pval_std) > 0.05, "dashed") + ) + +graph_sem(fit, + nodes = nods, + layout = lay, angle = 90, + edges = edgs) + + +# Making "plot1" ---------------------------------------------------------- + +mediation_model <- ' + mood_neg ~ anxiety + income ~ anxiety + mood_neg +' + +fit <- sem(mediation_model, data = income_psych) + + + + +nods <- get_nodes(fit) %>% + mutate(label = c("Negative\nMood", "Income", "Anxiety")) # \n is a break line + +lay <- get_layout( + NA, "mood_neg", NA, + "anxiety", NA, "income", + rows = 2 +) + +edgs <- get_edges(fit) %>% + filter(to != from) %>% + mutate(label = "") + +graph_sem(fit, + nodes = nods, edges = edgs, + layout = lay, angle = 90) + +ggplot2::ggsave("plot1.png", height = 3, width = 6) + + + + + +# Making "plot2" ---------------------------------------------------------- + + +mediation_model <- ' + mood_neg ~ a * anxiety + anxiety ~ b * shyness + income ~ c * anxiety + d * mood_neg + e * shyness +' + +fit <- sem(mediation_model, data = income_psych) + +nods <- get_nodes(fit) %>% + mutate(label = c("Negative\nMood", "Anxiety", "Income", "Shyness")) # \n is a break line + +lay <- get_layout( + NA, NA, "mood_neg", NA, + NA, "anxiety", NA, "income", + "shyness", NA, NA, NA, + rows = 3 +) + +edgs <- get_edges(fit) %>% + filter(to != from) %>% + mutate(label = "") + +graph_sem(fit, + nodes = nods, edges = edgs, + layout = lay, angle = 90) + +ggplot2::ggsave("plot2.png", height = 3, width = 6) diff --git a/01 path analysis/Exercise 1 - solution.R b/01 path analysis/Exercise 1 - solution.R deleted file mode 100644 index 88c6680..0000000 --- a/01 path analysis/Exercise 1 - solution.R +++ /dev/null @@ -1,74 +0,0 @@ -# Exercise ---------------------------------------------------------------- - -income_psych <- read.csv("income_psych.csv") -library(lavaan) - -## 1. Look at the plot in "plot2.png". Fit this model with `lavaan`. - -model <- " -# We need to build a regression equation for each endogenous variable: - anxiety ~ shyness -mood_neg ~ anxiety - income ~ anxiety + mood_neg + shyness -" - -fit <- sem(model, data = income_psych) -summary(fit, standardize = TRUE) - - -library(semPlot) -semPaths(fit) -# See `advanced plotting` for making "plot2.png" - - - - - - -## 2. Compute all the paths in this model from *anxiety* to *income*, and -## the total of these paths. -model <- " - anxiety ~ a * shyness -mood_neg ~ b * anxiety - income ~ c * anxiety + d * mood_neg + e * shyness - - # Define paths: - direct := c - ind_shyness := a * e - ind_mood := b * d - total := direct + ind_shyness + ind_mood -" - -fit <- sem(model, data = income_psych) -summary(fit, standardize = TRUE) -# Seems like the total path is not significant. -# Looks like a case of supression... Some paths are +, some are -... -# They LOOK like they cancel each other out! - -# However... -# The total is significant in the standerdized solution!! * -standardizedsolution(fit, output = "text") - -## - Why is the std total not equal exactly to the real correlation? -cor(income_psych$anxiety, income_psych$income) -# Theres a diff of (-0.001). Because the model isn't *just-identified*. -# We are missing the arrow from `shyness` to `mood_neg`. - - -## - is it very different? -# The diff is of (-0.001). Is this a lot? Is it significant? -# We will see next time how to answer these questions! - - -# * ----------------------------------------------------------------------- - -# Why does this happen? -# In SEM, standard errors (SE) are NOT invariant - that is, data -# transformation do not affect SEs the same way they do estimates (the -# parameters). -# However, Chis-square atatistics ARE invariant - so this is not an issue -# in model comparison (which we will learn next week). -# -# You can read more (mathy), here: -# https://doi.org/10.1007/BF01065882 -# (Thanks to Michael Neale for his explanation) \ No newline at end of file diff --git a/01 path analysis/advanced plotting.R b/01 path analysis/advanced plotting.R deleted file mode 100644 index 3fa540f..0000000 --- a/01 path analysis/advanced plotting.R +++ /dev/null @@ -1,184 +0,0 @@ -library(lavaan) -library(tidySEM) -library(dplyr) - -income_psych <- read.csv("income_psych.csv") - -mediation_model <- ' - income ~ anxiety + mood_neg - mood_neg ~ anxiety -' -fit <- sem(mediation_model, data = income_psych) - -## See below for how "plot1.png" and "plot2.png" were made. - -## Based on: -## https://cjvanlissa.github.io/tidySEM/articles/Plotting_graphs.html - - -# Changing the layout ----------------------------------------------------- - -lay <- get_layout(NA, "mood_neg", NA, - NA, NA, NA, - "anxiety", NA, "income", - rows = 3) - - - - -graph_data <- prepare_graph(fit, layout = lay, label = "est_std") - -plot(graph_data) - - - - -# Adding var labels ------------------------------------------------------- - -# We can add labels to our variables (our "nodes") by chaging the "label" column -# in the "nodes" part of the plot: - -(N <- nodes(graph_data)) - -nodes(graph_data) <- N %>% - mutate(label = c("Anxiety", "Income", "Negative Mood")) - -plot(graph_data) - - - -# Adding / changing arrow labels ------------------------------------------ - - -(tab <- table_results(fit, columns = c("label", "est_std", "confint_std", "pval"), digits = 2)) - -(E <- edges(graph_data)) - -edges(graph_data) <- E %>% - mutate(est_std = tab$est_std, - label = paste0(round(est_std, 2), " ",tab$confint)) - -plot(graph_data) - - - - - -# Even more --------------------------------------------------------------- - -E <- edges(graph_data) - -edges(graph_data) <- E %>% - mutate( - # color lines by sign: - colour = case_when( - arrow == "both" ~ "black", - est_std < 0 ~ "red", - est_std > 0 ~ "green" - ), - # mark which is sig - linetype = c("dashed","dashed","solid", "dashed", "solid", "solid") - ) - - -N <- nodes(graph_data) - -nodes(graph_data) <- N %>% - mutate(size = c(1,3,1)) - -plot(graph_data) - - - - - -# Making "plot1" ---------------------------------------------------------- - -mediation_model <- ' - income ~ anxiety + mood_neg - mood_neg ~ anxiety -' - -fit <- sem(mediation_model, data = income_psych) - - - -lay <- get_layout(NA, "mood_neg", NA, - "anxiety", NA, "income", - rows = 2) - -graph_data <- prepare_graph(fit, layout = lay) - -E <- edges(graph_data) -E$label <- "" -E$connect_from[3] <- "top" -E$connect_to[3] <- "left" -E$connect_from[2] <- "right" -E <- E[1:3,] -edges(graph_data) <- E - - - -(N <- nodes(graph_data)) -N$label <- c("Anxiety", "Income", "Negative Mood") -nodes(graph_data) <- N - - - - - -(p1 <- plot(graph_data)) - -# because the result it a ggplot, we will use ggsave to save it -ggplot2::ggsave(p1, filename = "plot1.png", width = 6, height = 3) - - - - - -# Making "plot2" ---------------------------------------------------------- - - -mediation_model <- ' - income ~ anxiety + mood_neg + shyness - mood_neg ~ anxiety - anxiety ~ shyness -' - -fit <- sem(mediation_model, data = income_psych) - - - - - - -lay <- get_layout(NA, NA, "mood_neg", NA, - NA, "anxiety", NA, "income", - "shyness", NA, NA, NA, - rows = 3) - -graph_data <- prepare_graph(fit, layout = lay) - -E <- edges(graph_data) -E$label <- "" -E$connect_from[5] <- "top" -E$connect_to[5] <- "left" -E$connect_from[4] <- "top" -E$connect_to[4] <- "left" -E$connect_to[3] <- "bottom" -E <- E[1:5,] -edges(graph_data) <- E - -plot(graph_data) - -(N <- nodes(graph_data)) -N$label <- c("Anxiety", "Income", "Negative Mood", "Shyness") -N$node_xmin[3] <- N$node_xmin[3]-0.5 -N$node_xmax[3] <- N$node_xmax[3]+0.5 -nodes(graph_data) <- N - - -(p2 <- plot(graph_data)) - -# because the result it a ggplot, we will use ggsave to save it -ggplot2::ggsave(p2, filename = "plot2.png", width = 6, height = 3) diff --git a/01 path analysis/path analysis.R b/01 path analysis/path analysis.R index 87f02fa..3476ed7 100644 --- a/01 path analysis/path analysis.R +++ b/01 path analysis/path analysis.R @@ -1,29 +1,40 @@ +# We will be using a simple mediation model to demonstrate the principals of +# path analysis. + income_psych <- read.csv("income_psych.csv") head(income_psych) -# Testing mediation: +# Our question: # Does negative mood mediate the relationship between anxiety and income? # Graphically, it looks like "plot1.png" + + + + # Manual path analysis ---------------------------------------------------- m1 <- lm(income ~ anxiety + mood_neg, income_psych) m2 <- lm(mood_neg ~ anxiety, income_psych) +# We can "build" the paths by multiplying the correct coefficiants: (coef1 <- coef(m1)) (coef2 <- coef(m2)) -direct <- unname(coef1[2]) -indirect <- unname(coef1[3] * coef2[2]) +direct <- unname(coef1['anxiety']) +indirect <- unname(coef1['mood_neg'] * coef2['anxiety']) + +(mediation_by_hand <- c(direct = direct, + indirect = indirect, + total = direct + indirect)) + +# Is the total correct? +lm(income ~ anxiety, data = income_psych) + -mediation_by_hand <- c(direct = direct, - indirect = indirect, - total = direct + indirect) -summary(lm(income ~ anxiety, income_psych)) -mediation_by_hand # With lavaan ------------------------------------------------------------- @@ -32,27 +43,50 @@ mediation_by_hand library(lavaan) -# modelss are written as a multi-line charachter object: +# model specification are written as a multi-line character: mediation_model <- ' - income ~ anxiety + mood_neg mood_neg ~ anxiety + income ~ anxiety + mood_neg ' # fit the model to the data: fit <- sem(mediation_model, data = income_psych) -summary(fit) # (why is the Test Statistic 0?) + +## Model summary +summary(fit) # get estimates + tests statistics +# Note that by DEFAULT, lavaan estimates all residual errors. +# (why is the Test Statistic 0?) summary(fit, standardize = TRUE) # look at Std.all for beta summary(fit, rsquare = TRUE) # See regression R2 -# Note that by DEFAULT, lavaan estimates all residual errors. -# Modifiers! -------------------------------------------------------------- -# how to compute the paths? With *modifiers*! -# - We can mark parameters with a "stand-ins" followed by "*" -# - We can have lavaan compute effects with ":=" +## Confidence intervals: +parameterestimates(fit, ci = TRUE, output = "text") # ci for the raw estimates +standardizedSolution(fit, ci = TRUE, output = "text") # ci for the std estimates +# Note that the tests from these 2 functions CAN BE DIFFERENT. Why? + + +## Bootstrap +fit_with_boot <- sem(mediation_model, data = income_psych, + se = "bootstrap", bootstrap = 200) +summary(fit_with_boot, standardize = TRUE) + + + + + + +## Modifiers! ------------ + +# - We can mark parameters with a "stand-ins" followed by "*" - these stand-ins +# are called *modifiers*. +# - We can have lavaan compute different estimates with ":=" +# +# This allows up to estimate paths in lavaan: + mediation_model <- ' # regressions mood_neg ~ a * anxiety @@ -66,23 +100,22 @@ mediation_model <- ' ' fit <- sem(mediation_model, data = income_psych) + summary(fit, standardize = TRUE) # look at "Defined Parameters" mediation_by_hand cor(income_psych$anxiety, income_psych$income) -## Confidence intervals: -summary(fit, standardize = TRUE, ci = TRUE) # ci for the raw estimates -standardizedSolution(fit, ci = TRUE, output = "text") # ci for the std estimates -## Bootstrap -fit_with_boot <- sem(mediation_model, data = income_psych, - se = "bootstrap", bootstrap = 200) -summary(fit_with_boot, standardize = TRUE) -# Compare paths ----------------------------------------------------------- -# We can also compare paths directly: + +## Compare paths -------- + +# With the ":=" operator, we can estimate many things! +# E.g., we can compare paths: + + mediation_model <- ' # regressions mood_neg ~ a * anxiety @@ -96,39 +129,57 @@ mediation_model <- ' path_diff := direct - indirect ' + fit <- sem(mediation_model, data = income_psych) # look at "Defined Parameters" parameterEstimates(fit, output = "text") # summary gives these test values standardizedSolution(fit, output = "text") -# Note that the tests from these 2 functions CAN BE DIFFERENT. Why? + + + fit <- sem(mediation_model, data = income_psych, likelihood = "wishart") # likelihood = "wishart" used an unbiased cov-matrix, and gives # similar results to AMOS (SPSS). -# In large samples this heardly has any effects... -# Read more about the varouls likelihoods and estimators: +# In large samples this hardly has any effects... +# Read more about the various likelihoods and estimators: # http://lavaan.ugent.be/tutorial/est.html -# Plotting ---------------------------------------------------------------- + + + + + + + + +## Plotting -------- library(tidySEM) graph_sem(fit) - +graph_sem(fit, label = "est") graph_sem(fit, label = "est_std") # See also advanced plotting.R + + + # Exercise ---------------------------------------------------------------- -# 1. Look at the plot in "plot2.png". Fit this model with `lavaan`. +# 1. Look at the plot in "plot2.png". +# - Explain the causal relationship in this model (in words). +# - Fit this model with `lavaan`. # 2. Compute all the paths in this model from *anxiety* to *income*, and # the total of these paths. -# - Why is the std total not equal exactly to the real correlation? -# - is it very different? +# 3. Why is the std total not *equal* exactly to the real correlation? +# - Is it very different? What does this mean? +# 4. Compute the difference between the two indirect paths. +# - How big is it? Is it significant? diff --git a/01 path analysis/plot1.png b/01 path analysis/plot1.png index 1580245..decc58d 100644 Binary files a/01 path analysis/plot1.png and b/01 path analysis/plot1.png differ diff --git a/01 path analysis/plot2.png b/01 path analysis/plot2.png index d1f0dd3..3e2e4de 100644 Binary files a/01 path analysis/plot2.png and b/01 path analysis/plot2.png differ