problem set 4 solutions

pluplu68 · Dec 10, 2021 · 8afa088 · 8afa088
1 parent 6362a16
commit 8afa088
Show file tree

Hide file tree

Showing 6 changed files with 1,451 additions and 1 deletion.
diff --git a/problem_sets/problem_set_4/problem_set_4_solutions.R b/problem_sets/problem_set_4/problem_set_4_solutions.R
@@ -0,0 +1,261 @@
+########################################################
+##### ResEcon 703: Topics in Advanced Econometrics #####
+#####         Problem set 4 solution code          #####
+#####         Matt Woerman, UMass Amherst          #####
+########################################################
+
+### Load packages for problem set
+library(tidyverse)
+
+
+### Problem 1 solutions --------------------------------------------------------
+
+### Part a
+## Load dataset
+data_camping <- read_csv('camping.csv')
+## Set seed for replication
+set.seed(703)
+## Draw standard normal random variables and split into list
+draws_camper <- map(1:1000, ~ tibble(time_draw = rnorm(100),
+                                     mountain_draw = rnorm(100)))
+## Split data into list by camper
+data_camper <- data_camping %>% 
+  group_by(camper_id) %>% 
+  group_split()
+## Function to simulate choice probabilities for one individual
+sim_probs_ind <- function(params, draws_ind, data_ind){
+  ## Select relevant variables and convert into a matrix
+  data_matrix <- data_ind %>% 
+    select(cost, time, mountain) %>% 
+    as.matrix()
+  ## Transform random draws into coefficients based on parameters
+  coef_matrix <- draws_ind %>% 
+    mutate(cost_coef = params[1],
+           time_coef = params[2] + params[4] * time_draw,
+           mountain_coef = params[3] + params[5] * mountain_draw) %>% 
+    select(cost_coef, time_coef, mountain_coef) %>% 
+    as.matrix()
+  ## Calculate representative utility for each alternative in each draw
+  utility <- (coef_matrix %*% t(data_matrix)) %>% 
+    pmin(700) %>% 
+    pmax(-700)
+  ## Sum the exponential of utility over alternatives
+  prob_denom <- utility %>% 
+    exp() %>% 
+    rowSums()
+  ## Calculate the conditional probability for each alternative in each draw
+  cond_prob <- exp(utility) / prob_denom
+  ## Calculate simulated choice probabilities as the means over all draws
+  sim_prob <- colMeans(cond_prob)
+  ## Add simulated choice probability to initial dataset
+  data_ind_out <- data_ind %>% 
+    mutate(prob = sim_prob)
+  ## Return initial dataset with simulated probability variable
+  return(data_ind_out)
+}
+## Function to calculate simulated log-likelihood
+sim_ll_fn <- function(params, draws_list, data_list){
+  ## Simulate probabilities for each individual
+  data_sim_ind <- map2(.x = draws_list, .y = data_list,
+                       .f = ~ sim_probs_ind(params = params, 
+                                            draws_ind = .x, 
+                                            data_ind = .y))
+  ## Combine individual datasets into one
+  data_sim <- data_sim_ind %>% 
+    bind_rows()
+  ## Calculate the log of simulated probability for the chosen alternative
+  data_sim <- data_sim %>% 
+    filter(visit == 1) %>% 
+    mutate(log_prob = log(prob))
+  ## Calculate the simulated log-likelihood
+  sim_ll <- sum(data_sim$log_prob)
+  ## Return the negative of simulated log-likelihood
+  return(-sim_ll)
+}
+## Maximize the log-likelihood function
+model_1a <- optim(par = c(-0.02, -0.005, -0.8, 0.002, 5), fn = sim_ll_fn, 
+                  draws_list = draws_camper, data_list = data_camper, 
+                  method = 'BFGS', hessian = TRUE)
+## Function to summarize MLE model results
+summarize_mle <- function(model, names){
+  ## Extract model parameter estimates
+  parameters <- model$par
+  ## Calculate parameters standard errors
+  std_errors <- model$hessian %>% 
+    solve() %>% 
+    diag() %>% 
+    sqrt()
+  ## Calculate parameter z-stats
+  z_stats <- parameters / std_errors
+  ## Calculate parameter p-values
+  p_values <- 2 * pnorm(-abs(z_stats))
+  ## Summarize results in a list
+  model_summary <- tibble(names = names,
+                          parameters = parameters,
+                          std_errors = std_errors,
+                          z_stats = z_stats,
+                          p_values = p_values)
+  ## Return model_summary object
+  return(model_summary)
+}
+## Summarize model results
+summarize_mle(model_1a, 
+              c('cost', 'time', 'mountain', 'sd.time', 'sd.mountain'))
+
+### Part b
+## Function to simulate elasticities for one individual
+sim_elas_ind <- function(params, draws_ind, data_ind){
+  ## Select relevant variables and convert into a matrix
+  data_matrix <- data_ind %>% 
+    select(cost, time, mountain) %>% 
+    as.matrix()
+  ## Transform random draws into coefficients based on parameters
+  coef_matrix <- draws_ind %>% 
+    mutate(cost_coef = params[1],
+           time_coef = params[2] + params[4] * time_draw,
+           mountain_coef = params[3] + params[5] * mountain_draw) %>% 
+    select(cost_coef, time_coef, mountain_coef) %>% 
+    as.matrix()
+  ## Calculate representative utility for each alternative in each draw
+  utility <- (coef_matrix %*% t(data_matrix)) %>% 
+    pmin(700) %>% 
+    pmax(-700)
+  ## Sum the exponential of utility over alternatives
+  prob_denom <- utility %>% 
+    exp() %>% 
+    rowSums()
+  ## Calculate the conditional probability for each alternative in each draw
+  cond_prob <- exp(utility) / prob_denom
+  ## Calculate simulated choice probabilities as the means over all draws
+  sim_prob <- colMeans(cond_prob)
+  ## Calculate simulated integral for own elasticity
+  sim_int_own_elas <- params[1] * 
+    mean(cond_prob[, 1] * (1 - cond_prob[, 1]))
+  ## Calculate simulated integral for cross elasticities
+  sim_int_cross_elas <- params[1] * 
+    colMeans(cond_prob[, 1] * cond_prob[, -1])
+  ## Combine elasticity simulated integrals into one vector
+  sim_int_elas <- c(sim_int_own_elas, sim_int_cross_elas)
+  ## Calculate own-price and cross-price simulated elasticities
+  sim_elas <- c(1, rep(-1, 4)) * data_ind$cost[1] / sim_prob * sim_int_elas
+  ## Add simulated elasticities to initial dataset
+  data_ind_out <- data_ind %>% 
+    mutate(elasticity = sim_elas)
+  ## Return initial dataset with simulated elasticity variable
+  return(data_ind_out)
+}
+## Simulate elasticities for each individual
+data_1b_ind <- map2(.x = draws_camper, .y = data_camper,
+                    .f = ~ sim_elas_ind(params = model_1a$par, 
+                                        draws_ind = .x, 
+                                        data_ind = .y))
+## Combine list of data into one tibble
+data_1b <- data_1b_ind %>% 
+  bind_rows()
+## Calculate average elasticity with respect to cost of alternative 1
+data_1b %>% 
+  group_by(park_id, park) %>% 
+  summarize(elasticity = mean(elasticity), .groups = 'drop') %>% 
+  arrange(park_id)
+
+
+### Problem 2 solutions --------------------------------------------------------
+
+### Part a
+## Function to simulate individual coefficients for one individual
+calc_mean_coefs <- function(params, draws_ind, data_ind){
+  ## Select relevant variables and convert into a matrix
+  data_matrix <- data_ind %>% 
+    select(cost, time, mountain) %>% 
+    as.matrix()
+  ## Transform random draws into coefficients based on parameters
+  coef <- draws_ind %>% 
+    mutate(cost_coef = params[1],
+           time_coef = params[2] + params[4] * time_draw,
+           mountain_coef = params[3] + params[5] * mountain_draw) %>% 
+    select(cost_coef, time_coef, mountain_coef)
+  ## Convert coefficients tibble to a matrix
+  coef_matrix <- as.matrix(coef)
+  ## Calculate representative utility for each alternative in each draw
+  utility <- (coef_matrix %*% t(data_matrix)) %>% 
+    pmin(700) %>% 
+    pmax(-700)
+  ## Sum the exponential of utility over alternatives
+  prob_denom <- utility %>% 
+    exp() %>% 
+    rowSums()
+  ## Calculate the conditional probability for each alternative in each draw
+  cond_prob <- exp(utility) / prob_denom
+  ## Calculate the numerator of the draw weights as probability of chosen alt
+  weights_num <- c(cond_prob %*% data_ind$visit)
+  ## Calculate the draw weights
+  weights <- weights_num / sum(weights_num)
+  ## Add draw weights to dataset of coefficients
+  coef <- coef %>%
+    mutate(weight = weights)
+  ## Calculate weighted mean for each coefficient
+  coef_means <- coef %>%
+    summarize(cost_coef = sum(cost_coef * weight),
+              time_coef_mean = sum(time_coef * weight),
+              mountain_coef_mean = sum(mountain_coef * weight))
+  ## Add individual coefficient means to initial dataset
+  data_ind_out <- data_ind %>% 
+    bind_cols(coef_means)
+  ## Return initial dataset with simulated probability variable
+  return(data_ind_out)
+}
+## Calculate mean coefficients for each individual
+data_2a_ind <- map2(.x = draws_camper, .y = data_camper,
+                    .f = ~ calc_mean_coefs(params = model_1a$par, 
+                                           draws_ind = .x, 
+                                           data_ind = .y))
+## Combine list of data into one tibble
+data_2a <- data_2a_ind %>% 
+  bind_rows()
+## Calculate average coefficients for each camping park
+coef_park_2a <- data_2a %>% 
+  filter(visit == 1) %>% 
+  group_by(park_id, park) %>% 
+  summarize(cost_coef = mean(cost_coef), 
+            time_coef_mean = mean(time_coef_mean), 
+            mountain_coef_mean = mean(mountain_coef_mean),
+            .groups = 'drop')
+coef_park_2a
+## Calculate mean values for each camping park
+coef_park_2a %>% 
+  mutate(time_value = time_coef_mean / cost_coef * 60,
+         mountain_value = mountain_coef_mean / -cost_coef) %>% 
+  select(park_id, park, time_value, mountain_value)
+
+### Part a canned solutions ----------------------------------------------------
+## Load mlogit package
+library(mlogit)
+## Convert dataset to dfidx format
+data_dfidx <- dfidx(data = data_camping, shape = 'long', 
+                    choice = 'visit', idx = c('camper_id', 'park_id'))
+## Model camping park visit as a mixed logit with fixed cost coefficient
+model_2a_alt <- mlogit(formula = visit ~ cost + time + mountain | 0 | 0, 
+                       data = data_dfidx, 
+                       rpar = c(time = 'n', mountain = 'n'), 
+                       R = 100, seed = 703)
+## Calculate mean coefficients for each individual
+model_2a_alt_params_ind <- fitted(model_2a_alt, type = 'parameters')
+## Add mean coefficients to dataset
+data_2a_alt <- data_camping %>% 
+  filter(visit == 1) %>% 
+  mutate(cost_coef = coef(model_2a_alt)[1],
+         time_coef_mean = model_2a_alt_params_ind[, 1],
+         mountain_coef_mean = model_2a_alt_params_ind[, 2])
+## Calculate average coefficients for each camping park
+coef_park_2a_alt <- data_2a_alt %>%
+  group_by(park_id, park) %>% 
+  summarize(cost_coef = mean(cost_coef), 
+            time_coef_mean = mean(time_coef_mean), 
+            mountain_coef_mean = mean(mountain_coef_mean),
+            .groups = 'drop')
+coef_park_2a_alt
+## Calculate mean values for each camping park
+coef_park_2a_alt %>% 
+  mutate(time_value = time_coef_mean / cost_coef * 60,
+         mountain_value = mountain_coef_mean / -cost_coef) %>% 
+  select(park_id, park, time_value, mountain_value)