Skip to content

Commit

Permalink
problem set 4 solutions
Browse files Browse the repository at this point in the history
  • Loading branch information
woerman committed Dec 10, 2021
1 parent 6362a16 commit 8afa088
Show file tree
Hide file tree
Showing 6 changed files with 1,451 additions and 1 deletion.
261 changes: 261 additions & 0 deletions problem_sets/problem_set_4/problem_set_4_solutions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
########################################################
##### ResEcon 703: Topics in Advanced Econometrics #####
##### Problem set 4 solution code #####
##### Matt Woerman, UMass Amherst #####
########################################################

### Load packages for problem set
library(tidyverse)


### Problem 1 solutions --------------------------------------------------------

### Part a
## Load dataset
data_camping <- read_csv('camping.csv')
## Set seed for replication
set.seed(703)
## Draw standard normal random variables and split into list
draws_camper <- map(1:1000, ~ tibble(time_draw = rnorm(100),
mountain_draw = rnorm(100)))
## Split data into list by camper
data_camper <- data_camping %>%
group_by(camper_id) %>%
group_split()
## Function to simulate choice probabilities for one individual
sim_probs_ind <- function(params, draws_ind, data_ind){
## Select relevant variables and convert into a matrix
data_matrix <- data_ind %>%
select(cost, time, mountain) %>%
as.matrix()
## Transform random draws into coefficients based on parameters
coef_matrix <- draws_ind %>%
mutate(cost_coef = params[1],
time_coef = params[2] + params[4] * time_draw,
mountain_coef = params[3] + params[5] * mountain_draw) %>%
select(cost_coef, time_coef, mountain_coef) %>%
as.matrix()
## Calculate representative utility for each alternative in each draw
utility <- (coef_matrix %*% t(data_matrix)) %>%
pmin(700) %>%
pmax(-700)
## Sum the exponential of utility over alternatives
prob_denom <- utility %>%
exp() %>%
rowSums()
## Calculate the conditional probability for each alternative in each draw
cond_prob <- exp(utility) / prob_denom
## Calculate simulated choice probabilities as the means over all draws
sim_prob <- colMeans(cond_prob)
## Add simulated choice probability to initial dataset
data_ind_out <- data_ind %>%
mutate(prob = sim_prob)
## Return initial dataset with simulated probability variable
return(data_ind_out)
}
## Function to calculate simulated log-likelihood
sim_ll_fn <- function(params, draws_list, data_list){
## Simulate probabilities for each individual
data_sim_ind <- map2(.x = draws_list, .y = data_list,
.f = ~ sim_probs_ind(params = params,
draws_ind = .x,
data_ind = .y))
## Combine individual datasets into one
data_sim <- data_sim_ind %>%
bind_rows()
## Calculate the log of simulated probability for the chosen alternative
data_sim <- data_sim %>%
filter(visit == 1) %>%
mutate(log_prob = log(prob))
## Calculate the simulated log-likelihood
sim_ll <- sum(data_sim$log_prob)
## Return the negative of simulated log-likelihood
return(-sim_ll)
}
## Maximize the log-likelihood function
model_1a <- optim(par = c(-0.02, -0.005, -0.8, 0.002, 5), fn = sim_ll_fn,
draws_list = draws_camper, data_list = data_camper,
method = 'BFGS', hessian = TRUE)
## Function to summarize MLE model results
summarize_mle <- function(model, names){
## Extract model parameter estimates
parameters <- model$par
## Calculate parameters standard errors
std_errors <- model$hessian %>%
solve() %>%
diag() %>%
sqrt()
## Calculate parameter z-stats
z_stats <- parameters / std_errors
## Calculate parameter p-values
p_values <- 2 * pnorm(-abs(z_stats))
## Summarize results in a list
model_summary <- tibble(names = names,
parameters = parameters,
std_errors = std_errors,
z_stats = z_stats,
p_values = p_values)
## Return model_summary object
return(model_summary)
}
## Summarize model results
summarize_mle(model_1a,
c('cost', 'time', 'mountain', 'sd.time', 'sd.mountain'))

### Part b
## Function to simulate elasticities for one individual
sim_elas_ind <- function(params, draws_ind, data_ind){
## Select relevant variables and convert into a matrix
data_matrix <- data_ind %>%
select(cost, time, mountain) %>%
as.matrix()
## Transform random draws into coefficients based on parameters
coef_matrix <- draws_ind %>%
mutate(cost_coef = params[1],
time_coef = params[2] + params[4] * time_draw,
mountain_coef = params[3] + params[5] * mountain_draw) %>%
select(cost_coef, time_coef, mountain_coef) %>%
as.matrix()
## Calculate representative utility for each alternative in each draw
utility <- (coef_matrix %*% t(data_matrix)) %>%
pmin(700) %>%
pmax(-700)
## Sum the exponential of utility over alternatives
prob_denom <- utility %>%
exp() %>%
rowSums()
## Calculate the conditional probability for each alternative in each draw
cond_prob <- exp(utility) / prob_denom
## Calculate simulated choice probabilities as the means over all draws
sim_prob <- colMeans(cond_prob)
## Calculate simulated integral for own elasticity
sim_int_own_elas <- params[1] *
mean(cond_prob[, 1] * (1 - cond_prob[, 1]))
## Calculate simulated integral for cross elasticities
sim_int_cross_elas <- params[1] *
colMeans(cond_prob[, 1] * cond_prob[, -1])
## Combine elasticity simulated integrals into one vector
sim_int_elas <- c(sim_int_own_elas, sim_int_cross_elas)
## Calculate own-price and cross-price simulated elasticities
sim_elas <- c(1, rep(-1, 4)) * data_ind$cost[1] / sim_prob * sim_int_elas
## Add simulated elasticities to initial dataset
data_ind_out <- data_ind %>%
mutate(elasticity = sim_elas)
## Return initial dataset with simulated elasticity variable
return(data_ind_out)
}
## Simulate elasticities for each individual
data_1b_ind <- map2(.x = draws_camper, .y = data_camper,
.f = ~ sim_elas_ind(params = model_1a$par,
draws_ind = .x,
data_ind = .y))
## Combine list of data into one tibble
data_1b <- data_1b_ind %>%
bind_rows()
## Calculate average elasticity with respect to cost of alternative 1
data_1b %>%
group_by(park_id, park) %>%
summarize(elasticity = mean(elasticity), .groups = 'drop') %>%
arrange(park_id)


### Problem 2 solutions --------------------------------------------------------

### Part a
## Function to simulate individual coefficients for one individual
calc_mean_coefs <- function(params, draws_ind, data_ind){
## Select relevant variables and convert into a matrix
data_matrix <- data_ind %>%
select(cost, time, mountain) %>%
as.matrix()
## Transform random draws into coefficients based on parameters
coef <- draws_ind %>%
mutate(cost_coef = params[1],
time_coef = params[2] + params[4] * time_draw,
mountain_coef = params[3] + params[5] * mountain_draw) %>%
select(cost_coef, time_coef, mountain_coef)
## Convert coefficients tibble to a matrix
coef_matrix <- as.matrix(coef)
## Calculate representative utility for each alternative in each draw
utility <- (coef_matrix %*% t(data_matrix)) %>%
pmin(700) %>%
pmax(-700)
## Sum the exponential of utility over alternatives
prob_denom <- utility %>%
exp() %>%
rowSums()
## Calculate the conditional probability for each alternative in each draw
cond_prob <- exp(utility) / prob_denom
## Calculate the numerator of the draw weights as probability of chosen alt
weights_num <- c(cond_prob %*% data_ind$visit)
## Calculate the draw weights
weights <- weights_num / sum(weights_num)
## Add draw weights to dataset of coefficients
coef <- coef %>%
mutate(weight = weights)
## Calculate weighted mean for each coefficient
coef_means <- coef %>%
summarize(cost_coef = sum(cost_coef * weight),
time_coef_mean = sum(time_coef * weight),
mountain_coef_mean = sum(mountain_coef * weight))
## Add individual coefficient means to initial dataset
data_ind_out <- data_ind %>%
bind_cols(coef_means)
## Return initial dataset with simulated probability variable
return(data_ind_out)
}
## Calculate mean coefficients for each individual
data_2a_ind <- map2(.x = draws_camper, .y = data_camper,
.f = ~ calc_mean_coefs(params = model_1a$par,
draws_ind = .x,
data_ind = .y))
## Combine list of data into one tibble
data_2a <- data_2a_ind %>%
bind_rows()
## Calculate average coefficients for each camping park
coef_park_2a <- data_2a %>%
filter(visit == 1) %>%
group_by(park_id, park) %>%
summarize(cost_coef = mean(cost_coef),
time_coef_mean = mean(time_coef_mean),
mountain_coef_mean = mean(mountain_coef_mean),
.groups = 'drop')
coef_park_2a
## Calculate mean values for each camping park
coef_park_2a %>%
mutate(time_value = time_coef_mean / cost_coef * 60,
mountain_value = mountain_coef_mean / -cost_coef) %>%
select(park_id, park, time_value, mountain_value)

### Part a canned solutions ----------------------------------------------------
## Load mlogit package
library(mlogit)
## Convert dataset to dfidx format
data_dfidx <- dfidx(data = data_camping, shape = 'long',
choice = 'visit', idx = c('camper_id', 'park_id'))
## Model camping park visit as a mixed logit with fixed cost coefficient
model_2a_alt <- mlogit(formula = visit ~ cost + time + mountain | 0 | 0,
data = data_dfidx,
rpar = c(time = 'n', mountain = 'n'),
R = 100, seed = 703)
## Calculate mean coefficients for each individual
model_2a_alt_params_ind <- fitted(model_2a_alt, type = 'parameters')
## Add mean coefficients to dataset
data_2a_alt <- data_camping %>%
filter(visit == 1) %>%
mutate(cost_coef = coef(model_2a_alt)[1],
time_coef_mean = model_2a_alt_params_ind[, 1],
mountain_coef_mean = model_2a_alt_params_ind[, 2])
## Calculate average coefficients for each camping park
coef_park_2a_alt <- data_2a_alt %>%
group_by(park_id, park) %>%
summarize(cost_coef = mean(cost_coef),
time_coef_mean = mean(time_coef_mean),
mountain_coef_mean = mean(mountain_coef_mean),
.groups = 'drop')
coef_park_2a_alt
## Calculate mean values for each camping park
coef_park_2a_alt %>%
mutate(time_value = time_coef_mean / cost_coef * 60,
mountain_value = mountain_coef_mean / -cost_coef) %>%
select(park_id, park, time_value, mountain_value)
Loading

0 comments on commit 8afa088

Please sign in to comment.