forked from woerman/ResEcon703
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
1,451 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,261 @@ | ||
######################################################## | ||
##### ResEcon 703: Topics in Advanced Econometrics ##### | ||
##### Problem set 4 solution code ##### | ||
##### Matt Woerman, UMass Amherst ##### | ||
######################################################## | ||
|
||
### Load packages for problem set | ||
library(tidyverse) | ||
|
||
|
||
### Problem 1 solutions -------------------------------------------------------- | ||
|
||
### Part a | ||
## Load dataset | ||
data_camping <- read_csv('camping.csv') | ||
## Set seed for replication | ||
set.seed(703) | ||
## Draw standard normal random variables and split into list | ||
draws_camper <- map(1:1000, ~ tibble(time_draw = rnorm(100), | ||
mountain_draw = rnorm(100))) | ||
## Split data into list by camper | ||
data_camper <- data_camping %>% | ||
group_by(camper_id) %>% | ||
group_split() | ||
## Function to simulate choice probabilities for one individual | ||
sim_probs_ind <- function(params, draws_ind, data_ind){ | ||
## Select relevant variables and convert into a matrix | ||
data_matrix <- data_ind %>% | ||
select(cost, time, mountain) %>% | ||
as.matrix() | ||
## Transform random draws into coefficients based on parameters | ||
coef_matrix <- draws_ind %>% | ||
mutate(cost_coef = params[1], | ||
time_coef = params[2] + params[4] * time_draw, | ||
mountain_coef = params[3] + params[5] * mountain_draw) %>% | ||
select(cost_coef, time_coef, mountain_coef) %>% | ||
as.matrix() | ||
## Calculate representative utility for each alternative in each draw | ||
utility <- (coef_matrix %*% t(data_matrix)) %>% | ||
pmin(700) %>% | ||
pmax(-700) | ||
## Sum the exponential of utility over alternatives | ||
prob_denom <- utility %>% | ||
exp() %>% | ||
rowSums() | ||
## Calculate the conditional probability for each alternative in each draw | ||
cond_prob <- exp(utility) / prob_denom | ||
## Calculate simulated choice probabilities as the means over all draws | ||
sim_prob <- colMeans(cond_prob) | ||
## Add simulated choice probability to initial dataset | ||
data_ind_out <- data_ind %>% | ||
mutate(prob = sim_prob) | ||
## Return initial dataset with simulated probability variable | ||
return(data_ind_out) | ||
} | ||
## Function to calculate simulated log-likelihood | ||
sim_ll_fn <- function(params, draws_list, data_list){ | ||
## Simulate probabilities for each individual | ||
data_sim_ind <- map2(.x = draws_list, .y = data_list, | ||
.f = ~ sim_probs_ind(params = params, | ||
draws_ind = .x, | ||
data_ind = .y)) | ||
## Combine individual datasets into one | ||
data_sim <- data_sim_ind %>% | ||
bind_rows() | ||
## Calculate the log of simulated probability for the chosen alternative | ||
data_sim <- data_sim %>% | ||
filter(visit == 1) %>% | ||
mutate(log_prob = log(prob)) | ||
## Calculate the simulated log-likelihood | ||
sim_ll <- sum(data_sim$log_prob) | ||
## Return the negative of simulated log-likelihood | ||
return(-sim_ll) | ||
} | ||
## Maximize the log-likelihood function | ||
model_1a <- optim(par = c(-0.02, -0.005, -0.8, 0.002, 5), fn = sim_ll_fn, | ||
draws_list = draws_camper, data_list = data_camper, | ||
method = 'BFGS', hessian = TRUE) | ||
## Function to summarize MLE model results | ||
summarize_mle <- function(model, names){ | ||
## Extract model parameter estimates | ||
parameters <- model$par | ||
## Calculate parameters standard errors | ||
std_errors <- model$hessian %>% | ||
solve() %>% | ||
diag() %>% | ||
sqrt() | ||
## Calculate parameter z-stats | ||
z_stats <- parameters / std_errors | ||
## Calculate parameter p-values | ||
p_values <- 2 * pnorm(-abs(z_stats)) | ||
## Summarize results in a list | ||
model_summary <- tibble(names = names, | ||
parameters = parameters, | ||
std_errors = std_errors, | ||
z_stats = z_stats, | ||
p_values = p_values) | ||
## Return model_summary object | ||
return(model_summary) | ||
} | ||
## Summarize model results | ||
summarize_mle(model_1a, | ||
c('cost', 'time', 'mountain', 'sd.time', 'sd.mountain')) | ||
|
||
### Part b | ||
## Function to simulate elasticities for one individual | ||
sim_elas_ind <- function(params, draws_ind, data_ind){ | ||
## Select relevant variables and convert into a matrix | ||
data_matrix <- data_ind %>% | ||
select(cost, time, mountain) %>% | ||
as.matrix() | ||
## Transform random draws into coefficients based on parameters | ||
coef_matrix <- draws_ind %>% | ||
mutate(cost_coef = params[1], | ||
time_coef = params[2] + params[4] * time_draw, | ||
mountain_coef = params[3] + params[5] * mountain_draw) %>% | ||
select(cost_coef, time_coef, mountain_coef) %>% | ||
as.matrix() | ||
## Calculate representative utility for each alternative in each draw | ||
utility <- (coef_matrix %*% t(data_matrix)) %>% | ||
pmin(700) %>% | ||
pmax(-700) | ||
## Sum the exponential of utility over alternatives | ||
prob_denom <- utility %>% | ||
exp() %>% | ||
rowSums() | ||
## Calculate the conditional probability for each alternative in each draw | ||
cond_prob <- exp(utility) / prob_denom | ||
## Calculate simulated choice probabilities as the means over all draws | ||
sim_prob <- colMeans(cond_prob) | ||
## Calculate simulated integral for own elasticity | ||
sim_int_own_elas <- params[1] * | ||
mean(cond_prob[, 1] * (1 - cond_prob[, 1])) | ||
## Calculate simulated integral for cross elasticities | ||
sim_int_cross_elas <- params[1] * | ||
colMeans(cond_prob[, 1] * cond_prob[, -1]) | ||
## Combine elasticity simulated integrals into one vector | ||
sim_int_elas <- c(sim_int_own_elas, sim_int_cross_elas) | ||
## Calculate own-price and cross-price simulated elasticities | ||
sim_elas <- c(1, rep(-1, 4)) * data_ind$cost[1] / sim_prob * sim_int_elas | ||
## Add simulated elasticities to initial dataset | ||
data_ind_out <- data_ind %>% | ||
mutate(elasticity = sim_elas) | ||
## Return initial dataset with simulated elasticity variable | ||
return(data_ind_out) | ||
} | ||
## Simulate elasticities for each individual | ||
data_1b_ind <- map2(.x = draws_camper, .y = data_camper, | ||
.f = ~ sim_elas_ind(params = model_1a$par, | ||
draws_ind = .x, | ||
data_ind = .y)) | ||
## Combine list of data into one tibble | ||
data_1b <- data_1b_ind %>% | ||
bind_rows() | ||
## Calculate average elasticity with respect to cost of alternative 1 | ||
data_1b %>% | ||
group_by(park_id, park) %>% | ||
summarize(elasticity = mean(elasticity), .groups = 'drop') %>% | ||
arrange(park_id) | ||
|
||
|
||
### Problem 2 solutions -------------------------------------------------------- | ||
|
||
### Part a | ||
## Function to simulate individual coefficients for one individual | ||
calc_mean_coefs <- function(params, draws_ind, data_ind){ | ||
## Select relevant variables and convert into a matrix | ||
data_matrix <- data_ind %>% | ||
select(cost, time, mountain) %>% | ||
as.matrix() | ||
## Transform random draws into coefficients based on parameters | ||
coef <- draws_ind %>% | ||
mutate(cost_coef = params[1], | ||
time_coef = params[2] + params[4] * time_draw, | ||
mountain_coef = params[3] + params[5] * mountain_draw) %>% | ||
select(cost_coef, time_coef, mountain_coef) | ||
## Convert coefficients tibble to a matrix | ||
coef_matrix <- as.matrix(coef) | ||
## Calculate representative utility for each alternative in each draw | ||
utility <- (coef_matrix %*% t(data_matrix)) %>% | ||
pmin(700) %>% | ||
pmax(-700) | ||
## Sum the exponential of utility over alternatives | ||
prob_denom <- utility %>% | ||
exp() %>% | ||
rowSums() | ||
## Calculate the conditional probability for each alternative in each draw | ||
cond_prob <- exp(utility) / prob_denom | ||
## Calculate the numerator of the draw weights as probability of chosen alt | ||
weights_num <- c(cond_prob %*% data_ind$visit) | ||
## Calculate the draw weights | ||
weights <- weights_num / sum(weights_num) | ||
## Add draw weights to dataset of coefficients | ||
coef <- coef %>% | ||
mutate(weight = weights) | ||
## Calculate weighted mean for each coefficient | ||
coef_means <- coef %>% | ||
summarize(cost_coef = sum(cost_coef * weight), | ||
time_coef_mean = sum(time_coef * weight), | ||
mountain_coef_mean = sum(mountain_coef * weight)) | ||
## Add individual coefficient means to initial dataset | ||
data_ind_out <- data_ind %>% | ||
bind_cols(coef_means) | ||
## Return initial dataset with simulated probability variable | ||
return(data_ind_out) | ||
} | ||
## Calculate mean coefficients for each individual | ||
data_2a_ind <- map2(.x = draws_camper, .y = data_camper, | ||
.f = ~ calc_mean_coefs(params = model_1a$par, | ||
draws_ind = .x, | ||
data_ind = .y)) | ||
## Combine list of data into one tibble | ||
data_2a <- data_2a_ind %>% | ||
bind_rows() | ||
## Calculate average coefficients for each camping park | ||
coef_park_2a <- data_2a %>% | ||
filter(visit == 1) %>% | ||
group_by(park_id, park) %>% | ||
summarize(cost_coef = mean(cost_coef), | ||
time_coef_mean = mean(time_coef_mean), | ||
mountain_coef_mean = mean(mountain_coef_mean), | ||
.groups = 'drop') | ||
coef_park_2a | ||
## Calculate mean values for each camping park | ||
coef_park_2a %>% | ||
mutate(time_value = time_coef_mean / cost_coef * 60, | ||
mountain_value = mountain_coef_mean / -cost_coef) %>% | ||
select(park_id, park, time_value, mountain_value) | ||
|
||
### Part a canned solutions ---------------------------------------------------- | ||
## Load mlogit package | ||
library(mlogit) | ||
## Convert dataset to dfidx format | ||
data_dfidx <- dfidx(data = data_camping, shape = 'long', | ||
choice = 'visit', idx = c('camper_id', 'park_id')) | ||
## Model camping park visit as a mixed logit with fixed cost coefficient | ||
model_2a_alt <- mlogit(formula = visit ~ cost + time + mountain | 0 | 0, | ||
data = data_dfidx, | ||
rpar = c(time = 'n', mountain = 'n'), | ||
R = 100, seed = 703) | ||
## Calculate mean coefficients for each individual | ||
model_2a_alt_params_ind <- fitted(model_2a_alt, type = 'parameters') | ||
## Add mean coefficients to dataset | ||
data_2a_alt <- data_camping %>% | ||
filter(visit == 1) %>% | ||
mutate(cost_coef = coef(model_2a_alt)[1], | ||
time_coef_mean = model_2a_alt_params_ind[, 1], | ||
mountain_coef_mean = model_2a_alt_params_ind[, 2]) | ||
## Calculate average coefficients for each camping park | ||
coef_park_2a_alt <- data_2a_alt %>% | ||
group_by(park_id, park) %>% | ||
summarize(cost_coef = mean(cost_coef), | ||
time_coef_mean = mean(time_coef_mean), | ||
mountain_coef_mean = mean(mountain_coef_mean), | ||
.groups = 'drop') | ||
coef_park_2a_alt | ||
## Calculate mean values for each camping park | ||
coef_park_2a_alt %>% | ||
mutate(time_value = time_coef_mean / cost_coef * 60, | ||
mountain_value = mountain_coef_mean / -cost_coef) %>% | ||
select(park_id, park, time_value, mountain_value) |
Oops, something went wrong.