forked from woerman/ResEcon703
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
1,582 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,265 @@ | ||
######################################################## | ||
##### ResEcon 703: Topics in Advanced Econometrics ##### | ||
##### Problem set 2 solution code ##### | ||
##### Matt Woerman, UMass Amherst ##### | ||
######################################################## | ||
|
||
### Load packages for problem set | ||
library(tidyverse) | ||
library(gmm) | ||
|
||
|
||
### Problem 1 solutions -------------------------------------------------------- | ||
|
||
### Create functions for use with maximum likelihood | ||
## Function to summarize MLE model results | ||
summarize_mle <- function(model, names){ | ||
## Extract model parameter estimates | ||
parameters <- model$par | ||
## Calculate parameters standard errors | ||
std_errors <- model$hessian %>% | ||
solve() %>% | ||
diag() %>% | ||
sqrt() | ||
## Calculate parameter z-stats | ||
z_stats <- parameters / std_errors | ||
## Calculate parameter p-values | ||
p_values <- 2 * pnorm(-abs(z_stats)) | ||
## Summarize results in a list | ||
model_summary <- tibble(names = names, | ||
parameters = parameters, | ||
std_errors = std_errors, | ||
z_stats = z_stats, | ||
p_values = p_values) | ||
## Return model_summary object | ||
return(model_summary) | ||
} | ||
## Function to conduct likelihood ratio test | ||
test_likelihood_ratio <- function(model_rest, model_unrest){ | ||
## Calculate likelihood ratio test statistic | ||
test_stat <- 2 * (model_rest$value - model_unrest$value) | ||
## Calculate the number of restrictions | ||
df <- length(model_unrest$par) - length(model_rest$par) | ||
## Test if likelihood ratio test statistic is greater than critical value | ||
test <- test_stat > qchisq(0.95, df) | ||
## Calculate p-value of test | ||
p_value <- 1 - pchisq(test_stat, df) | ||
## Return test result and p-value | ||
return(list(reject = test, p_value = p_value)) | ||
} | ||
|
||
### Part a | ||
## Load dataset | ||
data_multi <- read_csv('commute_multinomial.csv') | ||
## Function to calculate log-likelihood for heating choice | ||
ll_fn_1a <- function(params, data){ | ||
## Extract individual parameters with descriptive names | ||
beta_1 <- params[1] | ||
beta_2 <- params[2] | ||
## Calculate representative utility for each alternative given the parameters | ||
model_data <- data %>% | ||
mutate(utility_bike = beta_1 * cost.bike + beta_2 * time.bike, | ||
utility_bus = beta_1 * cost.bus + beta_2 * time.bus, | ||
utility_car = beta_1 * cost.car + beta_2 * time.car, | ||
utility_walk = beta_1 * cost.walk + beta_2 * time.walk) | ||
## Calculate logit choice probability denominator given the parameters | ||
model_data <- model_data %>% | ||
mutate(prob_denom = exp(utility_bike) + exp(utility_bus) + | ||
exp(utility_car) + exp(utility_walk)) | ||
## Calculate logit choice probability for each alt given the parameters | ||
model_data <- model_data %>% | ||
mutate(prob_bike = exp(utility_bike) / prob_denom, | ||
prob_bus = exp(utility_bus) / prob_denom, | ||
prob_car = exp(utility_car) / prob_denom, | ||
prob_walk = exp(utility_walk) / prob_denom) | ||
## Calculate logit choice probability for chosen alt given the parameters | ||
model_data <- model_data %>% | ||
mutate(prob_choice = prob_bike * (mode == 'bike') + | ||
prob_bus * (mode == 'bus') + prob_car * (mode == 'car') + | ||
prob_walk * (mode == 'walk')) | ||
## Calculate log of logit choice probability for chosen alt given the params | ||
model_data <- model_data %>% | ||
mutate(log_prob = log(prob_choice)) | ||
## Calculate the log-likelihood for these parameters | ||
ll <- sum(model_data$log_prob) | ||
return(-ll) | ||
} | ||
## Maximize the log-likelihood function | ||
model_1a <- optim(par = rep(0, 2), fn = ll_fn_1a, data = data_multi, | ||
method = 'BFGS', hessian = TRUE) | ||
## Summarize model results | ||
model_1a %>% | ||
summarize_mle(c('cost', 'time')) | ||
|
||
### Part b | ||
## Function to calculate log-likelihood for heating choice | ||
ll_fn_1b <- function(params, data){ | ||
## Extract individual parameters with descriptive names | ||
alpha_bus <- params[1] | ||
alpha_car <- params[2] | ||
alpha_walk <- params[3] | ||
beta_1 <- params[4] | ||
beta_2 <- params[5] | ||
## Calculate representative utility for each alternative given the parameters | ||
model_data <- data %>% | ||
mutate(utility_bike = beta_1 * cost.bike + beta_2 * time.bike, | ||
utility_bus = alpha_bus + beta_1 * cost.bus + beta_2 * time.bus, | ||
utility_car = alpha_car + beta_1 * cost.car + beta_2 * time.car, | ||
utility_walk = alpha_walk + beta_1 * cost.walk + beta_2 * time.walk) | ||
## Calculate logit choice probability denominator given the parameters | ||
model_data <- model_data %>% | ||
mutate(prob_denom = exp(utility_bike) + exp(utility_bus) + | ||
exp(utility_car) + exp(utility_walk)) | ||
## Calculate logit choice probability for each alt given the parameters | ||
model_data <- model_data %>% | ||
mutate(prob_bike = exp(utility_bike) / prob_denom, | ||
prob_bus = exp(utility_bus) / prob_denom, | ||
prob_car = exp(utility_car) / prob_denom, | ||
prob_walk = exp(utility_walk) / prob_denom) | ||
## Calculate logit choice probability for chosen alt given the parameters | ||
model_data <- model_data %>% | ||
mutate(prob_choice = prob_bike * (mode == 'bike') + | ||
prob_bus * (mode == 'bus') + prob_car * (mode == 'car') + | ||
prob_walk * (mode == 'walk')) | ||
## Calculate log of logit choice probability for chosen alt given the params | ||
model_data <- model_data %>% | ||
mutate(log_prob = log(prob_choice)) | ||
## Calculate the log-likelihood for these parameters | ||
ll <- sum(model_data$log_prob) | ||
return(-ll) | ||
} | ||
## Maximize the log-likelihood function | ||
model_1b <- optim(par = rep(0, 5), fn = ll_fn_1b, data = data_multi, | ||
method = 'BFGS', hessian = TRUE) | ||
## Summarize model results | ||
model_1b %>% | ||
summarize_mle(c('bus_int', 'car_int', 'walk_int', 'cost', 'time')) | ||
## Conduct likelihood ratio test of models 1a and 1b | ||
test_1b <- test_likelihood_ratio(model_1a, model_1b) | ||
## Display test results | ||
test_1b | ||
|
||
### Part c | ||
## Function to calculate log-likelihood for heating choice | ||
ll_fn_1c <- function(params, data){ | ||
## Extract individual parameters with descriptive names | ||
alpha_bus <- params[1] | ||
alpha_car <- params[2] | ||
alpha_walk <- params[3] | ||
beta <- params[4] | ||
gamma_bike <- params[5] | ||
gamma_bus <- params[6] | ||
gamma_car <- params[7] | ||
gamma_walk <- params[8] | ||
## Calculate representative utility for each alternative given the parameters | ||
model_data <- data %>% | ||
mutate(utility_bike = beta * cost.bike + gamma_bike * time.bike, | ||
utility_bus = alpha_bus + beta * cost.bus + gamma_bus * time.bus, | ||
utility_car = alpha_car + beta * cost.car + gamma_car * time.car, | ||
utility_walk = alpha_walk + beta * cost.walk + | ||
gamma_walk * time.walk) | ||
## Calculate logit choice probability denominator given the parameters | ||
model_data <- model_data %>% | ||
mutate(prob_denom = exp(utility_bike) + exp(utility_bus) + | ||
exp(utility_car) + exp(utility_walk)) | ||
## Calculate logit choice probability for each alt given the parameters | ||
model_data <- model_data %>% | ||
mutate(prob_bike = exp(utility_bike) / prob_denom, | ||
prob_bus = exp(utility_bus) / prob_denom, | ||
prob_car = exp(utility_car) / prob_denom, | ||
prob_walk = exp(utility_walk) / prob_denom) | ||
## Calculate logit choice probability for chosen alt given the parameters | ||
model_data <- model_data %>% | ||
mutate(prob_choice = prob_bike * (mode == 'bike') + | ||
prob_bus * (mode == 'bus') + prob_car * (mode == 'car') + | ||
prob_walk * (mode == 'walk')) | ||
## Calculate log of logit choice probability for chosen alt given the params | ||
model_data <- model_data %>% | ||
mutate(log_prob = log(prob_choice)) | ||
## Calculate the log-likelihood for these parameters | ||
ll <- sum(model_data$log_prob) | ||
return(-ll) | ||
} | ||
## Maximize the log-likelihood function | ||
model_1c <- optim(par = rep(0, 8), fn = ll_fn_1c, data = data_multi, | ||
method = 'BFGS', hessian = TRUE) | ||
## Summarize model results | ||
model_1c %>% | ||
summarize_mle(c('bus_int', 'car_int', 'walk_int', 'cost', | ||
'time_bike', 'time_bus', 'time_car', 'time_walk')) | ||
## Conduct likelihood ratio test of models 1a and 1b | ||
test_1c <- test_likelihood_ratio(model_1b, model_1c) | ||
## Display test results | ||
test_1c | ||
|
||
|
||
### Problem 2 solutions -------------------------------------------------------- | ||
|
||
### Part a | ||
## Load dataset | ||
data_binary <- read_csv('commute_binary.csv') | ||
## Create dataset for use in MM moment function | ||
data_2a <- data_binary %>% | ||
mutate(choice = 1 * (mode == 'car'), | ||
constant = 1, | ||
time.bus = -time.bus) %>% | ||
select(choice, constant, cost.car, time.car, time.bus) %>% | ||
as.matrix() | ||
## Function to calculate moments for commute choice | ||
mm_fn_2a <- function(params, data){ | ||
## Select data for X [N x K] | ||
X <- data[, -1] | ||
## Select data for y [N x 1] | ||
y <- data[, 1] | ||
## Calculate representative utility of driving [N x 1] | ||
utility <- X %*% params | ||
## Calculate logit choice probability of driving [N x 1] | ||
prob <- 1 / (1 + exp(-utility)) | ||
## Calculate econometric residuals [N x 1] | ||
residuals <- y - prob | ||
## Create moment matrix [N x K] | ||
moments <- c(residuals) * X | ||
return(moments) | ||
} | ||
## Use GMM to estimate model | ||
model_2a <- gmm(g = mm_fn_2a, x = data_2a, t0 = rep(0, 4), | ||
vcov = 'iid', method = 'Nelder-Mead', | ||
control = list(reltol = 1e-25, maxit = 10000)) | ||
## Summarize model results | ||
summary(model_2a) | ||
|
||
### Part b | ||
## Create dataset for use in MM moment function | ||
data_2b <- data_binary %>% | ||
mutate(choice = 1 * (mode == 'car'), | ||
constant = 1, | ||
time.bus = -time.bus) %>% | ||
select(choice, constant, cost.car, time.car, time.bus, | ||
price_gas, snowfall, construction, bus_detour) %>% | ||
as.matrix() | ||
## Function to calculate moments for commute choice | ||
gmm_fn_2b <- function(params, data){ | ||
## Select data for X [N x K] | ||
X <- data[, 2:5] | ||
## Select data for y [N x 1] | ||
y <- data[, 1] | ||
## select data for Z [N x L] | ||
Z <- data[, c(2, 6:9)] | ||
## Calculate representative utility of driving [N x 1] | ||
utility <- X %*% params | ||
## Calculate logit choice probability of driving [N x 1] | ||
prob <- 1 / (1 + exp(-utility)) | ||
## Calculate econometric residuals [N x 1] | ||
residuals <- y - prob | ||
## Create moment matrix [N x K] | ||
moments <- c(residuals) * Z | ||
return(moments) | ||
} | ||
## Use GMM to estimate model | ||
model_2b <- gmm(g = gmm_fn_2b, x = data_2b, t0 = c(0, 0, -0.3, -0.1), | ||
vcov = 'iid', method = 'Nelder-Mead', | ||
control = list(reltol = 1e-25, maxit = 10000)) | ||
## Summarize model results | ||
summary(model_2b) | ||
## Test overidentifying restrictions | ||
specTest(model_2b) |
Oops, something went wrong.