Skip to content

Commit

Permalink
refactor: Make likelihood computation more robust
Browse files Browse the repository at this point in the history
  • Loading branch information
fabern committed Sep 24, 2024
1 parent 8b55702 commit 99b053a
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 52 deletions.
57 changes: 28 additions & 29 deletions R/cost_likelihood_phydro.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ cost_likelihood_phydromodel <- function(
} else {
using_phydro = drivers$params_siml[[1]]$use_phydro
}
## define required parameter set based on calibrated parameters

## define required parameter set based on model parameters
if (!using_phydro){
required_param_names <- rsofun:::required_param_names$p_model
} else {
Expand Down Expand Up @@ -235,40 +235,38 @@ cost_likelihood_phydromodel <- function(
df_trait <- data.frame()
}

# loop over targets
ll <- lapply(seq(length(targets)), function(i){
target <- targets[i]
# loop over targets to compute log-likelihood ll
ll_df <- data.frame(target = targets,
ll = NaN)
for (target in targets){
# check (needed?):
if(target %in% colnames(df_flux) & target %in% colnames(df_trait)) {stop(
sprintf("Target '%s' cannot be simultatneously in df_flux and df_trait.", target))
}

# get observations and predicted target values, without NA
if(target %in% colnames(df_flux)){
df_target <- df_flux[, c(paste0(target, '_mod'), target)] |>
tidyr::drop_na()
df_target <- if(target %in% colnames(df_flux)){
df_flux[, c(paste0(target, '_mod'), target)] |> tidyr::drop_na()
}else{
df_target <- data.frame()
}
if(target %in% colnames(df_trait)){
df_target <- rbind(df_target,
df_trait[, c(paste0(target, '_mod'), target)] |>
tidyr::drop_na())
df_trait[, c(paste0(target, '_mod'), target)] |> tidyr::drop_na()
}

# calculate normal log-likelihood
ll <- sum(stats::dnorm(
df_target[[paste0(target, '_mod')]],
mean = df_target[[target]],
sd = par[length(par)-length(targets) + i],
log = TRUE
))
}) |>
unlist() |>
sum()

ll_df[ll_df$target == target, 'll'] <-
sum(stats::dnorm(
x = df_target[[paste0(target, '_mod')]], # model
mean = df_target[[target]], # obs
sd = par[[paste0('err_', target)]], # error model
log = TRUE))
}
ll <- sum(ll_df$ll)

# compute ll for dpsi using a Gaussian prior with mean 1 and sd 0.33
ll_dpsi = sum(stats::dnorm(
df_flux[['dpsi_int_mod']],
mean = df_flux[['dpsi_int']],
sd = 0.33,
log = TRUE
))
x = df_flux[['dpsi_int_mod']], # model
mean = df_flux[['dpsi_int']], # obs
sd = 0.33, # error model
log = TRUE))

ll <- ll + ll_dpsi

Expand All @@ -277,3 +275,4 @@ cost_likelihood_phydromodel <- function(

return(ll)
}

43 changes: 20 additions & 23 deletions R/cost_likelihood_pmodel.R
Original file line number Diff line number Diff line change
Expand Up @@ -193,34 +193,31 @@ cost_likelihood_pmodel <- function(
df_trait <- data.frame()
}

# loop over targets
ll <- lapply(seq(length(targets)), function(i){
target <- targets[i]
# loop over targets to compute log-likelihood ll
ll_df <- data.frame(target = targets,
ll = NaN)
for (target in targets){
# check (needed?):
if(target %in% colnames(df_flux) & target %in% colnames(df_trait)) {stop(
sprintf("Target '%s' cannot be simultatneously in df_flux and df_trait.", target))
}

# get observations and predicted target values, without NA
if(target %in% colnames(df_flux)){
df_target <- df_flux[, c(paste0(target, '_mod'), target)] |>
tidyr::drop_na()
df_target <- if(target %in% colnames(df_flux)){
df_flux[, c(paste0(target, '_mod'), target)] |> tidyr::drop_na()
}else{
df_target <- data.frame()
}
if(target %in% colnames(df_trait)){
df_target <- rbind(df_target,
df_trait[, c(paste0(target, '_mod'), target)] |>
tidyr::drop_na())
df_trait[, c(paste0(target, '_mod'), target)] |> tidyr::drop_na()
}

# calculate normal log-likelihood
ll <- sum(stats::dnorm(
df_target[[paste0(target, '_mod')]],
mean = df_target[[target]],
sd = par[length(par)-length(targets) + i],
log = TRUE
))
}) |>
unlist() |>
sum()
# TODO(fabian): make above ll more robust by using advantages of named vector `par`
# instead of relying on an expected order
ll_df[ll_df$target == target, 'll'] <-
sum(stats::dnorm(
x = df_target[[paste0(target, '_mod')]], # model
mean = df_target[[target]], # obs
sd = par[[paste0('err_', target)]], # error model
log = TRUE))
}
ll <- sum(ll_df$ll)

# trap boundary conditions
if(is.nan(ll) | is.na(ll) | ll == 0){ll <- -Inf}
Expand Down

0 comments on commit 99b053a

Please sign in to comment.