Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Phydro fix le output #221

Merged
merged 16 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

export(calib_sofun)
export(cost_likelihood_biomee)
export(cost_likelihood_pmodel)
export(cost_likelihood_phydromodel)
export(cost_rmse_biomee)
export(cost_rmse_pmodel)
export(init_dates_dataframe)
Expand Down
44 changes: 20 additions & 24 deletions R/calib_sofun.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#' \item{\code{par}}{A list of model parameters. For each parameter, an initial value
#' and lower and upper bounds should be provided. The calibratable parameters
#' include model parameters 'kphio', 'kphio_par_a', 'kphio_par_b', 'soilm_thetastar',
#' 'soilm_betao', 'beta_costunitratio', 'rd_to_vcmax', 'tau_acclim', 'kc_jmax'
#' 'beta_costunitratio', 'rd_to_vcmax', 'tau_acclim', 'kc_jmax'
#' and 'rootzone_whc' , and (if
#' doing Bayesian calibration) error parameters
#' for each target variable, named for example 'err_gpp'. This list must match
Expand Down Expand Up @@ -49,7 +49,6 @@
#' kphio_par_a = 0,
#' kphio_par_b = 1.0,
#' soilm_thetastar = 0.6*240,
#' soilm_betao = 0.01,
#' beta_unitcostratio = 146,
#' rd_to_vcmax = 0.014,
#' tau_acclim = 30,
Expand Down Expand Up @@ -120,7 +119,7 @@ calib_sofun <- function(
# create bounds
lower <- unlist(lapply(settings$par, function(x) x$lower))
upper <- unlist(lapply(settings$par, function(x) x$upper))
pars <- unlist(lapply( settings$par, function(x) x$init))
pars <- unlist(lapply(settings$par, function(x) x$init))

out <- GenSA::GenSA(
par = pars,
Expand Down Expand Up @@ -149,15 +148,20 @@ calib_sofun <- function(
eval(settings$metric)(par = par,
obs = obs,
drivers = drivers,
...)
...) # the dots contain: par_fixed, targets, parallel, ncores
}

# reformat parameters
pars <- as.data.frame(do.call("rbind", settings$par), row.names = FALSE)

priors <- BayesianTools::createTruncatedNormalPrior(
unlist(pars$mean),
unlist(pars$sd),
pars <- as.data.frame(do.call("rbind", settings$par)) # use rownames later on

# priors <- BayesianTools::createTruncatedNormalPrior(
# unlist(pars$mean), # NOTE: This needs a value otherwise: Error in `parallelSampler(1000)`: sampler provided doesn't work
# unlist(pars$sd), # NOTE: This needs a value otherwise: Error in `parallelSampler(1000)`: sampler provided doesn't work
# unlist(pars$lower), # As a workaround BayesianTools::createUniformPrior could be used
# unlist(pars$upper)
# # unlist(pars$init)
# )
priors <- BayesianTools::createUniformPrior( # workaround for TruncatedNormalPrior, this does not require mean and sd
unlist(pars$lower),
unlist(pars$upper)
# unlist(pars$init)
Expand All @@ -166,24 +170,16 @@ calib_sofun <- function(
# setup the bayes run, no message forwarding is provided
# so wrap the function in a do.call
setup <- BayesianTools::createBayesianSetup(
likelihood = function(
random_par) {
# cost(
# par = random_par,
# obs = obs,
# drivers = drivers,
# ...
# )
likelihood = function(random_par) {
do.call("cost",
list(
par = random_par,
par = setNames(random_par, rownames(pars)),
obs = obs,
drivers = drivers
))
},
prior = priors,
names = names(settings$par)
)
))},
prior = priors,
names = rownames(pars)
)

# set bt control parameters
bt_settings <- settings$control$settings
Expand All @@ -194,7 +190,7 @@ calib_sofun <- function(
sampler = settings$control$sampler,
settings = bt_settings
)

# drop last value
bt_par <- BayesianTools::MAP(out)$parametersMAP
bt_par <- bt_par[1:(length(bt_par))]
Expand Down
140 changes: 102 additions & 38 deletions R/cost_likelihood_phydro.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,75 @@
cost_likelihood_phydro <- function(
#' Cost function computing a log-likelihood for calibration of Phydro-model
#' parameters
#'
#' The cost function performs a Phydro-model run for the input drivers and model parameter
#' values, and computes the outcome's normal log-likelihood centered at the input
#' observed values and with standard deviation given as an input parameter
#' (calibratable).
#'
#' @param par A vector of values for the parameters to be calibrated, including
#' a subset of model parameters (described in \code{\link{runread_pmodel_f}}),
#' in order, and error terms
#' for each target variable (for example \code{'gpp_err'}), in the same order as
#' the targets appear in \code{targets}.
#' @param obs A nested data.frame of observations, with columns \code{'sitename'}
#' and \code{'data'} (see \code{\link{p_model_validation}} or \code{\link{p_model_validation_vcmax25}}
#' to check their structure).
#' @param drivers A nested data.frame of driver data. See \code{\link{p_model_drivers}}
#' for a description of the data structure.
#' @param targets A character vector indicating the target variables for which the
#' optimization will be done and the RMSE computed. This string must be a column
#' name of the \code{data} data.frame belonging to the validation nested data.frame
#' (for example 'gpp').
#' @param par_fixed A named list of model parameter values to keep fixed during the
#' calibration. These should complement the input \code{par} such that all model
#' parameters are passed on to \code{\link{runread_pmodel_f}}.
#' @param parallel A logical specifying whether simulations are to be parallelised
#' (sending data from a certain number of sites to each core). Defaults to
#' \code{FALSE}.
#' @param ncores An integer specifying the number of cores used for parallel
#' computing. Defaults to 2.
#'
#' @return The log-likelihood of the observed target values, assuming that they
#' are independent, normally distributed and centered on the predictions
#' made by the P-model run with standard deviation given as input (via `par` because
#' the error terms are estimated through the calibration with `BayesianTools`,
#' as shown in the "Parameter calibration and cost functions" vignette).
#'
#' @details To run the P-model, all model parameters must be given. The cost
#' function uses arguments \code{par} and \code{par_fixed} such that, in the
#' calibration routine, \code{par} can be updated by the optimizer and
#' \code{par_fixed} are kept unchanged throughout calibration.
#'
#' If the validation data contains a "date" column (fluxes), the simulated target time series
#' is compared to the observed values on those same dates (e.g. for GPP). Otherwise,
#' there should only be one observed value per site (leaf traits), and the outputs
#' (averaged over the growing season, weighted by predicted GPP) will be
#' compared to this single value representative of the site (e.g. Vcmax25). As an exception,
#' when the date of a trait measurement is available, it will be compared to the
#' trait value predicted on that date.
#'
#' @export
#'
#' @examples
#' # Compute the likelihood for a set of
#' # model parameter values involved in the
#' # temperature dependence of kphio
#' # and example data
#' cost_likelihood_phydromodel(
#' par = c(0.05, -0.01, 1, # model parameters
#' 2), # err_gpp
#' obs = p_model_validation,
#' drivers = p_model_drivers,
#' targets = c('gpp'),
#' par_fixed = list(
#' soilm_thetastar = 0.6 * 240, # old setup with soil moisture stress
#' beta_unitcostratio = 146.0,
#' rd_to_vcmax = 0.014, # from Atkin et al. 2015 for C3 herbaceous
#' tau_acclim = 30.0,
#' kc_jmax = 0.41
#' )
#' )
cost_likelihood_phydromodel <- function(
par, # model parameters & error terms for each target
obs,
drivers,
Expand All @@ -7,53 +78,46 @@ cost_likelihood_phydro <- function(
parallel = FALSE,
ncores = 2
){
# NOTE(fabian): These different cost functions share a LOT of code in common. Consider consolidation for maintainability?

# predefine variables for CRAN check compliance
sitename <- data <- gpp_mod <- NULL

using_phydro = drivers$params_siml[[1]]$use_phydro

# FIXME Jaideep: Instead of checking the number of params,
# it might be better to check for presence of each param in par and par_fixed
## check input parameters
expected_params = ifelse(using_phydro, yes=14, no=10)
if( (length(par) + length(par_fixed)) != (expected_params + length(targets)) ){
stop(paste0('Error: Input calibratable and fixed parameters (par = ',length(par),' and par_fixed = ',length(par_fixed),')
do not match length of the required P-model parameters (',expected_params + length(targets),').'))
if (!("use_phydro" %in% colnames(drivers$params_siml[[1]]))){
warning("Parameter use_phydro not set. Assuming FALSE")
using_phydro = FALSE
} else {
using_phydro = drivers$params_siml[[1]]$use_phydro
}


## define parameter set based on calibrated parameters
if (using_phydro){
calib_param_names <- c('kphio', 'kphio_par_a', 'kphio_par_b',
'rd_to_vcmax', 'tau_acclim', 'kc_jmax',
'phydro_K_plant', 'phydro_p50_plant', 'phydro_b_plant',
'phydro_alpha', 'phydro_gamma',
'bsoil', 'Ssoil', 'whc')
## define required parameter set based on calibrated parameters
if (!using_phydro){
required_param_names <- rsofun:::required_param_names$p_model
} else {
calib_param_names <- c('kphio', 'kphio_par_a', 'kphio_par_b',
'soilm_thetastar', 'soilm_betao',
'beta_unitcostratio', 'rd_to_vcmax',
'tau_acclim', 'kc_jmax', 'whc')
required_param_names <- rsofun:::required_param_names$phydro_model
}

# FIXME Jaideep: Here it is assumed that the params in par will appear in exactly the same order in settings as in the list above. Better to do this in an order-independent way.
if(!is.null(par_fixed)){
params_modl <- list()
# complete with calibrated values
i <- 1 # start counter
for(par_name in calib_param_names){
if(is.null(par_fixed[[par_name]])){
params_modl[[par_name]] <- par[i] # use calibrated par value
i <- i + 1 # counter of calibrated params
}else{
params_modl[[par_name]] <- par_fixed[[par_name]] # use fixed par value
}
}
}else{
params_modl <- as.list(par[1:expected_params]) # all parameters calibrated
names(params_modl) <- calib_param_names
## split calibrated parameters into model and error parameters
par_calibrated_model <- par[ ! names(par) %in% c("err_gpp") ] # consider only model parameters for the check
# par_calibrated_errormodel <- par[ names(par) %in% c("err_gpp") ]
# par_fixed

## check parameters
if (!identical(sort(c(names(par_calibrated_model), names(par_fixed))), required_param_names)){
stop(sprintf(paste0("Error: Input calibratable and fixed parameters do not ",
"match required model parameters:",
"\n par: c(%s)",
"\n par_fixed: c(%s)",
"\n required: c(%s)"),
paste0(sort(names(par_calibrated_model)), collapse = ", "),
paste0(sort(names(par_fixed)), collapse = ", "),
paste0(sort(required_param_names), collapse = ", ")))
}

# Combine fixed and estimated params to result in all the params required to run the model
# This basically uses all params except those of the error model of the observations
params_modl <- c(par, par_fixed)[required_param_names]

## run the model
df <- runread_pmodel_f(
drivers,
Expand Down
65 changes: 27 additions & 38 deletions R/cost_likelihood_pmodel.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
#' targets = c('gpp'),
#' par_fixed = list(
#' soilm_thetastar = 0.6 * 240, # old setup with soil moisture stress
#' soilm_betao = 0.0,
#' beta_unitcostratio = 146.0,
#' rd_to_vcmax = 0.014, # from Atkin et al. 2015 for C3 herbaceous
#' tau_acclim = 30.0,
Expand All @@ -80,6 +79,8 @@ cost_likelihood_pmodel <- function(
parallel = FALSE,
ncores = 2
){
# NOTE(fabian): These different cost functions share a LOT of code in common. Consider consolidation for maintainability?

# predefine variables for CRAN check compliance
sitename <- data <- gpp_mod <- NULL

Expand All @@ -90,48 +91,34 @@ cost_likelihood_pmodel <- function(
using_phydro = drivers$params_siml[[1]]$use_phydro
}

# FIXME Jaideep: Instead of checking the number of params,
# it might be better to check for presence of each param in par and par_fixed
## check input parameters
expected_params = ifelse(using_phydro, yes=14, no=10)
if( (length(par) + length(par_fixed)) != (expected_params + length(targets)) ){
stop(paste0('Error: Input calibratable and fixed parameters (par = ',length(par),' and par_fixed = ',length(par_fixed),')
do not match length of the required P-model parameters (',expected_params + length(targets),').'))
}


## define parameter set based on calibrated parameters
if (using_phydro){
calib_param_names <- c('kphio', 'kphio_par_a', 'kphio_par_b',
'rd_to_vcmax', 'tau_acclim', 'kc_jmax',
'phydro_K_plant', 'phydro_p50_plant', 'phydro_b_plant',
'phydro_alpha', 'phydro_gamma',
'bsoil', 'Ssoil', 'whc')
## define required parameter set based on model parameters
if (!using_phydro){
required_param_names <- rsofun:::required_param_names$p_model
} else {
calib_param_names <- c('kphio', 'kphio_par_a', 'kphio_par_b',
'soilm_thetastar', 'soilm_betao',
'beta_unitcostratio', 'rd_to_vcmax',
'tau_acclim', 'kc_jmax', 'whc')
required_param_names <- rsofun:::required_param_names$phydro_model
}

# FIXME Jaideep: Here it is assumed that the params in par will appear in exactly the same order in settings as in the list above. Better to do this in an order-independent way.
if(!is.null(par_fixed)){
params_modl <- list()
# complete with calibrated values
i <- 1 # start counter
for(par_name in calib_param_names){
if(is.null(par_fixed[[par_name]])){
params_modl[[par_name]] <- par[i] # use calibrated par value
i <- i + 1 # counter of calibrated params
}else{
params_modl[[par_name]] <- par_fixed[[par_name]] # use fixed par value
}
}
}else{
params_modl <- as.list(par[1:expected_params]) # all parameters calibrated
names(params_modl) <- calib_param_names
## split calibrated parameters into model and error parameters
par_calibrated_model <- par[ ! names(par) %in% c("err_gpp", "err_vcmax25") ] # consider only model parameters for the check
# par_calibrated_errormodel <- par[ names(par) %in% c("err_gpp", "err_vcmax25") ]
# par_fixed

## check parameters
if (!identical(sort(c(names(par_calibrated_model), names(par_fixed))), required_param_names)){
stop(sprintf(paste0("Error: Input calibratable and fixed parameters do not ",
"match required model parameters:",
"\n par: c(%s)",
"\n par_fixed: c(%s)",
"\n required: c(%s)"),
paste0(sort(names(par_calibrated_model)), collapse = ", "),
paste0(sort(names(par_fixed)), collapse = ", "),
paste0(sort(required_param_names), collapse = ", ")))
}

# Combine fixed and estimated params to result in all the params required to run the model
# This basically uses all params except those of the error model of the observations
params_modl <- c(par, par_fixed)[required_param_names]

## run the model
df <- runread_pmodel_f(
drivers,
Expand Down Expand Up @@ -232,6 +219,8 @@ cost_likelihood_pmodel <- function(
}) |>
unlist() |>
sum()
# TODO(fabian): make above ll more robust by using advantages of named vector `par`
# instead of relying on an expected order

# trap boundary conditions
if(is.nan(ll) | is.na(ll) | ll == 0){ll <- -Inf}
Expand Down
Loading