diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index 0ac6d55a..05ea91b1 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 4.4 -Date: 2023-10-31 17:03:39 UTC -SHA: 543a930620fe60662e8c6aa0d50544fcbd44c298 +Version: 4.4.1 +Date: 2023-11-13 18:38:30 UTC +SHA: f22109ac135540b9afef33ca308aee1af9e22981 diff --git a/NEWS.md b/NEWS.md index c0290044..d84c42b7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # rsofun v4.4.1 -* bugfix Fortran modules +* bugfix Fortran modules and derived types # rsofun v4.4 diff --git a/analysis/01-sensitivity-analysis.R b/analysis/01-sensitivity-analysis.R new file mode 100644 index 00000000..da090029 --- /dev/null +++ b/analysis/01-sensitivity-analysis.R @@ -0,0 +1,119 @@ +## Script running sensitivity analysis + +# Load libraries +library(rsofun) +library(dplyr) +library(tidyr) +library(ggplot2) +library(sensitivity) +library(BayesianTools) + +# Set random seed for reproducibility +set.seed(432) + +# Define log-likelihood function +ll_pmodel <- function( + par_v # a vector of all calibratable parameters + # including errors +){ + rsofun::cost_likelihood_pmodel( # likelihood cost function from package + par_v, + obs = rsofun::p_model_validation, # example data from package + drivers = rsofun::p_model_drivers, + targets = "gpp" + ) +} + +# Define parameter bounds (from previous literature) + +# best parameter values (initial values) +par_cal_best <- c( + kphio = 0.09423773, + kphio_par_a = -0.0025, + kphio_par_b = 20, + soilm_thetastar = 0.6*240, + soilm_betao = 0.2, + beta_unitcostratio = 146.0, + rd_to_vcmax = 0.014, + tau_acclim = 30.0, + kc_jmax = 0.41, + error_gpp = 1 +) + +# lower bound +par_cal_min <- c( + kphio = 0.03, + kphio_par_a = -0.004, + kphio_par_b = 10, + soilm_thetastar = 0, + soilm_betao = 0, + beta_unitcostratio = 50.0, + rd_to_vcmax = 0.01, + tau_acclim = 7.0, + kc_jmax = 0.2, + error_gpp = 0.01 +) + +# upper bound +par_cal_max <- c( + kphio = 0.15, + kphio_par_a = -0.001, + kphio_par_b = 30, + soilm_thetastar = 240, + soilm_betao = 1, + beta_unitcostratio = 200.0, + rd_to_vcmax = 0.1, + tau_acclim = 60.0, + kc_jmax = 0.8, + error_gpp = 4 +) + +# Create BayesinaTools setup object +morris_setup <- BayesianTools::createBayesianSetup( + likelihood = ll_pmodel, + prior = BayesianTools::createUniformPrior(par_cal_min, + par_cal_max, + par_cal_best), + names = names(par_cal_best) +) + +# Run Morris sensitivity analysis +set.seed(432) +morrisOut <- sensitivity::morris( + model = morris_setup$posterior$density, + factors = names(par_cal_best), + r = 1000, + design = list(type = "oat", levels = 20, grid.jump = 3), + binf = par_cal_min, + bsup = par_cal_max, + scale = TRUE) + +# Summarise the morris output into statistics +morrisOut.df <- data.frame( + parameter = names(par_cal_best), + mu.star = apply(abs(morrisOut$ee), 2, mean, na.rm = T), + sigma = apply(morrisOut$ee, 2, sd, na.rm = T) +) %>% + arrange( mu.star ) + +# Create barplot to show sensitivity analysis output +morrisOut.df |> + tidyr::pivot_longer( -parameter, names_to = "variable", values_to = "value") |> + ggplot(aes( + reorder(parameter, value), + value, + fill = variable), + color = NA) + + geom_bar(position = position_dodge(), stat = 'identity') + + scale_fill_manual("", + labels = c('mu.star' = expression(mu * "*"), + 'sigma' = expression(sigma)), + values = c('mu.star' = "#29a274ff", + 'sigma' = "#777055ff")) + + theme_classic() + + theme( + axis.text = element_text(size = 6), + axis.title = element_blank(), + legend.position = c(0.9, 0.1), legend.justification = c(0.95, 0.05) + ) + + coord_flip() # make horizontal \ No newline at end of file diff --git a/analysis/02-bayesian-calibration.R b/analysis/02-bayesian-calibration.R new file mode 100644 index 00000000..0a45f8b5 --- /dev/null +++ b/analysis/02-bayesian-calibration.R @@ -0,0 +1,117 @@ +# Script running Bayesian calibration + +# Load libraries +library(rsofun) +library(dplyr) +library(tidyr) +library(ggplot2) +library(sensitivity) +library(BayesianTools) +library(tictoc) + +# Define functions for plotting + +getSetup <- function(x) { + classes <- class(x) + if (any(c('mcmcSampler', 'smcSampler') %in% classes)) x$setup + else if (any(c('mcmcSamplerList', 'smcSamplerList') %in% classes)) x[[1]]$setup + else stop('Can not get setup from x') +} + +t_col <- function(color, percent = 50, name = NULL) { + # color = color name + # percent = % transparency + # name = an optional name for the color + + ## Get RGB values for named color + rgb.val <- col2rgb(color) + + ## Make new color using input color as base and alpha set by transparency + t.col <- rgb(rgb.val[1], rgb.val[2], rgb.val[3], + max = 255, + alpha = (100 - percent) * 255 / 100, + names = name) + + ## Save the color + invisible(t.col) +} + +plot_prior_posterior_density <- function( + x # Bayesian calibration output +){ + # Get matrices of prior and posterior samples + posteriorMat <- getSample(x, parametersOnly = TRUE) + priorMat <- getSetup(x)$prior$sampler(10000) # nPriorDraws = 10000 + + # Parameter names + nPar <- ncol(posteriorMat) + parNames <- colnames(posteriorMat) + # rename columns priorMat + colnames(priorMat) <- parNames + + # Create data frame for plotting + df_plot <- rbind(data.frame(posteriorMat, distrib = "posterior"), + data.frame(priorMat, distrib = "prior") + ) + df_plot$distrib <- as.factor(df_plot$distrib) + + # Plot with facet wrap + df_plot |> + tidyr::gather(variable, value, kphio:err_gpp) |> + ggplot( + aes(x = value, fill = distrib) + ) + + geom_density() + + theme_classic() + + facet_wrap( ~ variable , nrow = 2, scales = "free") + + theme(legend.position = "bottom", + axis.title.x = element_text("")) + + ggtitle("Marginal parameter uncertainty") + + scale_fill_manual(values = c("#29a274ff", t_col("#777055ff"))) # GECO colors + +} + +# Set random seed for reproducibility +set.seed(432) + +# Define calibration settings +settings_calib <- list( + method = "BayesianTools", + metric = rsofun::cost_likelihood_pmodel, + control = list( + sampler = "DEzs", + settings = list( + burnin = 1500, + iterations = 12000, + nrChains = 3, # number of independent chains + startValue = 3 # number of internal chains to be sampled + )), + par = list( + kphio = list(lower = 0.03, upper = 0.15, init = 0.05), + soilm_betao = list(lower = 0, upper = 1, init = 0.2), + kc_jmax = list(lower = 0.2, upper = 0.8, init = 0.41), + err_gpp = list(lower = 0.1, upper = 3, init = 0.8) + ) +) + +# Run calibration and measure time +tic() + +par_calib <- calib_sofun( + drivers = rsofun::p_model_drivers, + obs = rsofun::p_model_validation, + settings = settings_calib, + par_fixed = list( + kphio_par_a = -0.0025, # define model parameter values from + kphio_par_b = 20, # Stocker et al. 2020 + soilm_thetastar = 0.6*240, + beta_unitcostratio = 146.0, + rd_to_vcmax = 0.014, + tau_acclim = 30.0), + targets = "gpp" +) + +toc() # Stop measuring time + +# Plot prior and posterior distributions +plot_prior_posterior_density(par_calib$mod) diff --git a/analysis/03-uncertainty-estimation.R b/analysis/03-uncertainty-estimation.R new file mode 100644 index 00000000..3c73fd86 --- /dev/null +++ b/analysis/03-uncertainty-estimation.R @@ -0,0 +1,136 @@ +# Script getting the uncertainty estimation, using the +# output from 02-bayesian-calibration.R +getwd() +# Load libraries +library(rsofun) +library(dplyr) +library(tidyr) +library(ggplot2) +library(sensitivity) +library(BayesianTools) + +# Run 02-bayesian-calibration.R if missing calibration output object +if(!exists('par_calib')){ + source(here::here( + 'analysis/02-bayesian-calibration.R')) # path relative to project location +} + +# Set random seed for reproducibility +set.seed(2023) + +# Evaluation of the uncertainty coming from the model parameters' uncertainty + +# Sample parameter values from the posterior distribution +samples_par <- getSample(par_calib$mod, + thin = 46, # get 600 samples in total + whichParameters = 1:4) |> + as.data.frame() |> + dplyr::mutate(mcmc_id = 1:n()) |> + tidyr::nest(.by = mcmc_id, .key = "pars") + +# Define function to run model for a set of sampled parameters +run_pmodel <- function(sample_par){ + # Function that runs the P-model for a sample of parameters + # and also adds the new observation error + + out <- runread_pmodel_f( + drivers = p_model_drivers, + par = list( # copied from par_fixed above + kphio = sample_par$kphio, + kphio_par_a = -0.0025, + kphio_par_b = 20, + soilm_thetastar = 0.6*240, + soilm_betao = sample_par$soilm_betao, + beta_unitcostratio = 146.0, + rd_to_vcmax = 0.014, + tau_acclim = 30.0, + kc_jmax = sample_par$kc_jmax) # value from posterior + ) + + # return modelled GPP and prediction for a new GPP observation + gpp <- out$data[[1]][, "gpp"] + data.frame(gpp = gpp, + gpp_pred = gpp + rnorm(n = length(gpp), mean = 0, + sd = sample_par$err_gpp), + date = out$data[[1]][, "date"]) +} + +# Run the P-model for each set of parameters +pmodel_runs <- samples_par |> + dplyr::mutate(sim = purrr::map(pars, ~run_pmodel(.x))) |> + # format to obtain 90% credible intervals + dplyr::select(mcmc_id, sim) |> + tidyr::unnest(sim) |> + dplyr::group_by(date) |> + # compute quantiles for each day + dplyr::summarise( + gpp_q05 = quantile(gpp, 0.05, na.rm = TRUE), + gpp = quantile(gpp, 0.5, na.rm = TRUE), # get median + gpp_q95 = quantile(gpp, 0.95, na.rm = TRUE), + gpp_pred_q05 = quantile(gpp_pred, 0.05, na.rm = TRUE), + gpp_pred_q95 = quantile(gpp_pred, 0.95, na.rm = TRUE) + ) + +# Plot the credible intervals computed above +# for the first year only +plot_gpp_error <- ggplot(data = pmodel_runs |> + dplyr::slice(1:365) |> + dplyr::left_join( + # Merge GPP validation data (first year) + p_model_validation$data[[1]][1:365, ] |> + dplyr::rename(gpp_obs = gpp), + by = "date") |> + dplyr::left_join( + # Merge GPP before calibration + output$data[[1]][1:365, ] |> + dplyr::select(date, gpp) |> + dplyr::rename(gpp_no_calib = gpp), + by = "date") +) + # Plot only first year + geom_ribbon( + aes(ymin = gpp_pred_q05, + ymax = gpp_pred_q95, + x = date, + fill = "Model uncertainty")) + + geom_ribbon( + aes(ymin = gpp_q05, + ymax = gpp_q95, + x = date, + fill = "Parameter uncertainty")) + + + geom_line( + aes( + date, + gpp, + color = "Predictions" + ) + ) + + geom_line( + aes( + date, + gpp_obs, + color = "Observations" + ) + ) + + theme_classic() + + theme(panel.grid.major.y = element_line(), + legend.position = "bottom") + + labs( + x = 'Date', + y = expression(paste("GPP (g C m"^-2, "s"^-1, ")")) + ) + +# Include observations in the plot +plot_gpp_error + + scale_color_manual(name = "", + breaks = c("Observations", + "Predictions", + "Non-calibrated predictions"), + values = c(t_col("black", 10), + t_col("#E69F00", 10), + t_col("#56B4E9", 10))) + + scale_fill_manual(name = "", + breaks = c("Model uncertainty", + "Parameter uncertainty"), + values = c(t_col("#E69F00", 60), + t_col("#009E73", 40))) \ No newline at end of file diff --git a/analysis/cost_likelihood_heteroskedastic_pmodel.R b/analysis/cost_likelihood_heteroskedastic_pmodel.R new file mode 100644 index 00000000..5538adc0 --- /dev/null +++ b/analysis/cost_likelihood_heteroskedastic_pmodel.R @@ -0,0 +1,192 @@ +# This function implements the likelihood cost function used for calibration, +# but considers a model error that scales with GPP magnitude, thus accounting +# for linear heterokedasticity of the error term. + +cost_likelihood_heteroskedastic_pmodel <- function( + par, # model parameters & error terms for each target + obs, + drivers, + targets, + par_fixed = NULL, # non-calibrated model parameters + parallel = FALSE, + ncores = 2 +){ + # predefine variables for CRAN check compliance + sitename <- data <- gpp_mod <- NULL + + ## check input parameters + if( (length(par) + length(par_fixed)) != (9 + 2*length(targets)) ){ + stop('Error: Input calibratable and fixed parameters (par and par_fixed) + do not match length of the required P-model parameters and target error terms.') + } + + ## define parameter set based on calibrated parameters + calib_param_names <- c('kphio', 'kphio_par_a', 'kphio_par_b', + 'soilm_thetastar', 'soilm_betao', + 'beta_unitcostratio', 'rd_to_vcmax', + 'tau_acclim', 'kc_jmax') + + if(!is.null(par_fixed)){ + params_modl <- list() + # complete with calibrated values + i <- 1 # start counter + for(par_name in calib_param_names){ + if(is.null(par_fixed[[par_name]])){ + params_modl[[par_name]] <- par[i] # use calibrated par value + i <- i + 1 # counter of calibrated params + }else{ + params_modl[[par_name]] <- par_fixed[[par_name]] # use fixed par value + } + } + }else{ + params_modl <- as.list(par[1:9]) # all parameters calibrated + names(params_modl) <- calib_param_names + } + + ## run the model + df <- runread_pmodel_f( + drivers, + par = params_modl, + makecheck = TRUE, + parallel = parallel, + ncores = ncores + ) + + ## clean model output and unnest + df <- df |> + dplyr::rowwise() |> + dplyr::reframe( + cbind(sitename, data[, c('date', unique(c('gpp', targets)))]) |> + stats::setNames(c('sitename', 'date', paste0(unique(c('gpp', targets)), '_mod'))) + ) # gpp is used to get average trait prediction + + # separate validation data into fluxes and traits, site by site + is_flux <- apply(obs, 1, function(x){ 'date' %in% colnames(x$data)}) + + if(sum(is_flux) > 0){ + flux_sites <- obs$sitename[is_flux] + + # Unnest flux observations for our targets + obs_flux <- obs[is_flux, ] |> + dplyr::select(sitename, data) |> + tidyr::unnest(data) |> + dplyr::select(any_of(c('sitename', 'date', targets))) + + if(ncol(obs_flux) < 3){ + warning("Dated observations (fluxes) are missing for the chosen targets.") + df_flux <- data.frame() + }else{ + # Join P-model output and flux observations + df_flux <- df |> + dplyr::filter(sitename %in% flux_sites) |> + dplyr::left_join( + obs_flux, + by = c('sitename', 'date')) # observations with missing date are ignored + } + }else{ + df_flux <- data.frame() + } + + if(sum(!is_flux) > 0){ + trait_sites <- obs$sitename[!is_flux] + + # Unnest trait observations for our targets + obs_trait <- obs[!is_flux, ] |> + dplyr::select(sitename, data) |> + tidyr::unnest(data) |> + dplyr::select(any_of(c('sitename', targets))) + + if(ncol(obs_trait) < 2){ + warning("Non-dated observations (traits) are missing for the chosen targets.") + df_trait <- data.frame() + }else{ + # Join output and trait observations + df_trait <- df |> + dplyr::filter(sitename %in% trait_sites) |> + dplyr::group_by(sitename) |> + # get growing season average traits + dplyr::summarise(across(ends_with("_mod") & !starts_with('gpp'), + ~ sum(.x * gpp_mod/sum(gpp_mod)), + .names = "{.col}")) |> + dplyr::left_join( + obs_trait, + by = c('sitename') # compare yearly averages rather than daily obs + ) + } + }else{ + df_trait <- data.frame() + } + + # loop over targets + ll <- lapply(seq(length(targets)), function(i){ + target <- targets[i] + # get observations and predicted target values, without NA + if(target %in% colnames(df_flux)){ + df_target <- df_flux[, c(paste0(target, '_mod'), target)] |> + tidyr::drop_na() + }else{ + df_target <- data.frame() + } + if(target %in% colnames(df_trait)){ + df_target <- rbind(df_target, + df_trait[, c(paste0(target, '_mod'), target)] |> + tidyr::drop_na()) + } + + # calculate normal log-likelihood with varying standard deviation + ll <- sum(stats::dnorm( + df_target[[paste0(target, '_mod')]], + mean = df_target[[target]], + sd = par[length(par)-2*length(targets) + i] + + par[length(par) - 2*length(targets) + i+1] * df_target[[paste0(target, '_mod')]], + log = TRUE + )) + }) |> + unlist() |> + sum() + + # trap boundary conditions + if(is.nan(ll) | is.na(ll) | ll == 0){ll <- -Inf} + + return(ll) +} + +# Now we try to run the calibration +set.seed(2023) + +# Define calibration settings +settings_calib <- list( + method = "BayesianTools", + metric = cost_likelihood_heteroskedastic_pmodel, + control = list( + sampler = "DEzs", + settings = list( + burnin = 3000, + iterations = 9000, + nrChains = 3, # number of independent chains + startValue = 3 # number of internal chains to be sampled + )), + par = list( + kphio = list(lower = 0.03, upper = 0.15, init = 0.05), + kphio_par_a = list(lower = -0.004, upper = -0.001, init = -0.0025), + kphio_par_b = list(lower = 10, upper = 30, init =25), + err_gpp_a = list(lower = 0.1, upper = 3, init = 0.8), + err_gpp_b = list(lower = -1, upper = 1, init = 0) + ) +) + +# Calibrate kphio-related parameters and err_gpp +par_calib <- calib_sofun( + drivers = p_model_drivers, + obs = p_model_validation, + settings = settings_calib, + par_fixed = list( + soilm_thetastar = 0.6*240, + soilm_betao = 0.2, + beta_unitcostratio = 146.0, + rd_to_vcmax = 0.014, + tau_acclim = 30.0, + kc_jmax = 0.41), + targets = "gpp" +) + diff --git a/cran-comments.md b/cran-comments.md index 6b995f2f..9506b069 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,9 +1,8 @@ Dear CRAN team, -This is the re-submission of the {rsofun} package. We have addressed the following -concerns flagged by the automatic checks: +This is the re-submission of the {rsofun} package. We have removed non-default lower bound specification in derived type, in order to fix the compilation issues that appeared in the Debian check. -- Modified code in Fortran modules to fix compilation issues raised by Prof. Ripley. +We were not able to reproduce the error shown by Prof. Ligges but have tried to solve the issue anyways, and our checks pass cleanly. If other issues arise or this one is still present, we would need to be provided with the configuration of the machine used to run the checks on CRAN's end. This way, we could reproduce the error and see if our changes get rid of it. --- @@ -16,7 +15,7 @@ This package is an extension of {rpmodel} in the sense that it expands the P-mod The full documentation can be found at the github repository link: https://geco-bern.github.io/rsofun -Code coverage sits at ~72%, with remaining uncovered code pertaining to minor input data format checks of the main functions. The underlying P-model implementation is based on the {rpmodel} and the parameter calibration routines use packages {GenSA} and {BayesianTools}. +Code coverage sits at ~76%, with remaining uncovered code pertaining to minor input data format checks of the main functions. The underlying P-model implementation is based on the {rpmodel} and the parameter calibration routines use packages {GenSA} and {BayesianTools}. I hope this package is useful for other earth system scientists and the larger CRAN community. Kind regards, Josefa ArĂ¡n. @@ -32,7 +31,7 @@ I have read and agree to the CRAN policies enumerated here: https://cran.r-proje - rhub::check_on_cran() with only notes for latex elements -- codecove.io code coverage at ~72% +- codecove.io code coverage at ~76% ## Github actions R CMD check results diff --git a/src/Makevars b/src/Makevars index 90f5ed1b..e52f2694 100755 --- a/src/Makevars +++ b/src/Makevars @@ -1,5 +1,4 @@ # PKG_FFLAGS = -ffree-line-length-0 -fbacktrace -ffpe-trap=invalid,zero,overflow -O1 -Wall -Wextra -pedantic -fbacktrace -fPIC -fmax-errors=1 -ggdb -fcheck=all -# PKG_FFLAGS = -mtune=native -fPIC -g -O2 -c # C objects C_OBJS = wrappersc.o @@ -7,6 +6,9 @@ C_OBJS = wrappersc.o # Fortran objects: refer to file names , order reflects dependency structure FT_OBJS = params_core.mod.o sofunutils.mod.o grid_siterun.mod.o params_siml_pmodel.mod.o params_siml_biomee.mod.o forcing_siterun_pmodel.mod.o forcing_siterun_biomee.mod.o params_soil_biomee.mod.o interface_biosphere_pmodel.mod.o interface_biosphere_biomee.mod.o tile_pmodel.mod.o plant_pmodel.mod.o soiltemp_sitch.mod.o waterbal_splash.mod.o vegdynamics_pmodel.mod.o gpp_pmodel.mod.o gpp_biomee.mod.o photosynth_pmodel.mod.o biosphere_pmodel.mod.o biosphere_biomee.mod.o vegetation_biomee.mod.o soil_biomee.mod.o sofun_r.o +# Flags for the Fortran compiler, suggested by Prof. Ligges +# PKG_FFLAGS = -fc-prototypes-external -fpic -g -O2 -mtune=native -Wall -pedantic -flto=10 -c + all: $(SHLIB) clean $(SHLIB): $(FT_OBJS) $(C_OBJS)