geco-bern · khufkens · Dec 6, 2023 · Nov 13, 2023 · Nov 13, 2023 · Nov 16, 2023
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -1,3 +1,3 @@
-Version: 4.4
-Date: 2023-10-31 17:03:39 UTC
-SHA: 543a930620fe60662e8c6aa0d50544fcbd44c298
+Version: 4.4.1
+Date: 2023-11-13 18:38:30 UTC
+SHA: f22109ac135540b9afef33ca308aee1af9e22981
diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,6 @@
 # rsofun v4.4.1
 
-* bugfix Fortran modules
+* bugfix Fortran modules and derived types
 
 # rsofun v4.4
 

diff --git a/analysis/01-sensitivity-analysis.R b/analysis/01-sensitivity-analysis.R
@@ -0,0 +1,119 @@
+## Script running sensitivity analysis
+
+# Load libraries
+library(rsofun)
+library(dplyr)
+library(tidyr)
+library(ggplot2)
+library(sensitivity)
+library(BayesianTools)
+
+# Set random seed for reproducibility
+set.seed(432)
+
+# Define log-likelihood function
+ll_pmodel <- function(
+    par_v                 # a vector of all calibratable parameters 
+                          # including errors
+){
+  rsofun::cost_likelihood_pmodel(        # likelihood cost function from package
+    par_v,
+    obs = rsofun::p_model_validation,    # example data from package
+    drivers = rsofun::p_model_drivers,
+    targets = "gpp"
+  )
+}
+
+# Define parameter bounds (from previous literature)
+
+# best parameter values (initial values)
+par_cal_best <- c(
+  kphio              = 0.09423773,
+  kphio_par_a        = -0.0025,
+  kphio_par_b        = 20,
+  soilm_thetastar    = 0.6*240,
+  soilm_betao        = 0.2,
+  beta_unitcostratio = 146.0,
+  rd_to_vcmax        = 0.014,
+  tau_acclim         = 30.0,
+  kc_jmax            = 0.41,
+  error_gpp          = 1
+)
+
+# lower bound
+par_cal_min <- c(
+  kphio              = 0.03,
+  kphio_par_a        = -0.004,
+  kphio_par_b        = 10,
+  soilm_thetastar    = 0,
+  soilm_betao        = 0,
+  beta_unitcostratio = 50.0,
+  rd_to_vcmax        = 0.01,
+  tau_acclim         = 7.0,
+  kc_jmax            = 0.2,
+  error_gpp          = 0.01
+)
+
+# upper bound
+par_cal_max <- c(
+  kphio              = 0.15,
+  kphio_par_a        = -0.001,
+  kphio_par_b        = 30,
+  soilm_thetastar    = 240,
+  soilm_betao        = 1,
+  beta_unitcostratio = 200.0,
+  rd_to_vcmax        = 0.1,
+  tau_acclim         = 60.0,
+  kc_jmax            = 0.8,
+  error_gpp          = 4
+)
+
+# Create BayesinaTools setup object
+morris_setup <- BayesianTools::createBayesianSetup(
+  likelihood = ll_pmodel,
+  prior = BayesianTools::createUniformPrior(par_cal_min, 
+                                            par_cal_max, 
+                                            par_cal_best),
+  names = names(par_cal_best)
+)
+
+# Run Morris sensitivity analysis
+set.seed(432)
+morrisOut <- sensitivity::morris(
+  model = morris_setup$posterior$density,
+  factors = names(par_cal_best),
+  r = 1000,
+  design = list(type = "oat", levels = 20, grid.jump = 3),
+  binf = par_cal_min,
+  bsup = par_cal_max,
+  scale = TRUE)
+
+# Summarise the morris output into statistics
+morrisOut.df <- data.frame(
+  parameter = names(par_cal_best),
+  mu.star = apply(abs(morrisOut$ee), 2, mean, na.rm = T),
+  sigma = apply(morrisOut$ee, 2, sd, na.rm = T)
+) %>%
+  arrange( mu.star )
+
+# Create barplot to show sensitivity analysis output
+morrisOut.df |>
+  tidyr::pivot_longer( -parameter, names_to = "variable", values_to = "value") |>
+  ggplot(aes(
+    reorder(parameter, value),
+    value, 
+    fill = variable),
+    color = NA) +
+  geom_bar(position = position_dodge(), stat = 'identity') +
+  scale_fill_manual("", 
+                    labels = c('mu.star' = expression(mu * "*"),
+                               'sigma' = expression(sigma)),
+                    values = c('mu.star' = "#29a274ff", 
+                               'sigma' = "#777055ff")) +
+  theme_classic() +
+  theme(
+    axis.text = element_text(size = 6),
+    axis.title = element_blank(),
+    legend.position = c(0.9, 0.1), legend.justification = c(0.95, 0.05)
+  ) +
+  coord_flip()    # make horizontal
diff --git a/analysis/02-bayesian-calibration.R b/analysis/02-bayesian-calibration.R
@@ -0,0 +1,117 @@
+# Script running Bayesian calibration
+
+# Load libraries
+library(rsofun)
+library(dplyr)
+library(tidyr)
+library(ggplot2)
+library(sensitivity)
+library(BayesianTools)
+library(tictoc)
+
+# Define functions for plotting
+
+getSetup <- function(x) {
+  classes <- class(x)
+  if (any(c('mcmcSampler', 'smcSampler') %in% classes)) x$setup
+  else if (any(c('mcmcSamplerList', 'smcSamplerList') %in% classes)) x[[1]]$setup
+  else stop('Can not get setup from x')
+}
+
+t_col <- function(color, percent = 50, name = NULL) {
+  #      color = color name
+  #    percent = % transparency
+  #       name = an optional name for the color
+
+  ## Get RGB values for named color
+  rgb.val <- col2rgb(color)
+
+  ## Make new color using input color as base and alpha set by transparency
+  t.col <- rgb(rgb.val[1], rgb.val[2], rgb.val[3],
+               max = 255,
+               alpha = (100 - percent) * 255 / 100,
+               names = name)
+
+  ## Save the color
+  invisible(t.col)
+}
+
+plot_prior_posterior_density <- function(
+    x               # Bayesian calibration output
+){
+  # Get matrices of prior and posterior samples
+  posteriorMat <- getSample(x, parametersOnly = TRUE)
+  priorMat <-  getSetup(x)$prior$sampler(10000) # nPriorDraws = 10000
+
+  # Parameter names
+  nPar <- ncol(posteriorMat)
+  parNames <- colnames(posteriorMat)
+  # rename columns priorMat
+  colnames(priorMat) <- parNames  
+
+  # Create data frame for plotting
+  df_plot <- rbind(data.frame(posteriorMat, distrib = "posterior"),
+                   data.frame(priorMat, distrib = "prior")
+  )
+  df_plot$distrib <- as.factor(df_plot$distrib)
+
+  # Plot with facet wrap
+  df_plot |>
+    tidyr::gather(variable, value, kphio:err_gpp) |>
+    ggplot(
+      aes(x = value, fill = distrib)
+    ) +
+    geom_density() +
+    theme_classic() +
+    facet_wrap( ~ variable , nrow = 2, scales = "free") +
+    theme(legend.position = "bottom",
+          axis.title.x = element_text("")) +
+    ggtitle("Marginal parameter uncertainty") +
+    scale_fill_manual(values = c("#29a274ff", t_col("#777055ff"))) # GECO colors
+
+}
+
+# Set random seed for reproducibility
+set.seed(432)
+
+# Define calibration settings
+settings_calib <- list(
+  method = "BayesianTools",
+  metric = rsofun::cost_likelihood_pmodel,
+  control = list(
+    sampler = "DEzs",
+    settings = list(
+      burnin = 1500,
+      iterations = 12000,
+      nrChains = 3,       # number of independent chains
+      startValue = 3      # number of internal chains to be sampled
+    )),
+  par = list(
+    kphio = list(lower = 0.03, upper = 0.15, init = 0.05),
+    soilm_betao = list(lower = 0, upper = 1, init = 0.2),
+    kc_jmax = list(lower = 0.2, upper = 0.8, init = 0.41),
+    err_gpp = list(lower = 0.1, upper = 3, init = 0.8)
+  )
+)
+
+# Run calibration and measure time
+tic()
+
+par_calib <- calib_sofun(
+  drivers = rsofun::p_model_drivers,
+  obs = rsofun::p_model_validation,
+  settings = settings_calib,
+  par_fixed = list(
+    kphio_par_a = -0.0025,            # define model parameter values from
+    kphio_par_b = 20,                 # Stocker et al. 2020
+    soilm_thetastar    = 0.6*240,
+    beta_unitcostratio = 146.0,
+    rd_to_vcmax        = 0.014,
+    tau_acclim         = 30.0),
+  targets = "gpp"
+)
+
+toc() # Stop measuring time
+
+# Plot prior and posterior distributions
+plot_prior_posterior_density(par_calib$mod)
diff --git a/analysis/03-uncertainty-estimation.R b/analysis/03-uncertainty-estimation.R
@@ -0,0 +1,136 @@
+# Script getting the uncertainty estimation, using the
+# output from 02-bayesian-calibration.R
+getwd()
+# Load libraries
+library(rsofun)
+library(dplyr)
+library(tidyr)
+library(ggplot2)
+library(sensitivity)
+library(BayesianTools)
+
+# Run 02-bayesian-calibration.R if missing calibration output object
+if(!exists('par_calib')){
+  source(here::here(
+    'analysis/02-bayesian-calibration.R')) # path relative to project location
+}
+
+# Set random seed for reproducibility
+set.seed(2023)
+
+# Evaluation of the uncertainty coming from the model parameters' uncertainty
+
+# Sample parameter values from the posterior distribution
+samples_par <- getSample(par_calib$mod,
+                         thin = 46,              # get 600 samples in total
+                         whichParameters = 1:4) |>
+  as.data.frame() |>
+  dplyr::mutate(mcmc_id = 1:n()) |>
+  tidyr::nest(.by = mcmc_id, .key = "pars")
+
+# Define function to run model for a set of sampled parameters
+run_pmodel <- function(sample_par){
+  # Function that runs the P-model for a sample of parameters
+  # and also adds the new observation error
+
+  out <- runread_pmodel_f(
+    drivers = p_model_drivers,
+    par =  list(                      # copied from par_fixed above
+      kphio = sample_par$kphio,
+      kphio_par_a = -0.0025,
+      kphio_par_b = 20,
+      soilm_thetastar    = 0.6*240,
+      soilm_betao        = sample_par$soilm_betao,
+      beta_unitcostratio = 146.0,
+      rd_to_vcmax        = 0.014,
+      tau_acclim         = 30.0,
+      kc_jmax            = sample_par$kc_jmax)       # value from posterior
+  )
+
+  # return modelled GPP and prediction for a new GPP observation
+  gpp <- out$data[[1]][, "gpp"]
+  data.frame(gpp = gpp, 
+             gpp_pred = gpp + rnorm(n = length(gpp), mean = 0, 
+                                    sd = sample_par$err_gpp),
+             date = out$data[[1]][, "date"])
+}
+
+# Run the P-model for each set of parameters
+pmodel_runs <- samples_par |>
+  dplyr::mutate(sim = purrr::map(pars, ~run_pmodel(.x))) |>
+  # format to obtain 90% credible intervals
+  dplyr::select(mcmc_id, sim) |>
+  tidyr::unnest(sim) |>
+  dplyr::group_by(date) |>
+  # compute quantiles for each day
+  dplyr::summarise(
+    gpp_q05 = quantile(gpp, 0.05, na.rm = TRUE),
+    gpp = quantile(gpp, 0.5, na.rm = TRUE),          # get median
+    gpp_q95 = quantile(gpp, 0.95, na.rm = TRUE),
+    gpp_pred_q05 = quantile(gpp_pred, 0.05, na.rm = TRUE),
+    gpp_pred_q95 = quantile(gpp_pred, 0.95, na.rm = TRUE)
+  )
+
+# Plot the credible intervals computed above
+# for the first year only
+plot_gpp_error <- ggplot(data = pmodel_runs |>
+                           dplyr::slice(1:365) |>
+                           dplyr::left_join(
+                             # Merge GPP validation data (first year)
+                             p_model_validation$data[[1]][1:365, ] |>
+                               dplyr::rename(gpp_obs = gpp),
+                             by = "date") |>
+                           dplyr::left_join(
+                             # Merge GPP before calibration
+                             output$data[[1]][1:365, ] |>
+                               dplyr::select(date, gpp) |>
+                               dplyr::rename(gpp_no_calib = gpp),
+                             by = "date")
+) +             # Plot only first year
+  geom_ribbon(
+    aes(ymin = gpp_pred_q05, 
+        ymax = gpp_pred_q95,
+        x = date,
+        fill = "Model uncertainty")) +
+  geom_ribbon(
+    aes(ymin = gpp_q05, 
+        ymax = gpp_q95,
+        x = date,
+        fill = "Parameter uncertainty")) +
+
+  geom_line(
+    aes(
+      date,
+      gpp,
+      color = "Predictions"
+    )
+  ) +
+  geom_line(
+    aes(
+      date,
+      gpp_obs,
+      color = "Observations"
+    )
+  ) +
+  theme_classic() +
+  theme(panel.grid.major.y = element_line(),
+        legend.position = "bottom") +
+  labs(
+    x = 'Date',
+    y = expression(paste("GPP (g C m"^-2, "s"^-1, ")"))
+  )
+
+# Include observations in the plot
+plot_gpp_error +  
+  scale_color_manual(name = "",
+                     breaks = c("Observations",
+                                "Predictions",
+                                "Non-calibrated predictions"),
+                     values = c(t_col("black", 10),
+                                t_col("#E69F00", 10),
+                                t_col("#56B4E9", 10))) +
+  scale_fill_manual(name = "",
+                    breaks = c("Model uncertainty",
+                               "Parameter uncertainty"),
+                    values = c(t_col("#E69F00", 60),
+                               t_col("#009E73", 40)))