## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.align = "center"
)

library(rsofun)
library(dplyr)
library(ggplot2)
library(tidyr)
library(sensitivity)
library(BayesianTools)

## -----------------------------------------------------------------------------
# Define log-likelihood function
ll_pmodel <- function(
    par_v                 # a named vector of all calibratable parameters including errors
){
  rsofun::cost_likelihood_pmodel(        # reuse likelihood cost function
    par_v,
    obs = rsofun::p_model_validation,
    drivers = rsofun::p_model_drivers,
    targets = "gpp"
  )
}

# Compute log-likelihood for a given set of parameters
ll_pmodel( par_v = c(
  kphio              = 4.102119e-02,
  kphio_par_a        =-2.289344e-03,
  kphio_par_b        = 1.525094e+01,
  soilm_thetastar    = 1.577507e+02,
  soilm_betao        = 1.169702e-04,
  beta_unitcostratio = 146.0,
  rd_to_vcmax        = 0.014,
  tau_acclim         = 20.0,
  kc_jmax            = 0.41,
  error_gpp          = 0.9         # value from previous simulations
))

## -----------------------------------------------------------------------------
# best parameter values (initial values, taken from Stocker et al., 2020 GMD)
par_cal_best <- c(
  kphio              = 0.09423773,
  kphio_par_a        = -0.0025,
  kphio_par_b        = 20,
  soilm_thetastar    = 0.6*240,
  soilm_betao        = 0.2,
  beta_unitcostratio = 146.0,
  rd_to_vcmax        = 0.014,
  tau_acclim         = 30.0,
  kc_jmax            = 0.41,
  error_gpp          = 1
)

# lower bound
par_cal_min <- c(
  kphio              = 0.03,
  kphio_par_a        = -0.004,
  kphio_par_b        = 10,
  soilm_thetastar    = 0,
  soilm_betao        = 0,
  beta_unitcostratio = 50.0,
  rd_to_vcmax        = 0.01,
  tau_acclim         = 7.0,
  kc_jmax            = 0.2,
  error_gpp          = 0.01
)

# upper bound
par_cal_max <- c(
  kphio              = 0.15,
  kphio_par_a        = -0.001,
  kphio_par_b        = 30,
  soilm_thetastar    = 240,
  soilm_betao        = 1,
  beta_unitcostratio = 200.0,
  rd_to_vcmax        = 0.1,
  tau_acclim         = 60.0,
  kc_jmax            = 0.8,
  error_gpp          = 4
)

## ----eval = FALSE-------------------------------------------------------------
# morris_setup <- BayesianTools::createBayesianSetup(
#   likelihood = ll_pmodel,
#   prior = BayesianTools::createUniformPrior(par_cal_min, par_cal_max, par_cal_best),
#   names = names(par_cal_best)
# )

## ----eval = FALSE, echo = TRUE------------------------------------------------
# set.seed(432)
# morrisOut <- sensitivity::morris(
#   model = morris_setup$posterior$density,
#   factors = names(par_cal_best),
#   r = 1000,
#   design = list(type = "oat", levels = 20, grid.jump = 3),
#   binf = par_cal_min,
#   bsup = par_cal_max,
#   scale = TRUE
#   )

## ----simulate_morrisOut, include = FALSE--------------------------------------
# fake output since Morris sensitivity isn't run
# saveRDS(morrisOut, "files/sensitivity_analysis.Rmd__morrisOut.RDS")
morrisOut <- readRDS("files/sensitivity_analysis.Rmd__morrisOut.RDS")

## ----eval = TRUE, fig.width=7, fig.height=4-----------------------------------
# Summarise the morris output
morrisOut.df <- data.frame(
  parameter = names(par_cal_best),
  mu.star = apply(abs(morrisOut$ee), 2, mean, na.rm = T),
  sigma = apply(morrisOut$ee, 2, sd, na.rm = T)
) |> 
  arrange( mu.star )

morrisOut.df |>
  tidyr::pivot_longer( -parameter, names_to = "variable", values_to = "value") |>
  ggplot(aes(
    reorder(parameter, value),
    value, 
    fill = variable),
    color = NA) +
  geom_bar(position = position_dodge(), stat = 'identity') +
  scale_fill_manual("", 
                    labels = c('mu.star' = expression(mu * "*"),
                               'sigma' = expression(sigma)),
                    values = c('mu.star' = "#29a274ff", 
                               'sigma' = "#777055ff")) +
  theme_classic() +
  theme(
    axis.text = element_text(size = 6),
    axis.title = element_blank(),
    legend.position = c(0.9, 0.1), legend.justification = c(0.95, 0.05)
  ) +
  coord_flip()    # make horizontal

## ----eval = FALSE, echo = TRUE------------------------------------------------
# # Calibrates kphio, kphio_par_b, kc_jmax - top 3 model params
# set.seed(2025)
# 
# # Define calibration settings
# settings_calib <- list(
#   method = "BayesianTools",
#   metric = rsofun::cost_likelihood_pmodel,
#   control = list(
#     sampler = "DEzs",
#     settings = list(
#       burnin = 12000,
#       iterations = 24000,
#       nrChains = 3,       # number of independent chains
#       startValue = 3      # number of internal chains to be sampled
#     )),
#   par = list(
#     kphio = list(lower = 0.02, upper = 0.15, init = 0.05),
#     kphio_par_a =list(lower = -0.004, upper = -0.001, init = -0.0025),
#     kphio_par_b = list(lower = 10, upper = 30, init = 20),
#     soilm_thetastar = list(
#       lower = 0.01 * rsofun::p_model_drivers$site_info[[1]]$whc,
#       upper = 1.0  * rsofun::p_model_drivers$site_info[[1]]$whc,
#       init  = 0.6  * rsofun::p_model_drivers$site_info[[1]]$whc
#     ),
#     soilm_betao = list(lower = 0.0, upper = 1.0, init = 0.0),
#     err_gpp = list(lower = 0.1, upper = 3, init = 0.8)
#   )
# )
# 
# par_fixed <- list(
#   beta_unitcostratio = 146.0,
#   kc_jmax            = 0.41,
#   rd_to_vcmax        = 0.014,
#   tau_acclim         = 20.0
#   )
# 
# # Calibrate kphio-related parameters and err_gpp
# par_calib <- calib_sofun(
#   drivers = p_model_drivers,
#   obs = p_model_validation,
#   settings = settings_calib,
#   par_fixed = par_fixed,
#   targets = "gpp"
# )
# saveRDS(par_calib, "files/sensitivity_analysis.Rmd__par_calib.RDS", compress = "xz")
# 
# # This code takes 15 minutes to run

## ----simulate_par_calib, include = FALSE--------------------------------------
# fake output since calibration isn't run
par_calib <- readRDS("files/sensitivity_analysis.Rmd__par_calib.RDS")

## ----fig.height = 10, fig.width = 7-------------------------------------------
plot(par_calib$mod)

## ----fig.height = 10, fig.width = 7, eval = FALSE, echo = FALSE---------------
# # Define function for plotting chains separately
# plot_acf_mcmc <- function(chains, par_names){
#   # chains: from the BayesianTools output
#   n_chains <- length(chains)
#   n_internal_chains <- length(chains[[1]]$chain)
#   par(mfrow = c(length(par_names), n_chains))
#   for(par_name in par_names){
#     for(i in 1:n_chains){
#       stopifnot(n_internal_chains<=3); color = c("blue", "red", "darkgreen")
#       spacing = 0.5/n_internal_chains
#       for(j in 1:n_internal_chains){
#         autocorr_internal_chain <- pacf(getSample(chains[[i]]$chain[[j]])[, par_name], plot = FALSE)
#         if(j==1){
#           plot(autocorr_internal_chain, col = color[j],
#                main = sprintf("Series of %s , chain (%i)", par_name, i))
#         } else {
#           lines(autocorr_internal_chain$lag + spacing*(j-1),
#                 autocorr_internal_chain$acf,
#                 col = color[j], type = "h")
#         }
#       }
#     }
#   }
# }
# plot_acf_mcmc(
#   par_calib$mod,
#   c("kphio", "kphio_par_a", "kphio_par_b", "soilm_thetastar", "soilm_betao",  "err_gpp")
#   )

## ----fig.width=5, fig.height=5------------------------------------------------
correlationPlot(par_calib$mod, thin = 1)   # use all samples, no thinning

## -----------------------------------------------------------------------------
gelmanDiagnostics(par_calib$mod)

## -----------------------------------------------------------------------------
summary(par_calib$mod)

## ----echo = TRUE, eval = FALSE------------------------------------------------
# # Evaluation of the uncertainty coming from the model parameters' uncertainty
# 
# # Sample parameter values from the posterior distribution
# samples_par <- getSample(
#   par_calib$mod,
#   thin = 60
#   ) |>
#   as.data.frame() |>
#   dplyr::mutate(mcmc_id = 1:n()) |>
#   tidyr::nest(.by = mcmc_id, .key = "pars")
# 
# run_pmodel <- function(par){
#   # Function that runs the P-model for a sample of parameters
#   # and also adds the new observation error
# 
#   out <- runread_pmodel_f(
#     drivers = p_model_drivers,
#     par =  list(
#       kphio              = par$kphio,
#       kphio_par_a        = par$kphio_par_a,
#       kphio_par_b        = par$kphio_par_b,
#       soilm_thetastar    = par$soilm_thetastar,
#       soilm_betao        = par$soilm_betao,
#       beta_unitcostratio = 146.0,
#       rd_to_vcmax        = 0.014,
#       tau_acclim         = 20.0,
#       kc_jmax            = 0.41
#       )
#   )
# 
#   # return modelled GPP and prediction for a new GPP observation
#   gpp <- out$data[[1]][, "gpp"]
#   out <- data.frame(
#     gpp = gpp,
#     gpp_pred = rnorm(
#       n = length(gpp),
#       mean = gpp,
#       sd = par$err_gpp
#       ),
#     date = out$data[[1]][, "date"])
#   return(out)
# }
# 
# set.seed(2025)
# 
# # Run the P-model for each set of parameters
# pmodel_runs <- samples_par |>
#   dplyr::mutate(sim = purrr::map(pars, ~run_pmodel(.x))) |>
#   # format to obtain 90% credible intervals
#   dplyr::select(mcmc_id, sim) |>
#   tidyr::unnest(sim) |>
#   dplyr::group_by(date) |>
#   # compute quantiles for each day
#   dplyr::summarise(
#     gpp_q05 = quantile(gpp, 0.05, na.rm = TRUE),
#     gpp_q50 = quantile(gpp, 0.5, na.rm = TRUE),          # get median
#     gpp_q95 = quantile(gpp, 0.95, na.rm = TRUE),
#     gpp_pred_q05 = quantile(gpp_pred, 0.05, na.rm = TRUE),
#     gpp_pred_q95 = quantile(gpp_pred, 0.95, na.rm = TRUE)
#   )
# 
# # run model with maximum a posteriori parameter estimates
# pmodel_run_map <- run_pmodel(
#   MAP(par_calib$mod)$parametersMAP |>
#     t() |>
#     as_tibble()
# )

## ----simulate_pmodel_runs, include = FALSE------------------------------------
# TODO: get rid of this and always fully run the vignettes
# fake output since calibration isn't run
# saveRDS(pmodel_runs, file = "files/pmodel_runs.rds")
pmodel_runs <- readRDS("files/pmodel_runs.rds")

## ----fig.width=7, fig.height=5------------------------------------------------
## add transparency to color given as a name
add_alpha <- function( col, alpha ){
  col    <- col2rgb( col, alpha = TRUE )/255
  col[4] <- alpha
  col    <- rgb(col[1,],col[2,],col[3,],col[4,])
  return( col )
}

# Plot the credible intervals computed above
# for the first year only
data_to_plot <- pmodel_runs |>
  # Plot only first year
  dplyr::slice(1:365) |>
  dplyr::left_join(
    # Merge GPP validation data (first year)
    p_model_validation$data[[1]][1:365, ] |>
      dplyr::rename(gpp_obs = gpp),
    by = "date")

plot_gpp_error <- ggplot(data = data_to_plot) +
  geom_ribbon(
    aes(
      ymin = gpp_pred_q05,
      ymax = gpp_pred_q95,
      x = date,
      fill = "Model uncertainty"
    )) +
  geom_ribbon(
    aes(
      ymin = gpp_q05, 
      ymax = gpp_q95,
      x = date,
      fill = "Parameter uncertainty"
    )) +
  # Include observations in the plot
  geom_point(
    aes(
      x = date,
     y = gpp_obs,
     color = "Observations"
    ),
  ) +
  geom_line(
    aes(
      x = date,
      y = gpp_q50,
      color = "Predictions"
    )
  ) +
  theme_classic() +
  theme(panel.grid.major.y = element_line(),
        legend.position = "bottom") +
  labs(
    x = 'Date',
    y = expression(paste("GPP (g C m"^-2, "s"^-1, ")"))
  ) +
  scale_color_manual(NULL,
                     breaks = c("Observations",
                                "Predictions"),
                     values = c("black", "tomato")) +
  scale_fill_manual(NULL,
                    breaks = c("Model uncertainty",
                               "Parameter uncertainty"),
                    values = c(add_alpha("tomato", 0.5),
                               "#1b9e77", 0))
plot_gpp_error

## ----fig.width=7, fig.height=5, echo = FALSE, eval = FALSE--------------------
# # Define function to run model for a set of sampled parameters
# run_pmodel <- function(par){
#   # Function that runs the P-model for a sample of parameters
#   out <- runread_pmodel_f(
#     drivers = p_model_drivers,
#     par =  list(
#       kphio              = par$kphio,
#       kphio_par_a        = par$kphio_par_a,
#       kphio_par_b        = par$kphio_par_b,
#       soilm_thetastar    = par$soilm_thetastar,
#       soilm_betao        = par$soilm_betao,
#       beta_unitcostratio = 146.0,
#       rd_to_vcmax        = 0.014,
#       tau_acclim         = 20.0,
#       kc_jmax            = 0.41
#       )
#   )
# 
#   return(out)
# }
# 
# # Plot observed and predicted GPP, with a 95% confidence interval using err_gpp
# # Run model with maximum a posteriori parameter estimates (not shown on plot).
# pmodel_run_map <- run_pmodel(
#   BayesianTools::MAP(par_calib$mod)$parametersMAP |>
#     t() |>
#     as_tibble()
# ) |>
#   dplyr::select(-site_info) |>
#   tidyr::unnest(data)
# 
# # Plot the credible intervals computed above
# # for the first year only
# data_to_plot <- pmodel_run_map |>
#   # Plot only first year
#   dplyr::slice(1:365) |>
#   dplyr::left_join(
#     # Merge GPP validation data (first year)
#     p_model_validation$data[[1]][1:365, ] |>
#       dplyr::rename(gpp_obs = gpp),
#     by = "date")
# 
# plot_gpp_error <- ggplot(data = data_to_plot) +
#   # Include observations in the plot
#   geom_point(
#     aes(
#       x = date,
#      y = gpp_obs,
#      color = "Observations"
#     ),
#   ) +
#   geom_line(
#     aes(
#       x = date,
#       y = gpp,
#       color = "Predictions based on MAP"
#     )
#   ) +
#   theme_classic() +
#   theme(panel.grid.major.y = element_line(),
#         legend.position = "bottom") +
#   labs(
#     x = 'Date',
#     y = expression(paste("GPP (g C m"^-2, "s"^-1, ")"))
#   ) +
#   scale_color_manual(NULL,
#                      breaks = c("Observations",
#                                 "Predictions based on MAP"),
#                      values = c("black", "tomato"))
# plot_gpp_error

