## ----set_options, include = FALSE---------------------------------------------
knitr::opts_chunk$set(
  eval = FALSE, # Chunks of codes will not be evaluated by default
  collapse = TRUE,
  comment = "#>",
  fig.width = 7, fig.height = 5   # Set device size at rendering time (when plots are generated)
)

## ----setup, eval = TRUE, include = FALSE--------------------------------------
library(deepSTRAPP)

is_dev_version <- function (pkg = "deepSTRAPP")
{
  # # Check if ran on CRAN
  # not_cran <- identical(Sys.getenv("NOT_CRAN"), "true") # || interactive()

  # Version number check
  version <- tryCatch(as.character(utils::packageVersion(pkg)), error = function(e) "")
  dev_version <- grepl("\\.9000", version)

  # not_cran || dev_version
  
  return(dev_version)
}


## ----adjust_dpi_CRAN, include = FALSE, eval = !is_dev_version()---------------
knitr::opts_chunk$set(
  dpi = 72   # Lower DPI to save space
)

## ----adjust_dpi_dev, include = FALSE, eval = is_dev_version()-----------------
# knitr::opts_chunk$set(
#   dpi = 72   # Default DPI for the dev version
# )

## ----model_trait_evolution_cont-----------------------------------------------
# 
# # ------ Step 0: Load data ------ #
# 
# ## Load phylogeny and tip data
# 
# library(phytools)
# data(eel.tree)
# data(eel.data)
# # Dataset of feeding mode and maximum total length from 61 species of elopomorph eels.
# # Source: Collar, D. C., P. C. Wainwright, M. E. Alfaro, L. J. Revell, and R. S. Mehta (2014)
# # Biting disrupts integration to spur skull evolution in eels. Nature Communications, 5, 5505.
# 
# # Extract body size
# eel_tip_data <- stats::setNames(eel.data$Max_TL_cm,
#                                 rownames(eel.data))
# 
# plot(eel.tree)
#  ape::Ntip(eel.tree) == length(eel_tip_data)
# 
# ## Check that trait tip data and phylogeny are named and ordered similarly
# all(names(eel_tip_data) == eel.tree$tip.label)
# 
# # Reorder tip_data as in phylogeny
# eel_tip_data <- eel_tip_data[eel.tree$tip.label]
# 
# # ------ Step 1: Prepare continuous trait data ------ #
# 
# ## Goal: Map continuous trait evolution on the time-calibrated phylogeny
# 
# # 1.1/ Fit evolutionary models to trait data using Maximum Likelihood.
# # 1.2/ Select the best fitting model comparing AICc.
# # 1.3/ Infer ancestral characters estimates (ACE) at nodes.
# # 1.4/ Infer ancestral states along branches using interpolation to produce a `contMap`.
# 
# library(deepSTRAPP)
# 
# # All these actions are performed by a single function: deepSTRAPP::prepare_trait_data()
# ?deepSTRAPP::prepare_trait_data()
# # Model selection is performed internally with deepSTRAPP::select_best_model_from_geiger()
# ?deepSTRAPP::select_best_model_from_geiger()
# # Plotting of the contMap is carried out with deepSTRAPP::plot_contMap()
# ?deepSTRAPP::plot_contMap()
# 
# ## Macroevolutionary models for continuous traits
# 
# ?geiger::fitContinuous() # For more in-depth information on the models available
# 
# ## 7 models from geiger::fitContinuous() are available
#  # "BM": Brownian Motion model.
#     # Default model that assumes a Brownian random walk in the trait space.
#     # No trend. No time-dependence.
#     # Correlation structure is proportional to the extent of shared ancestry for pairs of species.
#     # sigma² ('sigsq') is the evolutionary rate that represents
#     # the expected variance in traits in proportion to time.
#     # 'z0' is the ancestral character estimate (trait value) at the root.
#  # "OU": Ornstein-Uhlenbeck model.
#     # Random walk with a central tendency (= an optimum).
#     # Attraction toward the central tendency is controlled by parameter 'alpha'.
#  # "EB": Early-burst model.
#     # Time-dependent model where rate of evolution increases or decreases exponentially
#     # through time under the model r[t] = r[0] * exp(a * t),  where r[0] is the initial rate,
#     # 'a' is the rate change parameter, and t is time. By default, 'a' is set to be negative,
#     # therefore the model represents a decelerating rate of evolution.
#     # Parameter estimate boundaries can be change to allow accelerating evolution
#     # with positive values of 'a' (as in the ACDC model).
#  # "rate_trend": Linear trend model.
#     # Time dependent model where rate of evolution varies linearly with time
#     # (i.e., following a slope). If the 'slope' parameter is positive,
#     # rates are increasing, and conversely.
#  # "lambda": Pagel's lambda model
#     # Based on branch length transformation. Modulates the extent to which the phylogeny
#     # predicts covariance among trait values for species (i.e., the degree of phylogenetic signal).
#     # The model multiplies all internal branch lengths by 'lambda'.
#     # 'lambda' close to zero indicates no phylogenetic signal.
#     # 'lambda' close to one approximate the 'BM' model and indicates strong phylogenetic signal.
#  # "kappa": Pagel's kappa model.
#     # Based on branch length transformation. Punctuational (speciational) model where
#     # trait divergence is related to the number of speciation events between pairs of species.
#     # Assumes that speciation events are responsible for trait divergence.
#     # The model raises all branch lengths to an estimated power 'kappa'.
#     # 'kappa' close to zero indicates a strong dependency of trait evolution on speciation events.
#     # 'kappa' close to one approximate the 'BM' model and indicates strong phylogenetic signal.
#  # "delta": Pagel's delta model.
#     # Based on branch length transformation. Time-dependent model that modulates
#     # the relative contributions of early vs. late evolution in the tree.
#     # The model raises all node depths to an estimated power 'delta'.
#     # 'delta' close to one approximate the 'BM' model and indicates strong phylogenetic signal.
#     # 'delta' lower than one gives more weight to early evolution, thus represents decelerating rates.
#     # 'delta' higher than one gives more weight to late evolution, thus represents accelerating rates.
# 
# ## Model trait data evolution
# eel_cont_data <- prepare_trait_data(
#     tip_data = eel_tip_data,
#     trait_data_type = "continuous",
#     phylo = eel.tree,
#     seed = 1234, # Set seed for reproducibility
#     # All possible models
#     evolutionary_models = c("BM", "OU",  "EB", "rate_trend", "lambda", "kappa", "delta"),
#     control = list(niter = 200), # Example of additional parameters that can be pass down
#     # to geiger::fitContinuous() to control parameter optimization.
#     res = 100, # To set the reoslution of the continuous mapping of trait value on the contMap
#     color_scale = c("darkgreen", "limegreen", "orange", "red"),
#     plot_map = FALSE,
#     # PDF_file_path = "./eel_contMap.pdf", # To export in PDF the contMap generated
#     return_ace = TRUE, # To include Ancestral Character Estimates (ACE) at nodes in the output
#     return_best_model_fit = TRUE, # To include the best model fit in the output
#     return_model_selection_df = TRUE, # To include the df for model selection in the output
#     verbose = TRUE) # To display progress
# 
# 
# # ------ Step 2: Explore output ------ #
# 
# ## Explore output
# str(eel_cont_data, 1)
# 
# ## Extract the contMap showing interpolated continuous trait evolution
# ## on the phylogeny as estimated from the model
# eel_contMap <- eel_cont_data$contMap
# 
# # Plot with initial color_scale
# plot_contMap(eel_contMap,
#              fsize = c(0.6, 1)) # Adjust tip label size
# # Plot with updated color_scale
# plot_contMap(contMap = eel_contMap,
#              color_scale = c("purple", "violet", "cyan", "blue"),
#              fsize = c(0.6, 1)) # Adjust tip label size
# # The contMap is the main input needed to perform a deepSTRAPP run on continuous trait data.
# 
# ## Extract the Ancestral Character Estimates (ACE) = trait values at nodes
# eel_ACE <- eel_cont_data$ace
# head(eel_ACE)
# # This is a named numerical vector with names = internal node ID and values = ACE.
# # It can be used as an optional input in deepSTRAPP run to provide
# # perfectly accurate estimates for trait values at internal nodes.
# 
# ## Explore summary of model selection
# eel_cont_data$model_selection_df # Summary of model selection
# # Models are compared using the corrected Akaike's Information Criterion (AICc)
# # Akaike's weights represent the probability that a given model is the best among
# # the set of candidate models, given the data.
# # Here, the best model is Pagel's lambda
# 
# ## Explore best model fit (Pagel's lambda)
# eel_cont_data$best_model_fit # Summary of best model optimization by geiger::fitContinuous()
# eel_cont_data$best_model_fit$opt # Parameter estimates and goodness-of-fit information
# # 'lambda' = 0.636. The best model detects a moderate degree of phylogenetic signal.
# 
# 
# ## Inputs needed to run deepSTRAPP are the contMap (eel_contMap), and optionally,
# ## the tip_data (eel_tip_data), and the ACE (eel_ACE)
# 
# 

## ----model_trait_evolution_cont_eval, eval = TRUE, echo = FALSE, out.width = "100%"----
suppressWarnings(library(maps))
suppressWarnings(library(ape))
suppressWarnings(library(phytools))

data(eel.tree)
data(eel.data)

# Extract body size
eel_tip_data <- stats::setNames(eel.data$Max_TL_cm,
                                rownames(eel.data))
# Reorder tip_data as in phylogeny
eel_tip_data <- eel_tip_data[eel.tree$tip.label]

## Model trait data evolution
eel_cont_data <- suppressWarnings(prepare_trait_data(
    tip_data = eel_tip_data,
    trait_data_type = "continuous",
    phylo = eel.tree,
    seed = 1234, # Set seed for reproducibility
    evolutionary_models = c("BM", "OU",  "EB", "rate_trend", "lambda", "kappa", "delta"), # All possible models
    control = list(niter = 200), # Example of additional parameters that can be pass down to geiger::fitContinuous() to control parameter optimization.
    res = 100, # To set the resolution of the continuous mapping of trait value on the contMap
    color_scale = c("darkgreen", "limegreen", "orange", "red"),
    plot_map = FALSE,
    # PDF_file_path = "./eel_contMap.pdf", # To export in PDF the contMap generated
    return_ace = TRUE, # To include Ancestral Character Estimates (ACE) at nodes in the output
    return_best_model_fit = TRUE, # To include the best model fit in the output
    return_model_selection_df = TRUE, # To include the df for model selection in the output
    verbose = FALSE)) # To display progress

# Plot with initial color_scale
plot_contMap(contMap = eel_cont_data$contMap,
             fsize = c(0.6, 1)) # Adjust tip label size

# Plot with updated color_scale
plot_contMap(contMap = eel_cont_data$contMap,
             color_scale = c("purple", "violet", "cyan", "blue"),
             fsize = c(0.6, 1)) # Adjust tip label size

## Explore summary of model selection
eel_cont_data$model_selection_df # Summary of model selection

