## ----init, include = FALSE----------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = FALSE
)

## ----setup--------------------------------------------------------------------
# library(tidyverse, quietly = TRUE)
# library(cowplot)
# library(RColorBrewer)
# library(ctxR)
# library(flextable)
# library(officer)
# library(patchwork)
# library(mirai)
# 
# devtools::load_all() # load invivopkfit
# 
# # get today's date
# today <- format(Sys.Date(), "%d%B%Y")

## ----eval = FALSE-------------------------------------------------------------
# Sys.setenv(
#   OD_DIR = "path/to/inputs/",
#   FIG_DIR = "path/to/outputs/"
# )

## ----eval = FALSE-------------------------------------------------------------
# ctxR::register_ctx_api_key(key = "<YOUR-API-KEY>")

## -----------------------------------------------------------------------------
# read_date_pk <- "25November2024"

## -----------------------------------------------------------------------------
# read_date_results <- "27November2024"

## ----pk_obj_fromSQL, eval=FALSE-----------------------------------------------
# ### Minimal PK Object ###----
# # Minimum pk object, add options later
# # Note that these mappings are now a default mappings, just being verbose here.
# # If there are any standard deviations that are zero or NA, but N_Subjects > 1,
# # Set the n_subjects to 6 or the current value if lower than 6.
# # (In Showa the mode is 6, but this condition is present in CVT_Legacy as well)
# minimal_pk <- pk(
#   data = cvt %>%
#     mutate(n_subjects_normalized = if_else(
#       n_subjects_normalized > 1 & is.na(conc_sd),
#       min(n_subjects_normalized, 6), n_subjects_normalized
#     )),
#   mapping = ggplot2::aes(
#     Chemical = analyzed_chem_dtxsid,
#     Chemical_Name = analyzed_chem_name_original,
#     DTXSID = analyzed_chem_dtxsid,
#     CASRN = analyzed_chem_casrn,
#     Species = species,
#     Reference = fk_extraction_document_id,
#     Media = conc_medium_normalized,
#     Route = administration_route_normalized,
#     Dose = dose_level_normalized,
#     Dose.Units = "mg/kg",
#     Subject_ID = fk_subject_id,
#     Series_ID = fk_series_id,
#     Study_ID = fk_study_id,
#     ConcTime_ID = conc_time_id,
#     N_Subjects = n_subjects_normalized,
#     Weight = weight_kg,
#     Weight.Units = "kg",
#     Time = time_hr,
#     Time.Units = "hours",
#     Value = conc,
#     Value.Units = "mg/L",
#     Value_SD = conc_sd,
#     LOQ = loq
#   )
# )

## ----outlining_all_options, eval=FALSE----------------------------------------
# # Types of Choices:
# ## Dose Normalized
# ## Log10 transform
# ## Scale time
# # List of options
# fitopts <- expand.grid(
#   error_model = c("pooled", "hierarchical"),
#   dose_norm = c(TRUE, FALSE),
#   log10_trans = c(TRUE, FALSE),
#   time_scale = c(TRUE, FALSE),
#   stringsAsFactors = FALSE
# )

## ----fitting_choices, eval=FALSE----------------------------------------------
# fit_data <- function(this_error_model,
#                      this_dose_norm,
#                      this_log10_trans,
#                      this_time_scale,
#                      file_dir = Sys.getenv("OD_DIR")) {
#   retval <- -9
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(
#     "mypk_fit_",
#     dose_indic,
#     log10_indic,
#     time_indic,
#     errmodel_indic,
#     ".Rds"
#   )
# 
#   cat("Fitting: ", file_str, "\n")
# 
#   tryCatch(
#     expr = {
#       this_pk <- minimal_pk +
#         facet_data(Chemical, Species) +
#         settings_preprocess(keep_data_original = FALSE, suppress.messages = TRUE) +
#         scale_conc(dose_norm = this_dose_norm, log10_trans = this_log10_trans) +
#         settings_optimx(method = c(
#           "L-BFGS-B",
#           "bobyqa"
#         ))
# 
#       if (this_error_model %in% "pooled") {
#         this_pk <- this_pk + stat_error_model(Chemical, Species)
#       } else if (this_error_model %in% c("hierarchical", "joint")) {
#         this_pk <- this_pk + stat_error_model(Chemical, Species, Reference)
#       } else {
#         stop("this_error_model must be either 'pooled' or 'hierarchical'/'joint'")
#       }
# 
#       if (this_time_scale %in% TRUE) {
#         this_pk <- this_pk + scale_time(new_units = "auto")
#       }
# 
#       # do the fit
#       this_time <- system.time(this_fit <- do_fit(this_pk, async = TRUE))
# 
# 
# 
#       # save the result
#       saveRDS(this_fit, file = paste0(file_dir, file_str))
# 
#       cli::cli_alert_success(text = "Fitting success!")
#       print(this_time)
# 
#       retval <- this_time[[3]]
#     },
#     error = function(e) {
#       cli::cli_alert_danger("Analysis {file_str}, failed with error: {e}")
#       retval <- -1
#     }
#   )
# 
#   return(retval)
# }

## ----running_all_options, eval=FALSE------------------------------------------
# daemons(8L)
# everywhere(devtools::load_all())
# system.time(tmp_out <- mapply(
#   fit_data,
#   this_error_model = fitopts$error_model,
#   this_dose_norm = fitopts$dose_norm,
#   this_log10_trans = fitopts$log10_trans,
#   this_time_scale = fitopts$time_scale
# ))
# daemons(0L)
# 
# mean(tmp_out) / 60 # Average runtime (in minutes) per fitting option
# median(tmp_out) / 60
# sum(tmp_out) # Looks like there's a few extra seconds of overhead
# split(tmp_out, f = factor(names(tmp_out))) |> lapply(mean)

## -----------------------------------------------------------------------------
# my_pk <- readRDS(paste0(
#   Sys.getenv("OD_DIR"),
#   # read_date_pk,
#   "mypk_fit_110p.Rds"
# ))

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Chemical) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Reference) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Species) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Chemical, Species) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Chemical, Species, Reference) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Chemical, Species, Reference, Route, Media) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   distinct(Chemical, Species, Reference, Route, Media, Dose) %>%
#   count()

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   group_by(Chemical, Species) %>%
#   summarise(blood_and_plasma = all(c(
#     "blood",
#     "plasma"
#   ) %in% Media)) %>%
#   ungroup() %>%
#   summarise(n_both = sum(blood_and_plasma))

## -----------------------------------------------------------------------------
# get_data(my_pk) %>%
#   group_by(Chemical, Species) %>%
#   summarise(
#     t_mid = mean(range(Time)),
#     t_mid_log10 = midpt_log10(Time),
#     new_time_units = auto_units(
#       y = Time,
#       from = "hours",
#       target = 10
#     )
#   ) %>%
#   ungroup() %>%
#   count(new_time_units)

## -----------------------------------------------------------------------------
# get_data_info(my_pk)$dose_norm_check %>%
#   dplyr::summarise(
#     AUC_flag = sum(!is.na(data_flag_AUC)),
#     Cmax_flag = sum(!is.na(data_flag_Cmax)),
#     total_multi_dose = sum(n_dose_groups > 1),
#     total_expts = n()
#   )

## -----------------------------------------------------------------------------
# # compute duration and concentration range across experiments (Chemical, Species, Reference, Route, Media, Dose)
# data_range <- my_pk$data %>%
#   filter(Detect, !exclude) %>%
#   group_by(Chemical, Species, Reference, Media, Route, Dose, Dose.Units) %>%
#   summarize(
#     maxT = max(Time) / 24,
#     concRange = log10(max(Conc) / min(Conc))
#   ) %>%
#   ungroup()
# 
# # Panel A: histogram of day of final timepoint per experiment
# sf_2A <- data_range %>%
#   ggplot(aes(x = maxT)) +
#   geom_histogram(
#     fill = "grey5",
#     bins = 40,
#     color = "grey5"
#   ) +
#   scale_x_log10(
#     labels = scales::number,
#     breaks = c(0.1, 1, 7, 30, 365)
#   ) +
#   annotation_logticks(sides = "b") +
#   labs(
#     x = "Day of final Timepoint",
#     y = "Count"
#   ) +
#   theme(
#     aspect.ratio = 1,
#     text = element_text(size = 10),
#     panel.border = element_rect(color = "black", fill = NA, size = 1),
#     panel.background = element_blank(),
#     panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#     axis.ticks = element_blank(),
#     axis.line = element_blank()
#   )
# 
# # panel B: histogram of fold concentration range
# sf_2B <- data_range %>%
#   ggplot(aes(x = concRange)) +
#   geom_histogram(
#     fill = "grey5",
#     bins = 40,
#     color = "grey5"
#   ) +
#   scale_y_continuous(
#     expand = c(0, 0),
#     limits = c(0, 50),
#     breaks = c(0, 25, 50)
#   ) +
#   labs(
#     x = "log10(Concentration Range)",
#     y = "Count"
#   ) +
#   annotation_logticks(sides = "b") +
#   theme(
#     aspect.ratio = 1,
#     panel.border = element_rect(color = "black", fill = NA, size = 1),
#     panel.background = element_blank(),
#     panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#     strip.background = element_rect(fill = "white"),
#     strip.text = element_text(face = "bold"),
#     axis.ticks = element_blank(),
#     axis.line = element_blank(),
#     axis.text = element_text(size = 10),
#     axis.title = element_text(size = 11)
#   )
# 
# sf_2AB <- plot_grid(sf_2A, sf_2B,
#   labels = c("A", "B"),
#   nrow = 1
# )
# 
# sf_2AB
# 
# # save PDF version
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig2_ConcTimeRanges",
#     ".pdf"
#   ),
#   height = 3.2,
#   width = 6,
#   bg = "white",
#   units = "in"
# )
# 
# # save PNG version
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig2_ConcTimeRanges",
#     ".png"
#   ),
#   height = 3.2,
#   width = 6,
#   bg = "white",
#   units = "in",
#   dpi = 300
# )
# 
# rm(sf_2A, sf_2B, sf_2AB)

## -----------------------------------------------------------------------------
# data_cvt <- get_data(my_pk)
# data_counts <- data_cvt %>%
#   dplyr::filter(Detect) %>%
#   dplyr::count(Chemical, Species, Reference, Route, Media, Dose, Time, N_Subjects,
#     name = "Count"
#   )
# data_counts <- dplyr::left_join(data_counts, data_cvt,
#   by = c(
#     "Chemical", "Species", "Reference",
#     "Route", "Media", "Dose", "Time",
#     "N_Subjects"
#   )
# ) %>%
#   dplyr::mutate(data_descr = dplyr::case_when(
#     (Count > 1 & N_Subjects == 1) ~ "Individual Data, Multiple Observations",
#     (Count == 1 & N_Subjects == 1) ~ "Individual Data, Single Observation",
#     (Count == 1 & N_Subjects > 1 & Conc_SD > 0) ~ "Grouped Data w SD",
#     (N_Subjects > 1 & Conc_SD == 0) ~ "Grouped Data no SD",
#     .default = NA_character_
#   ))
# 
# indiv_data <- subset(data_counts,
#   subset = (data_descr %in% "Individual Data, Multiple Observations")
# )
# 
# indiv_data <- indiv_data %>%
#   dplyr::group_by(Chemical, Species, Reference, Route, Media, Dose, Time) %>%
#   dplyr::mutate(replicate_mean = mean(Conc, na.rm = TRUE)) %>%
#   ungroup()
# 
# indiv_data_bygroup <- indiv_data %>%
#   dplyr::group_by(Chemical, Species) %>%
#   dplyr::summarise(
#     AFE = 10^mean(log10(Conc / replicate_mean)),
#     AAFE = 10^mean(abs(log10(Conc / replicate_mean)))
#   ) %>%
#   ungroup()
# 
# indiv_data_bygroup %>% count(AFE < 0.5)
# indiv_data_bygroup %>% count(AFE > 2)

## -----------------------------------------------------------------------------
# my_tf <- my_pk %>% twofold_test()

## -----------------------------------------------------------------------------
# pl3A <- my_tf$individual_data %>%
#   # Plotting
#   ggplot(aes(x = foldConc)) +
#   geom_histogram(
#     binwidth = 0.1,
#     fill = "grey5",
#     color = "grey5",
#     linewidth = 0.4
#   ) +
#   coord_cartesian(
#     xlim = c(0, 4),
#     ylim = c(0, 3000)
#   ) +
#   scale_y_continuous(
#     expand = c(0, 0),
#     breaks = c(0, 250, 500, 750, 1000, 1500, 2000, 2500, 3000),
#     labels = c("0", "2.5", "5", "7.5", "10", "15", "20", "25", "")
#   ) +
#   expand_limits(y = 0) +
#   geom_vline(
#     xintercept = c(0.5, 2),
#     color = "red3",
#     linetype = "dashed",
#     linewidth = 1
#   ) +
#   theme_bw() +
#   labs(
#     x = "Mean-normalized concentration",
#     y = "Observations (hundreds))"
#   ) +
#   theme(
#     text = element_text(size = 14),
#     panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
#     panel.background = element_blank(),
#     panel.grid.major.y = element_line(color = "grey90", linewidth = 0.4),
#     axis.title = element_text(face = "bold"),
#     axis.line = element_blank(),
#     axis.text = element_text(size = 14),
#     plot.background = element_blank(),
#     axis.ticks.y = element_blank()
#   )
# 
# pl3A

## -----------------------------------------------------------------------------
# # get time of peak concentration for use in calculated ADME-normalized time
# my_nca <- nca(my_pk) %>%
#   dplyr::filter(param_name %in% "tmax")
# 
# pl3B <- my_tf$individual_data %>%
#   inner_join(my_nca %>%
#     dplyr::select(Chemical, Species,
#       Route, Media,
#       tmax = param_value
#     ) %>%
#     filter(!is.na(tmax))) %>%
#   group_by(
#     Chemical,
#     Species,
#     Reference,
#     Route,
#     Media,
#     Dose
#   ) %>%
#   mutate(
#     normTime = ifelse(
#       Route == "iv",
#       1 + Time / max(Time),
#       ifelse(
#         Time > tmax,
#         ((Time - tmax) / max(Time)) + 1,
#         0.5 + Time / (2 * tmax)
#       )
#     )
#   ) %>%
#   ggplot(aes(
#     x = normTime,
#     y = foldConc
#   )) +
#   scale_x_continuous(
#     breaks = c(1, 2),
#     labels = c("tmax", "end")
#   ) +
#   geom_point(alpha = 0.1, size = 0.7) +
#   geom_hline(
#     yintercept = c(0.5, 2),
#     color = "red3",
#     linewidth = 0.8,
#     linetype = "dashed"
#   ) +
#   facet_grid(
#     cols = vars(Route),
#     scales = "free_x"
#   ) +
#   labs(
#     x = "ADME-Normalized Time",
#     y = "Mean-Normalized\nConcentration"
#   ) +
#   theme(
#     text = element_text(size = 14),
#     panel.border = element_rect(color = "black", fill = NA, size = 1),
#     panel.background = element_blank(),
#     panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#     strip.background = element_rect(fill = "white"),
#     strip.text = element_text(face = "bold"),
#     axis.line = element_blank(),
#     axis.text = element_text(size = 14),
#     axis.title = element_text(face = "bold"),
#     panel.spacing = unit(0.125, units = "in"),
#     plot.background = element_blank(),
#     legend.position = "none"
#   )
# 
# pl3B

## -----------------------------------------------------------------------------
# pl3 <- cowplot::plot_grid(pl3A,
#   pl3B,
#   ncol = 2,
#   rel_widths = c(1, 2),
#   labels = c("A", "B")
# )
# 
# # save PDF version
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Fig3.pdf"
#   ),
#   pl3,
#   bg = "white",
#   width = 8,
#   height = 4,
#   units = "in"
# )
# 
# # Save PNG version
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Fig3.png"
#   ),
#   pl3,
#   bg = "white",
#   width = 8,
#   height = 4,
#   units = "in",
#   dpi = 300
# )
# 
# # remove unneeded objects
# rm(pl3A, pl3B, pl3)

## -----------------------------------------------------------------------------
# plsupp3 <- my_tf$individual_data %>%
#   inner_join(my_nca %>%
#     dplyr::select(Chemical, Species,
#       Route, Media,
#       tmax = param_value
#     ) %>%
#     filter(!is.na(tmax))) %>%
#   group_by(
#     Chemical,
#     Species,
#     Reference,
#     Route,
#     Media,
#     Dose
#   ) %>%
#   mutate(
#     normTime = ifelse(
#       Route == "iv",
#       1 + Time / max(Time),
#       ifelse(
#         Time > tmax,
#         ((Time - tmax) / max(Time)) + 1,
#         0.5 + Time / (2 * tmax)
#       )
#     )
#   ) %>%
#   ggplot(aes(
#     x = normTime
#   )) +
#   scale_x_continuous(
#     breaks = c(1, 2),
#     labels = c("tmax", "end")
#   ) +
#   geom_histogram() +
#   facet_grid(
#     cols = vars(Route),
#     scales = "free_x"
#   ) +
#   labs(
#     x = "ADME-Normalized Time",
#     "# Observations"
#   ) +
#   theme(
#     text = element_text(size = 14),
#     panel.border = element_rect(color = "black", fill = NA, size = 1),
#     panel.background = element_blank(),
#     panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#     strip.background = element_rect(fill = "white"),
#     strip.text = element_text(face = "bold"),
#     # axis.ticks = element_blank(),
#     axis.line = element_blank(),
#     # axis.text.x = element_blank(),
#     axis.text = element_text(size = 14),
#     axis.title = element_text(face = "bold"),
#     panel.spacing = unit(0.125, units = "in"),
#     plot.background = element_blank(),
#     legend.position = "none"
#   )
# 
# # save PDF version
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig3",
#     ".png"
#   ),
#   plsupp3,
#   bg = "white",
#   width = 8,
#   height = 4,
#   units = "in",
#   dpi = 300
# )
# 
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig3",
#     ".pdf"
#   ),
#   plsupp3,
#   bg = "white",
#   width = 8,
#   height = 4,
#   units = "in"
# )

## ----Winmodelfn---------------------------------------------------------------
# # Evaluation
# # Winning Model Tally
# winmodel_comparison <- function(this_error_model,
#                                 this_dose_norm,
#                                 this_log10_trans,
#                                 this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(
#     Sys.getenv("OD_DIR"),
#     # read_date_pk,
#     "mypk_fit_",
#     dose_indic,
#     log10_indic,
#     time_indic,
#     errmodel_indic,
#     ".Rds"
#   )
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   current_rds <- readRDS(file = file_str)
# 
#   # Wide winning model
#   suppressMessages({
#     winning_model <- get_winning_model(obj = current_rds)
# 
#     wide_winning_model <- winning_model %>%
#       dplyr::group_by(method, model) %>%
#       dplyr::count() %>%
#       tidyr::pivot_wider(
#         names_from = model,
#         values_from = n
#       )
#   })
#   return(wide_winning_model)
# }

## -----------------------------------------------------------------------------
# AUC_plot <- function(
#     this_error_model,
#     this_dose_norm,
#     this_log10_trans,
#     this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(
#     Sys.getenv("OD_DIR"),
#     # read_date_pk,
#     "mypk_fit_",
#     dose_indic,
#     log10_indic,
#     time_indic,
#     errmodel_indic,
#     ".Rds"
#   )
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   message(paste0("Evaluation for: ", pk_name))
#   current_rds <- readRDS(file = file_str)
#   # Cmax and AUC_inf RMSEs
#   # note that eval_tkstats already filters winning models
#   # use finite_only = FALSE so that we can count the excluded cases
#   suppressMessages(RMSLE_eval <- eval_tkstats(
#     obj = current_rds,
#     dose_norm = FALSE,
#     finite_only = FALSE,
#     suppress.messages = TRUE
#   ) %>%
#     mutate(Options = pk_name) %>%
#     dplyr::select(
#       Options,
#       Chemical,
#       Species,
#       Route,
#       Media,
#       Reference,
#       method,
#       model,
#       starts_with("Cmax"),
#       starts_with("AUC_")
#     ))
# 
#   these_opts <- paste0(
#     dose_indic,
#     log10_indic,
#     time_indic,
#     errmodel_indic
#   )
# 
#   # plot AUC infinity for tkstats vs. AUC infinity for NCA
#   # show identity line and 20x above and below lines
#   AUC_plot <- RMSLE_eval %>%
#     dplyr::filter(is.finite(AUC_infinity.nca) & is.finite(AUC_infinity.tkstats)) %>%
#     ggplot() +
#     geom_point(aes(
#       x = AUC_infinity.nca,
#       y = AUC_infinity.tkstats,
#       color = model
#     )) +
#     scale_x_log10() +
#     scale_y_log10() +
#     annotation_logticks() +
#     geom_abline(aes(intercept = 0, slope = 1)) +
#     geom_abline(aes(intercept = log10(20), slope = 1),
#       linetype = 2
#     ) +
#     geom_abline(aes(intercept = log10(1 / 20), slope = 1),
#       linetype = 2
#     ) +
#     facet_grid(rows = vars(method)) +
#     ggtitle(paste(
#       these_opts,
#       "AUC_inf pred vs. AUC_inf NCA"
#     )) +
#     scale_color_manual(
#       values = c(
#         "model_1comp" = "#0398FC",
#         "model_2comp" = "#D68E09",
#         "model_flat" = "black"
#       ),
#       name = "Winning model"
#     ) +
#     coord_equal()
# 
#   Cmax_plot <- RMSLE_eval %>%
#     dplyr::filter(is.finite(Cmax.nca) & is.finite(Cmax.tkstats)) %>%
#     ggplot() +
#     geom_point(aes(
#       x = Cmax.nca,
#       y = Cmax.tkstats,
#       color = model
#     )) +
#     scale_x_log10() +
#     scale_y_log10() +
#     annotation_logticks() +
#     geom_abline(aes(intercept = 0, slope = 1)) +
#     geom_abline(aes(intercept = log10(20), slope = 1),
#       linetype = 2
#     ) +
#     geom_abline(aes(intercept = log10(1 / 20), slope = 1),
#       linetype = 2
#     ) +
#     facet_grid(rows = vars(method)) +
#     ggtitle(paste(
#       these_opts,
#       "Cmax pred vs. Cmax NCA"
#     )) +
#     scale_color_manual(
#       values = c(
#         "model_1comp" = "#0398FC",
#         "model_2comp" = "#D68E09",
#         "model_flat" = "black"
#       ),
#       name = "Winning model"
#     ) +
#     coord_equal()
# 
#   return(
#     list(
#       AUC_plot = AUC_plot,
#       Cmax_plot = Cmax_plot
#     )
#   )
# }

## -----------------------------------------------------------------------------
# all_AUC_plots <- fitopts %>%
#   dplyr::rowwise() %>%
#   dplyr::summarise(this_plot = list(AUC_plot(
#     this_error_model = error_model,
#     this_dose_norm = dose_norm,
#     this_log10_trans = log10_trans,
#     this_time_scale = time_scale
#   )))
# # save the plots
# pdf(
#   file = paste0(
#     Sys.getenv("FIG_DIR"),
#     today,
#     "_AUCinf_nca_vs_tk_plots.pdf"
#   ),
#   onefile = TRUE,
#   height = 5,
#   width = 7
# )
# 
# plotlist <- all_AUC_plots %>%
#   pull(this_plot)
# 
# for (x in seq_along(plotlist)) {
#   print(plotlist[[x]][["AUC_plot"]])
# }
# dev.off()
# 
# pdf(
#   file = paste0(
#     Sys.getenv("FIG_DIR"),
#     today,
#     "_Cmax_nca_vs_tk_plots.pdf"
#   ),
#   onefile = TRUE,
#   height = 5,
#   width = 7
# )
# 
# plotlist <- all_AUC_plots %>%
#   pull(this_plot)
# 
# for (x in seq_along(plotlist)) {
#   print(plotlist[[x]][["Cmax_plot"]])
# }
# dev.off()
# 
# rm(plotlist, all_AUC_plots)

## ----RMSLE_Cmax_AUC_fun-------------------------------------------------------
# # RMSLE for Cmax and AUC
# rmsles_Cmax_AUC <- function(
#     this_error_model,
#     this_dose_norm,
#     this_log10_trans,
#     this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(
#     Sys.getenv("OD_DIR"),
#     # read_date_pk,
#     "mypk_fit_",
#     dose_indic,
#     log10_indic,
#     time_indic,
#     errmodel_indic,
#     ".Rds"
#   )
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   message(paste0("Evaluation for: ", pk_name))
#   current_rds <- readRDS(file = file_str)
#   # Cmax and AUC_inf RMSEs
#   # note that eval_tkstats already filters winning models
#   # use finite_only = FALSE so that we can count the excluded cases
#   suppressMessages(RMSLE_eval <- eval_tkstats(
#     obj = current_rds,
#     dose_norm = FALSE,
#     finite_only = FALSE,
#     suppress.messages = TRUE
#   ) %>%
#     mutate(Options = pk_name) %>%
#     dplyr::select(
#       Options,
#       Chemical,
#       Species,
#       Route,
#       Media,
#       Reference,
#       method,
#       model,
#       starts_with("Cmax"),
#       starts_with("AUC_")
#     ))
# 
#   # Couunt bad observations:
#   # zero, NA, infinite
#   RMSLE_badobs <- RMSLE_eval %>%
#     group_by(Options, method) %>%
#     summarise(
#       total_expts = n(),
#       count_AUCinf_NCA_0 = sum(AUC_infinity.nca %in% 0),
#       count_AUCinf_pred_0 = sum(AUC_infinity.tkstats %in% 0),
#       count_AUCinf_NCA_isNA = sum(is.na(AUC_infinity.nca)),
#       count_AUCinf_pred_isNA = sum(is.na(AUC_infinity.tkstats)),
#       count_AUCinf_NCA_isInf = sum(is.infinite(AUC_infinity.nca)),
#       count_AUCinf_pred_isInf = sum(is.infinite(AUC_infinity.tkstats)),
#       count_Cmax_NCA_0 = sum(Cmax.nca %in% 0),
#       count_Cmax_pred_0 = sum(Cmax.tkstats %in% 0),
#       count_Cmax_NCA_isNA = sum(is.na(Cmax.nca)),
#       count_Cmax_pred_isNA = sum(is.na(Cmax.tkstats)),
#       count_Cmax_NCA_isInf = sum(is.infinite(Cmax.nca)),
#       count_Cmax_pred_isInf = sum(is.infinite(Cmax.tkstats))
#     ) %>%
#     ungroup()
# 
#   # count cases where AUC or Cmax was negative,
#   # or where AUC_inf was greater than 1e7
#   RMSLE_neg <- RMSLE_eval %>%
#     dplyr::filter(
#       is.finite(AUC_infinity.nca),
#       is.finite(AUC_infinity.tkstats),
#       is.finite(Cmax.nca),
#       is.finite(Cmax.tkstats)
#     ) %>%
#     group_by(Options, method) %>%
#     summarise(
#       count_AUCinf_NCA_lt0 = sum(AUC_infinity.nca < 0),
#       count_AUCinf_pred_lt0 = sum(AUC_infinity.tkstats < 0),
#       count_Cmax_NCA_lt0 = sum(AUC_infinity.nca < 0),
#       count_Cmax_pred_lt0 = sum(AUC_infinity.tkstats < 0),
#       count_AUCinf_NCA_gt_1e7 = sum(AUC_infinity.nca > 1e7),
#       count_AUCinf_pred_gt_1e7 = sum(AUC_infinity.tkstats > 1e7)
#     )
# 
#   RMSLE_badobs <- RMSLE_badobs %>%
#     left_join(RMSLE_neg, by = c(
#       "Options",
#       "method"
#     )) %>%
#     rowwise() %>%
#     dplyr::mutate(total_excl = sum(
#       c_across(
#         starts_with("count")
#       )
#     ))
# 
#   # compute RMSLEs after excluding the bad observations
#   RMSLE_eval <- RMSLE_eval %>%
#     dplyr::filter(
#       is.finite(AUC_infinity.nca),
#       is.finite(AUC_infinity.tkstats)
#     ) %>%
#     dplyr::filter(
#       AUC_infinity.nca > 0,
#       AUC_infinity.tkstats > 0,
#       AUC_infinity.nca < 1e7,
#       AUC_infinity.tkstats < 1e7
#     ) %>%
#     rowwise() %>%
#     mutate(
#       SLE_Cmax = (log10(Cmax.tkstats) - log10(Cmax.nca))^2,
#       SLE_AUC_inf = (log10(AUC_infinity.tkstats) - log10(AUC_infinity.nca))^2
#     ) %>%
#     dplyr::filter(!is.na(SLE_Cmax), !is.na(SLE_AUC_inf)) %>%
#     group_by(Options, method) %>%
#     summarise(
#       n_expts = n(),
#       RMSLE_Cmax = sqrt(mean(SLE_Cmax, na.rm = FALSE)) %>% signif(digits = 4),
#       RMSLE_AUC = sqrt(mean(SLE_AUC_inf, na.rm = FALSE)) %>% signif(digits = 4)
#     ) %>%
#     ungroup()
# 
#   # merge in counts of bad obs
# 
#   RMSLE_eval <- RMSLE_eval %>%
#     dplyr::left_join(RMSLE_badobs,
#       by = c("Options", "method")
#     )
# 
#   return(RMSLE_eval)
# }

## ----gof_tbl_fun--------------------------------------------------------------
# # Goodness-of-fit table with R-squared, RMSLE and AIC
# get_gof <- function(this_error_model,
#                     this_dose_norm,
#                     this_log10_trans,
#                     this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(
#     Sys.getenv("OD_DIR"),
#     # read_date_pk,
#     "mypk_fit_",
#     dose_indic,
#     log10_indic,
#     time_indic,
#     errmodel_indic,
#     ".Rds"
#   )
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   current_rds <- readRDS(file = file_str)
#   winning_model <- suppressMessages(get_winning_model(obj = current_rds))
# 
#   suppressMessages({
#     this_rsq <- rsq.pk(current_rds,
#       use_scale_conc = FALSE,
#       rsq_group = ggplot2::vars(Chemical, Species),
#       sub_pLOQ = TRUE
#     ) %>%
#       semi_join(winning_model)
# 
#     this_AIC <- AIC(current_rds) %>%
#       semi_join(winning_model)
# 
#     this_rmsle <- rmse.pk(current_rds,
#       rmse_group = vars(Chemical, Species),
#       use_scale_conc = list(
#         dose_norm = FALSE,
#         log10_trans = TRUE
#       ),
#       sub_pLOQ = TRUE
#     ) %>%
#       semi_join(winning_model)
#   })
# 
#   # Since they are all joined by winmodel
#   # all three must have same NROW
#   message(
#     paste0("For ", pk_name, "... outputs below must have same number of rows")
#   )
# 
#   message(paste0("rsq: ", NROW(this_rsq)))
#   message(paste0("aic: ", NROW(this_AIC)))
#   message(paste0("rmsle: ", NROW(this_rmsle)))
# 
#   gof_df <- this_rsq %>%
#     inner_join(this_AIC, by = join_by(Chemical, Species, method, model)) %>%
#     inner_join(this_rmsle, by = join_by(Chemical, Species, method, model)) %>%
#     mutate(Options = pk_name, .before = Chemical)
# 
#   return(gof_df)
# }

## ----allpreds_fun-------------------------------------------------------------
# # All predictions
# get_all_preds <- function(this_error_model,
#                           this_dose_norm,
#                           this_log10_trans,
#                           this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(
#     Sys.getenv("OD_DIR"),
#     read_date_pk,
#     "mypk_fit_",
#     dose_indic,
#     log10_indic,
#     time_indic,
#     errmodel_indic,
#     ".Rds"
#   )
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
# 
#   current_rds <- readRDS(file = file_str)
# 
#   # Wide winning model
#   suppressMessages({
#     winning_model <- get_winning_model(obj = current_rds) %>%
#       select(-c(near_flat, preds_below_loq))
#     this_preds <- predict(current_rds,
#       use_scale_conc = FALSE
#     ) %>%
#       semi_join(winning_model) %>%
#       mutate(Options = pk_name, .before = Chemical)
#   })
#   return(this_preds)
# }

## ----factor2fun---------------------------------------------------------------
# # Factor of two model error
# tf_tests <- function(this_error_model,
#                      this_dose_norm,
#                      this_log10_trans,
#                      this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(
#     Sys.getenv("OD_DIR"),
#     # read_date_pk,
#     "mypk_fit_",
#     dose_indic,
#     log10_indic,
#     time_indic,
#     errmodel_indic,
#     ".Rds"
#   )
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
#   current_rds <- readRDS(file = file_str)
#   print(paste0("Now analyzing: ", pk_name))
# 
#   out <- twofold_test(current_rds,
#     sub_pLOQ = TRUE,
#     suppress_messages = TRUE
#   )
# 
#   out <- out$model_error_summary %>%
#     mutate(Options = pk_name)
# 
#   return(out)
# }

## ----do_winmodel--------------------------------------------------------------
# # winning model
# system.time(wm_comp <- fitopts %>%
#   dplyr::rowwise() %>%
#   dplyr::mutate(winmodel_comp = list(
#     winmodel_comparison(
#       this_error_model = error_model,
#       this_dose_norm = dose_norm,
#       this_log10_trans = log10_trans,
#       this_time_scale = time_scale
#     )
#   )) %>%
#   tidyr::unnest(winmodel_comp))

## ----do_RMSLE_Cmax_AUC--------------------------------------------------------
# # RMSLE for Cmax and AUC
# system.time(cmax_auc_rmsle_tbl <- fitopts %>%
#   dplyr::rowwise() %>%
#   dplyr::mutate(nca_comp = list(
#     rmsles_Cmax_AUC(
#       this_error_model = error_model,
#       this_dose_norm = dose_norm,
#       this_log10_trans = log10_trans,
#       this_time_scale = time_scale
#     )
#   )) %>%
#   tidyr::unnest(nca_comp))

## ----do_gof_tbl---------------------------------------------------------------
# # GOF table
# system.time(gof_tbl <- fitopts %>%
#   dplyr::rowwise() %>%
#   dplyr::mutate(gof_win = list(
#     get_gof(
#       this_error_model = error_model,
#       this_dose_norm = dose_norm,
#       this_log10_trans = log10_trans,
#       this_time_scale = time_scale
#     )
#   )) %>%
#   tidyr::unnest(gof_win))

## ----do_factor_two------------------------------------------------------------
# # factor of two
# system.time(all_tf_tests <- fitopts %>%
#   dplyr::rowwise() %>%
#   dplyr::mutate(TF = list(
#     tf_tests(error_model,
#       this_dose_norm = dose_norm,
#       this_log10_trans = log10_trans,
#       this_time_scale = time_scale
#     )
#   )) %>%
#   tidyr::unnest(TF))

## ----do_all_preds-------------------------------------------------------------
# # all predictions
# system.time(all_preds <- fitopts %>%
#   dplyr::rowwise() %>%
#   dplyr::mutate(these_preds = list(
#     get_all_preds(
#       this_error_model = error_model,
#       this_dose_norm = dose_norm,
#       this_log10_trans = log10_trans,
#       this_time_scale = time_scale
#     )
#   )) %>%
#   tidyr::unnest(these_preds))

## ----write_preds--------------------------------------------------------------
# # Save preds to a file as well
# saveRDS(all_preds, paste0(
#   Sys.getenv("FIG_DIR"),
#   today,
#   "_all_preds.Rds"
# ))
# 
# rm(all_preds)

## -----------------------------------------------------------------------------
# (win_mdl_count_by_opts <- gof_tbl %>%
#   group_by(Options, method, model) %>%
#   count() %>%
#   pivot_wider(
#     names_from = model,
#     values_from = n
#   ))

## -----------------------------------------------------------------------------
# gof_tbl %>%
#   group_by(Options, method) %>%
#   count(RMSLE < 1) %>%
#   filter(`RMSLE < 1`) %>%
#   arrange(desc(n)) %>%
#   print(n = 8)
# # bobyqa-optimized options 010h, 110h, 110p

## -----------------------------------------------------------------------------
# gof_tbl %>%
#   group_by(Options, method) %>%
#   count(Rsq > 0.75) %>%
#   filter(`Rsq > 0.75`) %>%
#   arrange(desc(n)) %>%
#   print(n = 8)

## -----------------------------------------------------------------------------
# rmsle_rsq_rank <- gof_tbl %>%
#   group_by(Options, method) %>%
#   count(RMSLE < 1 & Rsq > 0.75) %>%
#   filter(`RMSLE < 1 & Rsq > 0.75`) %>%
#   arrange(desc(n)) %>%
#   dplyr::rename(`# Chemical-Species data groups` = n)
# 
# rmsle_rsq_rank %>% print(n = 8)

## -----------------------------------------------------------------------------
# cmax_auc_rmsle_rank <- cmax_auc_rmsle_tbl %>%
#   arrange(RMSLE_Cmax, RMSLE_AUC) %>%
#   select(Options, method, RMSLE_Cmax, RMSLE_AUC)
# 
# cmax_auc_rmsle_rank %>%
#   slice_head(n = 5)

## ----write_eval_results-------------------------------------------------------
# # Write the results to file
# writexl::write_xlsx(
#   x = list(
#     Fit_Counts = wm_comp,
#     AUC_and_Cmax_RMSLE_summary = cmax_auc_rmsle_tbl,
#     Prediction_Evaluations = gof_tbl,
#     Twofold_Tests = all_tf_tests,
#     RMSLE_Rsq_rank = rmsle_rsq_rank,
#     Cmax_AUC_RMSLE_rank = cmax_auc_rmsle_rank
#   ),
#   path = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Table2_eval_results",
#     ".xlsx"
#   )
# )
# 
# rm(wm_comp, cmax_auc_rmsle_tbl, gof_tbl, all_tf_tests)
# 
# gc()

## ----plot_rsq_rmsle-----------------------------------------------------------
# gof_tbl <- readxl::read_excel(
#   path = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Table2_eval_results",
#     ".xlsx"
#   ),
#   sheet = "Prediction_Evaluations"
# )
# 
# gof_tbl <- gof_tbl %>%
#   dplyr::mutate(
#     opts = substr(Options, 1, 3),
#     model_text = dplyr::case_when(model %in% "model_1comp" ~ "1comp",
#       model %in% "model_2comp" ~ "2comp",
#       model %in% "model_flat" ~ "flat",
#       .default = NA_character_
#     )
#   )
# 
# fig_rsq_rmsle <- ggplot(gof_tbl) +
#   geom_point(aes(
#     x = RMSLE,
#     y = Rsq,
#     color = model_text
#   )) +
#   facet_grid(
#     rows = vars(opts),
#     cols = vars(interaction(error_model, method))
#   ) +
#   scale_color_manual(
#     values = c(
#       "1comp" = "#0398FC",
#       "2comp" = "#D68E09",
#       "flat" = "black"
#     ),
#     name = "Winning model"
#   ) +
#   theme_bw()
# 
# fig_rsq_rmsle

## ----plot_rsq_rmsle_zoom------------------------------------------------------
# fig_rsq_rmsle_zoom <- ggplot(gof_tbl) +
#   geom_point(aes(
#     x = RMSLE,
#     y = Rsq,
#     color = model_text
#   )) +
#   facet_grid(
#     rows = vars(opts),
#     cols = vars(interaction(error_model, method))
#   ) +
#   scale_color_manual(
#     values = c(
#       "1comp" = "#0398FC",
#       "2comp" = "#D68E09",
#       "flat" = "black"
#     ),
#     name = "Winning model"
#   ) +
#   coord_cartesian(xlim = c(0, 1)) +
#   theme_bw()
# 
# fig_rsq_rmsle_zoom

## -----------------------------------------------------------------------------
# # PDF versions
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig4",
#     ".pdf"
#   ),
#   fig_rsq_rmsle,
#   width = 11,
#   height = 8.5
# )
# 
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig5",
#     ".pdf"
#   ),
#   fig_rsq_rmsle_zoom,
#   width = 11,
#   height = 8.5
# )
# 
# # PNG versions
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig4",
#     ".png"
#   ),
#   fig_rsq_rmsle,
#   width = 11,
#   height = 8.5
# )
# 
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig5",
#     ".png"
#   ),
#   fig_rsq_rmsle_zoom,
#   width = 11,
#   height = 8.5
# )
# 
# rm(fig_rsq_rmsle_zoom, fig_rsq_rmsle, gof_tbl)

## ----plot_cmax_auc_rmsle------------------------------------------------------
# # read in table
# cmax_auc_rmsle_tbl <- readxl::read_xlsx(
#   path = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Table2_eval_results.xlsx"
#   ),
#   sheet = "AUC_and_Cmax_RMSLE_summary"
# )
# # make plot
# cmax_auc_rmsle_tbl <- cmax_auc_rmsle_tbl %>%
#   dplyr::mutate(opts = substr(Options, 1, 3))
# 
# cmax_auc_rmsle_fig <- ggplot(cmax_auc_rmsle_tbl) +
#   geom_point(
#     aes(
#       x = RMSLE_AUC,
#       y = RMSLE_Cmax,
#       color = opts
#     ),
#     size = 4
#   ) +
#   facet_grid(
#     rows = vars(error_model),
#     cols = vars(method)
#   ) +
#   scale_color_brewer(
#     palette = "Paired",
#     name = "Fit options"
#   ) +
#   theme_bw()
# 
# cmax_auc_rmsle_fig
# 
# # save plot
# # PDF version
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig6",
#     ".pdf"
#   ),
#   cmax_auc_rmsle_fig,
#   width = 7,
#   height = 5
# )
# 
# # PNG version
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig6",
#     ".png"
#   ),
#   cmax_auc_rmsle_fig,
#   width = 7,
#   height = 5,
#   units = "in",
#   dpi = 300
# )
# 
# rm(cmax_auc_rmsle_fig, cmax_auc_rmsle_tbl)

## -----------------------------------------------------------------------------
# all_preds <- readRDS(paste0(
#   Sys.getenv("FIG_DIR"),
#   today,
#   "_all_preds.Rds"
# ))
# 
# # select winning fit opts: 110p bobyqa
# 
# all_preds_prep <- all_preds %>%
#   filter(
#     Options %in% c("110p"),
#     method %in% "bobyqa"
#   ) %>%
#   mutate(
#     Conc_est_sub = dplyr::if_else(Conc_est < pLOQ,
#       pLOQ,
#       Conc_est
#     ),
#     Conc_est_belowLOQ = dplyr::if_else(Conc_est < pLOQ,
#       TRUE,
#       FALSE
#     ),
#     ID = as.factor(paste(Chemical, Chemical_Name, Species)),
#     .keep = "unused"
#   ) %>%
#   select(
#     ID, Options,
#     dose_norm, log10_trans, time_scale,
#     Route, Media, Dose,
#     Conc, Conc_est_sub, Conc_est_belowLOQ,
#     Time, Reference
#   )
# 
# split_preds <- split(all_preds_prep, all_preds_prep$ID)
# 
# sp_plots <- lapply(names(split_preds), function(i) {
#   ggplot(
#     data = split_preds[[i]],
#     mapping = aes(
#       x = Conc,
#       y = Conc_est_sub,
#       color = as.factor(Dose),
#       shape = Reference
#     )
#   ) +
#     xlab("Observed conc, mg/L") +
#     ylab("Predicted conc, mg/L") +
#     geom_point(size = 2) +
#     geom_abline(slope = 1) +
#     facet_grid(Route ~ Media,
#       scales = "free_y"
#     ) +
#     scale_y_log10() +
#     scale_x_log10() +
#     annotation_logticks(sides = "bl") +
#     scale_color_viridis_d(name = "Dose, mg/kg") +
#     theme_bw() +
#     labs(title = i) +
#     theme(
#       legend.position = "bottom",
#       plot.title = element_text(hjust = 0.5, face = "bold")
#     )
# })
# 
# # Very BIG FILE
# pdf(
#   file = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_log10_predvsobs_plots_110pbobyqa_all.pdf"
#   ),
#   onefile = TRUE, height = 8, width = 8
# )
# for (x in seq_along(sp_plots)) {
#   print(sp_plots[[x]])
# }
# dev.off()
# 
# rm(sp_plots, split_preds, all_preds_prep, all_preds)

## -----------------------------------------------------------------------------
# my_pk <- readRDS(paste0(
#   Sys.getenv("OD_DIR"),
#   read_date_pk,
#   "mypk_fit_110p.Rds"
# ))

## ----data_aggregation---------------------------------------------------------
# # after fitting pk object for all cvt objects or reading the fitted pk object in `setup`
# winmodel <- get_winning_model(my_pk, method = "bobyqa")
# my_preds <- semi_join(predict(my_pk, use_scale_conc = FALSE, method = "bobyqa"), winmodel)
# my_residuals <- residuals(my_pk, use_scale_conc = FALSE, method = "bobyqa") %>%
#   semi_join(winmodel)
# my_tkstats <- eval_tkstats(my_pk, method = "bobyqa") %>% semi_join(winmodel)
# my_nca <- get_nca(my_pk)
# all_my_data <- get_data(my_pk)
# 
# # merge together long form coefs and long form coef SDs
# 
# my_tf <- twofold_test(my_pk, method = "bobyqa")
# 
# NROW(all_my_data) > NROW(my_preds) # THis must be true... or else try distinct(my_preds)
# 
# my_coef_sd <- coef_sd(my_pk, method = "bobyqa") %>% # this will take a few minutes to run
#   semi_join(winmodel) # keep only coefs & SDs for winning model
# 
# # Writing file to xlsx
# writexl::write_xlsx(
#   x = list(
#     predictions = my_preds,
#     tkstats = my_tkstats,
#     coefs_sds = my_coef_sd
#   ),
#   path = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Table3",
#     ".xlsx"
#   )
# )

## -----------------------------------------------------------------------------
# fe_df <- fold_error(my_pk,
#   sub_pLOQ = TRUE,
#   method = "bobyqa"
# ) %>% semi_join(winmodel)
# 
# r2_df <- rsq(my_pk,
#   use_scale_conc = FALSE,
#   sub_pLOQ = TRUE,
#   method = "bobyqa"
# ) %>%
#   semi_join(winmodel)
# 
# myAIC <- AIC(my_pk, method = "bobyqa") %>%
#   semi_join(winmodel)
# 
# myRMSLE <- rmse(my_pk,
#   use_scale_conc = list(
#     dose_norm = FALSE,
#     log10_trans = TRUE
#   ),
#   sub_pLOQ = TRUE,
#   method = "bobyqa"
# ) %>%
#   semi_join(winmodel)
# 
# myRMSE <- rmse(my_pk,
#   rmse_group = ggplot2::vars(
#     Chemical,
#     Species,
#     Route,
#     Media,
#     Dose
#   ),
#   sub_pLOQ = TRUE,
#   method = "bobyqa"
# ) %>%
#   semi_join(winmodel)

## -----------------------------------------------------------------------------
# my_tf$model_error_all %>%
#   filter(model %in% c("model_1comp", "model_2comp")) %>%
#   mutate(Route.model = factor(paste(Route, model),
#     levels = c(
#       "iv model_1comp",
#       "iv model_2comp",
#       "oral model_1comp",
#       "oral model_2comp"
#     )
#   )) %>%
#   ggplot(aes(
#     x = Fold_Error,
#     fill = Route.model
#   )) +
#   geom_histogram(
#     color = NA,
#     position = position_stack(),
#     bins = 50
#   ) +
#   scale_x_log10(labels = scales::label_math(format = log10)) +
#   annotation_logticks(sides = "b") +
#   scale_fill_brewer(
#     palette = "Paired",
#     name = "Route/winning model"
#   ) +
#   expand_limits(y = 0) +
#   geom_vline(xintercept = c(0.5, 2), color = "black", linetype = "dashed") +
#   labs(
#     x = "Predicted/Observed",
#     y = "Number of Observations"
#   ) +
#   theme_classic() +
#   theme(
#     aspect.ratio = 1,
#     panel.border = element_rect(color = "black", fill = NA),
#     panel.grid.major = element_line(color = "grey95", linewidth = 0.4)
#   ) +
#   coord_cartesian(xlim = c(10^(-1.5), 100))
# 
# 
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig7_PredictedObserved_compartmentModels.png"
#   ),
#   height = 5, width = 7
# )
# 
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig7_PredictedObserved_compartmentModels.pdf"
#   ),
#   height = 5, width = 7
# )

## ----fig4-modelPerform-v-dataVar----------------------------------------------
# my_tf$indiv_data_test_fold_errors %>%
#   select(model, starts_with("percent_")) %>%
#   glimpse()
# 
# pl_4A <- my_tf$indiv_data_fold_errors %>%
#   # For Plotting
#   ggplot(aes(
#     y = Fold_Error,
#     x = foldConc
#   )) +
#   geom_bin2d(bins = 55, color = NA) +
#   geom_hline(yintercept = c(0.5, 2), linetype = "dashed") +
#   geom_vline(xintercept = c(0.5, 2), linetype = "dashed") +
#   scale_x_log10(
#     labels = scales::label_math(format = log10),
#     limits = c(0.001, 20)
#   ) +
#   scale_y_log10(
#     labels = scales::label_math(format = log10),
#     limits = c(0.0001, 10000)
#   ) +
#   annotation_logticks(sides = "bl") +
#   scale_fill_viridis_c(
#     option = "cividis",
#     limits = c(1, 100),
#     oob = scales::oob_squish
#   ) +
#   labs(
#     y = "Model Error",
#     x = "Data Variability",
#     fill = "Count"
#   ) +
#   theme(
#     aspect.ratio = 1,
#     panel.border = element_rect(color = "black", fill = NA, size = 1.5),
#     panel.background = element_blank(),
#     panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#     strip.background = element_rect(fill = "white"),
#     strip.text = element_text(face = "bold"),
#     axis.ticks = element_blank(),
#     axis.line = element_blank(),
#     axis.text = element_text(size = 14),
#     axis.title = element_text(size = 14)
#   )
# 
# pl_4A
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     today,
#     "_Fig4A_ModelPerformance_vDataVariability.png"
#   ),
#   height = 5,
#   width = 7,
#   device = "png",
#   dpi = 300,
#   units = "in"
# )

## -----------------------------------------------------------------------------
# my_table <- my_tf$indiv_data_test_fold_errors %>%
#   ungroup() %>%
#   filter(model %in% "All") %>%
#   select(starts_with("percent_")) %>%
#   t() %>%
#   round(digits = 2) %>%
#   as.data.frame() %>%
#   rownames_to_column(var = "percentages") %>%
#   mutate(percentages = case_when(
#     percentages == "percent_both_within" ~ "Both within a factor of 2",
#     percentages == "percent_both_outside" ~ "Both outside a factor of 2",
#     percentages == "percent_model_outside" ~ "Model too variable",
#     percentages == "percent_data_outside" ~ "Data too variable"
#   ))
# 
# names(my_table) <- c(" ", "Overall (%)")
# 
# flextable(my_table) %>%
#   border_inner() %>%
#   fontsize(part = "all", size = 11) %>%
#   bold(part = "all", j = 2) %>%
#   autofit()
# 
# table_plot <- gen_grob(
#   flextable(my_table) %>%
#     border_inner() %>%
#     fontsize(part = "all", size = 10) %>%
#     bold(part = "all", j = 2) %>%
#     autofit(),
#   autowidths = TRUE,
#   fit = "width"
# )
# 
# 
# dual_plot <- plot_grid(pl_4A,
#   table_plot,
#   ncol = 1,
#   align = "hv",
#   rel_heights = c(1, 0.5),
#   rel_widths = c(1, 0.5)
# )
# 
# dual_plot
# 
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Fig4.png"
#   ),
#   plot = dual_plot,
#   height = 4.5,
#   width = 6,
#   device = "png",
#   bg = "white",
#   dpi = 300,
#   units = "in"
# )

## ----fig5-goodness-of-fit-----------------------------------------------------
# combined_gof_df <- my_tf$model_error_all %>%
#   group_by(Chemical, Species, model, method) %>%
#   summarize(within_2fold = sum(between(Fold_Error, 0.5, 2)) / n()) %>%
#   left_join(r2_df) %>%
#   left_join(myAIC) %>%
#   left_join(myRMSLE) %>%
#   semi_join(winmodel)
# 
# 
# mypl <- ggplot(
#   data = combined_gof_df,
#   aes(
#     x = within_2fold,
#     y = Rsq,
#     color = model
#   )
# ) +
#   geom_point() +
#   geom_abline(slope = 1, color = "black", linetype = "longdash") +
#   scale_color_manual(values = c("#0398FC", "#D68E09", "black")) +
#   # coord_cartesian(xlim = c(0,1), ylim = c(0,1)) +
#   labs(
#     x = "Fraction of predictions within 2-fold",
#     y = "R-squared value"
#   ) +
#   theme( # aspect.ratio = 1,
#     panel.border = element_rect(color = "black", fill = NA, size = 1),
#     panel.background = element_blank(),
#     panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#     axis.ticks = element_blank(),
#     axis.line = element_blank(),
#     legend.position = "none",
#     legend.title = element_blank(),
#     legend.key = element_blank()
#   )
# 
# panelA_plot <- ggExtra::ggMarginal(mypl,
#   groupFill = TRUE,
#   type = "histogram",
#   xparams = list(binwidth = 0.05),
#   yparams = list(binwidth = 0.05)
# )
# panelA_plot
# 
# panelB_plot <- ggplot(
#   data = myRMSE,
#   aes(
#     x = RMSE,
#     fill = model
#   )
# ) +
#   scale_x_log10() +
#   annotation_logticks(sides = "b") +
#   geom_histogram(
#     bins = 50,
#     position = position_stack(),
#     color = NA
#   ) +
#   labs(y = "Count") +
#   scale_fill_manual(values = c("#0398FC", "#D68E09", "grey10")) +
#   scale_color_manual(values = c("#0398FC", "#D68E09", "grey10"), guide = "none") +
#   theme(
#     text = element_text(size = 10),
#     # aspect.ratio = 1,
#     panel.border = element_rect(color = "black", fill = NA, size = 1),
#     panel.background = element_blank(),
#     panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#     axis.ticks = element_blank(),
#     axis.line = element_blank(),
#     strip.background = element_blank(),
#     panel.spacing.y = unit(0.125, units = "in"),
#     legend.title = element_blank(),
#     legend.position = "bottom"
#   )
# panelB_plot
# 
# plAB <- patchwork::wrap_elements(panelA_plot) + panelB_plot +
#   plot_annotation(tag_levels = "A") + plot_layout(guides = "collect",
#                                                   widths = c(1, 0.9)) & theme(legend.position = "bottom") & guides(color = "none")
# 
# # save PNG version
# ggsave(
#   filename = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Fig5.png"
#   ),
#   plAB,
#   height = 4,
#   width = 6.5,
#   bg = "white",
#   units = "in"
# )
# 
# # save PDF version
# ggsave(
#   filename = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Fig5.pdf"
#   ),
#   plAB,
#   height = 4,
#   width = 6.5,
#   bg = "white"
# )

## ----goodness-of-fit_exampleFits----------------------------------------------
# pl <- plot(my_pk,
#   method = "bobyqa",
#   best_fit = TRUE,
#   use_scale_conc = list(
#     dose_norm = TRUE,
#     log10_trans = FALSE
#   ),
#   drop_nonDetect = FALSE
# )
# 
# cvt %>%
#   filter(curation_set_tag == "CVT_Showa") %>%
#   pull(analyte_dtxsid) -> Showa_chems
# 
# 
# my_rsq <- rsq(my_pk, method = "bobyqa")
# my_rsq %>%
#   semi_join(winmodel) %>%
#   filter(Chemical %in% c(
#     "DTXSID2020139",
#     "DTXSID4023917",
#     "DTXSID3061635",
#     "DTXSID8030760"
#   ))
# 
# # High rsq: DTXSID2020139 and DTXSID4023917
# # Low rsq: DTXSID3061635 and DTXSID8030760
# 
# fe_df %>%
#   filter(Chemical %in% c(
#     "DTXSID2020139",
#     "DTXSID4023917",
#     "DTXSID3061635",
#     "DTXSID8030760"
#   ), Species %in% "rat") %>%
#   group_by(Chemical, Species) %>%
#   summarise(frac_2fold = sum(Fold_Error < 2 & Fold_Error > 0.5) / n())
# 
# # # A tibble: 4 × 3
# # # Groups:   Chemical [4]
# #   Chemical      Species frac_2fold
# #   <chr>         <chr>        <dbl>
# # 1 DTXSID2020139 rat          0.271
# # 2 DTXSID3061635 rat          0.125
# # 3 DTXSID4023917 rat          0.953
# # 4 DTXSID8030760 rat          0.778
# 
# # High frac 2fold: DTXSID4023917 and DTXSID8030760
# # Low frac 2fold: DTXSID2020139 and DTXSID3061635
# 
# pl_sub <- pl %>%
#   filter(
#     Chemical %in% c(
#       "DTXSID2020139",
#       "DTXSID4023917",
#       "DTXSID3061635",
#       "DTXSID8030760"
#     ),
#     Species %in% "rat"
#   )
# 
# ex_fits <- pl_sub %>%
#   pull(final_plot)
# ex_fits <- set_names(
#   ex_fits,
#   pl_sub$Chemical
# )
# 
# 
# 
# cowplot::plot_grid(
#   plotlist = list(
#     # Low frac 2fold and high Rsq
#     ex_fits[["DTXSID2020139"]] +
#       scale_color_manual(values = c(
#         "#a6bddb",
#         "#74a9cf",
#         "#41bbc4",
#         "#2b8cbe",
#         "#045a8d"
#       )) +
#       theme(legend.position = "none") +
#       xlim(0, 30) +
#       ggtitle("A: DTXSID2020139 rat"),
#     # high frac 2fold and high Rsq
#     ex_fits[["DTXSID4023917"]] +
#       scale_color_manual(values = c(
#         "black",
#         "grey40"
#       )) +
#       theme(legend.position = "none") +
#       ggtitle("B: DTXSID4023917 rat"),
# 
#     # low frac 2fold and low rsq
#     ex_fits[["DTXSID3061635"]] +
#       scale_color_manual(values = c("#006837")) +
#       theme(legend.position = "none") +
#       ggtitle("C: DTXSID3061635 rat"),
# 
#     # high frac2fold and low rsq
#     ex_fits[["DTXSID8030760"]] +
#       scale_color_manual(values = "magenta3") +
#       theme(legend.position = "none") +
#       ggtitle("D: DTXSID8030760 rat")
#   ),
#   scale = 0.85
# )
# 
# ggsave(
#   filename = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Fig7.png"
#   ),
#   height = 12,
#   width = 12
# )
# 
# ggsave(
#   filename = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Fig7.pdf"
#   ),
#   height = 12,
#   width = 12
# )

## -----------------------------------------------------------------------------
# flat_chems <- winmodel %>%
#   filter(model %in% "model_flat") %>%
#   distinct(Chemical, Species)
# print(flat_chems)
# 
# pl <- plot(my_pk,
#   method = "bobyqa",
#   best_fit = FALSE,
#   use_scale_conc = list(
#     dose_norm = TRUE,
#     log10_trans = FALSE
#   ),
#   drop_nonDetect = FALSE
# )
# 
# flat_fits <- pl %>%
#   inner_join(flat_chems) %>%
#   pull(final_plot)
# 
# cowplot::plot_grid(plotlist = flat_fits)
# 
# ggsave(
#   filename = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig8.pdf"
#   ),
#   height = 8,
#   width = 16,
#   units = "in"
# )
# 
# ggsave(
#   filename = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig8.png"
#   ),
#   bg = "white",
#   height = 8,
#   width = 16,
#   dpi = 300,
#   units = "in"
# )

## ----Lombardo_Comparison------------------------------------------------------
# cvt_DTXSIDs <- unique(my_pk$data$Chemical)
# # First things first, let's load the data from Lombardo et al.
# lombardo <- readxl::read_xlsx(
#   paste0(
#     Sys.getenv("OD_DIR"),
#     "Lombardo2018-Supplemental_82966_revised_corrected.xlsx"
#   ),
#   skip = 8
# )
# 
# # use ctxR to search the Dashboard for DTXSIDs corresponding to these CASRNs
# lombardo_dtxsid <- ctxR::chemical_equal_batch(
#   word_list = unique(lombardo$`CAS #`)
# )
# 
# # merge in the DTXSIDs
# lombardo <- lombardo %>%
#   dplyr::left_join(
#     lombardo_dtxsid$valid %>%
#       dplyr::select(searchValue, dtxsid, preferredName),
#     by = c("CAS #" = "searchValue")
#   )
# 
# # any left unidentified: search by name
# lombardo_missing <- lombardo %>%
#   dplyr::filter(is.na(dtxsid))
# 
# missing_names <- lombardo_missing %>%
#   dplyr::pull(Name)
# 
# lombardo_dtxsid_name <- ctxR::chemical_equal_batch(
#   word_list = unique(missing_names)
# )
# 
# lombardo_missing <- lombardo_missing %>%
#   dplyr::left_join(
#     lombardo_dtxsid_name$valid %>%
#       dplyr::select(searchValue, dtxsid, preferredName),
#     by = c("Name" = "searchValue")
#   )
# 
# lombardo <- dplyr::bind_rows(
#   lombardo %>% filter(!is.na(dtxsid)),
#   lombardo_missing
# )
# 
# lombard_abbr <- lombardo %>%
#   dplyr::select(dtxsid,
#     preferredName,
#     Vss = `human VDss (L/kg)`,
#     CLtot = `human CL (mL/min/kg)`,
#     halflife = `terminal  t1/2 (h)`
#   ) %>%
#   dplyr::mutate(
#     Species = "human",
#     source = "Lombardo et al.",
#     CLtot = CLtot * 60 / 1000 # Lombardo reports this as mL/min/kg, need L/h/kg
#   ) %>%
#   dplyr::arrange(halflife)
# 
# my_pk_vals <- my_tkstats %>%
#   dplyr::select(
#     dtxsid = Chemical,
#     Species,
#     model,
#     halflife = halflife.tkstats,
#     Vss = Vss.tkstats,
#     CLtot = CLtot.tkstats
#   ) %>%
#   dplyr::distinct() %>%
#   dplyr::mutate(source = "invivoPKfit") %>%
#   dplyr::filter(
#     model != "model_flat",
#     !is.na(halflife)
#   )
# 
# lombardo_inner <- my_pk_vals %>%
#   dplyr::inner_join(lombard_abbr, by = "dtxsid")
# # 79 chemicals in common
# 
# lombardo_comparison <- dplyr::bind_rows(
#   lombard_abbr %>%
#     dplyr::filter(
#       dtxsid %in%
#         lombardo_inner$dtxsid
#     ),
#   my_pk_vals %>%
#     dplyr::filter(
#       dtxsid %in%
#         lombardo_inner$dtxsid
#     )
# )
# 
# # merge in chemical names
# 
# # get preferred names by DTXSID
# all_details <- ctxR::get_chemical_details_batch(
#   unique(lombardo_comparison$dtxsid)
# )
# 
# lombardo_comparison <- lombardo_comparison %>%
#   select(-preferredName) %>%
#   left_join(all_details %>% select(dtxsid, preferredName))
# 
# # create a factor variable for chemical names that sorts chemicals from highest to lowest Lombardo halflife
# 
# chemnames_levels <- lombardo_comparison %>%
#   dplyr::filter(source %in% "Lombardo et al.") %>%
#   dplyr::arrange(halflife) %>%
#   dplyr::pull(preferredName)
# 
# 
# lombardo_comparison <- lombardo_comparison %>%
#   dplyr::mutate(preferredName = factor(preferredName,
#     levels = chemnames_levels
#   ))
# 
# # get invivoPKfit species for plot-splitting purposes
# ivpk_species <- lombardo_comparison %>%
#   dplyr::filter(source %in% "invivoPKfit") %>%
#   dplyr::distinct(dtxsid, Species) %>%
#   dplyr::rename(invivopkfit_species = Species)
# 
# lombardo_comparison <- lombardo_comparison %>%
#   dplyr::left_join(ivpk_species,
#     by = "dtxsid"
#   )
# 
# # reshape longer and plot
# lombardo_comparison %>%
#   tidyr::pivot_longer(cols = c("Vss", "CLtot", "halflife")) %>%
#   dplyr::mutate(name = factor(name,
#     levels = c("halflife", "Vss", "CLtot")
#   )) %>%
#   ggplot(mapping = aes(
#     x = value,
#     y = preferredName
#   )) +
#   geom_point(
#     aes(
#       shape = source,
#       color = source
#     ),
#     # position = position_dodge(0.7),
#     size = 5 / .pt, stroke = 2.5 / .pt
#   ) +
#   facet_grid(
#     cols = vars(name),
#     rows = vars(invivopkfit_species),
#     scales = "free",
#     space = "free_y",
#     drop = TRUE
#   ) +
#   scale_x_log10(labels = scales::label_math(format = log10)) +
#   # annotation_logticks(sides = "b") +
#   scale_shape_manual(values = c(
#     "Lombardo et al." = 3,
#     "invivoPKfit" = 22
#   )) +
#   scale_color_manual(values = c("#d95f02", "#1b9e77")) +
#   guides(x = "axis_logticks") +
#   theme(
#     panel.border = element_rect(
#       color = "black",
#       fill = NA,
#       linewidth = 1.5
#     ),
#     panel.background = element_blank(),
#     panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#     panel.spacing.y = unit(0, "line"),
#     strip.background = element_rect(fill = "white"),
#     strip.text.x = element_text(face = "bold", size = 12),
#     strip.text.y = element_blank(),
#     text = element_text(size = 10),
#     axis.ticks = element_blank(),
#     axis.line = element_blank(),
#     axis.text.y = element_text(face = "bold", size = 6),
#     axis.text.x = element_text(size = 10),
#     axis.title.y = element_blank(),
#     axis.title.x = element_blank(),
#     legend.position = "bottom",
#     legend.key = element_blank(),
#     legend.title = element_text(face = "bold"),
#     legend.text = element_text()
#   )
# 
# ggsave(
#   filename = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Fig6.png"
#   ),
#   height = 11,
#   width = 10,
#   units = "in",
#   device = "png",
#   dpi = 300
# )
# 
# ggsave(
#   filename = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Fig6.pdf"
#   ),
#   height = 11,
#   width = 10,
#   units = "in"
# )

## ----musther-read-------------------------------------------------------------
# species_cols <- c(
#   paste("Mouse",
#     c(
#       "Dosing Formulation",
#       "Strain",
#       "Sex",
#       "F% Mean",
#       "F% Range"
#     ),
#     sep = "."
#   ),
#   paste("Rat",
#     c(
#       "Dosing Formulation",
#       "Strain",
#       "Sex",
#       "F% Mean",
#       "WSD",
#       "F% Range"
#     ),
#     sep = "."
#   ),
#   paste("Dog",
#     c(
#       "Dosing Formulation",
#       "Strain",
#       "Sex",
#       "F% Mean",
#       "WSD",
#       "F% Range"
#     ),
#     sep = "."
#   ),
#   paste("Non-Human Primate",
#     c(
#       "Dosing Formulation",
#       "Strain",
#       "Sex",
#       "F% Mean",
#       "WSD",
#       "F% Range"
#     ),
#     sep = "."
#   ),
#   paste("Human",
#     c(
#       "Dosing Formulation",
#       "Sex",
#       "F% Mean",
#       "WSD",
#       "F% Range"
#     ),
#     sep = "."
#   )
# )
# 
# 
# 
# musther2014 <- readxl::read_excel(
#   path = paste0(
#     Sys.getenv("OD_DIR"),
#     "SupTableMusther2014.xlsx"
#   ),
#   sheet = "Sheet1",
#   skip = 1,
#   col_names = c(
#     "Compound",
#     "CAS",
#     "MW", "Ion Class",
#     species_cols,
#     "References.mouse",
#     "References.rat",
#     "References.dog",
#     "References.NHP",
#     "References.human"
#   ),
#   col_types = "text"
# )
# 
# # pivot longer
# musther2014_long <- musther2014 %>%
#   select(c(
#     Compound,
#     CAS,
#     contains("F% Mean")
#   )) %>%
#   pivot_longer(
#     cols = !c(Compound, CAS),
#     names_to = c("Species", "stat"),
#     names_sep = "\\."
#   ) %>%
#   mutate(
#     value = gsub(
#       x = value,
#       pattern = "<",
#       replacement = ""
#     ),
#     value_num = as.numeric(value)
#   )

## ----musther-ccd--------------------------------------------------------------
# musther2014_dtxsid <- ctxR::chemical_equal_batch(word_list = unique(musther2014_long$CAS))
# 
# musther2014_long <- musther2014_long %>%
#   left_join(musther2014_dtxsid$valid %>% select(searchValue, dtxsid, preferredName),
#     by = c("CAS" = "searchValue")
#   )

## ----musther-getfgutabs-------------------------------------------------------
# fgutabs <- coef(my_pk, method = "bobyqa") %>%
#   distinct() %>%
#   semi_join(winmodel) %>% # keep only winning models
#   rowwise() %>%
#   mutate(Fgutabs = coefs_vector["Fgutabs"]) %>%
#   filter(!is.na(Fgutabs))

## -----------------------------------------------------------------------------
# length(intersect(
#   musther2014_long$dtxsid,
#   get_data(my_pk)$Chemical
# ))

## ----musther-join-------------------------------------------------------------
# fgutabs_pk_musther <- fgutabs %>% inner_join(musther2014_long,
#   by = c("Chemical" = "dtxsid")
# )

## ----musther-plot-------------------------------------------------------------
# # capitalize invivopkfit species to harmonize with Musther
# fgutabs_pk_musther <- fgutabs_pk_musther %>%
#   dplyr::mutate(Species.x = stringr::str_to_sentence(Species.x))
# 
# ggplot(fgutabs_pk_musther) +
#   geom_point(
#     aes(
#       y = preferredName,
#       x = Fgutabs,
#       shape = Species.x,
#       color = "invivopkfit"
#     ),
#     size = 4,
#     stroke = 2
#   ) +
#   geom_point(
#     aes(
#       y = preferredName,
#       x = value_num / 100,
#       shape = Species.y,
#       color = "Musther et al. 2014"
#     ),
#     size = 4,
#     stroke = 2
#   ) +
#   scale_shape_manual(
#     values = c(
#       "Rat" = 21,
#       "Mouse" = 22,
#       "Dog" = 23,
#       "Non-Human Primate" = 24,
#       "Human" = 3
#     ),
#     breaks = c("Rat", "Mouse", "Dog", "Non-Human Primate", "Human")
#   ) +
#   theme_bw() +
#   theme(
#     axis.title.y = element_blank(),
#     legend.title = element_blank(),
#     axis.title.x = element_text(size = 12),
#     axis.text = element_text(size = 12),
#     legend.text = element_text(size = 12)
#   )
# 
# ggsave(
#   filename = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig9.pdf"
#   ),
#   height = 5,
#   width = 8
# )
# 
# ggsave(
#   filename = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig9.png"
#   ),
#   height = 5,
#   width = 8,
#   device = "png", dpi = 300
# )

## ----do_fit-benchmarking------------------------------------------------------
# # Randomize the order of chemical species
# chem_spec <- cvt %>%
#   dplyr::distinct(analyte_dtxsid, species) %>%
#   dplyr::slice_sample(prop = 1)
# 
# test_group_size <- seq(25, 105, by = 10)
# fit_options <- expand.grid(
#   error_model = c(
#     "pooled",
#     "hierarchical"
#   ),
#   dose_norm = FALSE,
#   log10_trans = TRUE,
#   time_scale = c(
#     TRUE,
#     FALSE
#   ),
#   stringsAsFactors = FALSE
# )
# 
# fit_options <- tidyr::expand_grid(fit_options, test_group_size)
# 
# fit_options <- fit_options %>%
#   rowwise() %>%
#   mutate(
#     this_data = list({
#       tmp <- chem_spec %>%
#         slice_head(n = test_group_size) %>%
#         left_join(cvt, by = join_by(analyte_dtxsid, species)) %>%
#         pk() +
#         scale_time(new_units = ifelse(!time_scale,
#           "identity",
#           "auto"
#         )) +
#         scale_conc(dose_norm = dose_norm, log10_trans = log10_trans) +
#         settings_preprocess(suppress.messages = TRUE)
#       do_prefit(tmp)
#     }),
#     data_size = chem_spec %>% slice_head(n = test_group_size) %>%
#       left_join(cvt, by = join_by(analyte_dtxsid, species)) %>%
#       NROW()
#   )
# 
# # Organization of the benchmarking
# # For each fit_option, return the four values of system.time() that we want
# df_out <- fit_options %>%
#   rowwise() %>%
#   mutate(
#     tim_1core = system.time(do_fit(this_data))[["elapsed"]],
#     tim_4core = system.time(do_fit(this_data, n_cores = 4))[["elapsed"]],
#     tim_7core = system.time(do_fit(this_data, n_cores = 7))[["elapsed"]],
#     tim_12core = system.time(do_fit(this_data, n_cores = 12))[["elapsed"]]
#   )
# 
# df_out <- df_out %>% select(-this_data)
# gc()
# 
# full_long <- df_out %>%
#   pivot_longer(
#     cols = c(tim_1core:tim_12core),
#     names_to = "N_cores",
#     values_to = "Time_s"
#   ) %>%
#   group_by(N_cores, test_group_size, data_size) %>%
#   summarize(
#     mean_time = mean(Time_s, na.rm = TRUE) / 60,
#     max_time = max(Time_s, na.rm = TRUE) / 60,
#     min_time = min(Time_s, na.rm = TRUE) / 60
#   ) %>%
#   mutate(N_cores = as.numeric(str_extract(N_cores, "[:digit:]+")))
# 
# df_out %>% clipr::write_clip()
# ggplot(
#   full_long,
#   aes(
#     x = test_group_size,
#     y = mean_time,
#     color = as.factor(N_cores)
#   )
# ) +
#   geom_point() +
#   geom_linerange(aes(
#     ymin = min_time,
#     ymax = max_time
#   )) +
#   geom_line(aes(group = N_cores)) +
#   labs(
#     x = "Number of Data Groups",
#     y = "Runtime (minutes)",
#     title = "Number of Processing Cores",
#     subtitle = "(with min/max runtime range)"
#   ) +
#   facet_grid(cols = vars(N_cores)) +
#   scale_color_manual(values = c("black", "#40c679", "#31a354", "#006837")) +
#   coord_fixed(ratio = 8) +
#   theme(
#     panel.border = element_rect(color = "black", fill = NA, size = 1.5),
#     panel.background = element_blank(),
#     panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#     legend.position = "none",
#     plot.title = element_text(hjust = 0.5),
#     plot.subtitle = element_text(hjust = 0.5),
#     axis.ticks = element_blank(),
#     axis.line = element_blank(),
#     strip.background = element_blank()
#   )
# 
# 
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Fig10.png"
#   ),
#   width = 5,
#   height = 2.5,
#   units = "in"
# )

## ----fit_summary_bydatagrp----------------------------------------------------
# fit_summary_bydatagrp <- function(this_error_model,
#                                   this_dose_norm,
#                                   this_log10_trans,
#                                   this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(
#     Sys.getenv("OD_DIR"),
#     read_date_pk,
#     "mypk_fit_",
#     dose_indic,
#     log10_indic,
#     time_indic,
#     errmodel_indic,
#     ".Rds"
#   )
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   my_pk <- readRDS(file = file_str)
# 
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   message(paste0("Summary by data group for: ", pk_name))
# 
#   myAFE <- AFE(my_pk,
#     use_scale_conc = FALSE,
#     sub_pLOQ = TRUE
#   )
# 
#   myAAFE <- AAFE(my_pk,
#     use_scale_conc = FALSE,
#     sub_pLOQ = TRUE
#   )
# 
#   my_r2 <- rsq(my_pk,
#     use_scale_conc = FALSE,
#     sub_pLOQ = TRUE
#   )
# 
#   myAIC <- AIC(my_pk)
# 
#   myRMSLE <- rmse(my_pk,
#     use_scale_conc = list(
#       dose_norm = FALSE,
#       log10_trans = TRUE
#     ),
#     sub_pLOQ = TRUE
#   )
# 
# 
#   myRMSE <- rmse(my_pk,
#     use_scale_conc = FALSE,
#     sub_pLOQ = TRUE
#   )
# 
#   winmodel <- get_winning_model(my_pk) %>%
#     select(Chemical, Species, method, model) %>%
#     dplyr::mutate(is_winning_model = TRUE)
# 
# 
#   # note convergence codes for each model
#   pf <- my_pk$fit %>%
#     dplyr::distinct(Chemical, Species, model, method, convcode)
# 
# 
#   convcode_labels <- data.frame(
#     convcode = c(
#       0,
#       1,
#       20,
#       21,
#       10,
#       51,
#       52,
#       9999,
#       -9999
#     ),
#     convcode_label = c(
#       "Successful convergence (see ?optimx::optimx)",
#       "The iteration limit maxit had been reached (see ?optimx::optimx)",
#       "the initial set of parameters is inadmissible, that is, that the function cannot be computed or returns an infinite, NULL, or NA value (see ?optimx::optimx))",
#       "an intermediate set of parameters is inadmissible (see ?optimx::optimx)",
#       "degeneracy of the Nelder–Mead simplex (see ?optimx::optimx)",
#       'a warning from the "L-BFGS-B" method (see ?optimx::optimx)',
#       'an error from the "L-BFGS-B" method (see ?optimx::optimx)',
#       "error in optimx::optimx()",
#       'model status was "abort"'
#     )
#   )
# 
#   pf <- pf %>%
#     dplyr::left_join(convcode_labels)
# 
#   # join absolutely everything together
#   summ_by_datagroup <- pf %>%
#     left_join(winmodel) %>%
#     left_join(myAIC) %>%
#     left_join(myAFE) %>%
#     left_join(myAAFE) %>%
#     left_join(my_r2) %>%
#     left_join(myRMSLE) %>%
#     left_join(myRMSE) %>%
#     mutate(is_winning_model = case_when(is_winning_model %in% TRUE ~ TRUE,
#       is.na(is_winning_model) ~ FALSE,
#       .default = FALSE
#     )) %>%
#     arrange(Chemical, Species, method, model)
# 
#   return(summ_by_datagroup)
# }

## -----------------------------------------------------------------------------
# fit_summary_byexpt <- function(this_error_model,
#                                this_dose_norm,
#                                this_log10_trans,
#                                this_time_scale) {
#   dose_indic <- as.numeric(this_dose_norm)
#   log10_indic <- as.numeric(this_log10_trans)
#   time_indic <- as.numeric(this_time_scale)
#   errmodel_indic <- substr(this_error_model, 1, 1)
# 
#   file_str <- paste0(
#     Sys.getenv("OD_DIR"),
#     read_date_pk,
#     "mypk_fit_",
#     dose_indic,
#     log10_indic,
#     time_indic,
#     errmodel_indic,
#     ".Rds"
#   )
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   my_pk <- readRDS(file = file_str)
# 
#   pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
# 
#   message(paste0("Summary by experiment for: ", pk_name))
# 
#   my_tkstats <- eval_tkstats(my_pk,
#     model = NULL
#   )
# 
#   winmodel <- get_winning_model(my_pk) %>%
#     select(Chemical, Species, method, model) %>%
#     dplyr::mutate(is_winning_model = TRUE)
# 
#   pf <- my_pk$fit %>%
#     dplyr::distinct(Chemical, Species, model, method, convcode)
# 
# 
#   convcode_labels <- data.frame(
#     convcode = c(
#       0,
#       1,
#       20,
#       21,
#       10,
#       51,
#       52,
#       9999,
#       -9999
#     ),
#     convcode_label = c(
#       "Successful convergence (see ?optimx::optimx)",
#       "The iteration limit maxit had been reached (see ?optimx::optimx)",
#       "the initial set of parameters is inadmissible, that is, that the function cannot be computed or returns an infinite, NULL, or NA value (see ?optimx::optimx))",
#       "an intermediate set of parameters is inadmissible (see ?optimx::optimx)",
#       "degeneracy of the Nelder–Mead simplex (see ?optimx::optimx)",
#       'a warning from the "L-BFGS-B" method (see ?optimx::optimx)',
#       'an error from the "L-BFGS-B" method (see ?optimx::optimx)',
#       "error in optimx::optimx()",
#       'model status was "abort"'
#     )
#   )
# 
#   pf <- pf %>%
#     dplyr::left_join(convcode_labels)
# 
# 
#   summ_by_expt <- pf %>%
#     left_join(winmodel) %>%
#     left_join(my_tkstats) %>%
#     mutate(is_winning_model = case_when(is_winning_model %in% TRUE ~ TRUE,
#       is.na(is_winning_model) ~ FALSE,
#       .default = FALSE
#     )) %>%
#     arrange(Chemical, Species, method, model)
# 
#   return(summ_by_expt)
# }

## -----------------------------------------------------------------------------
# summary_datagrp_all <- fitopts %>%
#   dplyr::rowwise() %>%
#   dplyr::mutate(summ = list(
#     fit_summary_bydatagrp(
#       this_error_model = error_model,
#       this_dose_norm = dose_norm,
#       this_log10_trans = log10_trans,
#       this_time_scale = time_scale
#     )
#   )) %>%
#   tidyr::unnest(cols = c(summ))
# 
# summary_expt_all <- fitopts %>%
#   dplyr::rowwise() %>%
#   dplyr::mutate(summ_expt = list(
#     fit_summary_byexpt(
#       this_error_model = error_model,
#       this_dose_norm = dose_norm,
#       this_log10_trans = log10_trans,
#       this_time_scale = time_scale
#     )
#   )) %>%
#   tidyr::unnest(cols = c(summ_expt))
# 
# writexl::write_xlsx(
#   list(
#     "Summary by data grp" = summary_datagrp_all,
#     "Summary by expt" = summary_expt_all
#   ),
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Supp_Table4.xlsx"
#   )
# )

## -----------------------------------------------------------------------------
# # set up pk analysis for single-study
# pk_study <- minimal_pk +
#   facet_data(
#     facets = vars(Chemical, Species, Reference)
#   ) +
#   stat_error_model(
#     error_group = vars(Chemical, Species, Reference)
#   ) +
#   settings_preprocess(
#     keep_data_original = FALSE,
#     suppress.messages = TRUE
#   ) +
#   scale_conc(
#     dose_norm = TRUE,
#     log10_trans = TRUE
#   ) +
#   settings_optimx(method = "bobyqa") +
#   stat_model(model = c(
#     "model_flat",
#     "model_1comp",
#     "model_2comp"
#   ))
# 
# # do pk analysis
# system.time(
#   pk_study <- do_fit(pk_study,
#     n_cores = 11
#   )
# )

## -----------------------------------------------------------------------------
# saveRDS(
#   pk_study,
#   paste0(
#     Sys.getenv("OD_DIR"),
#     today,
#     "studylevel_pk_fit_110p.Rds"
#   )
# )

## -----------------------------------------------------------------------------
# tkstats_study <- eval_tkstats(pk_study,
#   finite_only = FALSE,
#   dose_norm = TRUE,
#   model = "winning"
# ) %>%
#   dplyr::select(
#     Chemical,
#     Species,
#     Reference,
#     method,
#     model,
#     halflife.tkstats,
#     Vss.tkstats,
#     `Vss/Fgutabs.tkstats`
#   ) %>%
#   dplyr::rename(
#     halflife = halflife.tkstats,
#     Vss = Vss.tkstats,
#     `Vss/Fgutabs` = `Vss/Fgutabs.tkstats`
#   ) %>%
#   dplyr::distinct()
# 
# # Also: get coefficients and try to pull Fgutabs (if it was optimized)
# coef_study <- coef_sd(pk_study,
#   method = "bobyqa",
#   include_type = "optimize"
# ) %>%
#   dplyr::filter(param_name %in% "Fgutabs") %>%
#   dplyr::select(
#     Chemical,
#     Species,
#     Reference,
#     method,
#     model,
#     param_name,
#     param_value
#   ) %>%
#   dplyr::distinct()
# 
# # keep only winning model coefs
# win_study <- get_winning_model(pk_study) %>%
#   dplyr::select(-near_flat, -preds_below_loq)
# 
# coef_study <- coef_study %>%
#   dplyr::semi_join(win_study)
# 
# fgutabs_study <- coef_study %>%
#   select(-param_name) %>%
#   rename(Fgutabs = param_value)
# 
# tkstats_study2 <- full_join(
#   tkstats_study,
#   fgutabs_study
# )

## -----------------------------------------------------------------------------
# file_str <- paste0(
#   Sys.getenv("OD_DIR"),
#   read_date_pk,
#   "mypk_fit_110p.Rds"
# )
# pk_pooled <- readRDS(file = file_str)
# 
# tkstats_pooled <- eval_tkstats(pk_pooled,
#   finite_only = FALSE,
#   dose_norm = TRUE,
#   method = "bobyqa",
#   model = "winning"
# ) %>%
#   dplyr::select(
#     Chemical,
#     Species,
#     Reference,
#     method,
#     model,
#     halflife.tkstats,
#     Vss.tkstats,
#     `Vss/Fgutabs.tkstats`
#   ) %>%
#   dplyr::rename(
#     halflife = halflife.tkstats,
#     Vss = Vss.tkstats,
#     `Vss/Fgutabs` = `Vss/Fgutabs.tkstats`
#   ) %>%
#   dplyr::group_by(
#     Chemical,
#     Species,
#     method,
#     model,
#     halflife,
#     Vss,
#     `Vss/Fgutabs`
#   ) %>%
#   dplyr::summarise(n_ref = n_distinct(Reference)) %>%
#   dplyr::ungroup()
# 
# # Also: get coefficients and try to pull Fgutabs
# coef_pooled <- coef_sd(pk_pooled,
#   method = "bobyqa",
#   include_type = "optimize"
# ) %>%
#   dplyr::filter(param_name %in% "Fgutabs") %>%
#   dplyr::select(
#     Chemical,
#     Species,
#     method,
#     model,
#     param_name,
#     param_value
#   ) %>%
#   dplyr::distinct()
# 
# # keep only winning model coefs
# win_pooled <- get_winning_model(pk_pooled,
#   method = "bobyqa"
# ) %>%
#   dplyr::select(-near_flat, -preds_below_loq)
# 
# coef_pooled <- coef_pooled %>%
#   dplyr::semi_join(win_pooled)
# 
# fgutabs_pooled <- coef_pooled %>%
#   select(-param_name) %>%
#   rename(Fgutabs = param_value)
# 
# tkstats_pooled2 <- full_join(
#   tkstats_pooled,
#   fgutabs_pooled
# )

## -----------------------------------------------------------------------------
# tkstats_study3 <- tkstats_study2 %>%
#   dplyr::rename(model_study = model) %>%
#   pivot_longer(
#     cols = c(
#       halflife,
#       Vss,
#       `Vss/Fgutabs`,
#       Fgutabs
#     ),
#     names_to = "name",
#     values_to = "value_study"
#   )
# 
# tkstats_pooled3 <- tkstats_pooled2 %>%
#   dplyr::rename(
#     model_pooled = model,
#     n_ref_pooled = n_ref
#   ) %>%
#   pivot_longer(
#     cols = c(
#       halflife,
#       Vss,
#       `Vss/Fgutabs`,
#       Fgutabs
#     ),
#     names_to = "name",
#     values_to = "value_pooled"
#   )
# 
# tkstats_all <- full_join(
#   tkstats_study3,
#   tkstats_pooled3
# )

## -----------------------------------------------------------------------------
# tkstats_all <- tkstats_all %>%
#   filter(!(model_pooled %in% "model_flat") &
#     !(model_study %in% "model_flat"))

## -----------------------------------------------------------------------------
# ggplot(tkstats_all %>%
#   filter(!is.na(value_study) &
#     !is.na(value_pooled))) +
#   geom_point(aes(
#     x = value_pooled,
#     y = value_study,
#     color = factor(n_ref_pooled)
#   )) +
#   facet_wrap(
#     facets = vars(name),
#     scales = "free"
#   ) +
#   scale_x_log10(guide = "axis_logticks") +
#   scale_y_log10(guide = "axis_logticks")

## -----------------------------------------------------------------------------
# ggplot(tkstats_all %>%
#   filter(n_ref_pooled > 1 &
#     !is.na(value_study) &
#     !is.na(value_pooled))) +
#   geom_point(
#     aes(
#       x = value_pooled,
#       y = value_study
#     ),
#     color = "gray50",
#     shape = 1
#   ) +
#   geom_line(
#     aes(
#       x = value_pooled,
#       y = value_study,
#       group = interaction(Chemical, Species)
#     ),
#     color = "gray50"
#   ) +
#   facet_wrap(
#     facets = vars(name),
#     scales = "free"
#   ) +
#   geom_abline(
#     intercept = 0, slope = 1,
#     color = "black"
#   ) +
#   geom_smooth(
#     aes(
#       x = value_pooled,
#       y = value_study
#     ),
#     method = "lm",
#     se = FALSE,
#     color = "black",
#     linetype = 2
#   ) +
#   scale_x_log10(guide = "axis_logticks") +
#   scale_y_log10(guide = "axis_logticks") +
#   theme_bw() +
#   theme(strip.background = element_blank())

## -----------------------------------------------------------------------------
# rsq_df <- tkstats_all %>%
#   filter(n_ref_pooled > 1 &
#     !is.na(value_study) &
#     !is.na(value_pooled)) %>%
#   mutate(
#     log10_value_pooled = log10(value_pooled),
#     log10_value_study = log10(value_study)
#   ) %>%
#   group_by(name) %>%
#   summarise(
#     n = n(),
#     n_grp = n_distinct(Chemical, Species),
#     rsq_log10 = summary(
#       lm(
#         log10_value_study ~ log10_value_pooled,
#         na.action = "na.omit"
#       )
#     )[[
#       "r.squared"
#     ]],
#     rsq = summary(lm(value_study ~ value_pooled))[[
#       "r.squared"
#     ]]
#   ) %>%
#   ungroup() %>%
#   mutate(label = sprintf(
#     "R^2 == %.3f",
#     rsq_log10
#   ))
# 
# rsq_df %>%
#   select(-label) %>%
#   print()

## -----------------------------------------------------------------------------
# ggplot(tkstats_all %>%
#   filter(n_ref_pooled > 1 &
#     !is.na(value_study) &
#     !is.na(value_pooled))) +
#   geom_point(
#     aes(
#       x = value_pooled,
#       y = value_study
#     ),
#     color = "gray50",
#     shape = 1
#   ) +
#   geom_line(
#     aes(
#       x = value_pooled,
#       y = value_study,
#       group = interaction(Chemical, Species)
#     ),
#     color = "gray50"
#   ) +
#   geom_text(
#     data = rsq_df,
#     aes(
#       x = 0,
#       y = Inf,
#       label = label
#     ),
#     parse = TRUE,
#     hjust = -0.3,
#     vjust = 1.8
#   ) +
#   facet_wrap(
#     facets = vars(name),
#     scales = "free"
#   ) +
#   geom_abline(
#     intercept = 0, slope = 1,
#     color = "black"
#   ) +
#   geom_smooth(
#     aes(
#       x = value_pooled,
#       y = value_study
#     ),
#     method = "lm",
#     se = FALSE,
#     color = "black",
#     linetype = 2
#   ) +
#   scale_x_log10(guide = "axis_logticks") +
#   scale_y_log10(guide = "axis_logticks") +
#   xlab("Pooled value") +
#   ylab("Single-study value") +
#   theme_bw() +
#   theme(
#     strip.background = element_blank(),
#     strip.text = element_text(size = 12)
#   )
# 
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_Fig8.png"
#   ),
#   height = 5,
#   width = 7
# )
# 
# ggsave(
#   paste0(
#     Sys.getenv("FIG_DIR"),
#     today,
#     "Manu_Files/",
#     "_Fig8.pdf"
#   ),
#   height = 5,
#   width = 7
# )

## -----------------------------------------------------------------------------
# tkstats_all_wide <- tkstats_all %>%
#   pivot_wider(
#     id_cols = c(
#       Chemical, Species,
#       Reference, method,
#       model_study,
#       model_pooled,
#       n_ref_pooled
#     ),
#     names_from = name,
#     values_from = c(value_study, value_pooled)
#   )
# 
# tkstats_all_wide %>%
#   filter(n_ref_pooled > 1) %>%
#   writexl::write_xlsx(path = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "Supp_Table5.xlsx"
#   ))

## -----------------------------------------------------------------------------
# plot_pooled <- plot(pk_pooled,
#   method = "bobyqa",
#   use_scale_conc = TRUE,
#   best_fit = TRUE
# )
# plot_pooled %>%
#   filter(Chemical %in% "DTXSID2020139" & Species %in% "rat") %>%
#   pull(final_plot[[1]])

## -----------------------------------------------------------------------------
# plot_pooled <- plot(pk_pooled,
#   method = "bobyqa",
#   use_scale_conc = TRUE,
#   print_out = TRUE
# )
# 
# ggsave(
#   filename = paste0(
#     Sys.getenv("FIG_DIR"),
#     "Manu_Files/",
#     today,
#     "_allplots.pdf"
#   ),
#   plot = gridExtra::marrangeGrob(plot_pooled, nrow = 1, ncol = 1),
#   width = 15, height = 9
# )

## ----information--------------------------------------------------------------
# sessionInfo()

