## ----configure_knitr, eval = TRUE---------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = '#>', fig.width=5, fig.height=3)

## ----clear_memory, eval = TRUE------------------------------------------------
rm(list=ls()) 

## ----runchunks, eval = TRUE---------------------------------------------------
# Set whether or not the following chunks will be executed (run):
execute.vignette <- FALSE

## ----setup, eval = execute.vignette-------------------------------------------
# library(data.table)
# library(httk)
# library(httkexamples)
# library(ggplot2)
# library(stringr)
# library(scales)
# library(grid)
# library(reshape)

## ----lmr2, eval = execute.vignette--------------------------------------------
# lm_R2 <- function(m,prefix=NULL){
#     out <- substitute("RMSE = "~mse*","~~italic(R)^2~"="~r2,
#          list(mse = signif(mean(m$residuals^2)^(1/2),3),
#               r2 = format(summary(m)$adj.r.squared, digits = 2)))
#     paste(prefix,as.character(as.expression(out)))
# }

## ----multi.plot, eval = execute.vignette--------------------------------------
# multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL, heights=NULL, widths=unit(rep_len(1, cols), "null")) {
# 
#   # Make a list from the ... arguments and plotlist
#   plots <- c(list(...), plotlist)
# 
#   numPlots = length(plots)
# 
#   # If layout is NULL, then use 'cols' to determine layout
#   if (is.null(layout)) {
#     # Make the panel
#     # ncol: Number of columns of plots
#     # nrow: Number of rows needed, calculated from # of cols
#     layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
#                     ncol = cols, nrow = ceiling(numPlots/cols))
#   }
# 
#  if (numPlots==1) {
#     print(plots[[1]])
# 
#   } else {
#     # Set up the page
#     grid.newpage()
#     if (!is.null(heights))
#     {
#       pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout),heights=heights,widths=widths)))
#     } else {
#       pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout),widths=widths)))
#     }
#     # Make each plot, in the correct location
#     for (i in 1:numPlots) {
#       # Get the i,j matrix positions of the regions that contain this subplot
#       matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
# 
#       print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
#                                       layout.pos.col = matchidx$col))
#     }
#   }
# }

## ----cssfun.u, eval = execute.vignette----------------------------------------
# cssfun_u <- function(chemcas,
#                       MW,
#                       hepatic.bioavailability,
#                       Fabsgut=1,
#                       probs=0.95,
#                       minimum.Funbound.plasma=0.0001)
# {
#   if (chemcas %in% intersect(get_cheminfo(suppress.messages=TRUE,
#                                  model="3compartmentss"),
#                      get_cheminfo(suppress.messages=TRUE,
#                                   model="schmitt")))
#   {
#   # Create a data table with uncertainty only:
#   pop_u_httk <- as.data.table(parameterize_steadystate(chem.cas=chemcas,
#     clint.pvalue.threshold=1,
#     minimum.Funbound.plasma=0,
#                                  suppress.messages=TRUE))
# 
#   if (!is.na(pop_u_httk[,"Clint.dist"]) &
#       !is.na(pop_u_httk[,"Funbound.plasma.dist"]))
#   {
#   # Get the physico-chemical properties:
#   params.schmitt <-parameterize_schmitt(chem.cas=chemcas,
#                                  suppress.messages=TRUE)
#   psd <- params.schmitt$pKa_Donor
#   psa <- params.schmitt$pKa_Accept
#   params.schmitt <- params.schmitt[regexpr("pKa",names(params.schmitt))==-1]
#   pop_u_httk[,names(params.schmitt):=params.schmitt]
#   pop_u_httk[,pKa_Donor := paste(psd, collapse=",")]
#   pop_u_httk[,pKa_Accept := paste(psa, collapse=",")]
#   pop_u_httk <- pop_u_httk[rep(1,1000)]
# 
#   # Calculate the distribution coefficient:
#   ions <- calc_ionization(pH=7.4,parameters=pop_u_httk)
#   pop_u_httk[,Dow74 := Pow * (ions$fraction_neutral +
#     0.001 * ions$fraction_charged + ions$fraction_zwitter)]
# 
#   # Parse the Funbound.plasma.dist into quantiles:
#   Funbound.plasma.dist <- unlist(pop_u_httk[1,"Funbound.plasma.dist"])
#   if (nchar(Funbound.plasma.dist) -
#     nchar(gsub(",","",Funbound.plasma.dist))!=2)
#   {
#     stop("Funbound.plasma distribution should be three values (median,low95th,high95th) separated by commas.")
#   }
#   temp <- strsplit(Funbound.plasma.dist,",")
#   ppb.median <- as.numeric(temp[[1]][1])
#   ppb.low95 <- as.numeric(temp[[1]][2])
#   ppb.high95 <- as.numeric(temp[[1]][3])
# 
#   if (ppb.median>minimum.Funbound.plasma)
#   {
#     # Use optim to estimate alpha and beta for fup such that the median and 95%
#     # credible interval approximate the estimate from MCMC:
#     ppb.fit <- optim(c(2,(1-ppb.median)/ppb.median*2),
#       function(x) (0.95-pbeta(ppb.high95,x[1],x[2])+
#       pbeta(ppb.low95,x[1],x[2]))^2+
#       (ppb.median-qbeta(0.5,x[1],x[2]))^2,
#       method="BFGS")
#   # We are drawing new values for the unadjusted Fup:
#     pop_u_httk[, unadjusted.Funbound.plasma:=rbeta(1000,
#       ppb.fit$par[1],
#       ppb.fit$par[2])]
#   } else if (ppb.high95>minimum.Funbound.plasma)
#   {
#     # Assume that since the median is zero but the u95 is not, that there is
#     # an exponential distribution:
#     # 97.5% of clearance values will be below Funbound.plasma.u95:
#     pop_u_httk[,unadjusted.Funbound.plasma:=runif(1000,
#       minimum.Funbound.plasma,
#       min(1,minimum.Funbound.plasma+
#       2*(ppb.high95-minimum.Funbound.plasma)))]
#     pop_u_httk[as.logical(rbinom(1000,1,.95)),
#       unadjusted.Funbound.plasma:=minimum.Funbound.plasma]
#   } else {
#     pop_u_httk[, unadjusted.Funbound.plasma:=minimum.Funbound.plasma]
#   }
# 
# # then we need to adjust for in vitro binding:
#   pop_u_httk[,Flipid:=subset(physiology.data,
#     Parameter=='Plasma Effective Neutral Lipid Volume Fraction')[,
#     which(colnames(physiology.data) == 'Human')]]
#   pop_u_httk[,Funbound.plasma.adjustment:=1 / (Dow74 * Flipid +
#     1 / unadjusted.Funbound.plasma)/unadjusted.Funbound.plasma]
#   pop_u_httk[,
#     Funbound.plasma:=unadjusted.Funbound.plasma*Funbound.plasma.adjustment]
# 
# # Enforce a minimum ppb:
#   pop_u_httk[Funbound.plasma<minimum.Funbound.plasma,
#     Funbound.plasma:=minimum.Funbound.plasma]
# 
# # If Fup varies, then Rb2p and hepatic bioavailability also vary:
#   Rblood2plasma <- get_rblood2plasma(chem.cas=chemcas)
#   if (is.na(Rblood2plasma))
#   {
#     pop_u_httk[, Rblood2plasma:=calc_rblood2plasma(parameters=pop_u_httk,
#                                  suppress.messages=TRUE)]
#   # Right now we don't simulate variability on a known blood:plasma ratio
#   } else pop_u_httk[, Rblood2plasma:=Rblood2plasma]
# 
#   # Parse the clint.dist to retrieve the quantiles:
#   Clint.dist <- unlist(pop_u_httk[1,"Clint.dist"])
#   if (nchar(Clint.dist) -
#     nchar(gsub(",","",Clint.dist))!=3)
#   {
#     stop("Clint distribution should be four values (median,low95th,high95th,pValue) separated by commas.")
#   }
#   temp <- strsplit(Clint.dist,",")
#   Clint.median <- as.numeric(temp[[1]][1])
#   Clint.low95 <- as.numeric(temp[[1]][2])
#   Clint.high95 <- as.numeric(temp[[1]][3])
#   Clint.pvalue <- as.numeric(temp[[1]][4])
# 
#     # Use optim to estimate mean and standard deviation of Clint such that the
#   # median and 95% credible interval approximate the estimate from MCMC:
#   if (Clint.high95>0)
#   {
#     if (Clint.median>0)
#     {
#       clint.fit <- optim(c(log(Clint.median),0.3), function(x)
#         (0.95-plnorm(Clint.high95,x[1],x[2])+plnorm(Clint.low95,x[1],x[2]))^2+
#         (Clint.median-qlnorm(0.5,x[1],x[2]))^2)
#       pop_u_httk[,Clint:=rlnorm(1000,clint.fit$par[1],clint.fit$par[2])]
#     } else {
#       pop_u_httk[,Clint:=exp(runif(1000,log(1),
#       (log(Clint.high95)-log(1))/0.975))]
#     }
#   } else pop_u_httk[,Clint:=0]
#   pop_u_httk[as.logical(rbinom(1000,1,Clint.pvalue)),Clint:=0] # the Bayesian "p-value" here reflects how often there is no clearance
# 
#   pop_u_httk[, CLh:=calc_hep_clearance(chem.cas=chemcas,
#     parameters=pop_u_httk,hepatic.model='unscaled',
#     suppress.messages=TRUE,clint.pvalue.threshold=1)]
#   pop_u_httk[, Qliver:=Qtotal.liverc*BW^0.75]
#   pop_u_httk[, hepatic.bioavailability:= Qliver / (Qliver +
#     Funbound.plasma * Clint / Rblood2plasma)]
#   pop_u_httk[, (setdiff(names(pop_u_httk),
#     names(httk::parameterize_steadystate(chem.cas=chemcas,
#                                  suppress.messages=TRUE)))):=NULL]
# 
# 
#   #compute Css for each individual
#   # calc_analytic_css is vectorized so that this will work.
#   css <- httk::calc_analytic_css(parameters=pop_u_httk,
#                                  clint.pvalue.threshold=1,
#                                  daily.dose=1,
#                                  clint.pop.cv=NULL,
#                                  fup.pop.cv=NULL,
#                                  output.units="uM",
#                                  model="3compartmentss",
#                                  species="Human",
#                                  suppress.messages=TRUE)#,
#                             #     recalc.blood2plasma=TRUE)
#   #get Css95
#   cssquants <- quantile(css, probs=probs)
#   return(cssquants)
#   }
#   }
# }

## ----hlfun.u, eval = execute.vignette-----------------------------------------
# hlfun_u <- function(chemcas,
#                       MW,
#                       probs=0.95,
#                       minimum.Funbound.plasma=0.0001)
# {
#   if (chemcas %in% intersect(get_cheminfo(suppress.messages=TRUE,
#                                  model="3compartmentss"),
#                      get_cheminfo(suppress.messages=TRUE,
#                                   model="schmitt")))
#   {
#   # Create a data table with uncertainty only:
#   print(chemcas)
#   pop_u_httk <- as.data.table(parameterize_steadystate(chem.cas=chemcas,
#     clint.pvalue.threshold=1,
#     minimum.Funbound.plasma=0,
#     suppress.messages=TRUE))
#   if (!is.na(pop_u_httk[,"Clint.dist"]) &
#       !is.na(pop_u_httk[,"Funbound.plasma.dist"]))
#   {
#   params.schmitt <-parameterize_schmitt(chem.cas=chemcas,
#     suppress.messages=TRUE)
#   psd <- params.schmitt$pKa_Donor
#   psa <- params.schmitt$pKa_Accept
#   params.schmitt <- params.schmitt[regexpr("pKa",names(params.schmitt))==-1]
#   pop_u_httk[,names(params.schmitt):=params.schmitt]
#   pop_u_httk[,pKa_Donor := paste(psd, collapse=",")]
#   pop_u_httk[,pKa_Accept := paste(psa, collapse=",")]
#   pop_u_httk <- pop_u_httk[rep(1,1000)]
# 
#   ions <- calc_ionization(pH=7.4,parameters=pop_u_httk)
#   pop_u_httk[,Dow74 := Pow * (ions$fraction_neutral + 0.001 * ions$fraction_charged + ions$fraction_zwitter)]
# 
#   # Parse the Funbound.plasma.dist into quantiles:
#   Funbound.plasma.dist <- unlist(pop_u_httk[1,"Funbound.plasma.dist"])
#   print(paste(chemcas,Funbound.plasma.dist))
#   if (nchar(Funbound.plasma.dist) -
#     nchar(gsub(",","",Funbound.plasma.dist))!=2)
#   {
#     stop("Funbound.plasma distribution should be three values (median,low95th,high95th) separated by commas.")
#   }
#   temp <- strsplit(Funbound.plasma.dist,",")
#   ppb.median <- as.numeric(temp[[1]][1])
#   ppb.low95 <- as.numeric(temp[[1]][2])
#   ppb.high95 <- as.numeric(temp[[1]][3])
# 
#   if (ppb.median>minimum.Funbound.plasma)
#   {
#     # Use optim to estimate alpha and beta for fup such that the median and 95%
#     # credible interval approximate the estimate from MCMC:
#     ppb.fit <- optim(c(2,(1-ppb.median)/ppb.median*2),
#       function(x) (0.95-pbeta(ppb.high95,x[1],x[2])+
#       pbeta(ppb.low95,x[1],x[2]))^2+
#       (ppb.median-qbeta(0.5,x[1],x[2]))^2,
#       method="BFGS")
#   # We are drawing new values for the unadjusted Fup:
#     pop_u_httk[, unadjusted.Funbound.plasma:=rbeta(1000,
#       ppb.fit$par[1],
#       ppb.fit$par[2])]
#   } else if (ppb.high95>minimum.Funbound.plasma)
#   {
#     # Assume that since the median is zero but the u95 is not, that there is
#     # an exponential distribution:
#     # 97.5% of clearance values will be below Funbound.plasma.u95:
#     pop_u_httk[, unadjusted.Funbound.plasma:=runif(1000,
#       minimum.Funbound.plasma,
#       min(1,(ppb.high95-minimum.Funbound.plasma)/0.975))]
#     pop_u_httk[as.logical(rbinom(1000,1,.975)),
#       unadjusted.Funbound.plasma:=minimum.Funbound.plasma]
#   } else {
#     pop_u_httk[, unadjusted.Funbound.plasma:=minimum.Funbound.plasma]
#   }
# 
# # then we need to adjust for in vitro binding:
#   pop_u_httk[,Flipid:=subset(physiology.data,
#     Parameter=='Plasma Effective Neutral Lipid Volume Fraction')[,
#     which(colnames(physiology.data) == 'Human')]]
#   pop_u_httk[,Funbound.plasma.adjustment:=1 / (Dow74 * Flipid +
#     1 / unadjusted.Funbound.plasma)/unadjusted.Funbound.plasma]
#   pop_u_httk[,
#     Funbound.plasma:=unadjusted.Funbound.plasma*Funbound.plasma.adjustment]
# 
# # Enforce a minimum ppb:
#   pop_u_httk[Funbound.plasma<minimum.Funbound.plasma,
#     Funbound.plasma:=minimum.Funbound.plasma]
# 
#   Rblood2plasma <- get_rblood2plasma(chem.cas=chemcas)
#   if (is.na(Rblood2plasma))
#   {
#     pop_u_httk[, Rblood2plasma:=calc_rblood2plasma(parameters=pop_u_httk,
#                                         suppress.messages=TRUE)]
#   } else pop_u_httk[, Rblood2plasma:=Rblood2plasma] # Right now we don't simulate variability on a known blood:plasma ratio
#   # Parse the clint.dist to retrieve the quantiles:
#   Clint.dist <- unlist(pop_u_httk[1,"Clint.dist"])
#   if (nchar(Clint.dist) -
#     nchar(gsub(",","",Clint.dist))!=3)
#   {
#     stop("Clint distribution should be four values (median,low95th,high95th,pValue) separated by commas.")
#   }
#   temp <- strsplit(Clint.dist,",")
#   Clint.median <- as.numeric(temp[[1]][1])
#   Clint.low95 <- as.numeric(temp[[1]][2])
#   Clint.high95 <- as.numeric(temp[[1]][3])
#   Clint.pvalue <- as.numeric(temp[[1]][4])
# 
#     # Use optim to estimate mean and standard deviation of Clint such that the
#   # median and 95% credible interval approximate the estimate from MCMC:
#   if (Clint.high95>0)
#   {
#     if (Clint.median>0)
#     {
#       clint.fit <- optim(c(log(Clint.median),0.3), function(x)
#         (0.95-plnorm(Clint.high95,x[1],x[2])+plnorm(Clint.low95,x[1],x[2]))^2+
#         (Clint.median-qlnorm(0.5,x[1],x[2]))^2)
#       pop_u_httk[,Clint:=rlnorm(1000,clint.fit$par[1],clint.fit$par[2])]
#     } else {
#       pop_u_httk[,Clint:=exp(runif(1000,log(1),
#       (log(Clint.high95)-log(1))/0.975))]
#     }
#   } else pop_u_httk[,Clint:=0]
#   pop_u_httk[as.logical(rbinom(1000,1,Clint.pvalue)),Clint:=0] # the Bayesian "p-value" here reflects how often there is no clearance
# 
#   pop_u_httk[, CLh:=calc_hep_clearance(
#     chem.cas=chemcas,
#     parameters=pop_u_httk,
#     hepatic.model='unscaled',
#     suppress.messages=TRUE,
#     clint.pvalue.threshold=1)]
#   pop_u_httk[, Qliver:=Qtotal.liverc*BW^0.75]
#   pop_u_httk[, hepatic.bioavailability:= Qliver / (Qliver + Funbound.plasma * Clint / Rblood2plasma)]
# 
#   PC.names <- names(predict_partitioning_schmitt(chem.cas="80-05-7",
#                                         suppress.messages=TRUE))
#   pop_u_httk<-cbind(pop_u_httk,
#                     as.data.table(predict_partitioning_schmitt(parameters=pop_u_httk,
#                                         suppress.messages=TRUE)))
#   pop_u_httk[,hl:=calc_elimination_rate(parameters=pop_u_httk,
#                                         suppress.messages=TRUE)]
# 
#   return(log(2)/quantile(unlist(pop_u_httk[,"hl",with=FALSE]),probs=probs))
#   }
#   }
# }

## ----Wambaugh2019.Fig1, eval = execute.vignette-------------------------------
# Fig1.table <- httkexamples::wambaugh2019.raw
# Fig1.table$Error1 <- Fig1.table$Base.Fup.High - Fig1.table$Base.Fup.Low
# Fig1.table$Error2 <- Fig1.table$Affinity.Fup.High - Fig1.table$Affinity.Fup.Low
# Fig1.table$Sigma1 <- Fig1.table$Error1/(2*qnorm(0.975))
# Fig1.table$Mean1 <- (Fig1.table$Base.Fup.High + Fig1.table$Base.Fup.Low)/(qnorm(0.975))
# Fig1.table$CV1 <- Fig1.table$Sigma1/Fig1.table$Mean1
# Fig1.table$Sigma2 <- Fig1.table$Error2/(2*qnorm(0.975))
# Fig1.table$Mean2 <- (Fig1.table$Base.Fup.High + Fig1.table$Base.Fup.Low)/(qnorm(0.975))
# Fig1.table$CV2 <- Fig1.table$Sigma2/Fig1.table$Mean2
# Fig1.table$ErrorClint <- Fig1.table$CLint.1uM.High95th - Fig1.table$CLint.1uM.Low95th
# Fig1.table$SigmaClint <- Fig1.table$ErrorClint/(2*qnorm(0.975))
# Fig1.table$MeanClint <- (Fig1.table$CLint.1uM.High95th + Fig1.table$CLint.1uM.Low95th)/(qnorm(0.975))
# Fig1.table$CVClint <- Fig1.table$SigmaClint/Fig1.table$MeanClint
# 
# Fupmeasured <- subset(Fig1.table,!is.na(Affinity.Fup.Med))
# Fupmeasured <- subset(Fupmeasured,!duplicated(CAS))
# CLintmeasured <- subset(Fig1.table,!is.na(CLint.1uM.Median))
# CLintmeasured <- subset(CLintmeasured,!duplicated(CAS))
# 
# 
# 
# print(paste("New HTTK experimental measurements were made for",length(unique(Fig1.table$CAS)),"chemicals from the ToxCast HTS library."))
# 
# print(paste("Fup was successfully measured for",length(unique(subset(Fig1.table,!is.na(Affinity.Fup.Med))$CAS)),"chemicals from the ToxCast HTS library."))
# 
# print(paste("CLint was successfully measured for",length(unique(subset(Fig1.table,!is.na(CLint.1uM.Median))$CAS)),"chemicals from the ToxCast HTS library."))
# 
# 
# 
# 
# Fig1a <- ggplot(Fig1.table)+
#    geom_freqpoly(binwidth = 0.5,lwd=1.5,color="Red",aes(Base.Fup.Med))+
#    geom_freqpoly(binwidth = 0.5,lwd=1.5,lty=3,color="Blue",aes(Affinity.Fup.Med))+
#   xlab(expression(paste("Measured ",F[up]))) +
#   ylab("Number of chemicals")+
#    scale_x_log10(label=label_log(),limits=c(10^-6,1.05))+
#    labs(title=paste(length(unique(subset(Fig1.table,!is.na(Affinity.Fup.Med))$CAS)),"Chemicals Measured"))+
#     theme_bw()+
#     theme( text  = element_text(size=10)) +
#     annotate("text", x=10^-6,y=80,size=8,label="a")
# 
# print(paste("Median 100%:",signif(median(Fig1.table$Base.Fup.Med,na.rm=TRUE),2)))
# print(paste("Median Titration:",signif(median(Fig1.table$Affinity.Fup.Med,na.rm=TRUE),2)))
# 
# 
# 
# Fig1b <- ggplot(Fig1.table)+
#    geom_histogram(binwidth = 0.5,alpha=0.6,fill="green",aes(CLint.1uM.Median))+
#    xlab(expression(paste("Measured ",Cl[int]," (uL/min/",10^6," hepatocytes)"))) +
#   ylab("Number of chemicals")+
#   scale_x_log10(label=label_log(),limits=c(10^-1,5*10^3))+
#   labs(title=paste(length(unique(subset(Fig1.table,!is.na(CLint.1uM.Median))$CAS)),"Chemicals Measured"))+
#     theme_bw()+
#     theme( text  = element_text(size=10)) +
#     annotate("text", x=2*10^-1,y=80,size=8,label="b")
# 
# print(paste(sum(Fig1.table$CLint.1uM.Median==0,na.rm=TRUE),"Zero Measurments"))
# print(paste("Median Non-Zero:",signif(median(Fig1.table$CLint.1uM.Median,na.rm=TRUE),3)))
# 
# 
# 
# Fig1c <- ggplot(Fig1.table)+
#    geom_histogram(binwidth = 0.1,alpha=0.2,fill="Red",aes(Error1))+
#    geom_histogram(binwidth = 0.1,alpha=0.2,fill="Blue",aes(Error2))+
#   xlab("Width of Credible Interval") +
#   ylab("Number of chemicals")+
#    scale_x_log10(label=label_log(),limits=c(5*10^-4,1.05))+
#    labs(title=paste("CI:",signif(median(Fig1.table$Error1,na.rm=TRUE),2),"CV:",signif(median(Fig1.table$CV1,na.rm=TRUE),1),"/ CI:",signif(median(Fig1.table$Error2,na.rm=TRUE),1),"CV:",signif(median(Fig1.table$CV2,na.rm=TRUE),1)))+
#     theme_bw()+
#     theme( text  = element_text(size=10))+
#     annotate("text", x=5*10^-4,y=42,size=8,label="c")
# 
# print(paste("Median 100% CI:",signif(median(Fig1.table$Error1,na.rm=TRUE),2),"CV:",signif(median(Fig1.table$CV1,na.rm=TRUE),2)))
# print(paste("Median Titr. CI:",signif(median(Fig1.table$Error2,na.rm=TRUE),2),"CV:",signif(median(Fig1.table$CV2,na.rm=TRUE),2)))
# 
# 
# Fig1d <- ggplot(Fig1.table)+
#    geom_histogram(binwidth = 0.1,alpha=0.6,fill="green",aes(ErrorClint))+
#   xlab("Width of Credible Interval") +
#   ylab("Number of chemicals")+
#   scale_x_log10(label=label_log(),limits=c(10^0,10^3))+
#    labs(title=paste("Median CI:",signif(median(Fig1.table$ErrorClint,na.rm=TRUE),2),"CV:",signif(median(Fig1.table$CVClint,na.rm=TRUE),2)))+
#     theme_bw()+
#     theme( text  = element_text(size=10))+
#     annotate("text", x=1.5,y=25,size=8,label="d")
# 
# print(paste("Median CI:",signif(median(Fig1.table$ErrorClint,na.rm=TRUE),2),"CV:",signif(median(Fig1.table$CVClint,na.rm=TRUE),2)))
# 
# multiplot(Fig1a,Fig1c,Fig1b,Fig1d,cols=2,widths=c(1.75,1.75))

## ----Wambaugh2019.Fig2, eval = execute.vignette-------------------------------
# Fig2.table <- httkexamples::wambaugh2019.raw
# print(paste(sum(!is.na(Fig2.table$Fup.point)),"total successful Fup estimates using point estimation."))
# print(paste(sum(!is.na(Fig2.table$Base.Fup.Med)),"total successful Fup estimates using BAyesian analysis of top protein concentration."))
# Fig2.table$Error1 <- Fig2.table$Base.Fup.High - Fig2.table$Base.Fup.Low
# Fig2.table$Error2 <- Fig2.table$Affinity.Fup.High - Fig2.table$Affinity.Fup.Low
# Fig2.table$Sigma1 <- Fig2.table$Error1/(2*qnorm(0.975))
# Fig2.table$Mean1 <- (Fig2.table$Base.Fup.High + Fig2.table$Base.Fup.Low)/(qnorm(0.975))
# Fig2.table$CV1 <- Fig2.table$Sigma1/Fig2.table$Mean1
# Fig2.table$Sigma2 <- Fig2.table$Error2/(2*qnorm(0.975))
# Fig2.table$Mean2 <- (Fig2.table$Base.Fup.High + Fig2.table$Base.Fup.Low)/(qnorm(0.975))
# Fig2.table$CV2 <- Fig2.table$Sigma2/Fig2.table$Mean2
# Fig2.table$Point.Estimate <- "Good"
# Fig2.table[Fig2.table$Fup.point<0&!is.na(Fig2.table$Fup.point),"Point.Estimate"]<-"<0"
# Fig2.table[Fig2.table$Fup.point>1&!is.na(Fig2.table$Fup.point),"Point.Estimate"]<-">1"
# Fig2.table[is.na(Fig2.table$Fup.point),"Point.Estimate"]<-"No Value"
# Fig2.table[Fig2.table$Point.Estimate%in%c("<0","No Value"),"Fup.point"] <-  Fig2.table[Fig2.table$Point.Estimate%in%c("<0","No Value"),"Base.Fup.Med"]
# Fig2.table$Uncertain<-"Yes"
# Fig2.table[is.na(Fig2.table$CV1),"CV1"] <- -999
# Fig2.table[Fig2.table$CV1<0.49,"Uncertain"] <- "No"
# Fig2.table[Fig2.table$CV1==-999,"CV1"] <- NA
# 
# Fig2a <- ggplot(subset(Fig2.table,!is.na(Base.Fup.Med)),aes(x=Fup.point,y=Base.Fup.Med,shape=Point.Estimate,alpha=Uncertain,colour=Point.Estimate)) +
#          geom_point(size=3)+
# #         geom_errorbar(aes(ymax = Base.Fup.High, ymin=Base.Fup.Low))+
#    scale_y_log10(label=label_log(),limits=c(10^-6,2)) +
#    scale_x_log10(label=label_log(),limits=c(10^-6,2)) +
#   ylab(expression(paste(F[up]," Bayesian Analysis (Single Conc.)"))) +
#   xlab(expression(paste(F[up]," Point Estimate (Traditional Analysis)"))) +
#   geom_abline(intercept = 0, slope = 1,linetype="dashed", colour="Blue") +
# #  geom_vline(xintercept = 1, size=2,linetype="dashed", colour="red")+
#   geom_vline(xintercept = 0.01, size=2,linetype="dotted", colour="red")+
#   geom_hline(yintercept = 0.01, size=2,linetype="dotted", colour="red")+
#     scale_colour_discrete(name="Point Estimate")+
#     scale_shape_discrete(name="Point Estimate") +
#     scale_alpha_manual(values=c(1,0.3))+
#         theme_bw() +
#      theme(legend.position="bottom", text  = element_text(size=12))+
#      annotate("text", size=8,x = 10^-6, y = 2, label = "a")+
#     guides(alpha=guide_legend(nrow=2,byrow=TRUE),colour=guide_legend(nrow=2,byrow=TRUE),shape=guide_legend(nrow=2,byrow=TRUE))
# 
# #print(Fig2a)
# 
# print(paste(sum(Fig2.table$Uncertain=="Yes"),"uncertain estimates using single conc."))
# print(paste(sum(Fig2.table$Point.Estimate=="<0"),"chemicals with negative point estimates."))
# print(paste(sum(Fig2.table$Point.Estimate==">1"),"chemicals with Fup > 1."))
# 
# 
# 
# summary(lm(data=subset(Fig2.table,!is.na(Fup.point)&Fup.point>0&Base.Fup.Med>0),log(Fup.point)~log(Base.Fup.Med)))
# 
# 
# 
# 
# Fig2.table$Uncertain2<-"Yes"
# Fig2.table[is.na(Fig2.table$CV2),"CV2"] <- -999
# Fig2.table[Fig2.table$CV2<0.49,"Uncertain2"] <- "No"
# Fig2.table[Fig2.table$CV2==-999,"CV2"] <- NA
# Fig2.table$TopFail <- ""
# Fig2.table[!is.na(Fig2.table$Affinity.Fup.Med)&Fig2.table$Uncertain2=="No","TopFail"]<-"No"
# Fig2.table[Fig2.table$TopFail=="No"&!is.na(Fig2.table$Base.Fup.Med)&Fig2.table$Uncertain=="Yes","TopFail"]<-"Single Conc. Only"
# Fig2.table[!is.na(Fig2.table$Affinity.Fup.Med)&Fig2.table$Uncertain2=="Yes","TopFail"]<-"Yes"
# Fig2.table[!is.na(Fig2.table$Affinity.Fup.Med)&is.na(Fig2.table$Base.Fup.Med),"TopFail"] <- "Single Conc. Only"
# Fig2.table[is.na(Fig2.table$Base.Fup.Med),"Base.Fup.Med"] <-  Fig2.table[is.na(Fig2.table$Base.Fup.Med),"Affinity.Fup.Med"]
# 
# 
# Fig2b <- ggplot(subset(Fig2.table,!is.na(Affinity.Fup.Med)),aes(x=Base.Fup.Med,y=Affinity.Fup.Med,shape=TopFail,alpha=TopFail,colour=TopFail)) +
#          geom_point(size=3)+
#          geom_point(data=subset(Fig2.table,TopFail=="No"),size=3)+
# #         geom_errorbar(aes(ymax = Fup.High.x, ymin=Fup.Low.x))+
# #         geom_errorbarh(aes(xmin=Fup.Low.y,xmax=Fup.High.y))+
#    scale_y_log10(label=label_log(),limits=c(10^-8,2)) +
#    scale_x_log10(label=label_log(),limits=c(10^-8,2)) +
#   xlab(expression(paste(F[up]," Bayesian Analysis (Single Conc.)"))) +
#   ylab(expression(paste(F[up]," Bayesian Analysis (Three Conc.)"))) +
#   geom_abline(intercept = 0, slope = 1,linetype="dashed", colour="Blue")+
#   scale_colour_discrete(name="Uncertain")+
#   scale_shape_discrete(name="Uncertain") +
# #    annotate("text",size=8, x = 10^-4, y = 1, label = "B")+
#    scale_alpha_manual(name="Uncertain",values=c(1,0.6,0.3))+
#      theme_bw() +
#   theme(legend.position="bottom", text  = element_text(size=12))+
#       annotate("text", size=8,x = 10^-8, y = 2, label = "b")+
#     guides(alpha=guide_legend(nrow=2,byrow=TRUE),colour=guide_legend(nrow=2,byrow=TRUE),shape=guide_legend(nrow=2,byrow=TRUE))
# 
# #print(Fig2b)
# 
# # Need to reduce to unique chemicals.
# Fupbaseworked <- subset(Fig2.table,!is.na(Base.Fup.Med)&Uncertain=="No")
# Fupbasenot <- subset(Fig2.table,!is.na(Base.Fup.Med)&Uncertain=="Yes")
# Fupbaseworked <- subset(Fupbaseworked,!duplicated(CAS))
# Fupbasenot <- subset(Fupbasenot,!duplicated(CAS))
# # Also need to count a chemical as a success if it worked for either duplicate:
# Fupbasenot <- subset(Fupbasenot,!(CAS%in%Fupbaseworked$CAS))
# 
# Fupaffinityworked <- subset(Fig2.table,!is.na(Affinity.Fup.Med)&Uncertain2=="No")
# Fupaffinitynot <- subset(Fig2.table,!is.na(Affinity.Fup.Med)&Uncertain2=="Yes")
# Fupaffinityworked <- subset(Fupaffinityworked,!duplicated(CAS))
# Fupaffinitynot <- subset(Fupaffinitynot,!duplicated(CAS))
# Fupaffinitynot <- subset(Fupaffinitynot,!(CAS%in%Fupaffinityworked$CAS))
# 
# 
# print(paste(dim(Fupbaseworked)[1],"total successful Fup estimates using single conc."))
# print(paste(signif(dim(Fupbasenot)[1]/(dim(Fupbaseworked)[1]+dim(Fupbasenot)[1])*100,3),"percent failure rate using single conc."))
# print(paste(dim(Fupaffinityworked)[1],"total successful Fup estimates using protein titration."))
# print(paste(signif(dim(Fupaffinitynot)[1]/(dim(Fupaffinityworked)[1]+dim(Fupaffinitynot)[1])*100,3),"percent failure rate using protein titration."))
# print(paste(dim(Fupbasenot)[1],"uncertain estimates using original protocol."))
# print(paste(dim(Fupaffinitynot)[1],"uncertain estimates using protein titration."))
# print(paste(dim(Fupaffinityworked)[1]-dim(Fupbaseworked)[1],"chemicals that could not be measured at 100% plasma protein."))
# 
# Wetmore.data <- httkexamples:::Wetmore.data
# W2015 <- subset(Wetmore.data,Reference=="Wetmore 2015")
# 
# W2012 <- subset(Wetmore.data,Reference=="Wetmore 2012")
# print(paste("The Fup assay failed for ",signif(sum(W2012$Fub==0.005)/length(W2012$Fub)*100,3),"% of chemicals in {Wetmore, 2012} and ",signif(sum(W2015$Fub==0)/length(W2015$Fub)*100,3),"% of chemicals in {Wetmore, 2015}."))
# 
# 
# Fuplod <- subset(Fig2.table,TopFail=="Single Conc. Only")
# print(paste("Among the chemicals that were uncertain using the original protocol but could be measured using the new three concentration protocol, the median estimated Fup was ",signif(median(Fuplod$Affinity.Fup.Med),2),"with values as low as ",signif(min(Fuplod$Affinity.Fup.Med),2),"and as high as",signif(max(Fuplod$Affinity.Fup.Med),2),". Previously, a default of 0.005 has been assumed when Fup could not be measured {Rotroff, 2010}.}"))
# 
# 
# summary(lm(data=subset(Fig2.table,!is.na(Base.Fup.Med)&Affinity.Fup.Med>0&Base.Fup.Med>0),log(Base.Fup.Med)~log(Affinity.Fup.Med)))
# 
# #dev.new(width=10,height=6)
# multiplot(Fig2a,Fig2b,cols=2,widths=c(1.75,1.75))

## ----Wambaugh2019.Fig3, eval = execute.vignette-------------------------------
# Fig1.table$Class <- "Other"
# Fig1.table[Fig1.table$DTXSID %in% pharma[,1],"Class"]<-"Pharmaceutical"
# Fig1.table$Class <- as.factor(Fig1.table$Class)
# 
#   Fig3 <- ggplot(Fig1.table,aes(Affinity.Kd.Med,fill=Class))+
#     geom_histogram(binwidth = 0.5)+
#     xlab(expression(paste("Binding Affinity (", mu,"M)",sep=""))) +
#       ylab("Number of chemicals")+
#     scale_x_log10(label=label_log())+
#       scale_fill_discrete(name="Chemical Class")+
#       theme_bw()  +
#     theme(legend.position="bottom", text  = element_text(size=18))
# 
#  # dev.new()
#   print(Fig3)

## ----Wambaugh2019.Fig4, eval = execute.vignette-------------------------------
# clearance.table <- subset(httkexamples::wambaugh2019.raw,!is.na(CLint.1uM.Median))
# 
# print(paste(sum(clearance.table$CLint.1uM.Median==0),"zero valued median Bayesian posteriors"))
# print(paste(sum(clearance.table$CLint.1uM.Point==0 &
#                   clearance.table$CLint.1uM.Median>0,
#                 na.rm=TRUE),
#             "zero valued point estimates where median posterior>0"))
# print(paste(sum(clearance.table$CLint.1uM.Point>0 &
#                   clearance.table$CLint.1uM.Median==0,
#                 na.rm=TRUE),
#             "zero valued median Bayesian posteriors where point estimate was non-zero"))
# print(paste(sum(is.na(clearance.table$CLint.1uM.Point) &
#                   clearance.table$CLint.1uM.Median==0,
#                 na.rm=TRUE),
#             "zero valued median Bayesian posteriors where point estimate was NA"))
# print(paste(sum(is.na(clearance.table$CLint.1uM.Point) &
#                   clearance.table$CLint.1uM.Median>0,
#                 na.rm=TRUE),
#             "non-zero valued median Bayesian posteriors where point estimate was NA"))
# 
# 
# 
# 
# clearance.table$Fit <- "Decreasing"
# clearance.table[clearance.table$CLint.1uM.Median == 0,"Fit"] <- "Median Zero"
# #clearance.table[is.na(clearance.table$CLint.1uM.Point == 0),"Fit"] <- "Point Est. Missing"
# for (i in 1:dim(clearance.table)[1])
#   if (!is.na(clearance.table[i,"CLint.1uM.Point"]))
#   {
#     if (clearance.table[i,"CLint.1uM.Point"] == 0) clearance.table[i,"Fit"] <- "Point Est. Zero"
#   } else clearance.table[i,"Fit"] <- "Point Est. Failed"
# 
# 
# set.seed(123456)
# clearance.table[is.na(clearance.table$CLint.1uM.Point),"CLint.1uM.Point"]<-runif(sum(is.na(clearance.table$CLint.1uM.Point)),0.1,0.2)
# clearance.table[clearance.table$CLint.1uM.Point==0,"CLint.1uM.Point"]<-runif(sum(clearance.table$CLint.1uM.Point==0),0.3,0.8)
# clearance.table[clearance.table$CLint.1uM.Low95th==0,"CLint.1uM.Low95th"]<-0.1
# clearance.table[clearance.table$CLint.1uM.Median==0,"CLint.1uM.Median"]<-0.1
# clearance.table[clearance.table$CLint.1uM.High95th==0,"CLint.1uM.High95th"]<-0.1
# clearance.table[clearance.table$CLint.1uM.High95th>1000,"CLint.1uM.High95th"]<-1000
# clearance.table[clearance.table$CLint.1uM.Low95th<0.1,"CLint.1uM.Low95th"]<-0.1
# 
# clearance.fit <- lm(log10(CLint.1uM.Median)~log10(CLint.1uM.Point),data=subset(clearance.table,!is.na(CLint.1uM.Point)&CLint.1uM.Point>1))
# Fig4 <- ggplot(clearance.table, aes(x=CLint.1uM.Point,y=CLint.1uM.Median,colour=Fit)) +
#   geom_segment(aes(x=CLint.1uM.Point,xend=CLint.1uM.Point,y=CLint.1uM.Low95th,yend=CLint.1uM.High95th),size=1,alpha=0.5)+
#   geom_point(size=3) +
#   scale_y_log10(label=label_log(),limits=c(10^-1,1000)) +
#   scale_x_log10(label=label_log(),limits=c(10^-1,1000)) +
#   xlab(expression(paste(CL[int]," (",mu,"L/min/",10^{6}," Hep.) Point Estimate",sep="")))+
#   ylab(expression(paste(CL[int]," (",mu,"L/min/",10^{6}," Hep.) Bayesian",sep="")))+
#   geom_vline(xintercept = 0.25, size=2,colour="black")+
#   geom_vline(xintercept = 0.9, size=2,colour="black")+
#   geom_abline(intercept = 0, slope = 1,linetype="dashed")+
#   scale_colour_discrete(name="Assay Result")+
#   theme_bw() +
#   theme(legend.position="bottom", text  = element_text(size=14))+
#   guides(colour=guide_legend(nrow=2,byrow=TRUE))+
#   annotate("text",x = 10^2, y = 10^0,size=6, label = lm_R2(clearance.fit,prefix=""),parse=TRUE)
# 
# #dev.new()
# print(Fig4)
# 
# summary(clearance.fit)

## ----Wambaugh2019.Fig5, eval = execute.vignette-------------------------------
# # This section takes a long time because calc_vdist is not yet data.table compatible:
# ceetox <- as.data.table(subset(httkexamples::wambaugh2019,
#                                !is.na(Human.Funbound.plasma) &
#                                !is.na(Human.Clint)&!is.na(logP)))
# httk_cas <- ceetox$CAS[ceetox$CAS %in% get_cheminfo(model="3compartmentss",
#                                                     suppress.messages=TRUE)]
# 
# # back up chem.physical_and_invitro.data:
# HTTK.data <- chem.physical_and_invitro.data
# 
# # Use the point estimates:
# chem.physical_and_invitro.data <- add_chemtable(httkexamples::wambaugh2019.raw,
#   current.table=chem.physical_and_invitro.data,
#   data.list=list(Compound="Name",
#                  CAS="CAS",
#                  Clint="CLint.1uM.Point",
#                  Funbound.plasma="Fup.point"),
#   reference="Wambaugh 2019",
#   species="Human",
#   overwrite=TRUE)
# 
# 
# 
# schmitt_cas <- httk_cas[httk_cas %in% get_cheminfo(model="schmitt",
#                                                    suppress.messages=TRUE)]
# ceetox[CAS%in%schmitt_cas,
#        hlpoint:=log(2)/httk::calc_elimination_rate(chem.cas=CAS,
#                                         suppress.messages=TRUE),
#        by=.(CAS)]
# 
# chem.physical_and_invitro.data <- HTTK.data
# 
# ceetox[CAS%in%schmitt_cas,
#        hl_975:=hlfun_u(chemcas=CAS,
#                           MW=MW,
#                           probs=0.975),
#        by=.(CAS)]
# 
# ceetox[CAS%in%schmitt_cas,
#        hl_med:=hlfun_u(chemcas=CAS,
#                           MW=MW,
#                           probs=0.5),
#        by=.(CAS)]
# 
# ceetox[CAS%in%schmitt_cas,
#        hl_25:=hlfun_u(chemcas=CAS,
#                          MW=MW,
#                           probs=0.025),
#        by=.(CAS)]
# 
# ceetox[,hlpointEstimated:=TRUE]
# ceetox[is.na(hlpoint),hlpointEstimated:=FALSE]
# ceetox[is.na(hlpoint),hlpoint:=hl_med]

## ----Wambaugh2019.Fig6, eval = execute.vignette-------------------------------
# set.seed(42)
# ceetox[CAS%in%httk_cas,
#        css95_uv:=calc_mc_css(chem.cas=CAS,
#                                 # clint.pvalue.threshold=1, #Don't need to use p-values here (Bayesian)
#                                  output.units="uM",
#                                  model="3compartmentss",
#                                  species="Human",
#                                         suppress.messages=TRUE),
#        by=.(CAS)]
# 
# set.seed(42)
# ceetox[CAS%in%httk_cas,
#        css95_v:=calc_mc_css(chem.cas=CAS,
#                                  #clint.pvalue.threshold=1, #Don't need to use p-values here (Bayesian)
#                                  output.units="uM",
#                                  invitro.mc.arg.list=list(
#                                    clint.meas.cv=NULL,
#                                    fup.meas.cv=NULL,
#                                    clint.pop.cv=0.3,
#                                    fup.pop.cv=0.3
#                                  ),
#                                  model="3compartmentss",
#                                  species="Human",
#                                         suppress.messages=TRUE),
#        by=.(CAS)]
# 
# 
# 
# set.seed(42)
# ceetox[CAS%in%httk_cas,
#        css95_u:=cssfun_u(chemcas=CAS,
#                           MW=MW),
#        by=.(CAS)]
# 
# set.seed(42)
# ceetox[CAS%in%httk_cas,
#        cssmed_u:=cssfun_u(chemcas=CAS,
#                           MW=MW,
#                           probs=0.5),
#        by=.(CAS)]
# 
# set.seed(42)
# ceetox[CAS%in%httk_cas,
#        css25_u:=cssfun_u(chemcas=CAS,
#                           MW=MW,
#                           probs=0.025),
#        by=.(CAS)]
# 
# set.seed(42)
# ceetox[CAS%in%httk_cas,
#        css975_u:=cssfun_u(chemcas=CAS,
#                           MW=MW,
#                           probs=0.975),
#        by=.(CAS)]
# 
# ceetox[CAS%in%httk_cas,
#        cssmed:=httk::calc_analytic_css(chem.cas=CAS,
#                                  clint.pvalue.threshold=1,
#                                  output.units="uM",
#                                  model="3compartmentss",
#                                  species="Human",
#                                  suppress.messages=TRUE),
#        by=.(CAS)]

## ----Wambaugh2019.Fig5.2, eval = execute.vignette-----------------------------
# hlfit <- lm(data=subset(ceetox,hlpointEstimated &
#                           !is.na(log10(hlpoint)) &
#                           !is.na(log10(hl_med)) &
#                           !is.infinite(log10(hl_med))),
#             log10(hlpoint)~log10(hl_med))
# Fig5a <- ggplot(data=subset(ceetox,hlpointEstimated),aes(x=hlpoint,y=hl_med)) +
#   geom_point(size=3,alpha=0.5) +
#   geom_errorbar(aes(ymax = hl_975, ymin=hl_25),alpha=0.3)+
#   scale_y_log10(label=label_log(),limits=c(1,10^6)) +
#   scale_x_log10(label=label_log(),limits=c(1,10^6)) +
#   xlab("half-life (h) Point Estimate")+
#   ylab("Bayesian half-life (h)")+
#   geom_abline(intercept = 0, slope = 1,linetype="dashed",color="blue")+
#   annotate("text", size=8,x = 1, y = 10^6, label = "A")+
#   theme_bw() +
#   theme(text  = element_text(size=18))+
#   annotate("text",x = 3*10^3, y = 10^0,size=5, label = lm_R2(hlfit,prefix=""),parse=TRUE)
# 
# 
# cssfit <- lm(data=subset(ceetox,
#                          !is.na(log10(cssmed)) &
#                          !is.na(log10(cssmed_u)) &
#                          !is.infinite(log10(cssmed_u))),
#              log10(cssmed)~log10(cssmed_u))
# Fig5b <- ggplot(data=ceetox,aes(x=cssmed,y=cssmed_u)) +
#   geom_point(size=3,alpha=0.5) +
#   geom_errorbar(aes(ymin=css25_u,ymax=css975_u),alpha=0.3)+
#   scale_y_log10(label=label_log(),limits=c(10^-3,10^5)) +
#   scale_x_log10(label=label_log(),limits=c(10^-3,10^5)) +
#   xlab(expression(paste(C[ss]," Point Estimate (uM)")))+
#   ylab(expression(paste("Bayesian ",C[ss]," (uM)")))+
#   annotate("text", size=8,x = 10^-3, y = 10^5, label = "B")+
#   theme_bw() +
#   theme(text  = element_text(size=18)) +
#   geom_abline(intercept = 0, slope = 1,linetype="dashed",color="blue")+
#   annotate("text",x = 10^2, y = 10^-3,size=5, label = lm_R2(cssfit,prefix=""),parse=TRUE)
# 
# 
# #dev.new(width=8,height=5)
# multiplot(Fig5a,Fig5b,cols=2,widths=c(1.75,1.75))

## ----Wambaugh2019.Fig6.2, eval = execute.vignette-----------------------------
# # Go back to Bayes estimates:
# chem.physical_and_invitro.data <- HTTK.data
# 
# dt_melt <- data.table::melt(ceetox[, .(Compound, CAS, css95_u, css95_v, css95_uv, cssmed)],
#                             id.vars=c("Compound","CAS", "cssmed"),
#                             variable.name="type",
#                             value.name="css95")
# 
# dt_melt[, type:=gsub(x=type, pattern="css95_uv", replacement="Both", fixed=TRUE)]
# dt_melt[, type:=gsub(x=type, pattern="css95_u", replacement="Uncertainty", fixed=TRUE)]
# dt_melt[, type:=gsub(x=type, pattern="css95_v", replacement="Variability", fixed=TRUE)]
# 
# 
# #now set the factor levels explicitly
# dt_melt[, type:=factor(type,
#                        levels=c("Uncertainty", "Variability", "Both"))]
# 
# 
# dt_melt[, Norm:=css95/cssmed]
# 
# 
# #now set the factor levels explicitly
# dt_melt[, Compound:=factor(Compound,
#                        levels=dt_melt[type=="Both",Compound][order(dt_melt[type=="Both",Norm])])]
# 
# 
# Fig6 <- ggplot(data=dt_melt) +
#   geom_point(size=2,alpha=0.7,aes(x=Compound,
#                 y=Norm,
#                 color=type,
#                 shape=type)) +
#    scale_y_log10(label=label_log(),limits=c(0.9,500)) +
#   ylab(expression(paste("Ratio of ",C[ss]," ",95^th," Percentile to Median Estimate"))) +
#   xlab("Chemicals")+
#   scale_colour_discrete(name=expression(paste(C[ss]," Varied to Reflect")))+
#   scale_shape_discrete(name=expression(paste(C[ss]," Varied to Reflect"))) +
#   theme_bw() +
#   theme(legend.position="bottom",axis.text.x = element_blank())+
#   annotate("text", size=8,x = levels(dt_melt$Compound)[length(unique(dt_melt$Compound))/2], y = 400, label = paste("Median Ratio for Uncertainty:",signif(median(subset(dt_melt,type=="Uncertainty")$Norm),3)))+
#   annotate("text", size=8,x = levels(dt_melt$Compound)[length(unique(dt_melt$Compound))/2], y = 200, label = paste("Median Ratio for Variability:",signif(median(subset(dt_melt,type=="Variability")$Norm),3)))+
#   annotate("text", size=8,x = levels(dt_melt$Compound)[length(unique(dt_melt$Compound))/2], y = 100, label = paste("Median Ratio for Both:",signif(median(subset(dt_melt,type=="Both")$Norm),3)))
# 
# #dev.new(width=10,height=6)
# print(Fig6)

## ----Wambaugh2019.Fig7,  eval = execute.vignette------------------------------
# # Head to ftp://newftp.epa.gov/COMPTOX/High_Throughput_Screening_Data/Previous_Data/ToxCast_Data_Release_Oct_2015/ to get the ToxCast/Tox21 data:
# # These are the concentrations that caused activity in exess of the background (ACC) "hits".
# #
# # # Load the ACC table into an object Tox21.acc:
# # Tox21.acc <- read.csv("INVITRODB_V2_LEVEL5/EXPORT_LVL5&6_ASID7_TOX21_151020.csv",stringsAsFactors=FALSE)
# # # Subset to the chemicals of interest:
# # Tox21.acc <- subset(Tox21.acc,casn%in%Fig1.table$CAS)
# # # We only need hits:
# # Tox21.acc <- subset(Tox21.acc,hitc==1)
# #
# # # Pare this down to just the data we need:
# # Tox21.acc <- Tox21.acc[,c("code","chid","chnm","casn","aenm","modl_acc","flags")]
# #
# # # In this vignette we use the precalculated quantiles of the ACC's
# # # to help keep the HTTK package smaller:
# #
# # wambaugh2019.tox21 <- NULL
# # for (this.chem in sort(unique(Tox21.acc$casn)))
# # {
# #   this.subset <- subset(Tox21.acc,casn==this.chem)
# #   this.row <- data.frame(casn=this.chem,
# #     med.conc =
# #       median(10^(this.subset$modl_acc)),
# #     q10.conc =
# #       quantile(10^(this.subset$modl_acc),0.1),
# #     low.conc =
# #       min(10^(this.subset$modl_acc)),
# #     q90.conc =
# #       quantile(10^(this.subset$modl_acc),0.9),
# #     high.conc =
# #       max(10^(this.subset$modl_acc)),
# #     stringsAsFactors = F)
# #   wambaugh2019.tox21  <- rbind(wambaugh2019.tox21, this.row)
# # }
# 
# chem.preds <- httkexamples::wambaugh2019.seem3
# directnhanes.preds <- httkexamples::wambaugh2019.nhanes
# Tox21.acc.numeric <- httkexamples::wambaugh2019.tox21
# 
# #Subset to those chemicals with HTS hits:
# human.hits <- subset(ceetox,CAS%in%Tox21.acc.numeric$casn)
# 
# # Now calculate the oral equivalent dose for each chemical:
# # Must make sure that CSS is in units of uM!!
# for (this.chem in human.hits$CAS)
# {
#   med.conc <-
#     Tox21.acc.numeric[Tox21.acc.numeric$casn==this.chem,"med.conc"]
#   q10.conc <-
#     Tox21.acc.numeric[Tox21.acc.numeric$casn==this.chem,"q10.conc"]
#   low.conc <-
#     Tox21.acc.numeric[Tox21.acc.numeric$casn==this.chem,"low.conc"]
#   q90.conc <-
#     Tox21.acc.numeric[Tox21.acc.numeric$casn==this.chem,"q90.conc"]
#   high.conc <-
#     Tox21.acc.numeric[Tox21.acc.numeric$casn==this.chem,"high.conc"]
#   css95.v <- human.hits[human.hits$CAS==this.chem][["css95_v"]]
#   css95.uv <- human.hits[human.hits$CAS==this.chem][["css95_uv"]]
#   human.hits[human.hits$CAS==this.chem,"HTS.Median.ACC"] <- med.conc
#   human.hits[human.hits$CAS==this.chem,"HTS.Q10.ACC"] <- q10.conc
#   human.hits[human.hits$CAS==this.chem,"HTS.Q90.ACC"] <- q90.conc
#   human.hits[human.hits$CAS==this.chem,"HTS.Low.ACC"] <- low.conc
#   human.hits[human.hits$CAS==this.chem,"HTS.High.ACC"] <- high.conc
#   human.hits[human.hits$CAS==this.chem,"HTS.Median.equivdose.v"] <- med.conc/css95.v
#   human.hits[human.hits$CAS==this.chem,"HTS.Q10.equivdose.v"] <- q10.conc/css95.v
#   human.hits[human.hits$CAS==this.chem,"HTS.Q90.equivdose.v"] <- q90.conc/css95.v
#   human.hits[human.hits$CAS==this.chem,"HTS.Low.equivdose.v"] <- low.conc/css95.v
#   human.hits[human.hits$CAS==this.chem,"HTS.High.equivdose.v"] <- high.conc/css95.v
#   human.hits[human.hits$CAS==this.chem,"HTS.Median.equivdose.uv"] <- med.conc/css95.uv
#   human.hits[human.hits$CAS==this.chem,"HTS.Q10.equivdose.uv"] <- q10.conc/css95.uv
#   human.hits[human.hits$CAS==this.chem,"HTS.Q90.equivdose.uv"] <- q90.conc/css95.uv
#   human.hits[human.hits$CAS==this.chem,"HTS.Low.equivdose.uv"] <- low.conc/css95.uv
#   human.hits[human.hits$CAS==this.chem,"HTS.High.equivdose.uv"] <- high.conc/css95.uv
# }
# 
# 
# 
# #Add the exposure predictions:
# for (this.chem in human.hits$CAS)
# {
#   print(this.chem)
# # If we have direct inferrences from NHANES, use those exposures:
#   if (this.chem %in% directnhanes.preds$CASRN)
#   {
#     human.hits[human.hits$CAS==this.chem,"Exposure.median"] <- directnhanes.preds[directnhanes.preds$CASRN==this.chem,"lP"]
#     human.hits[human.hits$CAS==this.chem,"Exposure.high"] <- directnhanes.preds[directnhanes.preds$CASRN==this.chem,"lP.max"]
# # Otherwise use the heuristics model (Wambaugh 2014):
#   } else if (this.chem %in% chem.preds$CAS) {
#     human.hits[human.hits$CAS==this.chem,"Exposure.median"] <- chem.preds[chem.preds$CAS==this.chem,"seem3"]
#     human.hits[human.hits$CAS==this.chem,"Exposure.high"] <- chem.preds[chem.preds$CAS==this.chem,"seem3.u95"]
#   }
# }
# 
# # Drop chemicals without exposure predictions:
# human.hits<- subset(human.hits,!is.na(Exposure.median))
# 
# 
# # This code puts the chemicals into order by margin between exposure and activity:
# chem.names <- unique(human.hits$Compound)
# chem.names <- chem.names[!is.na(chem.names)]
# potency <- rep(Inf,times=length(chem.names))
# names(potency) <- chem.names
# potency.u <- potency
# for (this.chem in chem.names)
# {
#   this.subset <- subset(human.hits,Compound==this.chem)
#   if (dim(this.subset)[1]>0)
#   {
#     potency[this.chem] <- this.subset$HTS.Q10.equivdose.v/this.subset$Exposure.high
#     potency.u[this.chem] <- this.subset$HTS.Q10.equivdose.uv/this.subset$Exposure.high
#   }
# }
# potency <- sort(potency)
# human.hits$Compound <- factor(human.hits$Compound, levels=names(potency))
# 
# potency.u <- potency.u[names(potency.u)[potency.u<1]]
# if (any(names(potency)[potency>1] %in% names(potency.u)))
# {
#   potency.u <- potency.u[names(potency)[potency>1]]
# 
# new.overlaps <- names(potency.u[!is.na(potency.u)])
# first.change <- new.overlaps[1]
# last.change <- new.overlaps[length(new.overlaps)]
# window.start <- names(potency)[which(names(potency)==first.change)-5]
# window.end <- names(potency)[which(names(potency)==last.change)+5]
# window.names <- names(potency)[(which(names(potency)==first.change)-5):(which(names(potency)==last.change)+5)]
# # Initialize a new plot:
# Fig7a <- ggplot(data=human.hits) +
#   geom_segment(aes(x=Compound,xend=Compound,y=HTS.Q10.equivdose.v,yend=HTS.Q90.equivdose.v),size=1,color="white",alpha=0.5) +
#   geom_rect(mapping=aes(xmin=window.start,xmax=window.end,ymin=10^-9,ymax=10^3),color="grey",alpha=0.5)+
#   geom_segment(aes(x=Compound,xend=Compound,y=HTS.Q10.equivdose.v,yend=HTS.Q90.equivdose.v),size=1,color="red",alpha=0.5) +
#   geom_point(aes(x=Compound,y=HTS.Median.equivdose.v),shape=3) +
#   geom_point(aes(x=Compound,y=HTS.Q10.equivdose.v)) +
#   theme_bw() +
#   theme(axis.text.x = element_blank()) +
#   theme(axis.title.x = element_text(size=16)) +
#   theme(axis.title.y = element_text(size=16)) +
#   scale_y_log10(label=label_log(),limits = c(10^-9,10^3)) +
#   geom_segment(aes(x=Compound,xend=Compound,y=Exposure.high,yend=Exposure.median),size=1,color="blue",alpha=0.5) +
#   geom_point(aes(x=Compound,y=Exposure.median),shape=2) +
#   ylab("Bioactive Dose & Exposure\nmg/kg BW/day\n(Variability Only)") +
#   xlab("Chemicals Ranked By Ratio Between Bioactive Dose and Exposure")+
#   annotate("text", size=8,x = names(potency)[10], y = 100, label = "a")
# 
# # Initialize a new plot:
# Fig7c <- ggplot() +
#   geom_segment(data=human.hits,aes(x=Compound,xend=Compound,y=HTS.Q10.equivdose.v,yend=HTS.Q90.equivdose.v),size=3,color="grey") +
#   geom_segment(data=human.hits,aes(x=Compound,xend=Compound,y=HTS.Q10.equivdose.uv,yend=HTS.Q90.equivdose.uv),size=3,color="red",alpha=0.5) +
#   geom_point(data=human.hits,aes(x=Compound,y=HTS.Median.equivdose.uv),shape=3,size=2) +
#   geom_point(data=human.hits,aes(x=Compound,y=HTS.Q10.equivdose.uv),size=2) +
#   theme_bw() +
#   theme(axis.text.x = element_text(angle = 60, hjust = 1,size=6)) +
#   theme(axis.text.x = element_blank()) +
#   theme(axis.title.x = element_text(size=16)) +
#   theme(axis.title.y = element_text(size=16)) +
#   xlim(window.names) +
#   scale_y_log10(label=label_log(),limits = c(10^-8,2*10^2)) +
#   geom_segment(data=human.hits,aes(x=Compound,xend=Compound,y=Exposure.high,yend=Exposure.median),size=3,color="blue",alpha=0.5) +
#   geom_segment(data=subset(human.hits,Compound%in%names(potency.u)),aes(x=Compound,xend=Compound,y=10^-8,yend=Exposure.median/1.5),size=2,arrow=arrow(length=unit(0.5,"cm")))+
#   geom_point(data=human.hits,aes(x=Compound,y=Exposure.median),shape=2,size=2) +
#   xlab("") +
#   ylab("Bioactive Dose & Exposure\nmg/kg BW/day\n(Uncertainty and Variability)") +
#   annotate("text", size=8,x = window.names[1], y = 100, label = "b")
# 
# 
# #print(Fig10b)
# #dev.new(width=10,height=6)
# multiplot(Fig7a,Fig7c,cols=1,heights=c(0.5,0.5))
# 
# write.csv(subset(human.hits,Exposure.high>HTS.Q10.equivdose.uv)[,c("Compound","DSSTox_Substance_Id","Human.Clint","Human.Funbound.plasma","HTS.Q10.ACC","HTS.Q10.equivdose.v","HTS.Q10.equivdose.uv","Exposure.high")],file=paste("SupTable5-",Sys.Date(),".txt",sep=""),row.names=FALSE)
# }

## ----calc.stats,  eval = execute.vignette-------------------------------------
# Wetmore.chems <- subset(chem.physical_and_invitro.data,regexpr("Wetmore",Human.Clint.Reference)!=-1)[,c("Compound","CAS","DTXSID","Formula")]
# Wetmore.chems <- subset(Wetmore.chems,CAS%in%get_cheminfo())
# 
# for (this.cas in Wetmore.chems$CAS)
# {
#   Wetmore.chems[Wetmore.chems$CAS==this.cas,"Css.Lit"] <- get_lit_css(
#     chem.cas=this.cas,
#     output.units="uM")
#   set.seed(42)
#   Wetmore.chems[Wetmore.chems$CAS==this.cas,"Css.v192"] <- calc_mc_css(
#     chem.cas=this.cas,
#     invitro.mc.arg.list=list(
#       fup.meas.cv=NULL,
#       clint.meas.cv=NULL,
#       fup.censored.dist=TRUE),
#     output.units="uM")
#   set.seed(42)
#   Wetmore.chems[Wetmore.chems$CAS==this.cas,"Css.v110"] <- calc_mc_css(
#     chem.cas=this.cas,
#     output.units="uM")
# }
# 
# median(Wetmore.chems$Css.v110/Wetmore.chems$Css.Lit,na.rm=TRUE)
# 
# ceetox$Human.Clint2 <- sapply(as.character(ceetox$Human.Clint),
#                               function(x) as.numeric(strsplit(x,",")[[1]][1]))
# ceetox$Human.Fup2 <- sapply(as.character(ceetox$Human.Funbound.plasma),
#                             function(x) as.numeric(strsplit(x,",")[[1]][1]))
# write.csv(ceetox[,c(1,2,8,9,16,17,6,7,12,30,10,11,15,31,13,14,18:28)],row.names=FALSE,file=paste("Supplemental-Table4-",Sys.Date(),".txt",sep=""))
# 
# print(paste("Complete HTTK data on",sum(get_cheminfo()%in%ceetox$CAS),"new chemicals."))