## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
library(AssocBin)

## -----------------------------------------------------------------------------
data(heart)

## -----------------------------------------------------------------------------
str(heart)

## -----------------------------------------------------------------------------
heartClean <- heart
heartClean$thal <- NULL
heartClean$ca <- NULL
heartClean$slope <- NULL
heartClean <- na.omit(heartClean)
str(heartClean)

## ----fig.width=6, fig.height=6, fig.align = 'center'--------------------------
depDisplay(heartClean$sex, heartClean$num)

## ----fig.width=6, fig.height=6, fig.align = 'center'--------------------------
SexVsNum <- depDisplay(heartClean$sex, heartClean$num, xlab = "Sex", 
                       ylab = "Number of arteries >50% obstructed", 
                       pch = 20)

## -----------------------------------------------------------------------------
rbind(cbind(table(num = heartClean$num, sex = heartClean$sex), total = table(heartClean$num)),
            total = c(table(heartClean$sex), nrow(heartClean)))

## -----------------------------------------------------------------------------
str(SexVsNum, 1)

## -----------------------------------------------------------------------------
str(SexVsNum[[1]])

## -----------------------------------------------------------------------------
binChi(SexVsNum)

## ----fig.width=6, fig.height=6, fig.align = 'center'--------------------------
set.seed(1235) # more on this later
# the depDisplay function also has a method for data.frames
AgeVsNum <- depDisplay(x = heartClean, pair="age:num", xlab = "Age", 
                       ylab = "Number of arteries >50% obstructed", 
                       pch = 20, col = adjustcolor('gray50', alpha.f=0.5))

## ----fig.width=6, fig.height=6, fig.align = 'center'--------------------------
set.seed(812)
AgeVsChol <- depDisplay(heartClean$thalach, heartClean$oldpeak, 
                        xlab = "Maximum heart rate during exercise",
                        ylab = "ST wave depression during exercise",
                        pch = 20, col = adjustcolor('gray50', alpha.f=0.5))

## ----fig.width=6, fig.height=6, fig.align = 'center'--------------------------
set.seed(812)
AgeVsNum <- depDisplay(heartClean$thalach, heartClean$oldpeak, 
                       xlab = "Maximum heart rate during exercise",
                       ylab = "ST wave depression during exercise",
                       pch = 20, col = adjustcolor('gray50', alpha.f=0.5),
                       border = "black")

## -----------------------------------------------------------------------------
heartAssociations <- DepSearch(heartClean)

## -----------------------------------------------------------------------------
summary(heartAssociations)

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
plot(heartAssociations)

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
plot(heartAssociations, which = 62:66)

## -----------------------------------------------------------------------------
stopCrits <- makeCriteria(depth >= 10, # maximum depth of 10
                          expn <= 10, # smallest possible bin size of 5
                          n < 1 # don't split empty bins
                          )
stopCrits

## -----------------------------------------------------------------------------
greedyCrits <- makeCriteria(abs(expn - n)/sqrt(expn) > 4,
                            expn <= 10,
                            n < 1)
greedyCrits

## -----------------------------------------------------------------------------
conConChi <- function(bn) maxScoreSplit(bin = bn, scorer = chiScores)
# the univariate splitter requires an additional argument specifying which
# margin should be split
catConChi <- function(bn, on) uniMaxScoreSplit(bin = bn, scorer = chiScores,
                                               on = on)

## -----------------------------------------------------------------------------
heartAssociations_greedy <- DepSearch(heartClean,
                                      stopCriteria=greedyCrits,
                                      catCon=catConChi,
                                      conCon=conConChi)

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
plot(heartAssociations_greedy)

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
# a final way to use depDisplay is on a depSearch object
depDisplay(heartAssociations, pair="thalach:oldpeak",
           xlab = "Maximum heart rate during exercise",
           ylab = "ST wave depression during exercise",
           pch = "+", col = adjustcolor('purple', alpha.f=0.5),
           border = "black")

## -----------------------------------------------------------------------------
thalachOldpeak <- heartAssociations$binnings[["thalach:oldpeak"]]

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
# note that plotBinning does not have access to the marginal information to plot
# quantiles and so the marginal labels give the ranks
plotBinning(thalachOldpeak, pch = 20, 
            xlab = "Maximum heart rate during exercise",
            ylab = "ST wave depression during exercise",
            showXax = TRUE, showYax = TRUE,
            fill=depthFill(thalachOldpeak))

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
x <- rnorm(1000)
y <- 2*x + rnorm(1000, sd = 0.3)
rankx <- rank(x, ties.method = "random")
ranky <- rank(y, ties.method = "random")

# set up splitting criteria: depth stop limits run time (not necessary here)
criteria <- makeCriteria(expn <= 10, n == 0, depth >= 10)
# define the stop function using these criteria
stopFn <- function(bns) stopper(bns, criteria)
# use binner to run the algorithm
xyBins <- binner(x = rankx, y = ranky, stopper = stopFn, splitter = rIntSplit)

# plot with depthfill
set.seed(2119)
plotBinning(xyBins, fill=depthFill(xyBins), pch = 20)

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
plotBinning(thalachOldpeak, pch = 20, 
            xlab = "Maximum heart rate during exercise",
            ylab = "ST wave depression during exercise",
            showXax = TRUE, showYax = TRUE,
            fill=residualFill(thalachOldpeak, nbr = 10))

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
plotBinning(thalachOldpeak, pch = 20, 
            xlab = "Maximum heart rate during exercise",
            ylab = "ST wave depression during exercise",
            showXax = TRUE, showYax = TRUE,
            fill=residualFill(thalachOldpeak, nbr = 50,
                              resFun=binMI,
                              colrng = c("orange", "pink", "blue")))

## -----------------------------------------------------------------------------
summary(heartAssociations)

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
plot(heartAssociations)

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
depDisplay(heartAssociations, pair = 1, pch = 20, 
           xlab="Study", ylab="Serum cholesterol")

## -----------------------------------------------------------------------------
heartClean$chol[heartClean$study == "switzerland"]

## -----------------------------------------------------------------------------
heartCleveland <- heartClean[heartClean$study == "cleveland",]
heartCleveland$study <- NULL
clevelandAssociations <- DepSearch(heartCleveland)
set.seed(90192)
summary(clevelandAssociations)

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
plot(clevelandAssociations, which = 1:3)

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
plot(clevelandAssociations, which = 4:9)

## ----fig.width=6, fig.height=6, fig.align='center'----------------------------
plot(clevelandAssociations, which = 10:16)

