## ----setup, include=FALSE-----------------------------------------------------
knitr::knit_hooks$set(time_it = local({
  now <- NULL
  function(before, options) {
    if (before) {
      # record the current time before each chunk
      now <<- Sys.time()
    } else {
      # calculate the time difference after a chunk
      res <- difftime(Sys.time(), now, units = "secs")
      # return a character string to show the time
      paste("Time for this code chunk to run:", round(res,
        2), "seconds")
    }
  }
}))
knitr::opts_chunk$set(dev = "png", dev.args = list(type = "cairo-png"), time_it=TRUE)

## ----message=FALSE------------------------------------------------------------
set.seed(1)

## ----message=FALSE------------------------------------------------------------
## Loading the TraMineR library
library(TraMineR)
## Loading the data
data(mvad)

## State properties
mvad.alphabet <- c("employment", "FE", "HE", "joblessness", "school", "training")
mvad.lab <- c("employment", "further education", "higher education", "joblessness", "school", "training")
mvad.shortlab <- c("EM","FE","HE","JL","SC","TR")

## Creating the state sequence object
mvad.seq <- seqdef(mvad[, 17:86], alphabet = mvad.alphabet, states = mvad.shortlab, 
                   labels = mvad.lab, xtstep = 6)

## ----message=FALSE------------------------------------------------------------
## Using fastcluster for hierarchical clustering
library(fastcluster)
## Distance computation
diss <- seqdist(mvad.seq, method="LCS")
## Hierarchical clustering
hc <- hclust(as.dist(diss), method="ward.D")

## ----message=FALSE------------------------------------------------------------
# Loading the WeightedCluster library
library(WeightedCluster)
# Computing cluster quality measures.
clustqual <- as.clustrange(hc, diss=diss, ncluster=10)
clustqual

## ----fig.width=8, fig.height=10-----------------------------------------------
seqdplot(mvad.seq, group=clustqual$clustering$cluster6, border=NA)

## ----message=FALSE------------------------------------------------------------
# Add the clustering solution to the dataset
mvad$clustering <- clustqual$clustering$cluster6
# The first argument is a formula for the association between clustering and covariates of interest
# The function estimates separate logistic regressions for each cluster and compute the corresponding AMEs
# with their confidence interval
rarcatout <- rarcat(clustering ~ funemp + gcse5eq, data=mvad, diss=diss, robust=FALSE)
rarcatout

## ----message=FALSE------------------------------------------------------------
# # Loading the parallel library
library(future)
plan(multisession)

## ----message=FALSE------------------------------------------------------------
# Evaluate the validity of the original analysis and the reliability of its
# findings by applying RARCAT with 50 bootstrap replications.
# As in the original analysis, hierarchical clustering with Ward method is implemented.
# The number of clusters is fixed to 6 here.
rarcatout <- rarcat(clustering ~ funemp + gcse5eq, data = mvad, diss = diss, robust = TRUE, R = 100, 
                    kmedoid = FALSE, hclust.method = "ward.D", 
                    fixed = TRUE, ncluster = 6)
rarcatout

## ----message=FALSE------------------------------------------------------------
# Bootstrap replicates of the typology and its association with the variable funemp.
# As in the original analysis, hierarchical clustering with Ward method is implemented.
# Also, an optimal clustering solution with n between 2 and 10 is evaluated each time by
# maximizing the HC index.

# As we previously initalized parallel computing, it is used in these computations
vary.rarcat <- rarcat(clustering ~ funemp, data = mvad, diss = diss, R = 50,
                       kmedoid=FALSE, hclust.method = "ward.D",
                       fixed = FALSE, ncluster = 10, cqi = "HC")
vary.rarcat

## ----fig.width=8, fig.height=10-----------------------------------------------
# Histogram of the estimated AMEs for all individuals and all bootstraps
plot(rarcatout, covar="funempyes")

## ----message=FALSE------------------------------------------------------------
summary(rarcatout)

## ----message=FALSE, fig.width=8, fig.height=10--------------------------------
plot(rarcatout, what="ranef", covar="funempyes")

## ----message=FALSE, fig.width=8, fig.height=10--------------------------------
outliers <- abs(rarcatout$observation.stdranef[, "funempyes"])>2
seqIplot(mvad.seq[outliers, ], group=mvad$clustering[outliers])

## ----message=FALSE------------------------------------------------------------
# Loading the fpc library
library(fpc)
# Cluster-wise stability assessment by bootstrap
stab <- clusterboot(diss, B = 500, distances = TRUE, clustermethod = disthclustCBI, 
                    method = "ward.D", k = 6, count = FALSE)
stab

## ----include=FALSE------------------------------------------------------------
plan(sequential)

