## ----setup, include=FALSE-----------------------------------------------------

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)

## ----eval=F-------------------------------------------------------------------
# ## Install packages from CRAN repository
# install.packages(c("dplyr", "grmtree"))

## ----message=FALSE, warning=FALSE---------------------------------------------
library(dplyr)        # For data manipulation
library(grmtree)      # For tree-based GRM DIF Test

## ----message=FALSE------------------------------------------------------------
## Load the data
data("grmtree_data", package = "grmtree")

## Take a glimpse at the data
glimpse(grmtree_data)

## Prepare the data
resp.data <- grmtree_data %>% 
  mutate_at(vars(starts_with("MOS")), as.ordered) %>% 
  mutate_at(vars(c(sex, residency, depressed,
                   Education, job, smoker,
                   multimorbidity)), as.factor) 

## Explore the data
head(resp.data)

## Check the structure of the data
glimpse(resp.data)

## Create response as outcomes
resp.data$resp <- data.matrix(resp.data[, 1:8])

## -----------------------------------------------------------------------------
## Get help on the control parameter
# ?grmforest.control

## GRMTree control parameters with Benjamini-Hochberg 
grm_control <- grmtree.control(
  minbucket = 350,
  p_adjust = "BH", alpha = 0.05)

## Define the forest control parameters
forest_control <- grmforest.control(
  n_tree = 3, # Number of trees (Reduced for vignette build time)
  sampling = "bootstrap",  # Bootstrap method; resampling also available
  sample_fraction = 0.632,
  mtry = sqrt(9),  # Usually the square root of the number of covariates
  control = grm_control,
  remove_dead_trees = TRUE, # Remove any null GRMTree
  seed = 123
)

## ----eval=FALSE---------------------------------------------------------------
# ## Fit the GRM forest
# mos_forest <- grmforest(
#   resp ~ sex + age + bmi + Education +
#   residency + depressed + job + multimorbidity + smoker,
#   data = resp.data,
#   control = forest_control
# )
# 
# ## Get the summary of the fitted forest
# summary(mos_forest)
# print(mos_forest)
# 
# ## Plot a tree in the forest
# plot(mos_forest$trees[[1]])

## ----eval=FALSE---------------------------------------------------------------
# ## Calculate the variable importance
# importance <- varimp(mos_forest, seed = 123, verbose = T)
# 
# ## Print the result of the variable importance
# print(importance)

## ----eval=FALSE---------------------------------------------------------------
# ## Plot the variable importance scores (ggplot is the default)
# plot(importance)
# 
# ## Plot onlt the top 5 importance variables
# plot(importance, xlab = "", top_n = 5)
# 
# ## Plot the base R version
# plot(importance, use_ggplot = FALSE)
# 
# ## Custom colors
# plot(importance, col = c("green", "red"))
# 
# ## Rename the variable names in the order from the variable importance result
# names(importance) <- c("Age", "Smoking Status", "BMI",
#                        "Multimorbidity", "Sex", "Education",
#                        "Residency", "Depression", "Employment")
# 
# ## Now create the plot with informative names
# plot(importance)