## ----setup, include = FALSE---------------------------------------------------
LOCAL <- identical(Sys.getenv("LOCAL"), "TRUE")

knitr::opts_chunk$set(purl = LOCAL)
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
suppressPackageStartupMessages(library(SelectBoost.beta))
set.seed(1)

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
sim <- simulation_DATA.beta(
  n = 300, p = 10, s = 4, beta_size = c(1.0, -0.8, 0.6, -0.5),
  corr = "ar1", rho = 0.25,
  mechanism = "mixed", mix_prob = 0.5,
  delta = function(mu, X) 0.03 + 0.02 * abs(mu - 0.5),
  alpha = function(mu, X) 0.1 + 0.05 * (mu < 0.3),
  na_rate = 0.1, na_side = "random"
)

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
summary(sim$Y)
mean(is.na(sim$Y_low) | is.na(sim$Y_high))

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
head(sim$Y_low, 10)
head(sim$Y_high, 10)

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
interval_width <- sim$Y_high - sim$Y_low
hist(interval_width, breaks = 30, col = "#0A6AA6", border = "white",
     main = "Distribution of simulated interval widths",
     xlab = "Y_high - Y_low")

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
pseudo_y <- ifelse(
  is.na(sim$Y_low) | is.na(sim$Y_high),
  ifelse(is.na(sim$Y_low), sim$Y_high, sim$Y_low),
  0.5 * (sim$Y_low + sim$Y_high)
)

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
single <- compare_selectors_single(sim$X, pseudo_y, include_enet = FALSE)
head(single$table)

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
freq <- compare_selectors_bootstrap(
  sim$X, pseudo_y, B = 15, include_enet = FALSE, seed = 321
)
head(freq)

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
summary_tab <- compare_table(single$table, freq)
head(summary_tab)

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
plot_compare_coeff(single$table)

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
plot_compare_freq(freq)

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
fb <- fastboost_interval(
  sim$X, sim$Y_low, sim$Y_high,
  func = function(X, y) betareg_glmnet(X, y, choose = "bic", prestandardize = TRUE),
  B = 30, seed = 99
)
sort(fb$freq, decreasing = TRUE)[1:5]

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
comp_id <- complete.cases(sim$Y_low) & complete.cases(sim$Y_high)

sb_interval <- sb_beta_interval(
  sim$X[comp_id, ],
  Y_low = sim$Y_low[comp_id],
  Y_high = sim$Y_high[comp_id],
  B = 30,
  step.num = 0.4,
  sample = "uniform"
)
attr(sb_interval, "interval")
head(sb_interval)

## ----cache=TRUE, eval=LOCAL---------------------------------------------------
summary(sb_interval)
autoplot.sb_beta(sb_interval)

