## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----include=FALSE------------------------------------------------------------
library(Ckmeans.1d.dp)

## ----setup--------------------------------------------------------------------
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(TidyConsultant)

## -----------------------------------------------------------------------------
data(insurance)

## -----------------------------------------------------------------------------
insurance %>% 
  diagnose()

## -----------------------------------------------------------------------------
insurance %>% 
  diagnose_numeric()

## -----------------------------------------------------------------------------
insurance %>% 
  diagnose_category(everything(),  max_distinct = 7) %>% 
  print(width = Inf)

## -----------------------------------------------------------------------------
insurance %>% 
  determine_distinct(everything())

## -----------------------------------------------------------------------------
insurance %>% 
  auto_cor(sparse = TRUE) -> cors

cors

## -----------------------------------------------------------------------------
insurance %>% 
  auto_anova(everything(), baseline = "first_level") -> anovas1

anovas1 %>% 
  print(n = 50)

## -----------------------------------------------------------------------------

insurance %>% 
  auto_anova(everything(), baseline = "first_level", sparse = T, pval_thresh = .1) -> anovas2

anovas2 %>% 
  print(n = 50)


## -----------------------------------------------------------------------------
insurance %>% 
  create_dummies(remove_most_frequent_dummy = T) -> insurance1

## -----------------------------------------------------------------------------
insurance1 %>% 
  tidy_formula(target = charges) -> charges_form

charges_form

## ----message=FALSE, warning=FALSE, eval=FALSE---------------------------------
# insurance1 %>%
#   auto_variable_contributions(formula = charges_form)

## ----message=FALSE, warning=FALSE, eval=FALSE---------------------------------
# insurance1 %>%
#   auto_model_accuracy(formula = charges_form, include_linear = T)

## -----------------------------------------------------------------------------
insurance1 %>% 
  bin_cols(charges) -> insurance_bins

insurance_bins

## -----------------------------------------------------------------------------
insurance_bins %>% 
  bin_summary()

## -----------------------------------------------------------------------------
insurance %>% 
  set_fct(smoker, first_level = "yes") -> insurance

insurance %>% 
  create_dummies(where(is.character), remove_first_dummy = T) -> insurance_dummies

insurance_dummies %>% 
  diagnose


## -----------------------------------------------------------------------------
insurance_dummies %>% 
  tidy_formula(target = smoker) -> smoker_form

smoker_form

## -----------------------------------------------------------------------------
insurance_dummies %>% 
  tidy_xgboost(formula = smoker_form, 
              mtry = .5,
              trees = 100L,
              loss_reduction = 1,
              alpha = .1,
              sample_size = .7) -> smoker_xgb_classif



## -----------------------------------------------------------------------------
smoker_xgb_classif %>% 
  tidy_predict(newdata = insurance_dummies, form = smoker_form) -> insurance_fit


## -----------------------------------------------------------------------------
names(insurance_fit)[length(names(insurance_fit)) - 1] -> prob_preds

insurance_fit %>% 
  bin_cols(prob_preds, n_bins = 5) -> insurance_fit1

insurance_fit1 %>% 
  bin_summary()

## -----------------------------------------------------------------------------
insurance_fit1 %>% 
  eval_preds()

## -----------------------------------------------------------------------------
names(insurance_fit)[length(names(insurance_fit))] -> class_preds

insurance_fit1 %>% 
  yardstick::conf_mat(truth = smoker, estimate = class_preds) -> conf_mat_sm

conf_mat_sm

