---
title: "Bayesian Optimisation"
output:
  rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Bayesian Optimisation}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, eval = FALSE)
```

## Intro

The [fastai](https://github.com/fastai/fastai) library simplifies training fast and accurate neural nets using modern best practices. See the fastai website to get started. The library is based on research into deep learning best practices undertaken at ```fast.ai```, and includes "out of the box" support for ```vision```, ```text```, ```tabular```, and ```collab``` (collaborative filtering) models. 

## Bayesian Optimisation

The dataset can be downloaded from [Kaggle](https://www.kaggle.com/c/santander-customer-transaction-prediction):

```{r}
library(rBayesianOptimization)
library(magrittr)
library(fastai)

df = data.table::fread('train.csv')
df$ID_code <- NULL
df$target <- as.character(df$target)

procs = list(FillMissing(),Categorify(),Normalize())

pct_80 = round(nrow(df) * .8)

dep_var = 'target'
cont_names = setdiff(names(df), dep_var)

dls = TabularDataTable(df, procs, NULL, cont_names,
                       y_names = dep_var, splits = list(c(1:pct_80),c(c(pct_80+1):nrow(df))
                                                        )) %>%
  dataloaders(bs = 100)

fastai_fit = function(layer_1, layer_2, layer_3, lr, wd, emb_p) {
  model <- dls %>% tabular_learner(layers = c(layer_1, layer_2, layer_3),
                                  wd = wd, config = tabular_config(embed_p = emb_p, 
                                                                   use_bn = TRUE),
                                  metrics=list(RocAucBinary(),accuracy()),
                                  cbs = list(EarlyStoppingCallback(monitor='valid_loss', 
                                                                   patience = 2))
                                  )

  result_ <- model %>% fit_one_cycle(10,lr)

  score_ <- list(Score = unlist(tail(result_$roc_auc_score,1)),
                 Pred = 0)
  rm(model)

  return(score_)
}

search_bound_fastai <- list(layer_1 = c(20,200), layer_2 = c(20,200),
                            layer_3 = c(20,200),
                            lr = c(0, 0.1), wd = c(0, 0.1),
                            emb_p = c(0,1)
                           )
set.seed(123)
search_grid_fastai <- data.frame(layer_1 = runif(30, 20, 200),
                                layer_2 = runif(30, 20, 200),
                                layer_3 = runif(30, 20, 200),
                                lr = runif(30, 0, 0.1),
                                wd = runif(30, 0, 0.1),
                                emb_p = runif(30, 0, 1)
                                )
head(search_grid_fastai)

set.seed(123)
bayes_fastai <- BayesianOptimization(FUN = fastai_fit, bounds = search_bound_fastai,
                                    init_points = 2, init_grid_dt = search_grid_fastai,
                                    n_iter = 5, acq = "ucb")


bayes_fastai$Best_Par
```




