---
title: "Introduction to roclab"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Introduction to roclab}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
```

```{r setup}
library(roclab)
```

# Overview

The **roclab** package implements ROC-Optimizing Binary Classifiers, supporting
both linear and kernel models.

- **Linear models** provide various regularization penalties (ridge, lasso, alasso,
  elastic net, scad, mcp).
- **Kernel models** support flexible kernel functions (radial, polynomial, linear,
  laplace).
- **Both models** include several surrogate loss functions (hinge, hinge2 (squared
  hinge), logistic, exponential).

For large-scale data, the model is computationally prohibitive because its loss
is a U-statistic involving a double summation. To reduce this burden, the package
adopts an efficient algorithm based on an incomplete U-statistic, which approximates
the loss with a single summation. In kernel models, a Nyström low-rank approximation
is further applied to efficiently compute the kernel matrix. These approximations
substantially reduce computational cost and accelerate training, while maintaining
accuracy, making ROC-Optimizing Binary Classifiers practical for large-scale
datasets.

In addition, efficient optimization is performed using **Gradient Descent with the
Adamax update rule** —a variant of Adam, based on the infinity norm—for linear
models with the ridge penalty and kernel models. For linear models with other
penalties (i.e., those involving variable selection), **Proximal Gradient Descent
with an Adamax adaptive learning rate scheme** is employed.

# Example: Linear Model
```{r}
set.seed(123)
n_lin_train <- 1200
n_pos_lin <- round(0.2 * n_lin_train )
n_neg_lin <- n_lin_train  - n_pos_lin

X_train_lin <- rbind(
  matrix(rnorm(2 * n_neg_lin, mean = -1), ncol = 2),
  matrix(rnorm(2 * n_pos_lin, mean =  1), ncol = 2)
)
y_train_lin <- c(rep(-1, n_neg_lin), rep(1, n_pos_lin))

# Fit a linear model
fit_lin <- roclearn(X_train_lin, y_train_lin, lambda = 0.1)

# Summary
summary(fit_lin)

n_test_lin <- 300
n_pos_test_lin <- round(0.2 * n_test_lin)
n_neg_test_lin <- n_test_lin - n_pos_test_lin
X_test_lin <- rbind(
  matrix(rnorm(2 * n_neg_test_lin, mean = -1), ncol = 2),
  matrix(rnorm(2 * n_pos_test_lin, mean =  1), ncol = 2)
)
y_test_lin <- c(rep(-1, n_neg_test_lin), rep(1, n_pos_test_lin))

# Predict decision scores on the test set
pred_score_lin <- predict(fit_lin, X_test_lin, type = "response")
head(pred_score_lin)

# Predict classes {-1, 1} on the test set
pred_class_lin <- predict(fit_lin, X_test_lin, type = "class")
head(pred_class_lin)

# AUC on the test set
auc(fit_lin, X_test_lin, y_test_lin)

# Plot ROC curve on the test set
plot_roc(y_test_lin, pred_score_lin)
```


# Example: Kernel Model
```{r}
set.seed(123)
n_ker <- 1200
r_train_ker <- sqrt(runif(n_ker, 0.05, 1))
theta_train_ker <- runif(n_ker, 0, 2*pi)
X_train_ker <- cbind(r_train_ker * cos(theta_train_ker), r_train_ker * sin(theta_train_ker))
y_train_ker <- ifelse(r_train_ker < 0.5, 1, -1)

# Fit a kernel model
fit_ker <- kroclearn(X_train_ker, y_train_ker, lambda = 0.1, kernel = "radial")

# Summary
summary(fit_ker)

n_test_ker <- 300
r_test_ker <- sqrt(runif(n_test_ker, 0.05, 1))
theta_test_ker <- runif(n_test_ker, 0, 2*pi)
X_test_ker <- cbind(r_test_ker * cos(theta_test_ker), r_test_ker * sin(theta_test_ker))
y_test_ker <- ifelse(r_test_ker < 0.5, 1, -1)

# Predict decision scores on the test set
pred_score_ker <- predict(fit_ker, X_test_ker, type = "response")
head(pred_score_ker)

# Predict classes {-1, 1} on the test set
pred_class_ker <- predict(fit_ker, X_test_ker, type = "class")
head(pred_class_ker)

# AUC on the test set
auc(fit_ker, X_test_ker, y_test_ker)

# Plot ROC curve on the test set
plot_roc(y_test_ker, pred_score_ker)
```


# Cross-Validation
```{r}
# 5-fold CV for linear models
cvfit_lin <- cv.roclearn(
  X_train_lin, y_train_lin,
  lambda.vec = exp(seq(log(0.01), log(5), length.out = 20)),
  nfolds = 5
)

# Summarize the cross-validation result
summary(cvfit_lin)
```

```{r, fig.width=7, fig.height=6}
# Plot the cross-validation AUC across lambda values
plot(cvfit_lin)
```

```{r}
# 5-fold CV for kernel models
cvfit_ker <- cv.kroclearn(
  X_train_ker, y_train_ker,
  lambda.vec = exp(seq(log(0.01), log(5), length.out = 20)),
  kernel = "radial",
  nfolds = 5
)

# Summarize the cross-validation result
summary(cvfit_ker)
```

```{r, fig.width=7, fig.height=6}
# Plot the cross-validation AUC across lambda values
plot(cvfit_ker)
```


# Real Data Example 1: Ionosphere
```{r}
library(mlbench)
data(Ionosphere)

# Prepare data
X_iono <- Ionosphere[, -35]
y_iono <- ifelse(Ionosphere$Class == "bad", 1, -1)

set.seed(123)
train_idx <- sample(seq_len(nrow(X_iono)), size = 200)
X_train_iono <- X_iono[train_idx, ]
y_train_iono <- y_iono[train_idx]
X_test_iono  <- X_iono[-train_idx, ]
y_test_iono  <- y_iono[-train_idx]

# Fit a linear model
fit_iono_lin <- roclearn(X_train_iono, y_train_iono, lambda = 0.1, approx=TRUE)
summary(fit_iono_lin)

# Predict decision scores on the test set
pred_score_iono_lin <- predict(fit_iono_lin, X_test_iono, type = "response")
head(pred_score_iono_lin)

# Predict classes {-1, 1} on the test set
pred_class_iono_lin <- predict(fit_iono_lin, X_test_iono, type = "class")
head(pred_class_iono_lin)

# AUC on the test set
auc(fit_iono_lin, X_test_iono, y_test_iono)

# Plot ROC curve on the test set
plot_roc(y_test_iono, pred_score_iono_lin)
```

```{r}
# 5-fold CV for linear models
cvfit_iono_lin <- cv.roclearn(
  X_train_iono, y_train_iono,
  lambda.vec = exp(seq(log(0.01), log(5), length.out = 10)),
  approx=TRUE, nfolds=5)
summary(cvfit_iono_lin)
```

```{r, fig.width=7, fig.height=6}
# Plot the cross-validation AUC across lambda values
plot(cvfit_iono_lin)
```

```{r}
# Fit a kernel model
fit_iono_ker <- kroclearn(X_train_iono, y_train_iono, lambda = 0.1, kernel = "radial", approx=TRUE)
summary(fit_iono_ker)

# Predict decision scores on the test set
pred_score_iono_ker <- predict(fit_iono_ker, X_test_iono, type = "response")
head(pred_score_iono_ker)

# Predict classes {-1, 1} on the test set
pred_class_iono_ker <- predict(fit_iono_ker, X_test_iono, type = "class")
head(pred_class_iono_ker)

# AUC on the test set
auc(fit_iono_ker, X_test_iono, y_test_iono)

# Plot ROC curve on the test set
plot_roc(y_test_iono, pred_score_iono_ker)
```

```{r}
# 5-fold CV for kernel models
cvfit_iono_ker <- cv.kroclearn(
  X_train_iono, y_train_iono,
  lambda.vec = exp(seq(log(0.01), log(5), length.out = 10)),
  kernel = "radial",
  approx=TRUE, nfolds=5)
summary(cvfit_iono_ker)
```

```{r, fig.width=7, fig.height=6}
# Plot the cross-validation AUC across lambda values
plot(cvfit_iono_ker)
```


# Real Data Example 2: Spam (larger dataset) 
```{r}
library(kernlab)
data(spam)

# Prepare data
X_spam <- spam[, -58]
y_spam <- ifelse(spam$type == "spam", 1, -1)

set.seed(123)
train_idx <- sample(seq_len(nrow(X_spam)), size = 3000)
X_train_spam <- X_spam[train_idx, ]
y_train_spam <- y_spam[train_idx]
X_test_spam  <- X_spam[-train_idx, ]
y_test_spam  <- y_spam[-train_idx]

# Fit a linear model
fit_spam_lin <- roclearn(X_train_spam, y_train_spam, lambda = 0.1)
summary(fit_spam_lin)

# Predict decision scores on the test set
pred_score_spam_lin <- predict(fit_spam_lin, X_test_spam, type = "response")
head(pred_score_spam_lin)

# Predict classes {-1, 1} on the test set
pred_class_spam_lin <- predict(fit_spam_lin, X_test_spam, type = "class")
head(pred_class_spam_lin)

# AUC on the test set
auc(fit_spam_lin, X_test_spam, y_test_spam)

# Plot ROC curve on the test set
plot_roc(y_test_spam, pred_score_spam_lin)
```

```{r}
# 5-fold CV for linear models 
cvfit_spam_lin <- cv.roclearn(
  X_train_spam, y_train_spam,
  lambda.vec = exp(seq(log(0.01), log(5), length.out = 10)), nfolds=5)
summary(cvfit_spam_lin)
```

```{r, fig.width=7, fig.height=6}
# Plot the cross-validation AUC across lambda values
plot(cvfit_spam_lin)
```

```{r}
# Fit a kernel model
fit_spam_ker <- kroclearn(X_train_spam, y_train_spam, lambda = 0.1, kernel = "radial")
summary(fit_spam_ker)

# Predict decision scores on the test set
pred_score_spam_ker <- predict(fit_spam_ker, X_test_spam, type = "response")
head(pred_score_spam_ker)

# Predict classes {-1, 1} on the test set
pred_class_spam_ker <- predict(fit_spam_ker, X_test_spam, type = "class")
head(pred_class_spam_ker)

# AUC on the test set
auc(fit_spam_ker, X_test_spam, y_test_spam)

# Plot ROC curve on the test set
plot_roc(y_test_spam, pred_score_spam_ker)
```

```{r}
# 5-fold CV for kernel models 
cvfit_spam_ker <- cv.kroclearn(
  X_train_spam, y_train_spam,
  kernel = "radial", 
  lambda.vec = exp(seq(log(0.01), log(5), length.out = 10)), nfolds=5)
summary(cvfit_spam_ker)
```

```{r, fig.width=7, fig.height=6}
# Plot the cross-validation AUC across lambda values
plot(cvfit_spam_ker)
```


# Conclusion

The **roclab** package implements ROC-Optimizing Binary Classifiers through
both **linear models** (with regularization penalties) and **kernel models**
(with various kernel functions). It supports multiple surrogate loss functions
and incorporates scalable options to efficiently handle large datasets.

