---
title: "Examples of joint grid discretization"
author: "Jiandong Wang, Sajal Kumar, and Joe Song"
date: 2025-12-12
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Examples of joint grid discretization}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, include = FALSE}

knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>"
)
```

### History

Updated: 2025-12-12; 2022-01-27; 2022-01-17; 2020-09-13; 2020-08-01

Created: 2020-03-17


## Example 1. Nonlinear curves using kmeans+silhouette and Ball+BIC clustering with a fixed number of clusters

```{r clustering nonlinear patterns by fixed numbers of clusters, out.width="40%", fig.show="hold", fig.cap="Example 1. Nonlinear curves using kmeans+silhouette and Ball+BIC clustering with a fixed number of clusters."}
require(GridOnClusters)
x = rnorm(500)
y = sin(x)+rnorm(500, sd = 0)
z = cos(x)+rnorm(500, sd = 0)
data = cbind(x, y, z)
ks = 2:20

res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette", 
                         grid_method = "Sort+split") 
plot(res)

res = discretize.jointly(
  data, k=ks, cluster_method = "kmeans+silhouette", 
  grid_method = "DP exact likelihood") 
plot(res)

res = discretize.jointly(
  data, k=ks, cluster_method = "Ball+BIC", 
  grid_method = "DP approx likelihood 1-way")
plot(res)
```

## Example 2. Nonlinear curves and patterns using kmeans+silhouette and Ball+BIC clustering with a range for the number of clusters

```{r clustering nonlinear patterns by varying numbers of clusters, out.width="40%", fig.show="hold", fig.cap="Example 2. Using a range for the number of kmeans+silhouette and Ball+BIC clusters"}
x = rnorm(100)
y = log1p(abs(x))
z = ifelse(x >= -0.5 & x <= 0.5, 0, 1) + rnorm(100, 0, 0.1)
data = cbind(x, y, z)
ks = c(2:5)

#res = discretize.jointly(data, k=ks, cluster_method = "Ball+BIC",
#                         grid_method = "Sort+split", min_level = 1)
#plot(res)

res = discretize.jointly(
  data, k=ks, cluster_method = "kmeans+silhouette", 
  grid_method = "Sort+split", min_level = 1) 

plot(res)

#res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette", 
#                         grid_method = "DP exact likelihood", min_level = 1) 
#plot(res)

res = discretize.jointly(
  data, k=ks, cluster_method = "Ball+BIC", 
  grid_method = "DP exact likelihood", min_level = 1) 
plot(res)
```

## Example 3. Using the partition around medoids clustering method

```{r Example 3 using PAM for clustering, out.width="40%", fig.show="hold", fig.cap="Example 3. Using the partition around medoids clustering method."}
# using a clustering method other than kmeans+silhouette
x = rnorm(100)
y = log1p(abs(x))
z = sin(x)
data = cbind(x, y, z)

# pre-cluster the data using partition around medoids (PAM)
cluster_label = cluster::pam(x=data, diss = FALSE, metric = "euclidean", k = 4)$clustering

res = discretize.jointly(
  data, cluster_label = cluster_label,
  grid_method = "Sort+split", min_level = 1)
res = discretize.jointly(
  data, cluster_label = cluster_label,
  grid_method = "DP exact likelihood", min_level = 1)
plot(res, main="Original data\nPAM clustering", 
     main.table="Discretized data\nPAM & Sort+split")
```

## Example 4 Random patterns using kmeans+silhouette and Ball+BIC clustering with a range for the number of clusters

```{r Example 4, out.width="40%", fig.show="hold", fig.cap="Example 4. Random patterns using kmeans+silhouette and Ball+BIC clustering with a range."}
ks = 2:20
n = 40*10
sd = 60*4

x=rnorm(2*n, sd=sd)
y=rnorm(2*n, sd=sd)
x=c(x,rnorm(2*n, sd=sd/3))
y=c(y,rnorm(2*n, sd=sd/3)+200)

data = cbind(x, y)

res = discretize.jointly(
  data, k=ks, cluster_method = "Ball+BIC",
  grid_method = "Sort+split", min_level = 1)
plot(res)

res = discretize.jointly(
  data, k=ks, cluster_method = "kmeans+silhouette", 
  grid_method = "Sort+split", min_level = 1) 
plot(res)

res = discretize.jointly(
  data, k=ks, cluster_method = "kmeans+silhouette", 
  grid_method = "DP approx likelihood 1-way", min_level = 1) 
plot(res)

res = discretize.jointly(
  data, k=ks, cluster_method = "Ball+BIC",
  grid_method = "DP approx likelihood 1-way", min_level = 1)
plot(res)
```

## Example 5. Multi-cluster random patterns using kmeans+silhouette and Ball+BIC clustering with a range for the number of clusters

```{r Example 5 bivariate, out.width="40%", fig.show="hold", fig.cap="Example 5. Multi-cluster random patterns using kmeans+silhouette and Ball+BIC clustering with a range."}
n <- 50*8
ks <- 2:20

X.C1 <- matrix(
   c(rnorm(n, 5, sd=2),
     rnorm(n, 0, sd=40)), 
   ncol = 2, byrow = FALSE
)
X.C2 <- matrix(
   c(rnorm(n, 70, sd=1),
     rnorm(n, 0, sd=1)), 
   ncol = 2, byrow = FALSE
)

X.C3 <- matrix(
   c(rnorm(n, 150, sd=30),
     rnorm(n, 0, sd=30)), 
   ncol = 2, byrow = FALSE
)

data = rbind(X.C1, X.C3)

res = discretize.jointly(
  data, k=ks, cluster_method = "Ball+BIC",
  grid_method = "Sort+split", min_level = 1)
plot(res)

res = discretize.jointly(
  data, k=ks, cluster_method = "kmeans+silhouette", 
  grid_method = "Sort+split", min_level = 1) 
plot(res)

res = discretize.jointly(
  data, k=ks, cluster_method = "kmeans+silhouette", 
  grid_method = "DP approx likelihood 1-way", 
  min_level = 1, cutoff = 1) 
plot(res)
res = discretize.jointly(
  data, k=ks, cluster_method = "Ball+BIC", 
  grid_method = "DP approx likelihood 1-way", 
  min_level = 1) 
plot(res)
```

## Example 6. Exclusive or.

```{r Example 6. Exclusive or, out.width="40%", fig.show="hold", fig.cap="Example 6. Exclusive or. Dim1 ⊕ Dim2, Dim3 and Dim4 are random"}
n <- 100
ks <- 2:10

label = c(rep(1, n), rep(2,n), rep(3,n), rep(4,n))

X1 = c(rnorm(n, 0, sd=2),
     rnorm(n, 0, sd=2),
     rnorm(n, 10, sd=2),
     rnorm(n, 10, sd=2))

X2 = c(rnorm(n, 10, sd=2),
     rnorm(n, 0, sd=2),
     rnorm(n, 10, sd=2),
     rnorm(n, 0, sd=2))

X3 = c(rnorm(4*n, 20, sd=10))

X4 = c(rnorm(4*n, 3, sd=20))

data = cbind(X1, X2, X3, X4)

#res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette", 
#                         grid_method = "DP approx likelihood", min_level = 1) 
#plot(res)
#
#res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette", 
#                         grid_method = "DP Compressed majority", min_level = 1) 
#plot(res)

#res = discretize.jointly(data, k=ks, cluster_method = "Ball+BIC", 
#                         grid_method = "DP exact likelihood", min_level = 1) 
#plot(res)

res = discretize.jointly(
  data, k=ks, cluster_method = "Ball+BIC", 
  grid_method = "DP approx likelihood 1-way", 
  min_level = 1) 
plot(res)

res = discretize.jointly(
  data, k=ks, cluster_label = label,
  grid_method = "DP approx likelihood 1-way", 
  min_level = 1) 
plot(res)

#res = discretize.jointly(data, k=ks, cluster_label = label,
#                         grid_method = "Sort+split", min_level = 1) 
#plot(res)
#
#res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette", 
#                         grid_method = "Sort+split", min_level = 1) 
#plot(res)

```

<!-- ## Example 7. Toy dataset from paper -->

<!-- ```{r Example 7. Toy dataset, out.width="40%", fig.show="hold", fig.cap="Example 7. Toy dataset from paper"} -->
<!-- ks <- 2:10 -->

<!-- X1 = c(1,1,2,2,3,4,5,5,6,12) -->

<!-- X2 = c(7,6,7,6,7,1,2,1,1,5) -->

<!-- data = cbind(X1, X2) -->

<!-- res = discretize.jointly(data, k=ks, cluster_method = "Ball+BIC", -->
<!--                          grid_method = "DP exact likelihood", min_level = 1)  -->
<!-- plot(res) -->

<!-- ``` -->

<!-- ##Example 8. The testing dataset provided by Ruby, most of the points overlap with each other in this dataset -->

<!-- ```{r Example 8. Ruby\'s dataset, out.width="40%", fig.show="hold", fig.cap="Example 8. The testing dataset provided by Ruby, most of the points overlap with each other in this dataset."} -->
<!-- file = system.file("extdata", "TestforGridOnCluster.Rdata", package = "GridOnClusters") -->
<!-- load(file) -->
<!-- input = cbind(E1,E2) -->
<!-- res = discretize.jointly(data = input, k = c(2:9), min_level = 1,  -->
<!--                          cluster_method = "Ball+BIC",  -->
<!--                          grid_method = "DP approx likelihood") -->
<!-- plot(res) -->

<!-- res = discretize.jointly(data = input, k = c(2:9), min_level = 1,  -->
<!--                          cluster_method = "Ball+BIC", grid_method = "Sort+split") -->
<!-- plot(res) -->
<!-- ``` -->

## Example 7. Three well separated clusters

```{r Example 7. Three well separated rounds., out.width="40%", fig.show="hold", fig.cap="Example 7. Three rounds well seperated on x axis"}
n <- 20
ks <- 2:10

label = c(rep(1, n), rep(2,5*n), rep(3, 5*n))

X1 = c(rnorm(n, 0, sd=1), rnorm(5*n, 15, sd=3), rnorm(5*n, 35, sd=3))
X2 = c(rnorm(n, 0, sd=1), rnorm(5*n, 0, sd=3), rnorm(5*n, 0, sd=3))

data = cbind(X1, X2)

res = discretize.jointly(
  data, cluster_label = label,
  grid_method = "DP exact likelihood", min_level = 1) 
plot(res)
res = discretize.jointly(
  data,  cluster_label = label,
  grid_method = "DP approx likelihood 1-way", 
  min_level = 1) 
plot(res)

res.entropy = discretize.jointly(
  data,  cluster_label = label,
  grid_method = "DP exact likelihood", min_level = 1, entropy = TRUE) 
plot(res.entropy)


```

## Example 8. Four spheres with varying centers and radii

```{r Example 8. Four spheres with varying centers and radius, out.width="40%", fig.show="hold", fig.cap="Example 8. Four sphers with different centers and radius"}
n <- 200
ks <- 2:10

label = c(rep(1, n), rep(2,5*n), rep(3,n), rep(4,3*n))

X1 = c(rnorm(n, 0, sd=3), rnorm(5*n, 15, sd=3), rnorm(n, 5, sd=2), rnorm(3*n, 15, sd=4))

X2 = c(rnorm(n, 0, sd=4), rnorm(5*n, 15, sd=3), rnorm(n, 10, sd=2), rnorm(3*n, 0, sd=4))

data = cbind(X1, X2)

res = discretize.jointly(
  data, k=ks, cluster_method = "Ball+BIC",
  grid_method = "DP exact likelihood", min_level = 1) 
plot(res)

res = discretize.jointly(
  data, k=ks, cluster_method = "Ball+BIC",
  grid_method = "DP approx likelihood 1-way", 
  min_level = 1) 
plot(res)

res = discretize.jointly(
  data, k=ks, cluster_method = "Ball+BIC",
  grid_method = "DP exact likelihood", min_level = 1, entropy = TRUE) 
plot(res)


```

## Example 9. A small dense sphere overlapping a large sphere

Here we evaluate whether a method can recognize the two spheres.

```{r Example 9. Two overlapping spheres, out.width="40%", fig.show="hold", fig.cap="Example 9. A small dense sphere overlapping a large sphere."}
n <- 200
ks <- 2:10

label = c(rep(1, n), rep(2,5*n))

X1 = c(rnorm(n, 0, sd=1), rnorm(5*n, 6, sd=3))
X2 = c(rnorm(n, 0, sd=1), rnorm(5*n, 0, sd=3))

data = cbind(X1, X2)

res = discretize.jointly(
  data, k=ks, cluster_label = label,
  grid_method = "DP exact likelihood", min_level = 1) 
plot(res)

res = discretize.jointly(
  data, k=ks,  cluster_label = label,
  grid_method = "DP approx likelihood 1-way", 
  min_level = 1) 
plot(res)

res.entropy = discretize.jointly(
  data, k=ks,  cluster_label = label,
  grid_method = "DP exact likelihood", min_level = 1, entropy = TRUE) 
plot(res.entropy)
```
