## ----message=FALSE------------------------------------------------------------
library(PINSPlus)
data(AML2004)
data <- as.matrix(AML2004$Gene)

## -----------------------------------------------------------------------------
system.time(result <- PerturbationClustering(data = data, verbose = FALSE))

## ----eval=FALSE---------------------------------------------------------------
# result <- PerturbationClustering(data = data)

## -----------------------------------------------------------------------------
result$k

## -----------------------------------------------------------------------------
result$cluster

## -----------------------------------------------------------------------------
condition <- seq(unique(AML2004$Group[, 2]))
names(condition) = unique(AML2004$Group[, 2])
plot(prcomp(AML2004$Gene)$x, col = result$cluster, 
     pch = condition[AML2004$Group[, 2]], main = "AML2004")
legend("bottomright", legend = paste("Cluster ", sort(unique(result$cluster)), sep = ""),
        fill = sort(unique(result$cluster)))
legend("bottomleft", legend = names(condition), pch = condition)

## ----eval=FALSE---------------------------------------------------------------
# result <- PerturbationClustering(data = data, kMax = 5,
#                                  clusteringMethod = "kmeans")

## ----eval=FALSE---------------------------------------------------------------
# result <- PerturbationClustering(data = data, kMax = 5,
#                                  clusteringMethod = "pam")

## ----eval=FALSE---------------------------------------------------------------
# result <- PerturbationClustering(data = data, kMax = 5,
#                                  clusteringMethod = "hclust")

## ----eval=FALSE---------------------------------------------------------------
# result <- PerturbationClustering(
#     data = data,
#     clusteringMethod = "kmeans",
#     clusteringOptions = list(nstart = 100, iter.max = 500),
#     verbose = FALSE
# )

## ----eval=FALSE---------------------------------------------------------------
# result <- PerturbationClustering(data = data,
#     clusteringFunction = function(data, k){
#     # this function must return a vector of cluster
#     kmeans(x = data, centers = k, nstart = k*10, iter.max = 2000)$cluster
# })

## ----eval=FALSE---------------------------------------------------------------
# result <- PerturbationClustering(data = data,
#                                  perturbMethod = "noise",
#                                  perturbOptions = list(noise = 1.23))

## ----eval=FALSE---------------------------------------------------------------
# result <- PerturbationClustering(data = data,
#                                  perturbMethod = "noise",
#                                  perturbOptions = list(noisePercent = 10))

## ----eval=FALSE---------------------------------------------------------------
# result <- PerturbationClustering(data = data,
#                                  perturbMethod = "subsampling",
#                                  perturbOptions = list(percent = 80))

## ----eval=FALSE---------------------------------------------------------------
# result <- PerturbationClustering(data = data, perturbFunction = function(data){
#     rowNum <- nrow(data)
#     colNum <- ncol(data)
#     epsilon <-
#         matrix(
#             data = rnorm(rowNum * colNum, mean = 0, sd = 1.23456),
#             nrow = rowNum, ncol = colNum
#         )
# 
#     list(
#         data = data + epsilon,
#         ConnectivityMatrixHandler = function(connectivityMatrix, iter, k) {
#             connectivityMatrix
#         }
#     )
# })

## ----eval=FALSE---------------------------------------------------------------
# sampleNum <- 50000 # Number of samples
# geneNum <- 5000 # Number of genes
# subtypeNum <- 3 # Number of subtypes
# 
# # Generate expression matrix
# exprs <- matrix(rnorm(sampleNum*geneNum, 0, 1), nrow = sampleNum, ncol = geneNum)
# rownames(exprs) <- paste0("S", 1:sampleNum) # Assign unique names for samples
# 
# # Generate subtypes
# group <- sort(rep(1:subtypeNum, sampleNum/subtypeNum + 1)[1:sampleNum])
# names(group) <- rownames(exprs)
# 
# # Make subtypes separate
# for (i in 1:subtypeNum) {
#   exprs[group == i, 1:100 + 100*(i-1)] <- exprs[group == i, 1:100 + 100*(i-1)] + 2
# }
# 
# # Plot the data
# library(irlba)
# exprs.pca <- irlba::prcomp_irlba(exprs, n = 2)$x
# plot(exprs.pca, main = "PCA")

## ----eval=FALSE---------------------------------------------------------------
# set.seed(1)
# t1 <- Sys.time()
# result <- PerturbationClustering(data = exprs.pca, ncore = 1)
# t2 <- Sys.time()

## ----eval=FALSE---------------------------------------------------------------
# t2-t1

## ----eval=FALSE---------------------------------------------------------------
# result$k

## ----eval=FALSE---------------------------------------------------------------
# subtype <- result$cluster

## ----eval=FALSE---------------------------------------------------------------
# if (!require("mclust")) install.packages("mclust")
# library(mclust)
# ari <- mclust::adjustedRandIndex(subtype, group)
# 

## ----eval=FALSE---------------------------------------------------------------
# 
# colors <- as.numeric(as.character(factor(subtype)))
# 
# plot(exprs.pca, col = colors, main = "Cluster assigments for simulation data")
# 
# legend("topright", legend = paste("ARI:", ari))
# 
# legend("bottomright", fill = unique(colors),
#     legend = paste("Group ",
#                    levels(factor(subtype)), ": ",
#                    table(subtype)[levels(factor(subtype))], sep = "" )
#     )

## ----eval=FALSE---------------------------------------------------------------
# # Load the kidney cancer carcinoma data
# data(KIRC)
# # SubtypingOmicsData`'s input data must be a list of
# # numeric matrices that have the same number of rows:
# dataList <- list (as.matrix(KIRC$GE), as.matrix(KIRC$ME), as.matrix(KIRC$MI))
# names(dataList) <- c("GE", "ME", "MI")
# # Run `SubtypingOmicsData`:
# result <- SubtypingOmicsData(dataList = dataList)

## ----eval=FALSE---------------------------------------------------------------
# result <- SubtypingOmicsData(
#     dataList = dataList,
#     clusteringMethod = "kmeans",
#     clusteringOptions = list(nstart = 50)
# )

## ----eval=FALSE---------------------------------------------------------------
# library(survival)
# cluster1=result$cluster1;cluster2=result$cluster2
# a <- intersect(unique(cluster2), unique(cluster1))
# names(a) <- intersect(unique(cluster2), unique(cluster1))
# a[setdiff(unique(cluster2), unique(cluster1))] <-
#     seq(setdiff(unique(cluster2), unique(cluster1))) + max(cluster1)
# colors <- a[levels(factor(cluster2))]
# coxFit <- coxph(
#      Surv(time = Survival, event = Death) ~ as.factor(cluster2),
#      data = KIRC$survival,
#      ties = "exact"
# )
# mfit <- survfit(Surv(Survival, Death == 1) ~ as.factor(cluster2), data = KIRC$survival)
# plot(
#      mfit, col = colors, main = "Survival curves for KIRC, level 2",
#      xlab = "Days", ylab = "Survival",lwd = 2
# )
# legend("bottomright",
#     legend = paste(
#         "Cox p-value:", round(summary(coxFit)$sctest[3], digits = 5), sep = ""
#     )
# )
# legend(
#     "bottomleft",
#     fill = colors,
#     legend = paste("Group ", levels(factor(cluster2)), ": ",
#         table(cluster2)[levels(factor(cluster2))], sep =""
#     )
# )

