litedown::reactor(warning = FALSE) # vignette setting

library(easybio)
library(Seurat)
library(data.table)

# The pbmc.markers dataset is included in easybio
head(pbmc.markers)

marker_matched <- matchCellMarker2(marker = pbmc.markers, n = 50, spc = "Human")

# Let's look at the top 2 potential cell types for each cluster
marker_matched[, head(.SD, 2), by = cluster]

cl2cell_auto <- marker_matched[, head(.SD, 1), by = .(cluster)]
cl2cell_auto <- setNames(cl2cell_auto[["cell_name"]], cl2cell_auto[["cluster"]])
print("Initial automated annotation:")
cl2cell_auto

plotPossibleCell(marker_matched[, head(.SD), by = .(cluster)], min.uniqueN = 2)

# Let's investigate clusters 1, 5, and 7
local_evidence <- check_marker(marker_matched, cl = c(1, 5, 7), topcellN = 2, cis = TRUE)
print(local_evidence)

canonical_markers <- check_marker(marker_matched, cl = c(1, 5, 7), topcellN = 2, cis = FALSE)
print(canonical_markers)

# For this example to be runnable, we need a Seurat object.
# We'll create a minimal one. In your real workflow, you would use your own srt object.
marker_genes <- unique(pbmc.markers$gene)
counts <- matrix(
  abs(rnorm(length(marker_genes) * 50, mean = 1, sd = 2)),
  nrow = length(marker_genes),
  ncol = 50
)
rownames(counts) <- marker_genes
colnames(counts) <- paste0("cell_", 1:50)
srt <- CreateSeuratObject(counts = counts)
# Assign clusters that match the pbmc.markers data
srt$seurat_clusters <- sample(0:8, 50, replace = TRUE)
Idents(srt) <- "seurat_clusters"


# Now, let's plot the evidence for clusters 1, 5, and 7
matchCellMarker2(marker = pbmc.markers, n = 50, spc = "Human") |>
  check_marker(cl = c(1, 5, 7), topcellN = 2, cis = TRUE) |>
  plotSeuratDot(srt = srt)

# Based on our exploration, we finalize the annotations
cl2cell_final <- finsert(
  list(
    c(3) ~ "B cell",
    c(8) ~ "Megakaryocyte",
    c(7) ~ "DC",
    c(1, 5) ~ "Monocyte",
    c(0, 2, 4) ~ "Naive CD8+ T cell",
    c(6) ~ "Natural killer cell"
  ),
  len = 9 # Ensure vector length covers all clusters (0-8)
)
print("Final curated annotation:")
cl2cell_final

custom_ref_list <- list(
  "T-cell" = c("CD3D", "CD3E", "CD3G"),
  "B-cell" = c("CD79A", "MS4A1"),
  "Myeloid" = c("LYZ", "CST3", "AIF1")
)
print(custom_ref_list)

custom_ref_df <- list2dt(custom_ref_list, col_names = c("cell_name", "marker"))
head(custom_ref_df)

marker_custom <- matchCellMarker2(
  marker = pbmc.markers,
  n = 50,
  ref = custom_ref_df
)
# Note that the cell_name column now contains our custom cell types
marker_custom[, head(.SD, 2), by = cluster]

get_marker(spc = "Human", cell = c("Monocyte", "Neutrophil"), number = 5, min.count = 1)

plotMarkerDistribution(mkr = "CD68")