## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

if (!requireNamespace("bigmemory", quietly = TRUE)) {
  cat("This vignette requires the 'bigmemory' package.\n")
  knitr::knit_exit()
}

library(bigKNN)
library(bigmemory)

## ----helpers, include=FALSE---------------------------------------------------
neighbor_table <- function(index, query_ids, ref_ids, distance = NULL) {
  do.call(rbind, lapply(seq_along(query_ids), function(i) {
    out <- data.frame(
      query = query_ids[i],
      rank = seq_len(ncol(index)),
      neighbor = ref_ids[index[i, ]],
      row.names = NULL
    )
    if (!is.null(distance)) {
      out$distance <- signif(distance[i, ], 5)
    }
    out
  }))
}

## ----create-data--------------------------------------------------------------
reference_points <- data.frame(
  id = paste0("r", 1:8),
  x1 = c(0.0, 0.2, 1.0, 1.2, 3.0, 3.2, 4.0, 4.2),
  x2 = c(0.0, 0.1, 0.9, 1.0, 3.0, 3.1, 4.0, 4.1),
  x3 = c(0.5, 0.4, 1.2, 1.1, 2.8, 2.9, 3.8, 3.9)
)

query_points <- data.frame(
  id = paste0("q", 1:3),
  x1 = c(0.1, 1.1, 3.1),
  x2 = c(0.0, 1.0, 3.0),
  x3 = c(0.45, 1.15, 2.85)
)

reference <- as.big.matrix(as.matrix(reference_points[c("x1", "x2", "x3")]))
query_matrix <- as.matrix(query_points[c("x1", "x2", "x3")])

reference_points
query_points

## ----exact-truth--------------------------------------------------------------
exact <- knn_bigmatrix(
  reference,
  query = query_matrix,
  k = 3,
  metric = "euclidean",
  exclude_self = FALSE
)

exact
neighbor_table(
  exact$index,
  query_ids = query_points$id,
  ref_ids = reference_points$id,
  distance = exact$distance
)

## ----approx-top3--------------------------------------------------------------
approx_top3 <- rbind(
  c(8, 3, 1),
  c(7, 4, 2),
  c(6, 2, 5)
)

neighbor_table(
  approx_top3,
  query_ids = query_points$id,
  ref_ids = reference_points$id
)

## ----recall-------------------------------------------------------------------
recall_before <- recall_against_exact(exact, approx_top3, k = 3)

recall_before
data.frame(
  query = query_points$id,
  recall = recall_before$per_query,
  row.names = NULL
)

## ----candidate-pool-----------------------------------------------------------
candidate_pool <- rbind(
  c(8, 3, 1, 2, 6),
  c(7, 4, 2, 3, 1),
  c(6, 2, 5, 7, 1)
)

neighbor_table(
  candidate_pool,
  query_ids = query_points$id,
  ref_ids = reference_points$id
)

## ----rerank-------------------------------------------------------------------
reranked <- rerank_candidates_bigmatrix(
  reference,
  query = query_matrix,
  candidate_index = candidate_pool,
  metric = "euclidean",
  top_k = 3
)

reranked
neighbor_table(
  reranked$index,
  query_ids = query_points$id,
  ref_ids = reference_points$id,
  distance = reranked$distance
)

## ----compare-before-after-----------------------------------------------------
recall_after <- recall_against_exact(exact, reranked, k = 3)

data.frame(
  stage = c("Approximate top-3", "Reranked top-3 from 5 candidates"),
  overall_recall = c(recall_before$overall, recall_after$overall),
  row.names = NULL
)

## ----rerank-limit-------------------------------------------------------------
reranked_limited <- rerank_candidates_bigmatrix(
  reference,
  query = query_matrix,
  candidate_index = approx_top3,
  metric = "euclidean",
  top_k = 3
)

recall_after_limited <- recall_against_exact(exact, reranked_limited, k = 3)

data.frame(
  stage = c("Approximate top-3", "Reranked top-3 from same 3 candidates"),
  overall_recall = c(recall_before$overall, recall_after_limited$overall),
  row.names = NULL
)

