## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

if (!requireNamespace("bigmemory", quietly = TRUE)) {
  cat("This vignette requires the 'bigmemory' package.\n")
  knitr::knit_exit()
}

library(bigKNN)
library(bigmemory)

## ----helpers, include=FALSE---------------------------------------------------
knn_table <- function(result, query_ids) {
  do.call(rbind, lapply(seq_along(query_ids), function(i) {
    data.frame(
      query = query_ids[i],
      rank = seq_len(result$k),
      neighbor = result$index[i, ],
      distance = signif(result$distance[i, ], 5),
      row.names = NULL
    )
  }))
}

radius_slice_table <- function(index, distance, offset, query_ids, i) {
  start <- as.integer(offset[i])
  end <- as.integer(offset[i + 1L] - 1L)

  if (start > end) {
    return(data.frame(
      query = query_ids[i],
      neighbor = integer(0),
      distance = numeric(0)
    ))
  }

  data.frame(
    query = query_ids[i],
    neighbor = as.integer(index[start:end]),
    distance = signif(as.numeric(distance[start:end]), 5),
    row.names = NULL
  )
}

## ----create-data--------------------------------------------------------------
i <- seq_len(160)

reference_matrix <- cbind(
  x1 = i,
  x2 = (i %% 7) + 1,
  x3 = (i %% 11) + 0.5,
  x4 = (i %% 13) + 2
)

reference <- as.big.matrix(reference_matrix)

dense_query <- rbind(
  reference_matrix[5, ] + c(0.2, 0.0, 0.1, 0.0),
  reference_matrix[50, ] + c(-0.3, 0.2, 0.0, 0.1),
  reference_matrix[120, ] + c(0.4, -0.1, 0.2, 0.0),
  reference_matrix[151, ] + c(0.1, 0.2, -0.2, 0.3)
)

query_ids <- paste0("q", seq_len(nrow(dense_query)))

dim(reference_matrix)
dense_query

## ----build-plan---------------------------------------------------------------
plan <- knn_plan_bigmatrix(
  reference,
  metric = "euclidean",
  memory_budget = "64KB",
  num_threads = 2L,
  progress = FALSE
)

plan

## ----plan-comparison----------------------------------------------------------
plan_small <- knn_plan_bigmatrix(
  reference,
  metric = "euclidean",
  memory_budget = "4KB",
  num_threads = 2L,
  progress = FALSE
)

plan_large <- knn_plan_bigmatrix(
  reference,
  metric = "euclidean",
  memory_budget = "1MB",
  num_threads = 2L,
  progress = FALSE
)

data.frame(
  memory_budget = c(plan_small$memory_budget, plan$memory_budget, plan_large$memory_budget),
  block_size = c(plan_small$block_size, plan$block_size, plan_large$block_size),
  row.names = NULL
)

## ----planned-search-----------------------------------------------------------
planned_knn <- knn_bigmatrix(
  reference,
  query = dense_query,
  k = 3,
  plan = plan,
  exclude_self = FALSE
)

planned_knn
knn_table(planned_knn, query_ids = query_ids)

## ----stream-knn---------------------------------------------------------------
index_store <- big.matrix(nrow(dense_query), 3, type = "integer")
distance_store <- big.matrix(nrow(dense_query), 3, type = "double")

streamed_knn <- knn_stream_bigmatrix(
  reference,
  query = dense_query,
  xpIndex = index_store,
  xpDistance = distance_store,
  k = 3,
  plan = plan,
  exclude_self = FALSE
)

bigmemory::as.matrix(streamed_knn$index)
round(bigmemory::as.matrix(streamed_knn$distance), 4)

## ----stream-knn-compare-------------------------------------------------------
identical(bigmemory::as.matrix(streamed_knn$index), planned_knn$index)
all.equal(bigmemory::as.matrix(streamed_knn$distance), planned_knn$distance)

## ----stream-radius-counts-----------------------------------------------------
radius_counts <- count_within_radius_bigmatrix(
  reference,
  query = dense_query,
  radius = 2.2,
  plan = plan,
  exclude_self = FALSE
)

radius_counts
total_matches <- sum(radius_counts)
total_matches

## ----stream-radius------------------------------------------------------------
radius_index_store <- big.matrix(total_matches, 1, type = "integer")
radius_distance_store <- big.matrix(total_matches, 1, type = "double")
radius_offset_store <- big.matrix(length(radius_counts) + 1L, 1, type = "double")

streamed_radius <- radius_stream_bigmatrix(
  reference,
  query = dense_query,
  xpIndex = radius_index_store,
  xpDistance = radius_distance_store,
  xpOffset = radius_offset_store,
  radius = 2.2,
  plan = plan,
  exclude_self = FALSE
)

streamed_radius
streamed_radius$n_match

## ----stream-radius-offsets----------------------------------------------------
radius_offset <- as.vector(bigmemory::as.matrix(streamed_radius$offset))
radius_index <- as.vector(bigmemory::as.matrix(streamed_radius$index))
radius_distance <- as.vector(bigmemory::as.matrix(streamed_radius$distance))

radius_offset
radius_slice_table(radius_index, radius_distance, radius_offset, query_ids, 1)
radius_slice_table(radius_index, radius_distance, radius_offset, query_ids, 2)

## ----sparse-queries-----------------------------------------------------------
sparse_query <- Matrix::Matrix(dense_query, sparse = TRUE)

sparse_knn <- knn_bigmatrix(
  reference,
  query = sparse_query,
  k = 3,
  plan = plan,
  exclude_self = FALSE
)

identical(sparse_knn$index, planned_knn$index)
all.equal(sparse_knn$distance, planned_knn$distance)

