## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(BinaryReplicates)
library(tidyverse)
set.seed(42)

## -----------------------------------------------------------------------------
theta <- .4
p <- q <- .22
n <- 50
ni <- sample(2:6, n, replace = TRUE)
ti <- rbinom(n, 1, theta)
si <- rbinom(n, ni, ti*(1-q) + (1-ti)*p)
synth_data <- data.frame(ni = ni, si = si, ti=ti)

## -----------------------------------------------------------------------------
fit_em <- EMFit(ni, si)
fit_em$parameters_hat

## -----------------------------------------------------------------------------
fit <- BayesianFit(ni, si, chains = 4, iter = 5000, refresh = 0)
print(fit, pars = c("theta", "p", "q"))

## -----------------------------------------------------------------------------
synth_data <- synth_data %>%
  mutate(Y_A = average_scoring(ni, si),
         Y_M = median_scoring(ni, si),
         Y_L = likelihood_scoring(ni, si, list(theta = theta, p = p, q = q)),
         Y_MAP = MAP_scoring(ni, si, fit_em),
         Y_B = bayesian_scoring(ni, si, fit))

## -----------------------------------------------------------------------------
credint(fit)

## -----------------------------------------------------------------------------
credint(fit, level = 0.95)

## -----------------------------------------------------------------------------
theta_A <- prevalence_estimate(synth_data$Y_A)
theta_M <- prevalence_estimate(synth_data$Y_M)
theta_MAP <- prevalence_estimate(synth_data$Y_MAP)
theta_B <- bayesian_prevalence_estimate(fit)

cat("True prevalence:    ", theta, "\n")
cat("Average-based:      ", round(theta_A, 3), "\n")
cat("Median-based:       ", round(theta_M, 3), "\n")
cat("MAP-based:          ", round(theta_MAP, 3), "\n")
cat("Bayesian:           ", round(theta_B, 3), "\n")

## -----------------------------------------------------------------------------
v_L <- .35
v_U <- 1 - v_L
synth_data <- synth_data %>%
  mutate(T_A = classify_with_scores(Y_A, v_L, v_U),
         T_M = classify_with_scores(Y_M, v_L, v_U),
         T_L = classify_with_scores(Y_L, v_L, v_U),
         T_MAP = classify_with_scores(Y_MAP, v_L, v_U),
         T_B = classify_with_scores(Y_B, v_L, v_U))

## -----------------------------------------------------------------------------
confusion <- synth_data %>%
  mutate(
    Status = ifelse(ti == 1, "T=1", "T=0"),
    Average = T_A, Median = T_M, Likelihood = T_L, MAP = T_MAP, Bayesian = T_B) %>%
  pivot_longer(cols = c(Average, Median, Likelihood, MAP, Bayesian),
               names_to = "Method", values_to = "Decision") %>%
  group_by(Status, Method, Decision) %>%
  summarise(count = n(), .groups = "keep") %>%
  ungroup() %>%
  mutate(count = as.integer(count)) %>%
  pivot_wider(names_from = Decision, values_from = count, values_fill = 0)
confusion

## -----------------------------------------------------------------------------
new_data <- data.frame(ni = rep(8, 9), si = 0:8)

## -----------------------------------------------------------------------------
new_data <- new_data %>% 
  mutate(Y_B = predict_scores(ni, si, fit),
         T_B = classify_with_scores(Y_B, v_L, v_U))
new_data

