## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(causaldef)
library(stats)

# Helper for plot resizing
if (!exists("deparse1", envir = baseenv())) {
  deparse1 <- function(expr, collapse = " ", width.cutoff = 500L, ...) {
    paste(deparse(expr, width.cutoff, ...), collapse = collapse)
  }
}

## ----lalonde-data-------------------------------------------------------------
data("nsw_benchmark")

# Define Source: Experimental Sample
source_data <- subset(nsw_benchmark, sample_id %in% c("nsw_treated", "nsw_control"))

# Define Target: CPS Control Group (Broader population)
target_data <- subset(nsw_benchmark, sample_id == "cps_control")

# Covariates available for transport
transport_vars <- c("age", "education", "black", "hispanic", "married", "nodegree", "re74", "re75")

# Comparison of demographics
print(summary(source_data[, c("age", "education", "re74")]))
print(summary(target_data[, c("age", "education", "re74")]))

## ----transport-calc-----------------------------------------------------------
# Create causal specification for the SOURCE
source_spec <- causal_spec(
  data = source_data,
  treatment = "treat",
  outcome = "re78",
  covariates = transport_vars
)

# Compute Transport Deficiency
trans_def <- transport_deficiency(
  source_spec,
  target_data = target_data,
  transport_vars = transport_vars,
  method = "iptw",
  n_boot = 50 # Low for vignette speed
)

print(trans_def)
plot(trans_def, type = "shift")

## ----rhc-setup----------------------------------------------------------------
data("rhc")

# Preprocessing
if (is.factor(rhc$swang1)) rhc$treat <- as.numeric(rhc$swang1) - 1 else rhc$treat <- rhc$swang1
if (is.factor(rhc$dth30)) rhc$outcome <- as.numeric(rhc$dth30) - 1 else rhc$outcome <- rhc$dth30

# Variables for adjustment
covariates <- c("age", "sex", "race", "aps1", "cat1") 

spec_rhc <- causal_spec(
  data = rhc,
  treatment = "treat",
  outcome = "outcome",
  covariates = covariates
)

## ----policy-eval--------------------------------------------------------------
# Estimate propensity scores for adjustment
ps_model <- glm(treat ~ age + sex + race + aps1 + cat1, data = rhc, family = binomial)
rhc$ps <- predict(ps_model, type = "response")

# Define policies
policy_all <- rep(1, nrow(rhc))
policy_risk <- ifelse(rhc$aps1 > 50, 1, 0)

# Estimate Value (Inverse Propensity Weighted)
# Value = Mean of Y under policy. We want to MINIMIZE mortality.
# Equivalent to Maximizing Survival (1 - Y).
# Let's compute expected mortality.

get_policy_value <- function(policy, treat, outcome, ps) {
  # IPW estimator for policy value
  # Weight = I(A = \pi(X)) / P(A|X)
  w <- (treat == policy) / ifelse(policy == 1, ps, 1 - ps)
  mean(w * outcome) # Expected Mortality
}

val_all <- get_policy_value(policy_all, rhc$treat, rhc$outcome, rhc$ps)
val_risk <- get_policy_value(policy_risk, rhc$treat, rhc$outcome, rhc$ps)

cat("Estimated Mortality (Treat All):", round(val_all, 3), "\n")
cat("Estimated Mortality (Risk-Based):", round(val_risk, 3), "\n")

## ----safety-floor-------------------------------------------------------------
# 1. Estimate Deficiency of the dataset
defom <- estimate_deficiency(spec_rhc, methods = "iptw", n_boot = 0)
delta <- defom$estimates["iptw"]

# 2. Compute Safety Floor
# Utility range is [0, 1] (Mortality 0 or 1)
bounds <- policy_regret_bound(defom, utility_range = c(0, 1), method = "iptw")

print(bounds)