## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  message = FALSE, 
  warning = FALSE,
  eval = TRUE
)

## ----setup, include = FALSE---------------------------------------------------
library(edfinr)
library(dplyr)
library(ggplot2)

## ----eval = FALSE-------------------------------------------------------------
# library(edfinr)
# library(tidyverse)

## ----example-1----------------------------------------------------------------
# download "skinny" dataset for a single year and a single staet
ky_sy16 <- get_finance_data(yr = 2016, geo = "KY")

# view the structure of the returned data
glimpse(ky_sy16)

## ----example-2----------------------------------------------------------------
# download the full dataset with detailed expenditure data for a single year/state
ky_full_sy16 <- get_finance_data(yr = "2016", geo = "KY", dataset_type = "full")

# view additional variables in "full" dataset
names(ky_full_sy16)[42:89]

## ----example-3----------------------------------------------------------------
# get data for multiple states across multiple years
sec_data <- get_finance_data(
  yr = "2018:2022",  # years 2018 through 2022
  geo = "AL,AR,FL,GA,KY,LA,MS,MO,OK,SC,TN,TX"  # comma-separated state codes
)

# get the most recent year of data for all states
us_sy22 <- get_finance_data(yr = 2022, geo = "all")

## ----analysis-1---------------------------------------------------------------
# download 2022 data for connecticut
ct_sy22 <- get_finance_data(yr = "2022", geo = "CT")

# plot local revenue vs. total revenue w/ urbanicity + enrollment
ggplot(ct_sy22) +
  geom_point(aes(
    x = rev_local_pp, 
    y = rev_total_pp,
    color = urbanicity,
    size = enroll),
    alpha = .6) +
  scale_size_area(
    max_size = 10,
    labels = scales::label_comma()
    ) +    
  scale_x_continuous(labels = scales::label_dollar()) +
  scale_y_continuous(labels = scales::label_dollar()) +
  labs(
    title = "Connecticut Districts' Local vs. Total Revenue Per-Pupil, SY2021-22",
    x = "Local Revenue Per-Pupil", 
    y = "Total Revenue Per-Pupil", 
    size = "Enrollment", 
    color = "Urbanicity") +
  theme_bw()

## ----analysis-2---------------------------------------------------------------
# compare revenue sources across districts
revenue_analysis <- ct_sy22 |>
  mutate(
    pct_local = rev_local / rev_total,
    pct_state = rev_state / rev_total,
    pct_federal = rev_fed / rev_total
  ) |>
  select(dist_name, urbanicity, enroll, pct_local, pct_state, pct_federal) |>
  group_by(urbanicity) |>
  summarize(
    avg_pct_local = mean(pct_local, na.rm = TRUE),
    avg_pct_state = mean(pct_state, na.rm = TRUE),
    avg_pct_federal = mean(pct_federal, na.rm = TRUE),
    n_districts = n(),
    enrollment = sum(enroll, na.rm = TRUE)
  )

print(revenue_analysis)

