## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>", 
  fig.height = 5, 
  fig.width = 7, 
  out.width = "80%", 
  fig.show = "hold", 
  fig.align = "center"
)

## ----reallib, include = FALSE-------------------------------------------------
library(streamsampler)
df <- streamsampler::streamdat

## ----fakelib, eval = FALSE----------------------------------------------------
# library(streamsampler)
# library(dataRetrieval)

## ----eval=FALSE---------------------------------------------------------------
# daily <- dataRetrieval::readNWISdv(
#   siteNumbers = "01481500",
#   parameterCd = "00060",
#   startDate = "2007-10-01",
#   endDate = "2023-09-30"
# )
# 
# qw <- dataRetrieval::readNWISdv(
#   siteNumbers = "01481500",
#   parameterCd = "00095",
#   startDate = "2007-10-01",
#   endDate = "2023-09-30"
# )
# 
# daily <- daily[, -c(1,2,5)]
# colnames(daily) <- c("date", "q")
# 
# qw <- qw[, -c(1,2,5)]
# colnames(qw) <- c("date", "sc")
# 
# df <- merge(daily, qw, by = "date", all.x = TRUE, all.y = TRUE)

## ----dim----------------------------------------------------------------------
dim(df)

## ----evalq--------------------------------------------------------------------
# Subset for discharge record
q_dates <- df[!is.na(df$q), "date"]
results <- eval_dates(
  dates = q_dates, 
  rec_start = as.Date("2007-10-01"), 
  rec_end = as.Date("2023-09-30"), 
  by = "day"
)
print(results)

## ----evalqw-------------------------------------------------------------------
# Subset for sc record
qw_dates <- df[!is.na(df$sc), "date"]
sc_completeness <- eval_dates(
  dates = qw_dates, 
  rec_start = as.Date("2007-10-01"), 
  rec_end = as.Date("2023-09-30"), 
  by = "day"
)
print(sc_completeness)

## ----evalex-------------------------------------------------------------------
ex_sample_dates <- seq.Date(
  from = as.Date("2020-01-01"), 
  to = as.Date("2022-12-31"), 
  by = "9 days"
)
eval_dates(
  dates = ex_sample_dates, 
  rec_start = as.Date("2020-01-01"), 
  rec_end = as.Date("2022-12-31"), 
  by = "week"
)

## ----gapsqw-------------------------------------------------------------------
qw_dates <- as.Date(df[!is.na(df$sc), "date"])
qw_gaps <- find_gaps(dates = qw_dates)
head(qw_gaps)


## ----viewgaps-----------------------------------------------------------------
gap_start <- which(
  df$date == qw_gaps[1, "start"]
)
gap_end <- which(
  df$date == qw_gaps[1, "end"]
)
df[(gap_start - 1):(gap_end + 1), ]

## ----evalsign-----------------------------------------------------------------
q_data <- df[!is.na(df$q), "q"]
eval_sign(values = q_data)

## ----statsqw------------------------------------------------------------------
df_stats <- qw_stats(
  dates = df$date, 
  values = df$sc, 
  rec_start = as.Date("2007-10-01"), 
  rec_end = as.Date("2023-09-30"), 
  by = "day"
)
df_stats

## ----summmonth----------------------------------------------------------------
season_summary <- summarize_seasons(
  dates = df$date, 
  values = df$q, 
  season_start = 10, 
  n_seasons = 4
)

head(season_summary$monthly)

## ----summseason---------------------------------------------------------------
head(season_summary$seasonal)

## ----sznavgplot---------------------------------------------------------------
df_seasons <- season_summary$seasonal
df_seasons <- df_seasons[order(df_seasons$adj_year, df_seasons$season), ]
plot(
  df_seasons$avg_value, 
  type = "l", 
  xaxt = "n", 
  xlab = "Water Year", 
  ylim = c(0, 1200)
)
axis(
  1, 
  at = seq(1, length(df_seasons$adj_year), by = 4), 
  labels = unique(df_seasons$adj_year), 
  las = 2
)

## ----rollmeanq----------------------------------------------------------------
roll_q <- rollmean_date(
  dates = df$date, 
  values = df$q, 
  look_behind = 29, 
  look_units = "days"
)
df[["rollmean_q"]] <- roll_q

df_rollq_seasons <- summarize_seasons(
  dates = df$date, 
  values = df$rollmean_q, 
  season_start = 10, 
  n_seasons = 4
)$seasonal

df_rollq_seasons <- df_rollq_seasons[order(df_rollq_seasons$adj_year, df_rollq_seasons$season), ]
plot(
  df_seasons$avg_value, 
  type = "l", col = "darkgray", 
  xaxt = "n", 
  xlab = "Water Year", ylab = "Discharge (cfs)", 
  ylim = c(0, 1200)
)
lines(
  df_rollq_seasons$avg_value, col = "black"
)
axis(
  1, 
  at = seq(1, length(df_rollq_seasons$adj_year), by = 4), 
  labels = unique(df_rollq_seasons$adj_year), 
  las = 2
)
legend(
  "topleft", 
  c("Q", "30-day mean (cfs)"), 
  col = c("darkgray", "black"), lty = 1
)

## ----defthresh----------------------------------------------------------------
rollq_thresh <- thresholds(
  dates = df$date, 
  values = df$rollmean_q, 
  season_start = 10, 
  n_seasons = 4, 
  half_win = 2, 
  threshold = 0.8
)
head(rollq_thresh)

## ----subsamp------------------------------------------------------------------
ss_sc <- subsample(
  dates = df$date, 
  values = df$sc,  
  thresh_ref = df$rollmean_q
)
head(ss_sc)

## ----plotss-------------------------------------------------------------------

not_selected <- ss_sc[ss_sc$selection_type == "not_selected", ]
blw_thresh <- ss_sc[ss_sc$selection_type == "below_threshold", ]
excd_thresh <- ss_sc[ss_sc$selection_type == "exceeds_threshold", ]

# Sampling across dates
plot(
  not_selected$date, not_selected$thresh_ref, 
  col = "gray", log = "y", ylim = c(50, 5000), 
  xlab = "Date", ylab = "Q (cfs)"
)
points(
  blw_thresh$date, blw_thresh$thresh_ref, 
  col = "blue", pch = 16
)
points(
  excd_thresh$date, excd_thresh$thresh_ref, 
  col = "purple", pch = 16
)
legend("topright", 
  c("not_selected", "below_threshold", "exceeds_threshold"), 
  fill = c("gray", "blue", "purple")
)

# Sampling across the threshold reference
plot(
  not_selected$thresh_ref, not_selected$value, 
  log = "x", ylim = c(0, max(ss_sc$value, na.rm = TRUE)), 
  xlim = c(50, 2000), col = "gray", 
  xlab = "Q (cfs)", ylab = "SC (uS/cm)"
)
points(
  blw_thresh$thresh_ref, blw_thresh$value, 
  col = "blue", pch = 16
)
points(
  excd_thresh$thresh_ref, excd_thresh$value, 
  col = "purple", pch = 16
)
legend("topleft", 
  c("not_selected", "below_threshold", "exceeds_threshold"), 
  fill = c("gray", "blue", "purple")
)

# Compare spread
ss_sc$q_lab <- "Discharge"
ss_sc$sc_lab <- "SC"
boxplot(
  thresh_ref ~ selection_type + q_lab, data = ss_sc, 
  at = 1:3, 
  xlim = c(0.5, 7.0),
  log = "y",
  col = "#7fc97f", ylab = "", xlab = "", xaxt = "n"
)
boxplot(
  value ~ selection_type + sc_lab, data = ss_sc, 
  add = TRUE, at = 5:7 - 0.5, xaxt = "n", 
  col = "#beaed4"
)
axis(
  1, at = c(1:3, 5:7 - 0.5), 
  labels = rep(c("below", "exceeds", "not\nsampled"), 2), 
  lwd = 0
)
legend(
  "topright", 
  c("Discharge (cfs)", "SC (uS/cm)"), 
  fill = c("#7fc97f", "#beaed4")
)

## ----plotssnew----------------------------------------------------------------
ss_sc_peaks <- subsample(
  dates = df$date, 
  values = df$sc, 
  n_samples = 1, freq = "quarter", 
  thresh_ref = df$sc, 
  threshold = 0.9, n_et_samples = 10, 
  look_behind = 14, look_ahead = 14, look_units = "days", 
  season_weights = c(1, 1, 3), season_start = 1, n_seasons = 3
)


ss_df <- merge(df, ss_sc_peaks[, c("date", "selection_type")])
not_selected <- ss_df[ss_df$selection_type == "not_selected", ]
blw_thresh <- ss_df[ss_df$selection_type == "below_threshold", ]
excd_thresh <- ss_df[ss_df$selection_type == "exceeds_threshold", ]


# Sampling across dates
plot(
  not_selected$date, not_selected$sc, 
  col = "gray", log = "y", 
  xlab = "Date", ylab = "SC (uS/cm)"
)
points(
  blw_thresh$date, blw_thresh$sc, 
  col = "blue", pch = 16
)
points(
  excd_thresh$date, excd_thresh$sc, 
  col = "purple", pch = 16
)
legend("topleft", 
  c("not_selected", "below_threshold", "exceeds_threshold"), 
  fill = c("gray", "blue", "purple")
)

# Sampling across the threshold reference
plot(
  not_selected$q, not_selected$sc, col = "gray", 
  log = "x", ylim = c(0, max(ss_sc$value, na.rm = TRUE)), 
  xlab = "Q (cfs)", ylab = "SC (uS/cm)"
)
points(
  blw_thresh$q, blw_thresh$sc, 
  col = "blue", pch = 16
)
points(
  excd_thresh$q, excd_thresh$sc, 
  col = "purple", pch = 16
)
legend("topleft", 
  c("not_selected", "below_threshold", "exceeds_threshold"), 
  fill = c("gray", "blue", "purple")
)

# Compare spread
ss_df$q_lab <- "Discharge"
ss_df$sc_lab <- "SC"
boxplot(
  q ~ selection_type + q_lab, data = ss_df, 
  # at = 1:3 - 0.2, 
  at = 1:3, 
  # boxwex = 0.25, 
  xlim = c(0.5, 7.0),
  log = "y",
  col = "#7fc97f", ylab = "", xlab = "", xaxt = "n"
  # names = c("below", "exceeds", "not\nsampled")
)
boxplot(
  sc ~ selection_type + sc_lab, data = ss_df, 
  add = TRUE, at = 5:7 - 0.5, xaxt = "n", 
  col = "#beaed4"
  # names = c("below", "exceeds", "not\nsampled")
)
axis(
  1, at = c(1:3, 5:7 - 0.5), 
  labels = rep(c("below", "exceeds", "not\nsampled"), 2), 
  lwd = 0
)
legend(
  "topright", 
  c("Discharge (cfs)", "SC (uS/cm)"), 
  fill = c("#7fc97f", "#beaed4")
)

## -----------------------------------------------------------------------------
sroutine <- subsample_routine(
  dates = df$date, 
  values = df$sc, 
  day = 15, freq = "month"
)

sroutine <- merge(df[, -c(3, 4)], sroutine)

plot(
  sroutine[sroutine$selection_type == "not_selected", "date"], 
  sroutine[sroutine$selection_type == "not_selected", "value"], 
  col = "gray", log = "y", 
  xlab = "Date", 
  ylab = "SC (uS/cm)"
)
points(
  sroutine[sroutine$selection_type == "routine", "date"], 
  sroutine[sroutine$selection_type == "routine", "value"], 
  col = "blue", pch = 16
)
legend("topleft", 
  c("Not Selected", "Routine"), 
  fill = c("gray", "blue")
)

plot(
  sroutine$q[sroutine$selection_type == "not_selected"],
  sroutine$value[sroutine$selection_type == "not_selected"],
  pch = 21, col = "gray",
  xlab = "Q (cfs)", ylab = "SC (uS/cm)",
  main = "Subsampled Daily Data",
  log = "x"
)
points(
  sroutine$q[sroutine$selection_type != "not_selected"],
  sroutine$value[sroutine$selection_type != "not_selected"],
  pch = 16, cex = 1.5,
  col = c(
    "routine" = "blue"
  )[sroutine$selection_type[sroutine$selection_type != "not_selected"]]
)
legend(
  "topright",
  legend = c("Not Selected", "Routine"),
  col = c("gray", "blue"),
  pch = c(21, 16),
  bty = "n"
)

