## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = FALSE
)

## ----setup, include=FALSE-----------------------------------------------------
# library(eyeris)
# library(dplyr)
# library(DBI)

## ----basic-creation-----------------------------------------------------------
# # load your EyeLink eye-tracking data ASC file
# eyeris_data <- load_asc("path/to/your/data.asc")
# 
# # preprocess and epoch your data with eyeris glassbox functions
# processed_data <- eyeris_data %>%
#   glassbox() %>%
#   epoch(
#     events = "TRIAL_START_{trial_type}_{trial_number}",
#     limits = c(-0.5, 2.0),
#     label = "trial_epochs"
#   )
# 
# # Enable eyeris database alongside CSV files
# bidsify(
#   processed_data,
#   bids_dir = "~/my_eyetracking_study",
#   participant_id = "001",
#   session_num = "01",
#   task_name = "memory_task",
#   csv_enabled = TRUE,    # still create CSV files
#   db_enabled = TRUE,     # while also creating your eyeris project database
#   db_path = "study_database"  # creates study_database.eyerisdb
# )

## ----cloud-workflow-----------------------------------------------------------
# bidsify(
#   processed_data,
#   bids_dir = "~/my_eyetracking_study",
#   participant_id = "001",
#   session_num = "01",
#   task_name = "memory_task",
#   csv_enabled = FALSE,   # skip CSV creation
#   db_enabled = TRUE,     # use an eyeris project database only
#   db_path = "study_database"
# )

## ----batch-processing---------------------------------------------------------
# subjects <- c("001", "002", "003", "004", "005")
# data_dir <- "~/raw_eyetracking_data"
# bids_dir <- "~/processed_study_data"
# 
# for (subject_id in subjects) {
#   cat("Processing subject", subject_id, "\n")
# 
#   subject_data <- file.path(
#       data_dir,
#       paste0("sub-", subject_id),
#       "eye",
#       paste0("sub-", subject_id, ".asc")
#     ) %>%
#     glassbox() %>%
#     epoch(
#       events = "STIMULUS_{condition}_{trial}",
#       limits = c(-1, 3),
#       label = "stimulus_response"
#     )
# 
#   # then add to eyeris database (which automatically handles subject cleanup)
#   bidsify(
#     subject_data,
#     bids_dir = bids_dir,
#     participant_id = subject_id,
#     session_num = "01",
#     task_name = "attention_task",
#     csv_enabled = FALSE,
#     db_enabled = TRUE,
#     db_path = "attention_study_db"
#   )
# }

## ----connection---------------------------------------------------------------
# con <- eyeris_db_connect(
#   bids_dir = "~/processed_study_data",
#   db_path = "attention_study_db"  # will look for attention_study_db.eyerisdb
# )
# 
# # be sure to always disconnect when done (or use on.exit to ensure cleanup)
# on.exit(eyeris_db_disconnect(con))

## ----exploration--------------------------------------------------------------
# # first get a comprehensive summary of your eyeris project database
# summary_info <- eyeris_db_summary(
#   "~/processed_study_data",
#   "attention_study_db"
# )
# 
# summary_info$subjects      # all subjects in database
# summary_info$data_types    # available data types
# summary_info$sessions      # session information
# summary_info$tasks         # task names
# summary_info$total_tables  # total number of tables
# 
# # list all available tables
# all_tables <- eyeris_db_list_tables(con)
# print(all_tables)
# 
# # filter tables by data type
# timeseries_tables <- eyeris_db_list_tables(con, data_type = "timeseries")
# confounds_tables <- eyeris_db_list_tables(con, data_type = "run_confounds")
# 
# # filter tables by subject
# subject_001_tables <- eyeris_db_list_tables(con, subject = "001")

## ----simple-extraction--------------------------------------------------------
# # extract ALL data for ALL subjects (returns a named list)
# all_data <- eyeris_db_collect("~/processed_study_data", "attention_study_db")
# 
# # view available data types
# names(all_data)
# 
# # access specific data types
# timeseries_data <- all_data$timeseries
# events_data <- all_data$events
# confounds_data <- all_data$run_confounds

## ----targeted-extraction------------------------------------------------------
# # extract data for specific subjects only
# subset_subjects <- eyeris_db_collect(
#   bids_dir = "~/processed_study_data",
#   db_path = "attention_study_db",
#   subjects = c("001", "002", "003")
# )
# 
# # extract specific data types only
# behavioral_data <- eyeris_db_collect(
#   bids_dir = "~/processed_study_data",
#   db_path = "attention_study_db",
#   data_types = c("events", "epochs", "confounds_summary")
# )
# 
# # extract data for specific sessions and tasks
# session_01_data <- eyeris_db_collect(
#   bids_dir = "~/processed_study_data",
#   db_path = "attention_study_db",
#   sessions = "01",
#   tasks = "attention_task"
# )

## ----binocular-extraction-----------------------------------------------------
# # extract data from both eyes
# binocular_data <- eyeris_db_collect(
#   bids_dir = "~/processed_study_data",
#   db_path = "attention_study_db"
# )
# 
# # the function automatically combines left and right eye data
# # check if you have binocular data
# unique(binocular_data$timeseries$eye_suffix)  # should show "eyeL" and "eyeR"
# 
# # extract data for specific eye only
# left_eye_data <- eyeris_db_collect(
#   bids_dir = "~/processed_study_data",
#   db_path = "attention_study_db",
#   eye_suffixes = "eyeL"
# )

## ----epoch-extraction---------------------------------------------------------
# # extract specific epoch data
# trial_epochs <- eyeris_db_collect(
#   bids_dir = "~/processed_study_data",
#   db_path = "attention_study_db",
#   data_types = c("epochs", "confounds_events", "confounds_summary"),
#   epoch_labels = "stimulus_response"  # match your epoch label
# )
# 
# # multiple epoch types
# multiple_epochs <- eyeris_db_collect(
#   bids_dir = "~/processed_study_data",
#   db_path = "attention_study_db",
#   data_types = "epochs",
#   epoch_labels = c("stimulus_response", "baseline_period")
# )

## ----output-formats-----------------------------------------------------------
# list_format <- eyeris_db_collect("~/processed_study_data")
# 
# # access individual data types
# pupil_data <- list_format$timeseries
# trial_data <- list_format$epochs

## ----sql-queries--------------------------------------------------------------
# # first connect to your eyeris project database
# con <- eyeris_db_connect("~/processed_study_data", "attention_study_db")
# 
# # write your custom SQL query
# custom_query <- "
#   SELECT subject_id, session_id, task_name,
#          AVG(pupil_raw_deblink_detransient_interpolate_lpfilt_z) as mean_pupil,
#          COUNT(*) as n_samples
#   FROM timeseries_001_01_attention_task_run01_eyeL
#   WHERE pupil_clean IS NOT NULL
#   GROUP BY subject_id, session_id, task_name
# "
# 
# results <- DBI::dbGetQuery(con, custom_query)
# print(results)
# 
# # a complex cross-table query
# complex_query <- "
#   SELECT e.subject_id,
#          e.matched_event,
#          e.text_unique,
#          AVG(t.pupil_raw_deblink_detransient_interpolate_lpfilt_z) as mean_pupil_in_epoch,
#          c.blink_rate_hz
#   FROM epochs_001_01_attention_task_run01_stimulus_response_eyeL e
#   JOIN timeseries_001_01_attention_task_run01_eyeL t
#     ON e.subject_id = t.subject_id
#     AND t.time_orig BETWEEN e.epoch_start AND e.epoch_end
#   JOIN run_confounds_001_01_attention_task_run01_eyeL c
#     ON e.subject_id = c.subject_id
#   GROUP BY e.subject_id, e.matched_event, e.text_unique, c.blink_rate_hz
# "
# 
# complex_results <- DBI::dbGetQuery(con, complex_query)
# print(complex_results)

## ----individual-tables--------------------------------------------------------
# # read a specific table directly
# specific_table <- eyeris_db_read(
#   con = con,
#   table_name = "timeseries_001_01_attention_task_run01_eyeL"
# )
# 
# # read from eyeris database with filters
# filtered_data <- eyeris_db_read(
#   con = con,
#   data_type = "events",
#   subject = "001",
#   session = "01",
#   task = "attention_task"
# )
# 
# # read epoch data from eyeris database with specific epoch label
# epoch_data <- eyeris_db_read(
#   con = con,
#   data_type = "epochs",
#   subject = "001",
#   epoch_label = "stimulus_response"
# )

## ----analysis-example-1-------------------------------------------------------
# # extract all timeseries data
# pupil_data <- eyeris_db_collect(
#   "~/processed_study_data",
#   data_types = "timeseries"
# )$timeseries
# 
# # analyze pupil responses by subject and condition
# pupil_summary <- pupil_data %>%
#   filter(!is.na(pupil_clean)) %>%
#   group_by(subject_id, session_id) %>%
#   summarise(
#     mean_pupil = mean(pupil_clean),
#     sd_pupil = sd(pupil_clean),
#     samples_per_subject = n(),
#     .groups = 'drop'
#   )
# 
# print(pupil_summary)
# 
# # compare to loading individual CSV files (which should be much slower!)
# # csv_files <- list.files("~/processed_study_data",
# #                        pattern = "*timeseries*.csv",
# #                        recursive = TRUE, full.names = TRUE)
# # csv_data <- map_dfr(csv_files, read_csv)

## ----analysis-example-2-------------------------------------------------------
# # extract confounds data for quality control
# confounds_data <- eyeris_db_collect(
#   "~/processed_study_data",
#   data_types = c("run_confounds", "confounds_summary")
# )
# 
# # identify subjects with poor data quality
# quality_control <- confounds_data$run_confounds %>%
#   group_by(subject_id, session_id) %>%
#   summarise(
#     mean_blink_rate = mean(blink_rate_hz, na.rm = TRUE),
#     mean_prop_invalid = mean(prop_invalid, na.rm = TRUE),
#     mean_gaze_variance = mean(gaze_x_var_px, na.rm = TRUE),
#     .groups = 'drop'
#   ) %>%
#   mutate(
#     high_blink_rate = mean_blink_rate > 0.5,  # arbitrary thresholds
#     high_invalid_data = mean_prop_invalid > 0.3,
#     high_gaze_variance = mean_gaze_variance > 10000,
#     exclude_subject = high_blink_rate | high_invalid_data | high_gaze_variance
#   )
# 
# # then view the subjects recommended for exclusion
# exclude_list <- quality_control %>%
#   filter(exclude_subject) %>%
#   select(subject_id, session_id, exclude_subject)
# 
# print(exclude_list)

## ----performance-comparison---------------------------------------------------
# # benchmark database approach
# system.time({
#   db_data <- eyeris_db_collect(
#     "~/processed_study_data",
#     subjects = c("001", "002", "003", "004", "005"),
#     data_types = "timeseries"
#   )
# })
# 
# # benchmark CSV approach
# # system.time({
# #   csv_files <- list.files("~/processed_study_data",
# #                          pattern = "*timeseries*.csv",
# #                          recursive = TRUE, full.names = TRUE)
# #   csv_data <- map_dfr(csv_files[1:5], read_csv)  # only first 5 subjects
# # })
# 
# # memory usage comparison
# object.size(db_data)  # database extraction
# # object.size(csv_data)  # CSV loading
# 
# # file size comparison
# db_file_size <- file.size("~/processed_study_data/derivatives/attention_study_db.eyerisdb")
# csv_total_size <- sum(file.size(list.files("~/processed_study_data",
#                                           pattern = "*.csv",
#                                           recursive = TRUE,
#                                           full.names = TRUE)))
# 
# cat("Database file size:", round(db_file_size / 1024^2, 2), "MB\n")
# cat("Total CSV file size:", round(csv_total_size / 1024^2, 2), "MB\n")
# cat("Storage efficiency:", round(db_file_size / csv_total_size * 100, 1), "% of CSV size\n")

## ----best-practices-----------------------------------------------------------
# # 1. Always use descriptive database names
# bidsify(data, db_path = "study_name_pilot_2024")  # good
# # bidsify(data, db_path = "my-project")           # default, not descriptive
# 
# # 2. Use database-only mode for large studies
# bidsify(data, csv_enabled = FALSE, db_enabled = TRUE)  # efficient
# 
# # 3. Create separate databases for different experiments
# bidsify(data, db_path = "experiment_1_attention")
# bidsify(data, db_path = "experiment_2_memory")
# 
# # 4. always disconnect from databases
# con <- eyeris_db_connect("~/data", "study_db")
# # ... do your work ...
# # then disconnect ...
# eyeris_db_disconnect(con)
# 
# # or use on.exit for automatic cleanup
# process_data <- function() {
#   con <- eyeris_db_connect("~/data", "study_db")
#   on.exit(eyeris_db_disconnect(con))
# 
#   # ... your analysis code here ...
# 
#   results <- eyeris_db_collect("~/data", "study_db")
#   return(results)
# }

## ----cloud-optimization-------------------------------------------------------
# # 1. Use database-only workflow to minimize I/O costs
# process_cloud_data <- function(subject_list, input_bucket, output_bucket) {
#   for (subject in subject_list) {
#     # ... for demo purposes only -- download raw data ...
#     download_from_cloud(subject, input_bucket)
# 
#     # ... process and add to database (no CSV files) ...
#     eyeris_data <- glassbox(local_file) %>%
#       epoch(...)
# 
#     bidsify(
#       eyeris_data,
#       bids_dir = "local_processing",
#       participant_id = subject,
#       csv_enabled = FALSE,  # skip CSV for cloud efficiency
#       db_enabled = TRUE,
#       db_path = "cloud_study_db"
#     )
# 
#     # clean up local files
#     unlink(local_file)
#   }
# 
#   # upload final database back to your cloud
#   upload_to_cloud("cloud_study_db.eyerisdb", output_bucket)
# }
# 
# # 2. use database for distributed analysis
# analyze_cloud_data <- function() {
#   # download only the database file
#   download_from_cloud("cloud_study_db.eyerisdb")
# 
#   # extract only the data you need
#   analysis_data <- eyeris_db_collect(
#     "local_processing",
#     data_types = c("epochs", "confounds_summary"),
#     subjects = target_subjects
#   )
# 
#   # run analysis on extracted subset
#   results <- run_statistical_analysis(analysis_data)
# 
#   return(results)
# }

## ----error-handling-----------------------------------------------------------
# # safe eyeris project database operations with error handling
# safe_extract <- function(bids_dir, db_path, ...) {
#   tryCatch({
#     data <- eyeris_db_collect(bids_dir, db_path, ...)
#     return(data)
#   }, error = function(e) {
#     cat("Error extracting data:", e$message, "\n")
# 
#     # first check if eyeris project database exists
#     db_file <- file.path(bids_dir, "derivatives", paste0(db_path, ".eyerisdb"))
#     if (!file.exists(db_file)) {
#       cat("eyeris project database file not found:", db_file, "\n")
#       return(NULL)
#     }
# 
#     # try connecting to your eyeris project database
#     con <- tryCatch({
#       eyeris_db_connect(bids_dir, db_path)
#     }, error = function(e2) {
#       cat("Cannot connect to eyeris project database:", e2$message, "\n")
#       return(NULL)
#     })
# 
#     if (!is.null(con)) {
#       # list available tables for debugging
#       tables <- eyeris_db_list_tables(con)
#       cat("Available tables:\n")
#       print(tables)
#       eyeris_db_disconnect(con)
#     }
# 
#     return(NULL)
#   })
# }
# 
# # example usage
# data <- safe_extract("~/my_study", "study_database",
#                     subjects = c("001", "002"),
#                     data_types = "timeseries")

## ----csv-to-database----------------------------------------------------------
# # if you have existing eyeris-derived CSV files and want to migrate to a database
# migrate_csv_to_database <- function(bids_dir, db_path) {
#   # ... find all CSV files ...
#   csv_files <- list.files(bids_dir, pattern = "\\.csv$",
#                          recursive = TRUE, full.names = TRUE)
# 
#   # ... connect to an eyeris database ...
#   con <- eyeris_db_connect(bids_dir, db_path)
#   on.exit(eyeris_db_disconnect(con))
# 
#   for (csv_file in csv_files) {
#     cat("Processing:", basename(csv_file), "\n")
# 
#     # parse filename to extract metadata
#     # ... (which of course depends on your CSV naming convention) ...
#     filename_parts <- parse_bids_filename(basename(csv_file))
# 
#     # ... read CSV data ...
#     csv_data <- read.csv(csv_file)
# 
#     # ... then write to the eyeris project database ...
#     write_eyeris_data_to_db(
#       data = csv_data,
#       con = con,
#       data_type = filename_parts$data_type,
#       sub = filename_parts$subject,
#       ses = filename_parts$session,
#       task = filename_parts$task,
#       # ... other parameters
#     )
#   }
# 
#   cat("Migration complete!\n")
# }

## ----database-to-csv----------------------------------------------------------
# # export specific data back to CSV format (if needed)
# export_database_subset <- function(bids_dir, db_path, output_dir) {
# 
#   # ... extract data from the eyeris project database ...
#   data <- eyeris_db_collect(bids_dir, db_path,
#                              subjects = c("001", "002"),
#                              data_types = c("timeseries", "events"))
# 
#   # ... create an output directory ...
#   dir.create(output_dir, recursive = TRUE)
# 
#   # ... then export each data type ...
#   for (data_type in names(data)) {
#     filename <- file.path(output_dir, paste0(data_type, "_subset.csv"))
#     write.csv(data[[data_type]], filename, row.names = FALSE)
#     cat("Exported:", filename, "\n")
#   }
# }

## ----session-info-------------------------------------------------------------
# sessionInfo()