## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = FALSE,
  fig.width = 8,
  fig.height = 6,
  out.width = "70%"
)

## ----setup--------------------------------------------------------------------
# # package loading
# library(EcoCleanR)
# library(dplyr)

## -----------------------------------------------------------------------------
# # provide example species name
# species_name <- "Mexacanthina lugubris"

## -----------------------------------------------------------------------------
# ec_geographic_map(ecodata,
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude"
# )

## -----------------------------------------------------------------------------
# comparison <- ec_worms_synonym(species_name,
#   ecodata,
#   scientificName = "scientificName"
# )
# print(comparison)
# # compare the columns to know if any taxa found that is not a synonym in WoRMS data base, filter bad taxa from ecodata using dplyr::filter()

## -----------------------------------------------------------------------------
# ecodata_cl <- ec_filter_by_uncertainty(ecodata,
#   uncertainty_col = "coordinateUncertaintyInMeters",
#   percentile = 0.95,
#   ask = FALSE,
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude"
# )
# str(ecodata_cl[, 1:3])
# ### plot the map
# ec_geographic_map(ecodata_cl,
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude"
# )

## -----------------------------------------------------------------------------
# ecodata_cl$flag_precision <- ec_flag_precision(ecodata_cl,
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude"
# )
# 
# # filter the flag - flag_cordinate_precision
# ecodata_cl <- ecodata_cl %>%
#   filter(flag_precision != 1)
# str(ecodata_cl[1:3])

## ----heavy-processing-0, eval = FALSE-----------------------------------------
# # This is a heavy processing step, won’t execute during vignette building.
# direction <- "east"
# buffer <- 25000
# ocean <- "pacific"
# ecodata_cl$flag_non_region <- ec_flag_non_region(direction,
#   ocean,
#   buffer,
#   ecodata_cl,
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude"
# )
# str(ecodata_cl[, 1:3])
# # filter flagged records
# ecodata_cl <- ecodata_cl %>%
#   filter(flag_non_region != 1)
# ### map view to see accepted records
# ec_geographic_map(ecodata_cl,
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude"
# )

## ----heavy-processing-1, eval = FALSE-----------------------------------------
# # This is a heavy processing step, won’t execute during vignette building.
# # get the unique combination of coordiantes
# ecodata_unique <- ecodata_cl[, c("decimalLatitude", "decimalLongitude")]
# ecodata_unique <- base::unique(ecodata_unique)
# # It is recommended to check what layers available in sdm_predictors and correct name.
# # available_layers <- list_layers() # returns something like c("BO_sstmean", "BO_sstmax", ...)
# # provide layers as input to env_layers variable
# env_layers <- c("BO_sstmean", "BO_sstmin", "BO_sstmax")
# 
# ### extraction env layers
# ecodata_unique <- ec_extract_env_layers(ecodata_unique,
#   env_layers = env_layers,
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude"
# )
# # A warning message if layers are in saved in cache.
# 
# ### impute env var values those were missing after extraction
# ecodata_unique <- ec_impute_env_values(
#   ecodata_unique,
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude",
#   radius_km = 10,
#   iter = 3
# )
# 
# ### omit the coordinate which couldn't get any env values after imputation
# ecodata_unique <- na.omit(ecodata_unique)

## ----heavy-processing-2, eval = FALSE-----------------------------------------
# # This is a heavy processing step, won’t execute during vignette building.
# # Instead of executing it here, we will use a pre-saved cleaned file.
# ecodata_unique$flag_outliers <- ec_flag_outlier(ecodata_unique,
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude",
#   env_layers,
#   itr = 50,
#   k = 3,
#   geo_quantile = 0.99,
#   maha_quantile = 0.99
# )$outlier
# 
# ### these unique combinations of coordiantes, environmental variables and outliers will be mergeed to main ecodata_cl file
# ecodata_cl <- ecodata_cl %>%
#   left_join(ecodata_unique[, c("decimalLatitude", "decimalLongitude", "flag_outliers", env_layers)],
#     by = c("decimalLatitude", "decimalLongitude")
#   )

## -----------------------------------------------------------------------------
# # pre-saved file ecodata_with_outliers instead of using ecodata_cl
# ### map view to see records with outlier probability
# ec_geographic_map_w_flag(ecodata_with_outliers,
#   flag_column = "outliers",
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude"
# )

## -----------------------------------------------------------------------------
# ### mapview to visualize accepted data
# ec_geographic_map(ecodata_cleaned,
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude"
# )

## -----------------------------------------------------------------------------
# env_layers <- c("BO_sstmean", "BO_sstmax", "BO_sstmin")
# data("ecodata_cleaned")
# summary_table <- ec_var_summary(ecodata_cleaned,
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude",
#   env_layers
# )
# head(summary_table)
# 
# ec_plot_var_range(ecodata_with_outliers,
#   summary_df = summary_table,
#   latitude = "decimalLatitude",
#   longitude = "decimalLongitude",
#   env_layers = env_layers
# )

