## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  tidy = FALSE,
  comment = "#>",
  warning = FALSE,
  message = FALSE,
  fig.width = 9,
  fig.height = 7
)
set.seed(888L)
local_data <- "~/Sync/birddog/data-biogas/rawfiles-direct"
has_local_data <- dir.exists(local_data)
knitr::opts_chunk$set(eval = has_local_data)
# When local data is absent (e.g. CRAN), no chunk runs; the pre-built HTML ships instead.

## ----eval = FALSE-------------------------------------------------------------
# # stable version (CRAN)
# install.packages("birddog")
# 
# # development version (GitHub)
# # install.packages("remotes")
# remotes::install_github("roneyfraga/birddog")

## -----------------------------------------------------------------------------
library(birddog)

## ----eval = FALSE-------------------------------------------------------------
# library(openalexR)
# 
# # Fetch works from OpenAlex API
# url_api <- "https://api.openalex.org/works?page=1&filter=primary_location.source.id:s121026525"
# 
# openalexR::oa_request(query_url = url_api) |>
#   openalexR::oa2df(entity = "works") |>
#   birddog::read_openalex(format = "api") ->
#   M
# 
# # Or from a CSV export
# M <- birddog::read_openalex("path/to/openalex-export.csv", format = "csv")

## ----eval = FALSE-------------------------------------------------------------
# # BibTeX
# M <- birddog::read_wos("path/to/savedrecs.bib", format = "bib")
# 
# # RIS
# M <- birddog::read_wos("path/to/savedrecs.ris", format = "ris")
# 
# # Plain text
# M <- birddog::read_wos("path/to/savedrecs.txt", format = "txt-plain-text")
# 
# # Tab-delimited
# M <- birddog::read_wos("path/to/savedrecs.txt", format = "txt-tab-delimited")

## ----eval = FALSE-------------------------------------------------------------
# # Download from OpenAlex (~15 min)
# query_oa <- "( biogas )"
# 
# openalexR::oa_fetch(
#   entity = "works",
#   title_and_abstract.search = query_oa,
#   verbose = TRUE
# ) ->
#   papers
# 
# M <- birddog::read_openalex(papers, format = "api")

## ----eval = FALSE-------------------------------------------------------------
# # Pre-computed dataset
# url_m <- "https://roneyfraga.com/volume/keep_it/biogas-data/M.rds"
# M <- readRDS(url(url_m))

## ----include = FALSE----------------------------------------------------------
M <- readRDS(file.path(local_data, "M.rds"))

## -----------------------------------------------------------------------------
dplyr::glimpse(M)

## ----eval = FALSE-------------------------------------------------------------
# net <- birddog::sniff_network(M, type = "direct citation")

## ----include = FALSE----------------------------------------------------------
net <- readRDS(file.path(local_data, "net.rds"))

## -----------------------------------------------------------------------------
net |>
  tidygraph::activate(nodes) |>
  dplyr::select(name, AU, PY, TI, TC) |>
  dplyr::arrange(dplyr::desc(TC))

## ----eval = FALSE-------------------------------------------------------------
# comps <- birddog::sniff_components(net)

## ----include = FALSE----------------------------------------------------------
comps <- readRDS(file.path(local_data, "comps.rds"))

## -----------------------------------------------------------------------------
comps$components |>
  dplyr::slice_head(n = 5) |>
  gt::gt()

## ----eval = FALSE-------------------------------------------------------------
# groups <- birddog::sniff_groups(
#   comps,
#   algorithm = "fast_greedy",
#   min_group_size = 30,
#   seed = 888L
# )

## ----include = FALSE----------------------------------------------------------
groups <- readRDS(file.path(local_data, "groups.rds"))

## -----------------------------------------------------------------------------
groups$aggregate |>
  gt::gt()

## ----eval = FALSE-------------------------------------------------------------
# # ~2 min
# groups_attributes <- birddog::sniff_groups_attributes(
#   groups,
#   growth_rate_period = 2010:2024,
#   show_results = FALSE
# )
# 

## ----include = FALSE----------------------------------------------------------
groups_attributes <- readRDS(file.path(local_data, "groups_attributes.rds"))

## -----------------------------------------------------------------------------
groups_attributes$attributes_table

## -----------------------------------------------------------------------------
groups_keywords <- birddog::sniff_groups_keywords(groups)

groups_keywords |>
  dplyr::filter(group %in% c('c1g1', 'c1g2', 'c1g3')) |>
  gt::gt()

## ----eval = FALSE-------------------------------------------------------------
# # ~30 min
# groups_terms <- birddog::sniff_groups_terms(groups, algorithm = "phrase")
# 

## ----include = FALSE----------------------------------------------------------
groups_terms <- readRDS(file.path(local_data, "groups_terms.rds"))

## -----------------------------------------------------------------------------
groups_terms$terms_table |>
  dplyr::slice_head(n = 3) |>
  gt::gt()

## ----eval = FALSE-------------------------------------------------------------
# # ~20 min
# groups_hubs <- birddog::sniff_groups_hubs(groups)
# 

## ----include = FALSE----------------------------------------------------------
groups_hubs <- readRDS(file.path(local_data, "groups_hubs.rds"))

## -----------------------------------------------------------------------------
groups_hubs |>
  dplyr::filter(zone != "noHub") |>
  dplyr::mutate(Zi = round(Zi, 2), Pi = round(Pi, 2)) |>
  dplyr::arrange(dplyr::desc(zone), dplyr::desc(Zi)) |>
  dplyr::slice_head(n = 15) |>
  gt::gt() |>
  gt::text_transform(
    locations = gt::cells_body(columns = name),
    fn = function(x) {
      glue::glue('<a href="https://openalex.org/{x}" target="_blank">{x}</a>')
    }
  )

## ----eval = FALSE-------------------------------------------------------------
# # ~1.5 min
# groups_cct <- birddog::sniff_citations_cycle_time(
#   groups,
#   scope = "groups",
#   start_year = 2000,
#   end_year = 2024
# )
# 
# groups_cct$plots[["c1g3"]]

## ----include = FALSE----------------------------------------------------------
groups_cct <- readRDS(file.path(local_data, "groups_cct.rds"))

## ----eval = FALSE-------------------------------------------------------------
# groups_entropy <- birddog::sniff_entropy(
#   groups,
#   scope = "groups",
#   start_year = 2000,
#   end_year = 2024
# )
# 
# groups_entropy$plots[["c1g3"]]

## ----eval = FALSE-------------------------------------------------------------
# # ~2 min
# groups_cumulative <- birddog::sniff_groups_cumulative(groups)
# 

## ----include = FALSE----------------------------------------------------------
groups_cumulative <- readRDS(file.path(local_data, "groups_cumulative.rds"))

## ----eval = FALSE-------------------------------------------------------------
# suppressMessages({
#   groups_cumulative_trajectories <- birddog::sniff_groups_trajectories(groups_cumulative)
# })
# 
# birddog::plot_group_trajectories_2d(
#   groups_cumulative_trajectories,
#   group = "c1g3",
#   label_vertical_position = -2
# )
# 
# birddog::plot_group_trajectories_3d(
#   groups_cumulative_trajectories,
#   group = "c1g3"
# )
# 

## ----eval = FALSE, warning = FALSE--------------------------------------------
# traj_data <- birddog::detect_main_trajectories(
#   groups_cumulative_trajectories,
#   group = "c1g3"
# )
# 
# traj_filtered <- birddog::filter_trajectories(
#   traj_data$trajectories,
#   top_n = 3
# )
# 
# birddog::plot_group_trajectories_lines_2d(
#   traj_data = traj_data,
#   traj_filtered = traj_filtered,
#   title = "c1g3"
# )
# 
# birddog::plot_group_trajectories_lines_3d(
#   traj_data = traj_data,
#   traj_filtered = traj_filtered,
#   group_id = "c1g3"
# )

## ----eval = FALSE-------------------------------------------------------------
# # ~11 min
# groups_cumulative_citations <- birddog::sniff_groups_cumulative_citations(
#   groups,
#   min_citations = 2
# )
# 

## ----eval = FALSE-------------------------------------------------------------
# 
# groups_key_route <- birddog::sniff_key_route(groups, scope = "groups")
# 
# groups_key_route[["c1g3"]]$plot
# 
# groups_key_route[["c1g3"]]$data |>
#   dplyr::select(-name) |>
#   gt::gt()

## ----include = FALSE----------------------------------------------------------
key_route_c1g3_data <- readRDS(file.path(local_data, "key_route_c1g3_data.rds"))

## -----------------------------------------------------------------------------
key_route_c1g3_data |>
  dplyr::select(document = name, name2, title = TI) |>
  gt::gt() |>
  gt::text_transform(
    locations = gt::cells_body(columns = document),
    fn = function(x) {
      glue::glue('<a href="https://openalex.org/{x}" target="_blank">{x}</a>')
    }
  )

## ----eval = FALSE-------------------------------------------------------------
# # Prepare STM data (~30 min)
# groups_stm_prepare <- birddog::sniff_groups_stm_prepare(
#   groups,
#   group_to_stm = "c1g3"
# )

## ----include = FALSE----------------------------------------------------------
groups_stm_prepare <- readRDS(file.path(local_data, "groups_stm_prepare.rds"))

## ----eval = FALSE-------------------------------------------------------------
# groups_stm_prepare$plots[['metrics_by_k']]
# groups_stm_prepare$plots[['exclusivity_vs_coherence']]

## ----eval = FALSE-------------------------------------------------------------
# # Run STM (~35 sec)
# groups_stm_run <- birddog::sniff_groups_stm_run(
#   groups_stm_prepare,
#   k_topics = 17,
#   n_top_documents = 20
# )

## ----eval = FALSE-------------------------------------------------------------
# groups_stm_run$topic_proportion |>
#   dplyr::mutate(topic_proportion = round(topic_proportion, 3)) |>
#   gt::gt()
# 
# groups_stm_run$top_documents |>
#   dplyr::group_by(topic) |>
#   dplyr::arrange(dplyr::desc(gamma)) |>
#   dplyr::slice_head(n = 3) |>
#   dplyr::select(-DI) |>
#   gt::gt() |>
#   gt::text_transform(
#     locations = gt::cells_body(columns = document),
#     fn = function(x) {
#       glue::glue('<a href="https://openalex.org/{x}" target="_blank">{x}</a>')
#     }
#   )

## -----------------------------------------------------------------------------
sessionInfo()

