## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(ORscraper)

## -----------------------------------------------------------------------------
if (!requireNamespace("readxl", quietly = TRUE)) {
  stop("The readxl package is required for this vignette, install it with install.packages('readxl').")
}
InputPath <- system.file("extdata", package = "ORscraper")
files <- ORscraper::read_pdf_files(InputPath)
genes_file <- system.file("extdata/Genes.xlsx", package = "ORscraper")
genes <- readxl::read_excel(genes_file)
mutations <- unique(genes$GEN)

## -----------------------------------------------------------------------------
lines <- ORscraper::read_pdf_content(files[1])  # Example with the first file
head(lines)

## -----------------------------------------------------------------------------
diagnostic <- gender <- tumor_cell_percentage <- quality <- c()
diagnostic <- extract_values_start_end(diagnostic, lines, ".*Diagnóstico:\\s")
gender <- extract_values_start_end(gender, lines, ".*Sexo:\\s*")
tumor_cell_percentage <- extract_values_start_end(tumor_cell_percentage, lines, ".*% células tumorales:\\s")
quality <- extract_values_start_end(quality, lines, ".*CALIDAD DE LA MUESTRA /LIMITACIONES PARA SU ANÁLISIS:\\s")

## -----------------------------------------------------------------------------
NHC_Data <- NB_values <- dates <- textDiag <- c()
NHC_Data <- extract_intermediate_values(NHC_Data, lines, "NHC:")
NB_values <- extract_intermediate_values(NB_values, lines, "biopsia:")
dates <- extract_intermediate_values(dates, lines, "Fecha:")
textDiag <- extract_intermediate_values(textDiag, lines, "de la muestra:")

## -----------------------------------------------------------------------------
TableValues <- extract_values_from_tables(lines, mutations)
mutateGenes <- TableValues[[1]]
pathogenity <- TableValues[[2]]
frequencies <- TableValues[[3]]
codifications <- TableValues[[4]]
changes <- TableValues[[5]]

## -----------------------------------------------------------------------------
fusions <- extract_fusions(lines, mutations)

## ----eval=FALSE---------------------------------------------------------------
# search_pathogenity <- search_ncbi_clinvar(pathogenity, mutateGenes, codifications)

## -----------------------------------------------------------------------------
pathogenic_mutations <- filter_pathogenic_only(pathogenity, mutateGenes)
pathogenic_changes <- filter_pathogenic_only(pathogenity, changes)
pathogenic_frequencies <- filter_pathogenic_only(pathogenity, frequencies)

## -----------------------------------------------------------------------------
biopsies_identifiers <- classify_biopsy(NB_values)

## -----------------------------------------------------------------------------
chips <- extract_chip_id(files)

