## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(ggrecipes)

## -----------------------------------------------------------------------------
# Compare correlations between V-shaped vs straight engines
gg_splitcorr(
  data = mtcars,
  split = "vs",
  prefix = "Engine Type: "
)

## -----------------------------------------------------------------------------
# Alternative style
gg_splitcorr(
  data = mtcars,
  split = "vs",
  prefix = "Engine Type: ",
  style = "point"
)

## ----fig.height = 3.5---------------------------------------------------------
# Synthetic data - bacterial strain growth rates
growth_data <- data.frame(
  strain = rep(paste0("Strain", 1:13), each = 6),
  condition = rep(c("Control", "Treated"), each = 3, times = 13),
  growth_rate = c(
    rnorm(39, mean = 0.85, sd = 0.12),  # Control
    rnorm(39, mean = 0.45, sd = 0.10)   # Treated
  )
)

gg_rankshift(
  data = growth_data,
  id = "strain",
  group = "condition",
  value = "growth_rate"
)

## ----fig.height = 3.5---------------------------------------------------------
# Alternative style & minor customizations
gg_rankshift(
  data = growth_data,
  id = "strain",
  group = "condition",
  value = "growth_rate",
  style = "bar",
  fill = c("#e41a1c", "#377eb8"),
  rank_change_colors = c(
    increase = "#1b9e77",
    decrease = "#d95f02",
    no_change = "#7570b3"
  ),
  panel_ratio = 0.65,
  point_size = 2.5,
  line_width = 1,
  decreasing = TRUE
)

## ----fig.height = 3.5, fig.width = 4------------------------------------------
data(mtcars)
mtcars$horsepower <- 
  cut(mtcars$hp, breaks = 5, 
         labels = c("Very Low", "Low", "Medium", "High", "Very High"))
mtcars$`miles per gallon` <- 
  cut(mtcars$mpg, breaks = 5,
         labels = c("Very Low", "Low", "Medium", "High", "Very High"))

gg_conf(data = mtcars, x = "horsepower", y = "miles per gallon")

## ----fig.height = 3.5, fig.width = 4------------------------------------------
# Custom styling
gg_conf(data = mtcars, x = "horsepower", y = "miles per gallon",
        fill = "lightcoral", point_size_range = c(5, 20),
        show_grid = FALSE)

## ----fig.height = 3.5---------------------------------------------------------
# With faceting by "vs" column
gg_conf(data = mtcars, x = "horsepower", y = "miles per gallon",
        fill = "lightcoral", point_size_range = c(5, 20),
        facet_x = "vs")

## ----fig.height = 4-----------------------------------------------------------
# Create example data
# Example: Gene prioritization criteria
gene_data <- data.frame(
  gene = c("BRCA1", "TP53", "EGFR", "KRAS", "MYC", 
           "PTEN", "APC", "CDKN2A", "RB1", "VHL"),
  `Missense Variant_crit` = c("Yes", "Yes", "Yes", NA, "Yes", 
                               "Yes", NA, "Yes", NA, "Yes"),
  `eQTL_crit` = c("Yes", "Yes", NA, "Yes", "Yes", 
                  "Yes", "Yes", "Yes", "Yes", NA),
  `pQTL_crit` = c("Yes", NA, "Yes", "Yes", NA, 
                  "Yes", "Yes", NA, "Yes", "Yes"),
  `GWAS Hit_crit` = c("Yes", "Yes", "Yes", "Yes", NA, 
                      "Yes", "Yes", "Yes", NA, NA),
  `Loss of Function_crit` = c(NA, "Yes", NA, NA, "Yes", 
                              "Yes", "Yes", NA, "Yes", NA),
  `High Conservation_crit` = c("Yes", "Yes", "Yes", "Yes", "Yes", 
                               "Yes", "Yes", "Yes", "Yes", "Yes"),
  `mRNA DE_crit` = c("Yes", NA, "Yes", NA, NA,
                     "Yes", "Yes", "Yes", NA, "Yes"),
  `Prot DE_crit` = c(NA, "Yes", NA, NA, NA,
                     "Yes", NA, NA, NA, "Yes"),
  check.names = FALSE
)

# Calculate total criteria met
crit_cols <- grep("_crit$", names(gene_data), value = TRUE)
gene_data$`Total` <- rowSums(gene_data[crit_cols] == "Yes", na.rm = TRUE)

head(gene_data)

## -----------------------------------------------------------------------------
# Base criteria plot
gg_criteria(
  data = gene_data,
  id = "gene",
  criteria = "_crit$",
  show_text = FALSE
)

## ----fig.width = 6.85, fig.height = 5-----------------------------------------
# With added barplot
gg_criteria(
  data = gene_data,
  id = "gene",
  criteria = "_crit$",
  bar_column = "Total",
  show_text = FALSE,
  tile_fill = c(Yes = "#A6CEE3", No = "white"),
  bar_fill = "#A6CEE3",
  panel_ratio = 0.7
)

## -----------------------------------------------------------------------------
# Example: VHH Variant Analysis
# Define amino acid chemistry colors
aa_colors <- c(
  "D" = "#E60A0A", "E" = "#E60A0A", # Acidic (red)
  "K" = "#145AFF", "R" = "#145AFF", # Basic (blue)
  "H" = "#8282D2",                  # Histidine (purple)
  "S" = "#FA9600", "T" = "#FA9600", # Polar uncharged (orange)
  "N" = "#00DCDC", "Q" = "#00DCDC", # Polar amides (cyan)
  "C" = "#E6E600",                  # Cysteine (yellow)
  "G" = "#EBEBEB",                  # Glycine (light gray)
  "P" = "#DC9682",                  # Proline (tan)
  "A" = "#C8C8C8",                  # Alanine (gray)
  "V" = "#0F820F", "I" = "#0F820F", # Hydrophobic (green)
  "L" = "#0F820F", "M" = "#0F820F",
  "F" = "#3232AA", "W" = "#B45AB4", # Aromatic (dark blue/purple)
  "Y" = "#3232AA"
)

vhh_variants <- data.frame(
  variant = c("WT", "Mut1", "Mut2", "Mut3", "Mut4", "Mut7", "Mut5",
              "Mut6", "Mut8", "Mut9", "Mut10", "Mut11"),
  Q5_mut = c(NA, "H", NA, NA, NA, NA, "H", "H", "H", "D", NA, "H"),
  S55_mut = c(NA, NA, "P", NA, NA, "P", "P", NA, "P", "P", NA, NA),
  N73_mut = c(NA, NA, NA, "E", NA, NA, NA, "E", "E", NA, "E", NA),
  K80_mut = c(NA, "L", NA, NA, "S", "V", NA, NA, NA, "L", "S", NA),
  F99_mut = c(NA, NA, "L", NA, NA, NA, NA, NA, NA, "W", NA, "W"),
  KD_nM = c(45, 18, 5.2, 38, 42, 20, 3.8, 15, 3.2, 4.5, 40, 22),
  yield_mg_L = c(12, 11.8, 10, 13, 11, 10, 10, 12, 10, 7.8, 12.5, 8.5),
  Tm_C = c(68.5, 67.8, 68, 72.3, 35, 66, 67.5, 70, 70.5, 72, 38, 74)
)

## -----------------------------------------------------------------------------
# Create plot
gg_criteria(
  data = vhh_variants,
  id = "variant",
  criteria = "_mut$",
  tile_fill = aa_colors,
  bar_column = c("KD_nM", "yield_mg_L", "Tm_C"),
  panel_ratio = 2,
  tile_width = 0.70,
  tile_height = 0.70,
  show_text = TRUE,
  border_color = "grey40",
  border_width = 0.4,
  text_size = 10,
  show_legend = FALSE
)

## -----------------------------------------------------------------------------
# Create example SNP and phenotype data
set.seed(123)

snp_data <- data.frame(
  id = paste0("P", sprintf("%03d", 1:12)),
  # SNP columns
  rs1234_geno = sample(c(c("0/0", "0/1", "1/1"), NA),
                       12, replace = TRUE, 
                       prob = c(0.4, 0.4, 0.15, 0.05)),
  rs5678_geno = sample(c("0/0", "0/1", "0/2", "1/1", "1/2", "2/2", NA),
                       12, replace = TRUE, 
                       prob = c(0.25, 0.25, 0.1, 0.15, 0.15, 0.05, 0.05)),
  rs9012_geno = sample(c(c("0|0", "0|1", "1|1", "0/1", "1/2"), NA),
                       12, replace = TRUE,  
                       prob = c(0.2, 0.2, 0.15, 0.2, 0.15, 0.1)),
  rs3456_geno = sample(c(c("0/0", "0/1", "1/1"), NA),
                       12, replace = TRUE,  
                       prob = c(0.45, 0.35, 0.15, 0.05)),
  rs7890_geno = sample(c("0/0", "0/1", "0/2", "1/3", "2/2", NA),
                       12, replace = TRUE,  
                       prob = c(0.3, 0.25, 0.15, 0.1, 0.15, 0.05)),
  rs2468_geno = sample(c("0|0", "0|1", "1|1", "1|2", NA),
                       12, replace = TRUE, 
                       prob = c(0.3, 0.35, 0.2, 0.1, 0.05)),
  
  # Phenotype columns for bar plots
  Age = sample(25:75, 12, replace = TRUE),
  BMI = round(rnorm(12, mean = 26, sd = 4), 1),
  Insulin = round(rnorm(12, mean = 12, sd = 3), 1)
)

head(snp_data)

## -----------------------------------------------------------------------------
# Base genotype plot
gg_geno(
  data = snp_data,
  id = "id",
  geno = "_geno$"
)

## ----fig.width = 7------------------------------------------------------------
# Show optional barplots
gg_geno(
  data = snp_data,
  id = "id",
  geno = "_geno$",
  show_legend = TRUE,
  panel_ratio = 1,
  bar_column = c("Age", "BMI", "Insulin"),
  bar_fill = c("#c77d77", "#e0b46e", "#c7bc77"),
  text_size = 10
)

## -----------------------------------------------------------------------------
# Create synthetic example of peptide mapping data
# Reference sequence
ref_seq <- paste0(
  "QVQLVESGGGLVQAGGSLRLSCAASGFTFSSYAMGWFRQAPGKEREFVAAINSGGST",
  "YYPDSVKGRFTISRDNAKNTVYLQMNSLKPEDTAVYYCAADLRGTTVNNYWGQGTQV",
  "TVSSEQKLISEEDL"
)

# Peptides with RT and intensity
df_peptides <- data.frame(
  id = c("Pep_1004", "Pep_1010", "Pep_1007", "Pep_1011", 
         "Pep_1009", "Pep_1005", "Pep_1013", "Pep_1003", 
         "Pep_1001", "Pep_1012", "Pep_1006", "Pep_1008", 
         "Pep_1002"),
  sequence = c(
    "QAPGKER",
    "GRFTISR",
    "GTTVNNYWGQGTQVTVSSEQKLISEEDL",
    "GRFTISRDNAKNTVYLQMNSLK",
    "EREFVAAINSGGSTYYPDSVK",
    "QAPGKEREFVAAINSGGSTYYPDSVKGR",
    "NTVYLQMNSLKPEDTAVYYCAADLR",
    "LSCAASGFTFSSYAMGWFRQAPGKER",
    "QVQLVESGGGLVQAGGSLR",
    "PEDTAVYYCAADLRGTTVNNYWGQGTQVTVSSEQKLISEEDL",
    "FTISRDNAKNTVYLQMNSLKPEDTAVYYCAADLR",
    "LSCAASGFTFSSYAMGWFRQAPGK",
    "LSCAASGFTFSSYAMGWFR"
  ),
  rt_min = c(10, 28.5, 34.4, 34.4, 36, 36.5, 40.8, 
             42.5, 42.8, 43.3, 44.1, 44.8, 46.7),
  intensity = c(2769840, 2248170, 2172370, 1698280, 2202810, 
                983267, 659246, 1064906, 1988932, 1438544, 
                639990, 1017811, 1112824),
  stringsAsFactors = FALSE
)

head(df_peptides)

## -----------------------------------------------------------------------------
# Base coverage map
gg_seq(data = df_peptides, ref = ref_seq, wrap = 70)

## -----------------------------------------------------------------------------
# With peptide IDs and residue coloring
gg_seq(
  data = df_peptides, 
  ref = ref_seq, 
  name = "id",
  color = c(C = "red", K = "blue", R = "#468c2d"), 
  highlight = list(
    "#ffb4b4" = c(27:33, 51:57, 96:107),
    "#70bcfa" = c(1, 43, 64, 75, 86)
  ),
  wrap = 70
)

## ----fig.height = 5-----------------------------------------------------------
# With annotations
gg_seq(
  data = df_peptides, 
  ref = ref_seq, 
  name = "id",
  color = c(C = "red", K = "blue", R = "#468c2d"),
  highlight = list(
    "#ffb4b4" = c(27:33, 51:57, 96:107),  # CDR regions
    "#70bcfa" = c(1, 43, 64, 75, 86),     # Lysines
    "#d68718" = c(105:106),               # Liability site
    "#94d104" = c(119:128)                # c-Myc tag
  ),
  annotate = list(
    list(label = "CDR1", pos = 30),
    list(label = "CDR2", pos = 54),
    list(label = "CDR3", pos = 101),
    list(label = "N-term", pos = 1, angle = 90, vjust = 1),
    list(label = "K43", pos = 43, angle = 90),
    list(label = "K64", pos = 64, angle = 90),
    list(label = "K75", pos = 75, angle = 90),
    list(label = "K86", pos = 86, angle = 90),
    list(label = "Liability", pos = 106, angle = 90),
    list(label = "c-Myc tag", pos = 124)
  ),
  annotate_defaults = list(face = "bold"),
  wrap = 80
)

## -----------------------------------------------------------------------------
# -----------------------------------------------------------------------
# Example with Clustal alignment file
# -----------------------------------------------------------------------
# Create a temporary Clustal file
clustal_file <- tempfile(fileext = ".aln")
writeLines(c(
  "CLUSTAL W (1.83) multiple sequence alignment",
  "",
  "WT              EQKLISEEDLMKTAYIAKQRQISFVKSHFSRQLERIEKKIEAHFDDLHP",
  "Mutant1         EQKLISEEDLMKTAYIAKQRQISFVKSHFSRQLERIEKKIEAHFDDLHP",
  "Mutant2         EQKLISEEDLMKTAYIAKQRQRSFVKSHFSRQLERIEKKWEAHFDDLHP",
  "Mutant3         EQKLISEEDLMKTAYIAKQRQISFVKSHFSRQLER----IEAHFDDLHP",
  "Mutant4         EQKLISEEDLMKTAYIAKQRQISFVKSHFSRQAERIEKKIEAHFDDLHP",
  "Mutant5         EQKLISEEDLAKTAYIAKQRQISFVKSHFSRQLERIEKKIEAHFDDRHP",
  "Mutant6         EQKLISEEDLMKTAYIAKQRQISFVKSHFSRQLERIEKKIEAHFDDLHP",
  "                *********** ***************** * ******* *******:**",
  "",
  "WT              DIVALSGHTFGKTHGAGKQSSHHHHHH",
  "Mutant1         DIVALSGHTFGKTHGAGKQSSHHHHHH",
  "Mutant2         DIVALSGHTFGKTHGAGKQSSHHHHHH",
  "Mutant3         DIVALSGHTFGKTHGAGKQSS------",
  "Mutant4         DIVALSGHTFGKTHGAGKQSSHHHHHH",
  "Mutant5         DIVALSGHTFGKTHGAGKQSSHHHHHH",
  "Mutant6         DRVALSGHTFAKTHGAGKQSS------",
  "                * ******** **********      "
), clustal_file)

# Plot Clustal alignment
gg_seqdiff(
  clustal = clustal_file,
  ref = paste0("EQKLISEEDLMKTAYIAKQRQISFVKSHFSRQLERIEKKIEAHFDDLHP",
               "DIVALSGHTFGKTHGAGKQSSHHHHHH"),
  color = c(K = "#285bb8", R = "#285bb8",    # Basic
            E = "#a12b20", D = "#a12b20",    # Acidic
            W = "#9b59b6", F = "#9b59b6",    # Aromatic
            H = "#f39c12"),                  # Histidine
  highlight = list(
    "#94d104" = 1:10,      # N-terminal c-Myc tag
    "#FFE0B2" = 30:45,     # Active site
    "#94d104" = 72:77      # C-terminal His-tag
  ),
  annotate = list(
    list(label = "c-Myc", pos = 5),
    list(label = "Active site", pos = 37),
    list(label = "6xHis", pos = 74)
  ),
  wrap = 60
)

## -----------------------------------------------------------------------------
# Clean up
unlink(clustal_file)

# -----------------------------------------------------------------------
# Example with DNA sequences - gene structure with regulatory elements
# -----------------------------------------------------------------------
dna_ref <- paste0(
  "TATAAA",                       # TATA box (promoter)
  "ATGCGATCGATCGATCGTAGCTAGCT",   # Exon 1
  "GTAAGTATCGATCGAT",             # Intron 1 (splice sites: GT...AG)
  "ACGTACGTACGTAGCTAGCTAGCTAC",   # Exon 2
  "GTACGTACGTACGTAC",             # Intron 2
  "GTACGTACGTAGCTAGCTAGCTACGT",   # Exon 3
  "ACGTACGTAAATAA"                # 3'UTR with poly-A signal
)

dna_df <- data.frame(
  sequence = c(
    dna_ref,                         
    sub("TATAAA", "TATATA", dna_ref),
    gsub("GTAAGT", "ATAAGT", dna_ref),
    gsub("CGATAG", "CGATAA", dna_ref),
    sub("ATG", "AAG", dna_ref),
    gsub("AATAA$", "AACAA", dna_ref),
    sub("GCGATCGATCGATCG", "GCGATCAATCGATCG", dna_ref),
    gsub("ACGTACGTACGTAG", "ACGTACATACGTAG", dna_ref)
  ),
  id = c("WT", "Promoter_mut", "Splice_donor",
         "Splice_acceptor", "Start_codon", "PolyA_mut",
         "Exon1_missense", "Exon2_frameshift")
)

# Highlight gene structure elements
gg_seqdiff(
  data = dna_df, 
  ref = dna_ref, 
  name = "id",
  color = c(G = "#4e8fb5", C = "#845cab"),
  highlight = list(
    "#FFE0B2" = 1:6,                     # TATA box (promoter)
    "#C8E6C9" = c(7:32, 49:74, 91:116),  # Exons
    "#FFCCBC" = 117:130                  # 3'UTR with poly-A
  ),
  annotate = list(
    list(label = "TATA", pos = 1, angle = 90),
    list(label = "ATG", pos = 7, angle = 90, color = "red"),
    list(label = "Exon1", pos = 19),
    list(label = "GT", pos = 33, angle = 90, size = 2.5),
    list(label = "GA", pos = 46, angle = 90, size = 2.5),
    list(label = "Exon2", pos = 61),
    list(label = "GT", pos = 75, angle = 90, size = 2.5),
    list(label = "AC", pos = 89, angle = 90, size = 2.5),
    list(label = "Exon3", pos = 103),
    list(label = "AATAAA", pos = 125, angle = 90, color = "blue")
  ),
  wrap = 80
)

## -----------------------------------------------------------------------------
bio_data <- data.frame(
  id        = paste0("sample_", 1:6),
  condition = rep(c("Control", "Treated"), each = 3),
  replicate = rep(1:3, times = 2),

  Blood_val  = c(4.8, 5.2, 4.5, 4.1, 4.3, 4.0),
  Heart_val  = c(1.9, 2.1, 2.0, 1.6, 1.8, 1.7),
  Lung_val   = c(3.5, 3.8, 3.2, 3.0, 3.1, 2.9),
  Liver_val  = c(14.2, 15.1, 13.8, 11.5, 12.0, 11.2),
  Spleen_val = c(9.1, 8.7, 9.4, 7.2, 7.5, 7.0),
  Kidney_val = c(125.0, 112.8, 121.9, 111.1, 102.4, 103.0),
  Tumor_val  = c(22.5, 24.1, 23.3, 28.2, 29.5, 27.8),
  Muscle_val = c(0.7, 0.6, 0.8, 0.5, 0.4, 0.6),
  Bone_val   = c(1.4, 1.6, 1.5, 1.1, 1.2, 1.0)
)

head(bio_data)

## ----fig.height = 2.5---------------------------------------------------------
# Base biodist plot
gg_biodist(bio_data, id = "organ",
           value = "_val", group = "condition",
           point_size = 1.25,
           y_label = "%ID/g")

## ----fig.height = 2.5---------------------------------------------------------
# Separate high uptake organs on separate axis
gg_biodist(bio_data, id = "organ",
           value = "_val", group = "condition",
           point_size = 1.25,
           y_label = "%ID/g",
           separate = c("Tumor", "Kidney"))

## ----fig.height = 2.5---------------------------------------------------------
# Customization
gg_biodist(bio_data, id = "organ",
           value = "_val", group = "condition",
           point_size = 0, error_bars = TRUE,
           fill_colors = c("#e41a1c", "#377eb8"),
           y_label = "%ID/g",
           separate = c("Tumor", "Kidney"))


## -----------------------------------------------------------------------------
# Basic example: 5 variants with single measurements
kinetic_data <- data.frame(
  id = c("WT", "Mut1", "Mut2", "Mut3", "Mut4"),
  ka = c(1.2e5, 2.5e5, 2e5, 8.0e4, 1.8e5),
  kd = c(1.5e-3, 2.0e-3, 1.5e-3, 1.2e-3, 1.8e-3)
)

gg_kdmap(data = kinetic_data, show_anno = TRUE)

## -----------------------------------------------------------------------------
# With replicates: lines connect points with same ID
kinetic_rep <- data.frame(
  id = c("WT", "WT", "WT", "Mut1", "Mut1", "Mut2", "Mut3", "Mut4"),
  ka = c(1.2e5, 1.5e5, 1.1e5, 2.5e5, 2.4e5, 2e5, 8.0e4, 1.8e5),
  kd = c(1.5e-3, 1.6e-3, 1.4e-3, 2.0e-3, 1.9e-3, 1.5e-3, 1.2e-3, 1.8e-3)
)

head(kinetic_rep)

gg_kdmap(data = kinetic_rep, show_anno = TRUE, fill = "id")

# Add labels and highlight reference
gg_kdmap(data = kinetic_rep, show_anno = TRUE, fill = "id")

# Customize iso-KD lines
gg_kdmap(data = kinetic_rep, show_anno = TRUE, fill = "id")

## -----------------------------------------------------------------------------
library(ggplot2)
p <- gg_splitcorr(data = mtcars, split = "vs")

# Adjust legend
p + theme(legend.position = "bottom")

## -----------------------------------------------------------------------------
# Theme adjustments
p + theme(axis.text.x = element_text(angle = 90))

## -----------------------------------------------------------------------------
# Labels
p + labs(title = "Correlation comparison", 
         caption = "Data: mtcars") +
  theme(plot.title = element_text(vjust = 3))

## -----------------------------------------------------------------------------
# Coordinate transformations
p + coord_fixed(ratio = 1.5)

## -----------------------------------------------------------------------------
# Font adjustments
p + theme(text = element_text(family = "serif", size = 14))

