## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----fig-toy, fig.cap="Representation of a synthetic patient records.", echo=FALSE----
dob <- as.Date("2003-04-19")
x <- dplyr::tibble(
  start = 5800 + c(0, 365, 800, 5, 85, 150, 365, 810, 825),
  end = start + c(90, 120, 30, 30, 60, 60, 150, 10, 65),
  colour = factor(c(rep("visit_occurrence", 3), rep("drug_exposure", 6)), levels = c("visit_occurrence", "drug_exposure"))
) |>
  dplyr::mutate(
    start = dob + start,
    end = dob + end, 
    id = dplyr::row_number(),
    y = dplyr::if_else(colour == "visit_occurrence", 1.5, 1)
  ) |>
  tidyr::pivot_longer(cols = c("start", "end"), values_to = "x")

ggplot2::ggplot(data = x, mapping = ggplot2::aes(x = x, y = y, group = id, colour = colour)) +
  ggplot2::geom_line(linewidth = 2) +
  ggplot2::theme_bw() +
  ggplot2::theme(
    axis.title.y = ggplot2::element_blank(),
    axis.text.y = ggplot2::element_blank(),
    axis.ticks.y = ggplot2::element_blank(),
    legend.title = ggplot2::element_blank(),
    legend.position = "top",
    axis.line.x = ggplot2::element_blank()
  ) +
  ggplot2::scale_x_date(
    name = "Time",
    breaks = as.Date(c("2018-07-01", "2019-01-01", "2019-07-01", "2020-01-01", "2020-07-01", "2021-01-01", "2021-07-01", "2022-01-01")), 
    labels = c(dob, "2019-01-01", "2019-07-01", "2020-01-01", "2020-07-01", "2021-01-01", "2021-07-01", "2022-01-01"), 
    limits = as.Date(c("2018-07-01", "2022-01-01"))
  ) +
  ggplot2::coord_cartesian(clip = "off", ylim = c(0.5, 2)) +
  ggplot2::annotate(
    geom = "segment", 
    x = as.Date("2018-10-01") + 5 * c(1, -1), 
    xend = as.Date("2018-10-01") + 5 * c(1, -1), 
    y = 0.42 - 0.1,
    yend = 0.42 + 0.1 
  ) +
  ggplot2::geom_vline(
    xintercept = as.Date(c("2018-07-01", "2021-04-19", "2022-01-01")),
    linetype = "dashed"
  ) +
  ggplot2::geom_label(
    mapping = ggplot2::aes(x = x, y = y, label = lab),
    data = dplyr::tibble(
      x = as.Date(c("2018-07-01", "2021-04-19", "2022-01-01")),
      y = 1.25,
      lab = c("Birth date", "18th birthday", "Extraction date")
    ), 
    inherit.aes = FALSE,
    angle = 90
  )

## ----fig-obs, fig.cap="Different observation periods.", echo=FALSE------------
x <- dplyr::tibble(
  start = 5800 + c(0, 0, 0, 365, 800, 0, 800, 0, 800, 0, 0),
  end = 5800 + c(1032, 890, 90, 120+365, 30+800, 515, 890, 695, 1032, 1032, 775),
  colour = c(
    "First to extraction", "First to last", rep("Inpatient", 3), "Collapse 180", 
    "Collapse 180", "Collapse+Surveillance 180", "Collapse+Surveillance 180",
    "Collapse+Surveillance 365", "Pediatric"
  )
) |>
  dplyr::mutate(
    start = dob + start,
    end = dob + end, 
    id = dplyr::row_number(),
    colour = factor(colour, levels = unique(colour)),
    y = 7-as.numeric(colour)
  ) |>
  tidyr::pivot_longer(cols = c("start", "end"), values_to = "x")

ggplot2::ggplot(data = x, mapping = ggplot2::aes(x = x, y = y, group = id, colour = colour)) +
  ggplot2::geom_line(linewidth = 2) +
  ggplot2::theme_bw() +
  ggplot2::theme(
    axis.title.y = ggplot2::element_blank(),
    axis.text.y = ggplot2::element_blank(),
    axis.ticks.y = ggplot2::element_blank(),
    legend.title = ggplot2::element_blank(),
    legend.position = "top",
    axis.line.x = ggplot2::element_blank()
  ) +
  ggplot2::scale_x_date(
    name = "Time",
    breaks = as.Date(c("2018-07-01", "2019-01-01", "2019-07-01", "2020-01-01", "2020-07-01", "2021-01-01", "2021-07-01", "2022-01-01")), 
    labels = c(dob, "2019-01-01", "2019-07-01", "2020-01-01", "2020-07-01", "2021-01-01", "2021-07-01", "2022-01-01"), 
    limits = as.Date(c("2018-07-01", "2022-01-01"))
  ) +
  ggplot2::coord_cartesian(clip = "off", ylim = c(-0.5, 6.5)) +
  ggplot2::annotate(
    geom = "segment", 
    x = as.Date("2018-10-01") + 5 * c(1, -1), 
    xend = as.Date("2018-10-01") + 5 * c(1, -1), 
    y = -0.87 - 0.3,
    yend = -0.87 + 0.3 
  ) +
  ggplot2::geom_vline(
    xintercept = as.Date(c("2018-07-01", "2021-04-19", "2022-01-01")),
    linetype = "dashed"
  )

## -----------------------------------------------------------------------------
library(omock)
library(OmopConstructor)
library(OmopSketch)
library(dplyr, warn.conflicts = FALSE)
library(ggplot2)
library(visOmopResults)

cdm <- mockCdmFromDataset(datasetName = "GiBleed", source = "duckdb")
cdm

## -----------------------------------------------------------------------------
cdm <- buildObservationPeriod(
  cdm = cdm,
  collapseDays = Inf,
  persistenceDays = Inf,
  censorAge = 120,
  dateRange = as.Date(c("1900-01-01", "2020-01-01"))
)

# summarise the generated observation_period using OmopSketch
result1a <- summariseObservationPeriod(observationPeriod = cdm$observation_period,
                                       byOrdinal = FALSE)
result1b <- summariseInObservation(observationPeriod = cdm$observation_period,
                                   interval = "years",
                                   output = c("person-days", "age"))

# change cdm_name to identify the different results objects
result1 <- bind(result1a, result1b) |>
  mutate(cdm_name = "First to extract")

## -----------------------------------------------------------------------------
cdm <- buildObservationPeriod(
  cdm = cdm,
  collapseDays = Inf,
  persistenceDays = 0,
  censorAge = 120,
  dateRange = as.Date(c("1900-01-01", "2020-01-01"))
)

# summarise the generated observation_period using OmopSketch
result2a <- summariseObservationPeriod(observationPeriod = cdm$observation_period,
                                       byOrdinal = FALSE)
result2b <- summariseInObservation(observationPeriod = cdm$observation_period,
                                   interval = "years",
                                   output = c("person-days", "age"))

# change cdm_name to identify the different results objects
result2 <- bind(result2a, result2b) |>
  mutate(cdm_name = "First to last")

## -----------------------------------------------------------------------------
cdm <- buildObservationPeriod(
  cdm = cdm,
  collapseDays = 0,
  persistenceDays = 0,
  censorAge = 120,
  dateRange = as.Date(c("1900-01-01", "2020-01-01")),
  recordsFrom = "visit_occurrence"
)

# summarise the generated observation_period using OmopSketch
result3a <- summariseObservationPeriod(observationPeriod = cdm$observation_period,
                                       byOrdinal = FALSE)
result3b <- summariseInObservation(observationPeriod = cdm$observation_period,
                                   interval = "years",
                                   output = c("person-days", "age"))

# change cdm_name to identify the different results objects
result3 <- bind(result3a, result3b) |>
  mutate(cdm_name = "Inpatient")

## -----------------------------------------------------------------------------
cdm <- buildObservationPeriod(
  cdm = cdm,
  collapseDays = 180,
  persistenceDays = 0,
  censorAge = 120,
  dateRange = as.Date(c("1900-01-01", "2020-01-01"))
)

# summarise the generated observation_period using OmopSketch
result4a <- summariseObservationPeriod(observationPeriod = cdm$observation_period,
                                       byOrdinal = FALSE)
result4b <- summariseInObservation(observationPeriod = cdm$observation_period,
                                   interval = "years",
                                   output = c("person-days", "age"))

# change cdm_name to identify the different results objects
result4 <- bind(result4a, result4b) |>
  mutate(cdm_name = "Collapse 180")

## -----------------------------------------------------------------------------
cdm <- buildObservationPeriod(
  cdm = cdm,
  collapseDays = 180,
  persistenceDays = 179,
  censorAge = 120,
  dateRange = as.Date(c("1900-01-01", "2020-01-01"))
)

# summarise the generated observation_period using OmopSketch
result5a <- summariseObservationPeriod(observationPeriod = cdm$observation_period,
                                       byOrdinal = FALSE)
result5b <- summariseInObservation(observationPeriod = cdm$observation_period,
                                   interval = "years",
                                   output = c("person-days", "age"))

# change cdm_name to identify the different results objects
result5 <- bind(result5a, result5b) |>
  mutate(cdm_name = "Collapse+Persistence 180")

## -----------------------------------------------------------------------------
cdm <- buildObservationPeriod(
  cdm = cdm,
  collapseDays = 365,
  persistenceDays = 364,
  censorAge = 120,
  dateRange = as.Date(c("1900-01-01", "2020-01-01"))
)

# summarise the generated observation_period using OmopSketch
result6a <- summariseObservationPeriod(observationPeriod = cdm$observation_period,
                                       byOrdinal = FALSE)
result6b <- summariseInObservation(observationPeriod = cdm$observation_period,
                                   interval = "years",
                                   output = c("person-days", "age"))

# change cdm_name to identify the different results objects
result6 <- bind(result6a, result6b) |>
  mutate(cdm_name = "Collapse+Persistence 365")

## -----------------------------------------------------------------------------
cdm <- buildObservationPeriod(
  cdm = cdm,
  collapseDays = Inf,
  persistenceDays = Inf,
  censorAge = 18,
  dateRange = as.Date(c("1900-01-01", "2020-01-01"))
)

# summarise the generated observation_period using OmopSketch
result7a <- summariseObservationPeriod(observationPeriod = cdm$observation_period,
                                       byOrdinal = FALSE)
result7b <- summariseInObservation(observationPeriod = cdm$observation_period,
                                   interval = "years",
                                   output = c("person-days", "age"))

# change cdm_name to identify the different results objects
result7 <- bind(result7a, result7b) |>
  mutate(cdm_name = "Pediatric")

## -----------------------------------------------------------------------------
result <- bind(result1, result2, result3, result4, result5, result6, result7)

## -----------------------------------------------------------------------------
tableObservationPeriod(result)

## -----------------------------------------------------------------------------
plotObservationPeriod(result = result, 
                      variableName = "Records per person",
                      plotType = "densityplot", 
                      colour = "cdm_name") +
  ggplot2::coord_cartesian(xlim = c(NA, 40))

## -----------------------------------------------------------------------------
result |>
  filterSettings(result_type == "summarise_observation_period") |>
  filter(
    variable_name == "Duration in days",
    estimate_name %in% c("density_x", "density_y")
  ) |>
  tidy() |>
  mutate(cdm_name = factor(cdm_name, levels = unique(cdm_name))) |>
  group_by(cdm_name) |>
  mutate(density_y = density_y / max(density_y)) |>
  ungroup() |>
  scatterPlot(
    x = "density_x",
    y = "density_y",
    line = TRUE,
    point = FALSE,
    ribbon = FALSE,
    colour = "cdm_name"
  )

## -----------------------------------------------------------------------------
result |>
  filter(variable_name == 'Number person-days') |>
  plotInObservation(colour = "cdm_name")

## -----------------------------------------------------------------------------
result |>
  filter(variable_name == 'Median age in observation') |>
  plotInObservation(colour = "cdm_name")

