## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)
library(sumvar)
library(ggplot2)
library(dplyr)


## ----continuous---------------------------------------------------------------
# Example data
set.seed(123)
df <- tibble::tibble(
  age = rnorm(100, mean = 50, sd = 20),
  sex = sample(c("male", "female"), 100, replace = TRUE)) %>%
  dplyr::mutate(age = dplyr::if_else(sex == "male", age + 10, age))

# Call dist_sum
df %>% dist_sum(age)
df %>% dist_sum(age, sex)

## ----dates--------------------------------------------------------------------
df3 <- tibble::tibble(
  dates = as.Date("2022-01-01") + rnorm(n=100, sd=50, mean=0),
  group = sample(c("A", "B"), 100, TRUE)) %>%
  dplyr::mutate(dt = dplyr::case_when(group == "A" ~ dates + 10, TRUE ~ dates))

df3 %>% dist_date(dates)
df3 %>% dist_date(dates, group)

## ----categorical--------------------------------------------------------------
df2 <- tibble::tibble(
  group = sample(LETTERS[1:3], 200, TRUE)
)

df2 %>% tab1(group)

## ----crosstab-----------------------------------------------------------------
df_tab <- dplyr::tibble(
  treatment = sample(c("control", "treatment"), 100, replace = TRUE),
  outcome   = sample(c("improved", "stable", "worse"), 100, replace = TRUE)
)

df_tab %>% tab(treatment, outcome)
df_tab %>% tab(treatment, outcome, show = "col")  # column percentages
df_tab %>% tab(treatment, outcome, test = "chi")  # with chi-squared test
result <- df_tab %>% tab(treatment, outcome)      # save as tibble

## ----duplicate----------------------------------------------------------------
example_data <- dplyr::tibble(id = 1:200, age = round(rnorm(200, mean = 30, sd = 50), digits=0))
example_data$age[sample(1:200, size = 15)] <- NA  # Replace 15 values with missing.

example_data %>% dup(age)

## ----duplicate_all------------------------------------------------------------
example_data <- dplyr::tibble(age = round(rnorm(200, mean = 30, sd = 50), digits=0),
                              sex = sample(c("Male", "Female"), 200, TRUE),
                              favourite_colour = sample(c("Red", "Blue", "Purple"), 200, TRUE))
example_data$age[sample(1:200, size = 15)] <- NA  # Replace 15 values with missing.
example_data$sex[sample(1:200, size = 32)] <- NA  # Replace 32 values with missing.

dup(example_data)

## ----explorer, eval=FALSE-----------------------------------------------------
# explorer(example_data)                      # HTML report (default)
# explorer(example_data, format = "pdf")      # PDF report
# explorer(example_data, id_var = "id")       # exclude an identifier column

