---
title: "Visualize Sports Injury Data"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{visualize-injury-data}
  %\VignetteEncoding{UTF-8}
  %\VignetteEngine{knitr::rmarkdown}
editor_options: 
  chunk_output_type: console
---

```{r, include = FALSE}
library(knitr)
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
options(rmarkdown.html_vignette.check_title = FALSE) # to supress R-CMD check

## to fold/hook the code
hook_output <- knit_hooks$get("output")
knit_hooks$set(output = function(x, options) {
  lines <- options$output.lines
  if (is.null(lines)) {
    return(hook_output(x, options))  # pass to default hook
  }
  x <- unlist(strsplit(x, "\n"))
  more <- "..."
  if (length(lines) == 1) {
    if (length(x) > lines) {
      # truncate the output, but add ....
      x <- c(head(x, lines), more)
    }
  } else {
    x <- c(if (abs(lines[1]) > 1) more else NULL,
           x[lines],
           if (length(x) > lines[abs(length(lines))]) more else NULL
    )
  }
  # paste these lines together
  x <- paste(c(x, ""), collapse = "\n")
  hook_output(x, options)
})

modern_r <- getRversion() >= "4.1.0"
```


```{r setup, message = F}
library(injurytools)
library(ggplot2)
library(dplyr)
library(gridExtra)
library(grid)
library(knitr)
```

**Example data**: we continue exploring the cohort of Liverpool Football Club male's first team players over two consecutive seasons, 2017-2018 and 2018-2019, scrapped from https://www.transfermarkt.com/ website[^visualize-note-1].

[^visualize-note-1]: These data sets are provided for illustrative purposes. We warn that they might not be accurate and could potentially include discrepancies or incomplete information compared to what actually occurred.

# A quick glance

```{r, fig.width = 13.7, fig.height = 7}
gg_photo(injd, 
         title   = "Overview of injuries:\nLiverpool FC 1st male team during 2017-2018 and 2018-2019 seasons",
         by_date = "2 month", 
         fix     = TRUE) +
  
  ## plus some lines of ggplot2 code..
  xlab("Follow-up date") + ylab("Players") + labs(caption = "source: transfermarkt.com") +
  theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 22),
        axis.text.x.bottom = element_text(size = 13, angle = 20, hjust = 1),
        axis.text.y.left   = element_text(size = 12),
        axis.title.x       = element_text(size = 20, face = "bold", vjust = -1),
        axis.title.y       = element_text(size = 20, face = "bold", vjust = 1.8),
        legend.text        = element_text(size = 20),
        plot.caption       = element_text(face = "italic", size = 12, colour = "gray10"))
```

Let's count how many injuries (red crosses in the graph) occurred and how severe they were (length of the thick black line).

```{r, warning = FALSE}
# warnings set to FALSE
df_summary        <- calc_summary(injd) 
df_summary_perinj <- calc_summary(injd, by = "injury_type") 
# injds
```

<details>

<summary>
Code for tidying up the tables
</summary>

```{r, eval = F}
df_summary |> 
  mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
         burden_new    = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |> 
  dplyr::select(2, 7, 1, incidence_new, burden_new) |> 
  kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
        caption   = "Injury incidence and injury burden are reported as 100 player-matches",
        align     = "c")

df_summary_perinj |> 
  mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
         burden_new    = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |> 
  dplyr::select(1:2, 9, 4, incidence_new, burden_new) |> 
  kable(col.names = c("Type of injury", "N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
        caption   = "Injury incidence and injury burden are reported as 100 player-matches",
        align     = "c")
```
</details>

**Overall**

```{r, echo = F, eval = modern_r}
df_summary |> 
  mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
         burden_new    = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |> 
  dplyr::select(2, 7, 1, incidence_new, burden_new) |> 
  kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
        caption   = "Injury incidence and injury burden are reported as 100 player-matches",
        align     = "c")
```

**Overall per type of injury**
```{r, echo = F, eval = modern_r}
df_summary_perinj |> 
  mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
         burden_new    = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |> 
  dplyr::select(1:2, 9, 4, incidence_new, burden_new) |> 
  kable(col.names = c("Type of injury", "N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
        caption   = "Injury incidence and injury burden are reported as 100 player-matches",
        align     = "c")
```

Let's plot the information shown in the second table in a risk matrix that displays injury incidence against injury burden.

```{r, eval = F}
# warnings set to FALSE
gg_riskmatrix(injd, 
              by = "injury_type", 
              title = "Risk matrix")
```

<details>

<summary>
Code for further plot specifications
</summary>
```{r, eval = F}
# warnings set to FALSE
palette <- c("#000000", Ligament = "#E69F00", Muscle = "#56B4E9", "#009E73",
             Unknown = "#F0E442", "#0072B2", Bone = "#D55E00", Concussion = "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
theme3 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
                axis.text.x.bottom = element_text(size = 20),
                axis.text.y.left = element_text(size = 20),
                axis.title.x = element_text(size = 15),
                axis.title.y = element_text(size = 15),
                legend.title = element_text(size = 15),
                legend.text = element_text(size = 15))

gg_riskmatrix(injd, 
              by = "injury_type", 
              title = "Risk matrix") +
  scale_fill_manual(name = "Type of injury",
                    values = palette) +
  guides(fill = guide_legend(override.aes = list(size = 5))) +
  theme3
```
</details>


```{r, echo = F, fig.width = 9, fig.height = 5.8, warning = F}
palette <- c("#000000", Ligament = "#E69F00", Muscle = "#56B4E9", "#009E73",
             Unknown = "#F0E442", "#0072B2", Bone = "#D55E00", Concussion = "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
theme3 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
                axis.text.x.bottom = element_text(size = 18),
                axis.text.y.left = element_text(size = 18),
                axis.title.x = element_text(size = 18),
                axis.title.y = element_text(size = 18),
                legend.title = element_text(size = 15),
                legend.text = element_text(size = 15))

gg_riskmatrix(injd, 
              by = "injury_type", 
              title = "Risk matrix") +
  scale_fill_manual(name = "Type of injury",
                    values = palette) +
  guides(fill = guide_legend(override.aes = list(size = 5))) +
  theme3
```

<br/>

# Comparing injuries occurred in 17/18 vs. 18/19

We prepare two `injd` objects:

```{r, warning = F}
# warnings set to FALSE
injd1 <- cut_injd(injd, datef = 2017)
injd2 <- cut_injd(injd, date0 = 2018)
```

```{r, eval = F}
## Plot just for checking whether cut_injd() worked well
p1 <- gg_photo(injd1, fix = TRUE, by_date = "3 months")
p2 <- gg_photo(injd2, fix = TRUE, by_date = "3 months")
grid.arrange(p1, p2, ncol = 2)
```


```{r, echo = F, fig.width = 18, fig.height = 4}
p1 <- gg_photo(injd1, fix = TRUE, by_date = "3 months")
p1$layers[[3]]$aes_params$size <- 2
p2 <- gg_photo(injd2, fix = TRUE, by_date = "3 months") 
p2$layers[[3]]$aes_params$size <- 2
grid.arrange(p1, p2, ncol = 2)
```

Let's compute injury summary statistics for each season.

```{r, warning = FALSE}
# warnings set to FALSE
df_summary1 <- calc_summary(injd1, quiet = T)
df_summary2 <- calc_summary(injd2, quiet = T)
```


<details>

<summary>
Code for tidying up the tables
</summary>

```{r, eval = F}
## **Season 2017/2018**
df_summary1 |> 
  mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
         burden_new    = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |> 
  dplyr::select(2, 7, 1, incidence_new, burden_new) |> 
  kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
        caption   = "Injury incidence and injury burden are reported as 100 player-matches",
        align     = "c")

## **Season 2018/2019**
df_summary2 |> 
  mutate(incidence_new = paste0(round(incidence, 2), "  (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
         burden_new    = paste0(round(burden, 2), "  (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |> 
  dplyr::select(2, 7, 1, incidence_new, burden_new) |> 
  kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
        caption   = "Injury incidence and injury burden are reported as 100 player-matches",
        align     = "c")
```
</details>

**Season 2017/2018**
```{r, echo = F, eval = modern_r}
df_summary1 |> 
  mutate(incidence_new = paste0(round(incidence, 2), " (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
         burden_new    = paste0(round(burden, 2), " (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |> 
  dplyr::select(2, 7, 1, incidence_new, burden_new) |> 
  kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
        caption   = "Injury incidence and injury burden are reported as 100 player-matches",
        align     = "c")
```

**Season 2018/2019**
```{r, echo = F, eval = modern_r}
df_summary2 |> 
  mutate(incidence_new = paste0(round(incidence, 2), "  (", round(incidence_lower, 2), ",", round(incidence_upper, 2), ")"),
         burden_new    = paste0(round(burden, 2), "  (", round(burden_lower, 2), ",", round(burden_upper, 2), ")")) |> 
  dplyr::select(2, 7, 1, incidence_new, burden_new) |> 
  kable(col.names = c("N injuries", "N days lost", "Total expo", "Incidence (95% CI)", "Burden (95% CI)"),
        caption   = "Injury incidence and injury burden are reported as 100 player-matches",
        align     = "c")
```

<br/>

## - Who were the most injured players? And the most severely affected?

Player-wise statistics can be computed by `df_summay1_pl <- calc_summary(injd1, overall = FALSE)`. Then, we plot them:

```{r}
p11 <- gg_rank(injd1, line_overall = TRUE)
p12 <- gg_rank(injd1, summary_stat = "burden", line_overall = TRUE)
p21 <- gg_rank(injd2, line_overall = TRUE)
p22 <- gg_rank(injd2, summary_stat = "burden", line_overall = TRUE) 

# grid.arrange(p11, p21, p12, p22, nrow = 2)
```


<details>

<summary>
Code for further plot specifications
</summary>

```{r, eval = F}
theme2 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 26),
                axis.text.x.bottom = element_text(size = 18),
                axis.text.y.left = element_text(size = 13),
                axis.title.x = element_text(size = 11, vjust = 1),
                axis.title.y = element_text(size = 22, face = "bold", vjust = 1))

p11 <- p11 + 
  xlab("Injury incidence") + 
  ylab("Player-wise incidence (injuries per 100 player-match)") +
  ggtitle("2017/2018 season") +
  scale_y_continuous(limits = c(0, 80)) + ## same x axis 
  theme2 +
  theme(plot.margin = margin(0.2, 0.2, 0.2, 0.5, "cm"))
p12 <- p12 +
  xlab("Injury burden") + 
  ylab("Player-wise burden (days lost per 100 player-match)") +
  scale_y_continuous(limits = c(0, 6110)) + 
  theme2 +
  theme(plot.margin = margin(0.2, 0.2, 0.2, 0.65, "cm"))

p21 <- p21 + 
  ylab("Player-wise incidence (injuries per 100 player-match)") +
  ggtitle("2018/2019 season") +
  scale_y_continuous(limits = c(0, 80)) + 
  theme2 
p22 <- p22 +
  ylab("Player-wise burden (days lost per 100 player-match)") +
  scale_y_continuous(limits = c(0, 6110)) + 
  theme2

grid.arrange(p11, p21, p12, p22, nrow = 2)
```
</details>


```{r, echo = F, fig.width = 14, fig.height = 11.8}
theme2 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 26),
                axis.text.x.bottom = element_text(size = 18),
                axis.text.y.left = element_text(size = 13),
                axis.title.x = element_text(size = 11, vjust = 1),
                axis.title.y = element_text(size = 22, face = "bold", vjust = 1))

p11 <- p11 + 
  xlab("Injury incidence") + 
  ylab("Player-wise incidence (injuries per 100 player-match)") +
  ggtitle("2017/2018 season") +
  scale_y_continuous(limits = c(0, 80)) + ## same x axis 
  theme2 +
  theme(plot.margin = margin(0.2, 0.2, 0.2, 0.5, "cm"))
p12 <- p12 +
  xlab("Injury burden") + 
  ylab("Player-wise burden (days lost per 100 player-match)") +
  scale_y_continuous(limits = c(0, 6110)) + 
  theme2 +
  theme(plot.margin = margin(0.2, 0.2, 0.2, 0.65, "cm"))

p21 <- p21 + 
  ylab("Player-wise incidence (injuries per 100 player-match)") +
  ggtitle("2018/2019 season") +
  scale_y_continuous(limits = c(0, 80)) + 
  theme2 
p22 <- p22 +
  ylab("Player-wise burden (days lost per 100 player-match)") +
  scale_y_continuous(limits = c(0, 6110)) + 
  theme2

grid.arrange(p11, p21, p12, p22, nrow = 2)
```

<br/>

## - Which injuries were more frequent? And more burdensome?


```{r, warning = F}
# warnings set to FALSE
p1 <- gg_riskmatrix(injd1, by = "injury_type", 
                    title = "Season 2017/2018", add_contour = FALSE)
p2 <- gg_riskmatrix(injd2, by = "injury_type",
                    title = "Season 2018/2019", add_contour = FALSE)

# Print both plots side by side
# grid.arrange(p1, p2, nrow = 1)
```



<details>
<summary>
Code for further plot specifications
</summary>

```{r, eval = F}
palette <- c("#000000", Ligament = "#E69F00", Muscle = "#56B4E9", "#009E73",
             Unknown = "#F0E442", "#0072B2", Bone = "#D55E00", Concussion = "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
theme3 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
                axis.text.x.bottom = element_text(size = 18),
                axis.text.y.left = element_text(size = 18),
                axis.title.x = element_text(size = 18),
                axis.title.y = element_text(size = 18),
                legend.title = element_text(size = 15),
                legend.text = element_text(size = 15))

## Plot
p1 <- gg_riskmatrix(injd1, by = "injury_type", 
                    title = "Season 2017/2018", add_contour = T,
                    cont_max_x = 5.2, cont_max_y = 125, ## after checking the data
                    bins = 10) 
p2 <- gg_riskmatrix(injd2, by = "injury_type",
                    title = "Season 2018/2019", add_contour = T, 
                    cont_max_x = 5.2, cont_max_y = 125,
                    bins = 10)

p1 <- p1 +
  scale_x_continuous(limits = c(-0.05, 5.2)) +
  scale_y_continuous(limits = c(-0.05, 125)) + 
  scale_fill_manual(name = "Type of injury",
                    values = palette) + 
  guides(fill = guide_legend(override.aes = list(size = 5))) +
  theme3
p2 <- p2 +
  scale_x_continuous(limits = c(-0.5, 5.2)) +
  scale_y_continuous(limits = c(-0.5, 125)) + 
  scale_fill_manual(name = "Type of injury",
                    values = palette) + # keep the same color coding
  guides(fill = guide_legend(override.aes = list(size = 5))) +
  theme3

grid.arrange(p1, p2, ncol = 2, 
             top = textGrob("Risk matrices", gp = gpar(fontsize = 26, font = 2))) ## for the main title
```

</details>

```{r, echo = F, fig.width = 13, fig.height = 5.8, warning = FALSE, message = FALSE}
palette <- c("#000000", Ligament = "#E69F00", Muscle = "#56B4E9", "#009E73",
             Unknown = "#F0E442", "#0072B2", Bone = "#D55E00", Concussion = "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
theme3 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
                axis.text.x.bottom = element_text(size = 18),
                axis.text.y.left = element_text(size = 18),
                axis.title.x = element_text(size = 18),
                axis.title.y = element_text(size = 18),
                legend.title = element_text(size = 15),
                legend.text = element_text(size = 15))

## Plot
p1 <- gg_riskmatrix(injd1, by = "injury_type", 
                    title = "Season 2017/2018", add_contour = T,
                    cont_max_x = 5.2, cont_max_y = 125, ## after checking the data
                    bins = 10) 
p2 <- gg_riskmatrix(injd2, by = "injury_type",
                    title = "Season 2018/2019", add_contour = T, 
                    cont_max_x = 5.2, cont_max_y = 125,
                    bins = 10)

p1 <- p1 +
  scale_x_continuous(limits = c(-0.05, 5.2)) +
  scale_y_continuous(limits = c(-0.05, 125)) + 
  scale_fill_manual(name = "Type of injury",
                    values = palette) + # get rid off the green (pos: 4) 
  guides(fill = guide_legend(override.aes = list(size = 5))) +
  theme3
p2 <- p2 +
  scale_x_continuous(limits = c(-0.5, 5.2)) +
  scale_y_continuous(limits = c(-0.5, 125)) + 
  scale_fill_manual(name = "Type of injury",
                    values = palette) + # keep the same color coding
  guides(fill = guide_legend(override.aes = list(size = 5))) +
  theme3

grid.arrange(p1, p2, ncol = 2, 
             top = textGrob("Risk matrices", gp = gpar(fontsize = 26, font = 2))) ## for the main title
```

<br/>

## - How many players were injured in each month?

We will create bar plots, with each bar representing the monthly prevalence[^visualize-note-2].

[^visualize-note-2]: See the *Note* section in `?calc_prevalence()` or have a look at this section in [Estimate summary statistics](https://lzumeta.github.io/injurytools/articles/estimate-epi-measures.html#calc_prevalence) vignette, to better understand what the proportions refer to.

```{r, eval = F}
gg_prevalence(injd, time_period = "monthly",
              line_mean = TRUE)
```

<details>
<summary>
Code for further plot specifications
</summary>

```{r, eval = F}
theme4 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
                axis.text.x = element_text(size = 13.5),
                axis.text.y = element_text(size = 18),
                legend.title = element_text(size = 20),
                legend.text = element_text(size = 20),
                strip.text = element_text(size = 20))


gg_prevalence(injd, time_period = "monthly",
              line_mean = TRUE,
              title = "Monthly prevalence of sports injuries") + 
  theme4
```
</details>


```{r, echo = F, fig.width = 12.8, fig.height = 5.2}
theme4 <- theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 20),
                axis.text.x = element_text(size = 13.5),
                axis.text.y = element_text(size = 18),
                legend.title = element_text(size = 20),
                legend.text = element_text(size = 20),
                strip.text = element_text(size = 20))


gg_prevalence(injd, time_period = "monthly",
              line_mean = TRUE,
              title = "Monthly prevalence of sports injuries") + 
  theme4
```


```{r, eval = F}
gg_prevalence(injd, time_period = "monthly",
              by = "injury_type", line_mean = TRUE)
```

<details>
<summary>
Code for further plot specifications
</summary>

```{r, eval = F}
palette2 <- c("seagreen3", "#000000", Ligament = "#E69F00", Muscle = "#56B4E9", "#009E73",
              Unknown = "#F0E442", "#0072B2", Bone = "#D55E00", Concussion = "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/


gg_prevalence(injd, time_period = "monthly", 
              by = "injury_type", line_mean = TRUE,
              title = "Monthly prevalence of each type of sports injuries") +
  scale_fill_manual(name = "Type of injury", 
                    values = palette2) + 
  theme4
```
</details>

```{r, echo = F, fig.width = 12.8, fig.height = 5.2}
palette2 <- c("seagreen3", "#000000", Ligament = "#E69F00", Muscle = "#56B4E9", "#009E73",
              Unknown = "#F0E442", "#0072B2", Bone = "#D55E00", Concussion = "#CC79A7")
# source of the palette: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/


gg_prevalence(injd, time_period = "monthly", 
              by = "injury_type", line_mean = TRUE,
              title = "Monthly prevalence of each type of sports injuries") +
  scale_fill_manual(name = "Type of injury", 
                    values = palette2) + 
  theme4
```
