---
title: "How-to in the Tidyverse"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{How-to in the Tidyverse}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
```

```{r setup, eval=TRUE, echo=FALSE, warning=FALSE, message=FALSE}
library(FFdownload)
library(dplyr)
library(ggplot2)
library(tidyr)
outd    <- paste0(tempdir(),"/",format(Sys.time(), "%F_%H-%M"))
outfile <- paste0(outd,"FFData_tbl.RData")
```

```{r setup2, eval=FALSE, echo=TRUE}
library(FFdownload)
library(tidyverse)
outd    <- paste0("data/", format(Sys.time(), "%F_%H-%M"))
outfile <- paste0(outd, "FFData_tbl.RData")
```

This vignette covers the tidyverse workflow. For the step-by-step xts workflow
(with separate download and processing stages), see
`vignette("FFD-xts-how-to")`.

---

## Quick start: `FFget()` (new in v1.2.0)

`FFget()` is the simplest way to get a single dataset into your session. It
returns a tibble directly — no intermediate `.RData` file, no `load()` call.
Missing-value sentinels (`-99`, `-999`, `-99.99`) are converted to `NA` by
default.

```{r tbl_ffget, eval=FALSE}
# Monthly FF 3-factor data as a tibble, NAs handled automatically
ff3 <- FFget("F-F_Research_Data_Factors", subtable = "Temp2")
head(ff3)
#> # A tibble: 6 × 5
#>   date      Mkt.RF   SMB   HML    RF
#>   <yearmon>  <dbl> <dbl> <dbl> <dbl>
#> 1 Jul 1926    2.89 -2.55 -2.39  0.22
#> ...
```

Use `frequency = NULL` to get all frequencies at once, or `subtable = NULL`
to get all sub-tables within a frequency:

```{r tbl_ffget_all, eval=FALSE}
# All sub-tables for the monthly frequency
ff3_monthly <- FFget("F-F_Research_Data_Factors", subtable = NULL)
names(ff3_monthly)   # e.g. "Temp2"

# Annual data as xts
ff3_ann_xts <- FFget("F-F_Research_Data_Factors", frequency = "annual", format = "xts")
```

---

## Dataset discovery: `FFlist()` and `FFmatch()`

Before downloading, browse all available datasets and check that your search
strings match the intended files:

```{r tbl_fflist, eval=FALSE}
fl <- FFlist()
nrow(fl)         # 100+ non-daily datasets
fl |> filter(grepl("5_Factors|Momentum", name))
```

```{r tbl_ffmatch, eval=FALSE}
FFmatch(c("Research_Data_Factors", "Momentum_Factor"))
#> # A tibble: 2 × 4
#>   requested             matched                    edit_distance similarity
#>   <chr>                 <chr>                              <int>      <dbl>
#> 1 Research_Data_Factors F-F_Research_Data_Factors              3      0.87
#> 2 Momentum_Factor       F-F_Momentum_Factor                    4      0.78
```

---

## Bulk download with `FFdownload()` (classic API)

For downloading multiple datasets in one call and/or saving a dated snapshot
for reproducible research, use `FFdownload()` directly with `format = "tibble"`.

For a detailed explanation of the download / process separation, see
`vignette("FFD-xts-how-to")`. Here we download and process in one step:

```{r tbl_all}
inputlist <- c("F-F_Research_Data_Factors_CSV","F-F_Momentum_Factor_CSV")
FFdownload(exclude_daily=TRUE, tempd=outd, download=TRUE, download_only=FALSE,
           inputlist=inputlist, output_file=outfile, format="tibble")
```

### New parameters

Replace French's missing-value codes with `NA` during processing:

```{r tbl_na_values, eval=FALSE}
FFdownload(exclude_daily=TRUE, tempd=outd, download=TRUE, download_only=FALSE,
           inputlist=inputlist, output_file=outfile, format="tibble",
           na_values=c(-99, -999, -99.99))
```

Return the data list directly in addition to saving the file:

```{r tbl_return_data, eval=FALSE}
FFdata <- FFdownload(exclude_daily=TRUE, tempd=outd, download=TRUE,
                     download_only=FALSE, inputlist=inputlist,
                     output_file=outfile, format="tibble",
                     return_data=TRUE)
```

---

## Working with the result

```{r tbl_load}
load(outfile)
ls.str(FFdata)
```

Verify that the sub-tables are tibbles:

```{r tbl_check}
str(FFdata$`x_F-F_Research_Data_Factors`$monthly$Temp2)
```

Merge the two datasets (the `date` column is a `yearmon` object and serves
as the join key):

```{r tbl_merge}
FFfour <- FFdata$`x_F-F_Research_Data_Factors`$monthly$Temp2 %>%
  left_join(FFdata$`x_F-F_Momentum_Factor`$monthly$Temp2, by="date")
FFfour %>% head()
```

Plot cumulative wealth indices using `pivot_longer()` and `ggplot2`:

```{r FFFourPic, out.width="100%", fig.width=8, fig.height=4}
FFfour %>%
  pivot_longer(Mkt.RF:Mom, names_to="FFVar", values_to="FFret") %>%
  mutate(FFret=FFret/100, date=as.Date(date)) %>%
  filter(date>="1960-01-01", !FFVar=="RF") %>%
  group_by(FFVar) %>% arrange(FFVar, date) %>%
  mutate(FFret=ifelse(date=="1960-01-01",1,FFret), FFretv=cumprod(1+FFret)-1) %>%
  ggplot(aes(x=date, y=FFretv, col=FFVar, type=FFVar)) + geom_line(lwd=1.2) +
  scale_y_log10() +
  labs(title="FF4 Factors", subtitle="Cumulative wealth plots", ylab="cum. returns") +
  scale_colour_viridis_d("FFvar") +
  theme_bw() + theme(legend.position="bottom")
```
