## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = identical(tolower(Sys.getenv("NOT_CRAN")), "true"),
  out.width = "100%"
)


## ----eval = FALSE-------------------------------------------------------------
# read_households(
#   year,          # year of reference
#   columns,       # select columns to read
#   add_labels,    # add labels to categorical variables
#   as_data_frame, # return an Arrow DataSet or a data.frame
#   showProgress,  # show download progress bar
#   cache,         # cache data for faster access later
#   verbose        # whether to print informative messages
#   )

## ----warning=FALSE, message=FALSE---------------------------------------------
library(censobr)
library(arrow)
library(dplyr)
library(ggplot2)

## ----warning = FALSE, message=FALSE-------------------------------------------
pop <- read_population(
  year = 2010,
  columns = c('abbrev_state', 'V0606', 'V0010', 'V6400'),
  add_labels = 'pt',
  showProgress = FALSE
  )

class(pop)

## ----warning = FALSE, message=FALSE-------------------------------------------
dplyr::glimpse(pop)

## ----warning = FALSE, message=FALSE-------------------------------------------
df <- pop |>
      filter(abbrev_state == "RJ") |>                                                    # (a)
      compute() |>
      group_by(V0606) |>                                                                 # (b)
      summarize(higher_edu = sum(V0010[which(V6400=="Superior completo")]) / sum(V0010), # (c)
                pop = sum(V0010) ) |>
      collect()

head(df)

## ----message=FALSE------------------------------------------------------------
df <- subset(df, V0606 != 'Ignorado')

ggplot() +
  geom_col(data = df, aes(x=V0606, y=higher_edu), fill = '#5c997e') +
  scale_y_continuous(name = 'Proportion with higher education',
                     labels = scales::percent) +
  labs(x = 'Cor/raça') +
  theme_classic()
  

## ----message=FALSE------------------------------------------------------------
hs <- read_households(
  year = 2010, 
  showProgress = FALSE
  )


## ----warning = FALSE, message=FALSE-------------------------------------------
esg <- hs |> 
        compute() |>
        group_by(code_muni) |>                                             # (a)
        summarize(rede = sum(V0010[which(V0207=='1')]),                    # (b)
                  total = sum(V0010)) |>                                   # (b)
        mutate(cobertura = rede / total) |>                                # (c)
        collect()                                                          # (d)

head(esg)

## ----warning = FALSE, message=FALSE-------------------------------------------
library(geobr)

muni_sf <- geobr::read_municipality(
  year = 2010,
  showProgress = FALSE
  )

head(muni_sf)

## ----warning = FALSE, message=FALSE-------------------------------------------
esg_sf <- left_join(muni_sf, esg, by = 'code_muni')

ggplot() +
  geom_sf(data = esg_sf, aes(fill = cobertura), color=NA) +
  labs(title = "Share of households connected to a sewage network") +
  scale_fill_distiller(palette = "Greens", direction = 1, 
                       name='Share of\nhouseholds', 
                       labels = scales::percent) +
  theme_void()


## ----warning = FALSE, message=FALSE-------------------------------------------
metro_muni <- geobr::read_metro_area(
  year = 2010, 
  showProgress = FALSE) |>
  subset(name_metro == "RM São Paulo")


## ----warning = FALSE, message=FALSE-------------------------------------------
wt_areas <- geobr::read_weighting_area(
  code_weighting = "SP", 
  showProgress = FALSE,
  year = 2010
  )

wt_areas <- subset(wt_areas, code_muni %in% metro_muni$code_muni)
head(wt_areas)

## ----warning = FALSE, message=FALSE-------------------------------------------
rent <- hs |>
        filter(code_muni %in% metro_muni$code_muni) |>                     # (a)
        compute() |>
        group_by(code_weighting) |>                                        # (b)
        summarize(avgrent=weighted.mean(x=V2011, w=V0010, na.rm=TRUE)) |>  # (c)
        collect()                                                          # (d)

head(rent)

## ----warning = FALSE, message=FALSE-------------------------------------------
rent_sf <- left_join(wt_areas, rent, by = 'code_weighting')

ggplot() +
  geom_sf(data = rent_sf, aes(fill = avgrent), color=NA) +
  geom_sf(data = metro_muni, color='gray', fill=NA) +
  scale_fill_distiller(palette = "Greens", direction = 1, 
                       name='Avgerage\nRent in R$') +
  theme_void()


## ----warning=FALSE, eval=FALSE------------------------------------------------
# censobr::censobr_cache(
#   list_files = TRUE,
#   print_tree = TRUE
#   )

## ----warning=FALSE, eval=FALSE------------------------------------------------
# censobr::censobr_cache(delete_file = "2010_emigration")
# 

## ----warning=FALSE, eval=FALSE------------------------------------------------
# censobr::censobr_cache(delete_file = "all")
# 

## ----eval=TRUE, warning=FALSE-------------------------------------------------
tempf <- fs::path_temp(pattern = "my_temp_dir")

censobr::set_censobr_cache_dir(path = tempf)


## ----eval=TRUE, warning=FALSE, message=FALSE----------------------------------
# download file to our new cache dir
df_emi <- censobr::read_emigration(year = 2010)

# check files in current cache dir
censobr::censobr_cache(
  list_files = TRUE, 
  print_tree = TRUE
  )