---
title: "Geocoding French adresses with BanR"
author: "Paul-Antoine Chevalier (Etalab), Joël Gombin (Datactivist)"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Geocoding French adresses with BanR}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup}
library("tibble")
library("dplyr")
library("banR")

table_test <- tibble::tibble(
  adress = c("39 quai André Citroën", "64 Allée de Bercy", "20 avenue de Ségur"),
  postal_code = c("75015", "75012", "75007"),
  z = rnorm(3)
  )
```



- `geocode()` geocodes a single address
- `reverse_geocode()` reverse geocodes a single pair of longitude and latitude
- `geocode_tbl()` geocodes a data frame
- `reverse_geocode_tbl()` reverse geocodes a data frame

## Geocode

Geocoding is the process of transforming a human readable address into a location (ie a pair of latitude and longitude).

### A single address

```{r geocode, eval=FALSE}
geocode(query = "39 quai André Citroën, Paris") %>%
  glimpse()
```

```{r geocode-output, echo=FALSE, comment=""}
cat("Rows: 1
Columns: 19
$ label       <chr> \"39 Quai André Citroën 75015 Paris\"
$ score       <dbl> 0.9801645
$ housenumber <chr> \"39\"
$ id          <chr> \"75115_0318_00039\"
$ name        <chr> \"39 Quai André Citroën\"
$ postcode    <chr> \"75015\"
$ citycode    <chr> \"75115\"
$ x           <dbl> 647082.8
$ y           <dbl> 6861010
$ city        <chr> \"Paris\"
$ district    <chr> \"Paris 15e Arrondissement\"
$ context     <chr> \"75, Paris, Île-de-France\"
$ type        <chr> \"housenumber\"
$ importance  <dbl> 0.78181
$ street      <chr> \"Quai André Citroën\"
$ `_type`     <chr> \"address\"
$ type_geo    <chr> \"Point\"
$ longitude   <dbl> 2.278922
$ latitude    <dbl> 48.84696")
```

The BAN API sends back both projected/Cartesian coordinates (`x` and `y` columns - they use Lambert 93 projection, aka as EPSG:2154), and lon/lat (i.e. WGS84) coordinates (`longitude` and `latitude` columns). It also indicates the degree of confidence it has in each result (column `score`). The above example only sends back one result, but sometimes the API will send back several suggestion for the same query. They are ordered by descending order of confidence. 

### A data frame

In addition to the adress, `geocode_tbl()` can take as argument either the [postal code](https://en.wikipedia.org/wiki/Postal_codes_in_France) or the French official code ([INSEE code](https://en.wikipedia.org/wiki/INSEE_code)) of the commune.

```{r geocode-tbl, eval=FALSE}
geocode_tbl(tbl = table_test, adresse = adress) %>%
  glimpse()
```

```{r geocode-tbl-output, echo=FALSE, comment=""}
cat("Rows: 3
Columns: 21
$ postal_code        <chr> \"75015\", \"75012\", \"75007\"
$ z                  <dbl> -1.316912, 2.787876, 1.313769
$ adress             <chr> \"39 quai André Citroën\", \"64 Allée de Bercy\", \"20 a…
$ longitude          <dbl> 2.278922, 2.376011, 2.308628
$ latitude           <dbl> 48.84696, 48.84254, 48.85070
$ result_score       <dbl> 0.9801645, 0.9729327, 0.9716455
$ result_score_next  <chr> \"0.8013699999999999\", NA, \"0.9532445454545454\"
$ result_label       <chr> \"39 Quai André Citroën 75015 Paris\", \"64 Allée de B…
$ result_type        <chr> \"housenumber\", \"housenumber\", \"housenumber\"
$ result_id          <chr> \"75115_0318_00039\", \"75112_0874_00064\", \"75107_8909…
$ result_housenumber <chr> \"39\", \"64\", \"20\"
$ result_name        <chr> \"39 Quai André Citroën\", \"64 Allée de Bercy\", \"20 A…
$ result_street      <chr> \"Quai André Citroën\", \"Allée de Bercy\", \"Avenue de …
$ result_postcode    <chr> \"75015\", \"75012\", \"75007\"
$ result_city        <chr> \"Paris\", \"Paris\", \"Paris\"
$ result_context     <chr> \"75, Paris, Île-de-France\", \"75, Paris, Île-de-Fran…
$ result_citycode    <chr> \"75115\", \"75112\", \"75107\"
$ result_oldcitycode <chr> NA, NA, NA
$ result_oldcity     <chr> NA, NA, NA
$ result_district    <chr> \"Paris 15e Arrondissement\", \"Paris 12e Arrondisseme…
$ result_status      <chr> \"ok\", \"ok\", \"ok\"")
```

```{r geocode-tbl-postalcode, eval=FALSE}
geocode_tbl(tbl = table_test, adresse = adress, code_postal = postal_code) %>%
  glimpse()
```

```{r geocode-tbl-postalcode-output, echo=FALSE, comment=""}
cat("Rows: 3
Columns: 21
$ z                  <dbl> -0.1769586, 1.2007049, -0.3180847
$ adress             <chr> \"39 quai André Citroën\", \"64 Allée de Bercy\", \"20 a…
$ postal_code        <chr> \"75015\", \"75012\", \"75007\"
$ longitude          <dbl> 2.278922, 2.376011, 2.308628
$ latitude           <dbl> 48.84696, 48.84254, 48.85070
$ result_score       <dbl> 0.9801645, 0.9729327, 0.9716455
$ result_score_next  <chr> NA, \"0.600363971291866\", \"0.38506487603305783\"
$ result_label       <chr> \"39 Quai André Citroën 75015 Paris\", \"64 Allée de B…
$ result_type        <chr> \"housenumber\", \"housenumber\", \"housenumber\"
$ result_id          <chr> \"75115_0318_00039\", \"75112_0874_00064\", \"75107_8909…
$ result_housenumber <chr> \"39\", \"64\", \"20\"
$ result_name        <chr> \"39 Quai André Citroën\", \"64 Allée de Bercy\", \"20 A…
$ result_street      <chr> \"Quai André Citroën\", \"Allée de Bercy\", \"Avenue de …
$ result_postcode    <chr> \"75015\", \"75012\", \"75007\"
$ result_city        <chr> \"Paris\", \"Paris\", \"Paris\"
$ result_context     <chr> \"75, Paris, Île-de-France\", \"75, Paris, Île-de-Fran…
$ result_citycode    <chr> \"75115\", \"75112\", \"75107\"
$ result_oldcitycode <chr> NA, NA, NA
$ result_oldcity     <chr> NA, NA, NA
$ result_district    <chr> \"Paris 15e Arrondissement\", \"Paris 12e Arrondisseme…
$ result_status      <chr> \"ok\", \"ok\", \"ok\"")
```

```{r geocode-tbl-codeinsee, eval=FALSE}
data("paris2012")
paris2012 %>%
  slice(1:100) %>%
  mutate(
    adresse = paste(numero, voie, nom),
    code_insee = paste0("751", arrondissement)
    ) %>%
  geocode_tbl(adresse = adresse, code_insee = code_insee) %>%
  glimpse()
```

```{r geocode-tbl-codeinsee-output, echo=FALSE, comment=""}
cat("Rows: 100
Columns: 27
$ arrondissement     <chr> \"06\", \"06\", \"06\", \"06\", \"06\", \"06\", \"06\", \"06\", \"06…
$ bureau             <chr> \"09\", \"09\", \"09\", \"09\", \"09\", \"09\", \"09\", \"09\", \"09…
$ numero             <int> 4, 5, 6, 7, 8, 11, 12, 13, 14, 16, 3, 4, 5, 6, 7, 8…
$ voie               <chr> \"RUE DE L\", \"RUE DE L\", \"RUE DE L\", \"RUE DE L\", \"RU…
$ nom                <chr> \"ABBAYE\", \"ABBAYE\", \"ABBAYE\", \"ABBAYE\", \"ABBAYE\", \"…
$ nb                 <int> 1, 1, 20, 2, 17, 2, 9, 15, 17, 8, 13, 6, 6, 3, 9, 1…
$ ID                 <chr> \"0609\", \"0609\", \"0609\", \"0609\", \"0609\", \"0609\", \"06…
$ adresse            <chr> \"4 RUE DE L ABBAYE\", \"5 RUE DE L ABBAYE\", \"6 RUE DE…
$ code_insee         <chr> \"75106\", \"75106\", \"75106\", \"75106\", \"75106\", \"75106…
$ longitude          <dbl> 2.335715, 2.335172, 2.335352, 2.335041, 2.334903, 2…
$ latitude           <dbl> 48.85405, 48.85407, 48.85414, 48.85410, 48.85425, 4…
$ result_score       <dbl> 0.9663627, 0.9663627, 0.9663627, 0.9663627, 0.96636…
$ result_score_next  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ result_label       <chr> \"4 Rue de l'Abbaye 75006 Paris\", \"5 Rue de l'Abbaye…
$ result_type        <chr> \"housenumber\", \"housenumber\", \"housenumber\", \"house…
$ result_id          <chr> \"75106_0002_00004\", \"75106_0002_00005\", \"75106_0002…
$ result_housenumber <chr> \"4\", \"5\", \"6\", \"7\", \"8\", \"11\", \"12\", \"13\", \"14\", \"1…
$ result_name        <chr> \"4 Rue de l'Abbaye\", \"5 Rue de l'Abbaye\", \"6 Rue de…
$ result_street      <chr> \"Rue de l'Abbaye\", \"Rue de l'Abbaye\", \"Rue de l'Abb…
$ result_postcode    <chr> \"75006\", \"75006\", \"75006\", \"75006\", \"75006\", \"75006…
$ result_city        <chr> \"Paris\", \"Paris\", \"Paris\", \"Paris\", \"Paris\", \"Paris…
$ result_context     <chr> \"75, Paris, Île-de-France\", \"75, Paris, Île-de-Fran…
$ result_citycode    <chr> \"75106\", \"75106\", \"75106\", \"75106\", \"75106\", \"75106…
$ result_oldcitycode <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ result_oldcity     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ result_district    <chr> \"Paris 6e Arrondissement\", \"Paris 6e Arrondissement…
$ result_status      <chr> \"ok\", \"ok\", \"ok\", \"ok\", \"ok\", \"ok\", \"ok\", \"ok\", \"ok…")
```

## Reverse geocode

Reverse geocoding is the process of back (reverse) coding of a point location (latitude, longitude) to a human readable address.

### A single adress

`reverse_geocode()` takes longitude and latitude as arguments and returns a data frame with addresses.

```{r reverse-geocode, eval=FALSE}
reverse_geocode(long =  2.279092, lat = 48.84683)  %>%
  glimpse()
```

```{r reverse-geocode-output, echo=FALSE, comment=""}
cat("Rows: 10
Columns: 23
$ type        <chr> \"housenumber\", \"housenumber\", \"housenumber\", \"housenumber\"…
$ name        <chr> \"39a Quai André Citroën\", \"39 Quai André Citroën\", \"43a Qu…
$ label       <chr> \"39a Quai André Citroën 75015 Paris\", \"39 Quai André Citro…
$ street      <chr> \"Quai André Citroën\", \"Quai André Citroën\", \"Quai André Ci…
$ postcode    <chr> \"75015\", \"75015\", \"75015\", \"75015\", \"75015\", \"75015\", \"750…
$ citycode    <chr> \"75115\", \"75115\", \"75115\", \"75115\", \"75115\", \"75115\", \"751…
$ city        <chr> \"Paris\", \"Paris\", \"Paris\", \"Paris\", \"Paris\", \"Paris\", \"Par…
$ oldcitycode <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ oldcity     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ district    <chr> \"Paris 15e Arrondissement\", \"Paris 15e Arrondissement\", \"P…
$ context     <chr> \"75, Paris, Île-de-France\", \"75, Paris, Île-de-France\", \"7…
$ importance  <dbl> 0.78181, 0.78181, 0.78181, 0.78181, 0.82311, 0.78181, 0.82…
$ housenumber <chr> \"39a\", \"39\", \"43a\", \"41\", \"8a\", \"37\", \"8\", \"10\", \"43\", \"12\"
$ id          <chr> \"75115_0318_00039_a\", \"75115_0318_00039\", \"75115_0318_0004…
$ banId       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ x           <dbl> 647094.3, 647082.8, 647087.1, 647071.8, 647110.9, 647095.8…
$ y           <dbl> 6860995, 6861010, 6860976, 6860999, 6861016, 6861024, 6861…
$ distance    <int> 1, 19, 21, 24, 26, 29, 30, 32, 36, 40
$ score       <dbl> 0.9999, 0.9981, 0.9979, 0.9976, 0.9974, 0.9971, 0.9970, 0.…
$ `_type`     <chr> \"address\", \"address\", \"address\", \"address\", \"address\", \"ad…
$ type_geo    <chr> \"Point\", \"Point\", \"Point\", \"Point\", \"Point\", \"Point\", \"Poi…
$ longitude   <dbl> 2.279081, 2.278922, 2.278985, 2.278774, 2.279305, 2.279098…
$ latitude    <dbl> 48.84683, 48.84696, 48.84665, 48.84686, 48.84701, 48.84709…")
```

### A data frame

`reverse_geocode_tbl` takes the names of the longitude and latitude columns and returns a data frame with adresses.

```{r reverse-geocode-tbl, eval=FALSE}
test_df <- tibble::tibble(
  nom = sample(letters, size = 10, replace = FALSE),
  lon = runif(10, 2.19, 2.47),
  lat = runif(10, 48.8, 48.9)
)

test_df %>%
  reverse_geocode_tbl(lon, lat) %>%
  glimpse()
```

```{r reverse-geocode-tbl-output, echo=FALSE, comment=""}
cat("Rows: 10
Columns: 20
$ nom                <chr> \"p\", \"n\", \"i\", \"j\", \"f\", \"c\", \"o\", \"u\", \"q\", \"a\"
$ longitude          <dbl> 2.209505, 2.245517, 2.286644, 2.382297, 2.193622, 2…
$ latitude           <dbl> 48.82198, 48.89566, 48.85378, 48.83758, 48.88390, 4…
$ result_longitude   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ result_latitude    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ result_distance    <int> 14, 10, 46, 4, 35, NA, 4, 8, 45, 3
$ result_label       <chr> \"Rue Léon Cladel 92310 Sèvres\", \"71 Rue Segoffin 92…
$ result_type        <chr> \"street\", \"housenumber\", \"housenumber\", \"housenumbe…
$ result_id          <chr> \"92072_1130\", \"92026_8535_00071\", \"75115_4313_00016…
$ result_housenumber <chr> NA, \"71\", \"16a\", \"55\", \"69\", NA, \"203\", \"52\", \"39\",…
$ result_name        <chr> \"Rue Léon Cladel\", \"71 Rue Segoffin\", \"16a Quai de …
$ result_street      <chr> NA, \"Rue Segoffin\", \"Quai de Grenelle\", \"Rue de Ber…
$ result_postcode    <chr> \"92310\", \"92400\", \"75015\", \"75012\", \"92000\", NA, \"7…
$ result_city        <chr> \"Sèvres\", \"Courbevoie\", \"Paris\", \"Paris\", \"Nanterre…
$ result_context     <chr> \"92, Hauts-de-Seine, Île-de-France\", \"92, Hauts-de-…
$ result_citycode    <chr> \"92072\", \"92026\", \"75115\", \"75112\", \"92050\", NA, \"7…
$ result_oldcitycode <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ result_oldcity     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ result_district    <chr> NA, NA, \"Paris 15e Arrondissement\", \"Paris 12e Arro…
$ result_status      <chr> \"ok\", \"ok\", \"ok\", \"ok\", \"ok\", \"not-found\", \"ok\", \"o…")
```