openadds

Build Status Build status codecov.io rstudio mirror downloads cran version

openadds is an R client for data from Openaddresses.io. Data comes from http://data.openaddresses.io.

The reason for creating this R client is that the data coming from OpenAddresses is heterogenous in many ways:

This pacakge tries to make it easy to retreive the data, as well as combine data sets, and visualize.

Install

CRAN (and get leaflet)

install.packages(c("leaflet", "openadds"))

Dev version

devtools::install_github("sckott/openadds")
library("openadds")

List datasets

(dat <- oa_list())
#> # A tibble: 1,680 x 15
#>                      source
#>                       <chr>
#> 1   ar/ba/buenos_aires.json
#> 2             at/31254.json
#> 3             at/31255.json
#> 4             at/31256.json
#> 5    at/city_of_vienna.json
#> 6             at/tirol.json
#> 7  au/city_of_canberra.json
#> 8       au/countrywide.json
#> 9        au/queensland.json
#> 10   au/tas-launceston.json
#> # ... with 1,670 more rows, and 14 more variables: cache <chr>,
#> #   sample <chr>, geometry type <chr>, address count <int>, version <chr>,
#> #   fingerprint <chr>, cache time <S3: hms>, processed <chr>, process
#> #   time <S3: hms>, output <chr>, attribution required <chr>, attribution
#> #   name <chr>, share-alike <chr>, code version <chr>

Search for datasets

oa_search(country = "us", state = "ca")
#> # A tibble: 53 x 5
#>    country state                  city    id
#> *    <chr> <chr>                 <chr> <chr>
#> 1       us    ca               alameda      
#> 2       us    ca                amador      
#> 3       us    ca              berkeley      
#> 4       us    ca                 butte      
#> 5       us    ca       city_of_anaheim      
#> 6       us    ca   city_of_bakersfield      
#> 7       us    ca        city_of_carson      
#> 8       us    ca     city_of_cupertino      
#> 9       us    ca       city_of_hayward      
#> 10      us    ca city_of_mountain_view      
#> # ... with 43 more rows, and 1 more variables: url <chr>

Get data

Passing in a URL

(out1 <- oa_get(dat$processed[5]))
#> <Openaddresses data> city_of_vienna
#> paths: /Users/sacmac/Library/Caches/openadds-ca ...
#> data set sizes (NROW): 282313
#> first data_frame ...  
#> # A tibble: 282,313 x 11
#>         LON      LAT NUMBER       STREET  UNIT  CITY DISTRICT REGION
#>       <dbl>    <dbl>  <chr>        <chr> <chr> <chr>    <chr>  <chr>
#> 1  16.36766 48.21032      1    Irisgasse  <NA>  <NA>     <NA>   <NA>
#> 2  16.36743 48.21029     13  Naglergasse  <NA>  <NA>     <NA>   <NA>
#> 3  16.36674 48.21101      1  Heidenschuß  <NA>  <NA>     <NA>   <NA>
#> 4  16.36657 48.21100     31  Naglergasse  <NA>  <NA>     <NA>   <NA>
#> 5  16.36954 48.21006      7   Tuchlauben  <NA>  <NA>     <NA>   <NA>
#> 6  16.37587 48.20312      4 Schubertring  <NA>  <NA>     <NA>   <NA>
#> 7  16.37547 48.20297      8  Fichtegasse  <NA>  <NA>     <NA>   <NA>
#> 8  16.37593 48.20226      7 Schubertring  <NA>  <NA>     <NA>   <NA>
#> 9  16.37403 48.20418     15 Seilerstätte  <NA>  <NA>     <NA>   <NA>
#> 10 16.37279 48.20407     18    Annagasse  <NA>  <NA>     <NA>   <NA>
#> # ... with 2.823e+05 more rows, and 3 more variables: POSTCODE <int>,
#> #   ID <chr>, HASH <chr>

First getting URL for dataset through as_openadd(), then passing to oa_get()

(x <- as_openadd("us", "nv", "las_vegas"))
#> <<OpenAddreses>> 
#>   <<country>> us
#>   <<state>> nv
#>   <<city>> las_vegas
oa_get(x)
#> <Openaddresses data> las_vegas
#> paths: /Users/sacmac/Library/Caches/openadds-ca ...
#> data set sizes (NROW): 159
#> first data_frame ...  
#> # A tibble: 159 x 11
#>          LON      LAT NUMBER                      STREET  UNIT  CITY
#>        <dbl>    <dbl>  <int>                       <chr> <chr> <chr>
#> 1  -115.0529 36.15966     NA RODRIGUEZ-DE-LOPEZ GRACIELA  <NA>  <NA>
#> 2  -115.0518 36.16089   5581             ORCHARD LN #150  <NA>  <NA>
#> 3  -115.0514 36.16099   5587                  ORCHARD LN  <NA>  <NA>
#> 4  -115.0525 36.16001   5516                  ORCHARD LN  <NA>  <NA>
#> 5  -115.0524 36.16059   6320           E CHARLESTON BLVD  <NA>  <NA>
#> 6  -115.0522 36.16039     NA              TAYLOR CYNTHIA  <NA>  <NA>
#> 7  -115.0524 36.16032   5539                  ORCHARD LN  <NA>  <NA>
#> 8  -115.0520 36.15991    863                   HURON AVE  <NA>  <NA>
#> 9  -115.0501 36.15981   5650                 VINEYARD LN  <NA>  <NA>
#> 10 -115.0528 36.15998   5528                  ORCHARD LN  <NA>  <NA>
#> # ... with 149 more rows, and 5 more variables: DISTRICT <chr>,
#> #   REGION <chr>, POSTCODE <chr>, ID <chr>, HASH <chr>

Combine multiple datasets

out2 <- oa_get(dat$processed[35])
(alldat <- oa_combine(out1, out2))
#> # A tibble: 383,344 x 4
#>         lon      lat         address        dataset
#> *     <dbl>    <dbl>           <chr>          <chr>
#> 1  16.36766 48.21032     1 Irisgasse city_of_vienna
#> 2  16.36743 48.21029  13 Naglergasse city_of_vienna
#> 3  16.36674 48.21101   1 Heidenschuß city_of_vienna
#> 4  16.36657 48.21100  31 Naglergasse city_of_vienna
#> 5  16.36954 48.21006    7 Tuchlauben city_of_vienna
#> 6  16.37587 48.20312  4 Schubertring city_of_vienna
#> 7  16.37547 48.20297   8 Fichtegasse city_of_vienna
#> 8  16.37593 48.20226  7 Schubertring city_of_vienna
#> 9  16.37403 48.20418 15 Seilerstätte city_of_vienna
#> 10 16.37279 48.20407    18 Annagasse city_of_vienna
#> # ... with 383,334 more rows

Map data

Get some data

x <- oa_get(oa_search(city = "oregon_city")[1,]$url)

Make an interactive map

library("leaflet")
leaflet(x) %>%
  addTiles() %>%
  addCircles(lat = ~LAT, lng = ~LON, popup = ~STREET)
map1

map1

Meta