library(censusapi)
library(magrittr)
library(dplyr)
library(rvest)
library(stringr)
library(ggmap)
library(ggplot2)
library(rgdal)
library(rgeos)
library(maptools)
library(sf)
library(RColorBrewer)
library(leaflet)

Setup

Note: To access the Census API, you will need to sign up for a free key: https://api.census.gov/data/key_signup.html.

You will then add this line (including quotation marks, replacing YOURKEY with your personal Census API key; do not share this key) to the beginning of your script (I add it after setwd()):

Sys.setenv(CENSUS_KEY = "YOURKEY")

Census API R package vignette: https://cran.r-project.org/web/packages/censusapi/vignettes/getting-started.html

Example: Dallas County Demographics

Import demographic data using Census API

# Census tracts data
names2019_all <- as.data.frame(listCensusMetadata(name = "2019/acs/acs5", type = "variables"))
names2019_geo <- as.data.frame(listCensusMetadata(name = "2019/acs/acs5", type = "geography"))
# View(names2019_all[grepl("^SEX BY AGE", names2019_all$concept) == TRUE,])
# "B01001": Sex by Age, Sex by Age by Race
# "B05013": Sex by Age for the Foreign-Born Population
names2019 <- names2019_all[grepl("NAME|B01001|B05013", names2019_all$name) == TRUE, 
                           c("name", "label", "concept")]
pop2019_1 <- makeVarlist("2019/acs/acs5", find = "B01001", varsearch = "name")
pop2019_2 <- makeVarlist("2019/acs/acs5", find = "B05013", varsearch = "name")
pop2019 <- getCensus(name = "2019/acs/acs5",
                              vars = c("NAME", pop2019_1, pop2019_2),
                              region = "tract:*",
                              regionin = "state:48+county:113")

Preparing Census data for merge with shapefiles

row.names(pop2019) <- paste("tract_", pop2019$tract, sep="")
head(pop2019[,1:6])
##            state county tract                                     NAME
## tract_201     48    113   201  Census Tract 2.01, Dallas County, Texas
## tract_1203    48    113  1203 Census Tract 12.03, Dallas County, Texas
## tract_1502    48    113  1502 Census Tract 15.02, Dallas County, Texas
## tract_2200    48    113  2200    Census Tract 22, Dallas County, Texas
## tract_2400    48    113  2400    Census Tract 24, Dallas County, Texas
## tract_3101    48    113  3101 Census Tract 31.01, Dallas County, Texas
##            B01001B_029E B01001B_027E
## tract_201             0            0
## tract_1203            0            0
## tract_1502           11           22
## tract_2200           38           29
## tract_2400            7           16
## tract_3101           12            0
pop2019 <- select(pop2019, -c(1:4)) %>% 
  t() %>% 
  as.data.frame()

head(pop2019[,1:6])
##              tract_201 tract_1203 tract_1502 tract_2200 tract_2400 tract_3101
## B01001B_029E         0          0         11         38          7         12
## B01001B_027E         0          0         22         29         16          0
## B01001B_028E         0         15         53         12          4         24
## B01001B_025E         0          0         29         21          0         23
## B01001B_026E         0          0          0         33          9         65
## B01001B_022E         0          0          0          0          0         16
head(names2019)
##           name                                      label
## 1 B01001B_029E  Estimate!!Total:!!Female:!!65 to 74 years
## 2 B01001B_027E  Estimate!!Total:!!Female:!!45 to 54 years
## 3 B01001B_028E  Estimate!!Total:!!Female:!!55 to 64 years
## 4 B01001B_025E  Estimate!!Total:!!Female:!!30 to 34 years
## 5 B01001B_026E  Estimate!!Total:!!Female:!!35 to 44 years
## 6 B01001B_022E Estimate!!Total:!!Female:!!18 and 19 years
##                                        concept
## 1 SEX BY AGE (BLACK OR AFRICAN AMERICAN ALONE)
## 2 SEX BY AGE (BLACK OR AFRICAN AMERICAN ALONE)
## 3 SEX BY AGE (BLACK OR AFRICAN AMERICAN ALONE)
## 4 SEX BY AGE (BLACK OR AFRICAN AMERICAN ALONE)
## 5 SEX BY AGE (BLACK OR AFRICAN AMERICAN ALONE)
## 6 SEX BY AGE (BLACK OR AFRICAN AMERICAN ALONE)
# Merge data with descriptive "label" and "concept" based on "name"
pop2019 <- merge(names2019, pop2019, by.x = "name", by.y = 0, all = TRUE)
row.names(pop2019) <- pop2019$name
head(pop2019[,1:6])
##                    name                                   label    concept
## B01001_001E B01001_001E                        Estimate!!Total: SEX BY AGE
## B01001_002E B01001_002E                 Estimate!!Total:!!Male: SEX BY AGE
## B01001_003E B01001_003E  Estimate!!Total:!!Male:!!Under 5 years SEX BY AGE
## B01001_004E B01001_004E   Estimate!!Total:!!Male:!!5 to 9 years SEX BY AGE
## B01001_005E B01001_005E Estimate!!Total:!!Male:!!10 to 14 years SEX BY AGE
## B01001_006E B01001_006E Estimate!!Total:!!Male:!!15 to 17 years SEX BY AGE
##             tract_201 tract_1203 tract_1502
## B01001_001E      3010       1481       3728
## B01001_002E      1392        741       1904
## B01001_003E        83         43         71
## B01001_004E       118         69        215
## B01001_005E        80         27        129
## B01001_006E        48         57         27
pop2019 <- pop2019 %>%
  t() %>% 
  as.data.frame()

# Add descriptive column names based on values in "label" and "concept" rows
names(pop2019) <- gsub("\\W+","_",pop2019[c("label","concept"),])
pop2019 <- pop2019[-c(1:3),]

# Rename rows to match Census Tract IDs in Census shapefile
row.names(pop2019) <- gsub("tract_", "", row.names(pop2019))
rnames <- row.names(pop2019)
pop2019 <- lapply(pop2019, as.numeric) %>% as.data.frame()
row.names(pop2019) <- rnames
head(pop2019[,1:3])
##      c_Estimate_Total_SEX_BY_AGE_ c_Estimate_Total_Male_SEX_BY_AGE_
## 201                          3010                              1392
## 1203                         1481                               741
## 1502                         3728                              1904
## 2200                         2504                              1196
## 2400                         2789                              1430
## 3101                         3627                              2304
##      c_Estimate_Total_Male_Under_5_years_SEX_BY_AGE_
## 201                                               83
## 1203                                              43
## 1502                                              71
## 2200                                              12
## 2400                                              28
## 3101                                              15

Reading in shapefile and merging with Census data

tracts <- st_read(dsn = "data", "tl_2019_48_tract")
## Reading layer `tl_2019_48_tract' from data source `C:\Users\Rachel\Documents\ISL2021\data' using driver `ESRI Shapefile'
## Simple feature collection with 5265 features and 12 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -106.6456 ymin: 25.83716 xmax: -93.50804 ymax: 36.5007
## geographic CRS: NAD83
tractsDal <- tracts[tracts$COUNTYFP == "113",]
tractsDal$TRACTCE <- as.numeric(tractsDal$TRACTCE)
tractsDal <- merge(tractsDal, pop2019, by.x = "TRACTCE", by.y = 0)
tractsDal$WhitePct <- tractsDal$c_Estimate_Total_SEX_BY_AGE_WHITE_ALONE_NOT_HISPANIC_OR_LATINO_/tractsDal$c_Estimate_Total_SEX_BY_AGE_

Making a basic map with ggplot

ggplot() +
  geom_sf(
    mapping = aes(fill = WhitePct),
    data = tractsDal,
    show.legend = TRUE) +
  theme_minimal()

Example: Interactive Florida map

Data Sources

“tl_2020_12_state20” and “tl_2020_12_county20” from https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html -> Scroll to “Download Complete Set of Shapefiles by State” -> Scroll to “Florida”; download will begin automatically.

“US_Routes_TDA” from https://gis-fdot.opendata.arcgis.com/datasets/us-routes-tda -> Click on cloud icon to download. See https://gis-fdot.opendata.arcgis.com/search?tags=roads&type=feature%20layer for full list of available feature layers from Florida Open Data Hub.

You can find more shapefiles on https://www.fdot.gov/statistics/gis/default.shtm. For downloads, enter “guest” as username and leave password field blank.

FYI, if you want to unzip files automatically:

Move downloads to working directory, then:

unzip("tl_2020_12_all.zip", exdir="tl_2020_12_all")

Mapping code

state <- st_read(dsn = "tl_2020_12_all", "tl_2020_12_state20") %>%
  st_transform(crs = "WGS84")
## Reading layer `tl_2020_12_state20' from data source `C:\Users\Rachel\Documents\ISL2021\tl_2020_12_all' using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 14 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -87.6349 ymin: 24.39631 xmax: -79.97431 ymax: 31.00097
## geographic CRS: NAD83
counties <- st_read(dsn = "tl_2020_12_all", "tl_2020_12_county20") %>%
  st_transform(crs = "WGS84")
## Reading layer `tl_2020_12_county20' from data source `C:\Users\Rachel\Documents\ISL2021\tl_2020_12_all' using driver `ESRI Shapefile'
## Simple feature collection with 67 features and 17 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -87.6349 ymin: 24.39631 xmax: -79.97431 ymax: 31.00097
## geographic CRS: NAD83
highways <- st_read(dsn = "US_Routes_TDA", "US_Routes_TDA") %>%
  st_transform(crs = "WGS84")
## Reading layer `US_Routes_TDA' from data source `C:\Users\Rachel\Documents\ISL2021\US_Routes_TDA' using driver `ESRI Shapefile'
## Simple feature collection with 658 features and 13 fields
## geometry type:  LINESTRING
## dimension:      XY
## bbox:           xmin: -119240.2 ymin: 2715403 xmax: 594411.5 ymax: 3446241
## projected CRS:  NAD83 / UTM zone 17N
highways$route_factor <- as.factor(highways$ROUTE)

bins <- c(1000000, 10000000, 100000000, 1000000000, 10000000000, 1000000000000, Inf)
pal <- colorBin("Blues", domain = counties$AWATER20, bins = bins)
hwycolors <- colorFactor("viridis", highways$route_factor)
m <- leaflet() %>%
  addProviderTiles(providers$CartoDB.Voyager) %>%
  addMarkers(lng=-80.788, lat=26.929,
             popup="Lake Okeechobee") %>%
  setView(lng=-83, lat=29, zoom=7) %>%
  addPolygons(data = counties,
              color='white',
              opacity=0.7,
              fillColor=~pal(AWATER20),
              fillOpacity=0.7,
              label=~NAMELSAD20) %>%
  addPolylines(data = highways,
               color = ~hwycolors(route_factor),
               opacity = 1,
               label = ~ROUTE) %>%
  addPolylines(data = state,
               color = 'black') %>%
  addLegend(pal = pal, 
            values = counties$AWATER20, 
            title = "County Water Area")
m

See https://rstudio.github.io/leaflet/ for other Leaflet options and examples.

Save map as HTML widget

library(htmlwidgets)
saveWidget(m, "m_florida.html", selfcontained = F, libdir = "lib")

More about HTML widgets in R: https://plotly-r.com/saving.html

Questions? Email me.

Return to main programming page.