Compiled on 2021-02-06 by E. Anthon Eff
Jones College of Business, Middle Tennessee State University

This script can serve as a template for bringing in block group-level data from the US Bureau of the Census, and displaying that data on a chloropleth map.

# ===============================================================================================
# -------------api_census_bg-----------------------------------------

options(encoding = 'UTF-8')
setwd("~/Dropbox/4500/") # change to your own working directory
library(censusapi)
library(rgdal)
library(maps)
library(maptools)
library(tigris)
library(sp)
library(sf)
library(plyr)
library(grDevices)

# ----block-group level data from census-----------

# Get data from the census API
# https://cran.r-project.org/web/packages/censusapi/vignettes/getting-started.html
capi<-"b142681d75c7e72cb7c6191c665472bd3c9cc884" # My API key -- you might want to get your own (see link above)
Sys.setenv(CENSUS_KEY=capi) # Add key to .Renviron
readRenviron("~/.Renviron") # Reload .Renviron
Sys.getenv("CENSUS_KEY") # Check to see that the expected key is output in your R console

# ---get a list of all variables in census acs5-2018
# ---not all of these will have non-missing values at the block group-level
dim(j<-listCensusMetadata("acs/acs5", vintage = 2018, type = "variables", group = NULL))
j<-j[which(nchar(j$concept)<=300),1:7] #necessary to remove an erroneous record; also redundant column removed
str(j)
write.csv(j,file="acs5_2018varbs.csv") # can open this in in a spreadsheet
getwd() # the file is written in this location (your working directory)

# ---finding variables-----------
z<-grep("Median household income",j$label,ignore.case=TRUE)
a<-j[z,]
a<-a[order(a$name),c("name","group","concept","label")]
# ---by looking at both concept and label you can figure out the meaning of each variable------
write.csv(a,"z.csv") # easiest to look at this in a spreadsheet

# ---identify counties in which to extract block groups--------
cx<-"47" # state fips for Tennessee, put your own state fips here
cnty<-tigris::counties(state=cx)
cnty<-sf::as_Spatial(cnty)

cii<-c("Bedford", "Cannon", "Cheatham", "Clay", "Coffee", "Davidson", 
       "DeKalb", "Dickson", "Fentress", "Franklin", "Giles", "Grundy", 
       "Hickman", "Houston", "Humphreys", "Jackson", "Lawrence", "Lewis", 
       "Lincoln", "Macon", "Marshall", "Maury", "Montgomery", "Moore", 
       "Overton", "Perry", "Pickett", "Putnam", "Robertson", "Rutherford", 
       "Smith", "Stewart", "Sumner", "Trousdale", "Van Buren", 
       "Warren", "Wayne", "White", "Williamson", "Wilson")

cnty<-cnty[which(cnty@data$NAME %in% cii),] # pick only the Middle Tennessee counties
# cnty<-cnty[which(cnty@data$CSAFP=="400"),] # pick only the Nashville CSA counties
xy<-coordinates(cnty);colnames(xy)<-c("x","y")
cnty@data<-data.frame(cnty@data,xy) # add the coordinates to the attribute table
str(cnty,max.level=2)

cxx<-paste0("state:",cx," + county:",cnty@data$COUNTYFP) # format county fips to get census data
xaa<-NULL # empty object to which we will append data one county at a time as we iterate through the following loop
for (ix in cxx){
zz <- censusapi::getCensus(name = "acs/acs5",vintage = 2018,vars ="group(B19013)",
                            region = "block group:*",regionin = ix)
xaa<-rbind(xaa,zz) 
}

xaa$GEOID<-sapply(strsplit(xaa$GEO_ID,"US"),function(i) i[2])
xaa<-xaa[,c("GEOID",grep("E$",colnames(xaa),value=TRUE))] # variable names end with "E" (for estimate)
rownames(xaa)<-xaa$GEOID
head(xaa)

# block group boundaries from package tigris
bg<-tigris::block_groups(47,county=cnty@data$COUNTYFP) # bring in block group boundaries for Middle Tennessee
bg<-sf::as_Spatial(bg) # convert from sf to sp format
rownames(bg@data)<-bg@data$GEOID
setdiff(rownames(xaa),rownames(bg@data))

bg@data<-data.frame(bg@data,mhhy=xaa[rownames(bg@data),"B19013_001E"])
bg@data[which(bg@data$mhhy<0),"mhhy"]<-NA # Missing data is indicated with a value of -666666666

# plot(bg,lwd=.1) # can see what the block group outlines look like

# ---below we print a map of median household income to a png file---
ix<-"mhhy"
nint<-5 # number of categories (different colors)
png(file=paste0(ix,"_bg.png"),width=12,height=10,units="in",pointsize=10,res=800)
length(brks<-classInt::classIntervals(bg@data[,ix],nint,style="kmeans")$brks)
nuk<-findInterval(bg@data[,ix], brks, all.inside=TRUE)
# farv<-rev(colorspace::sequential_hcl(length(brks)-1,"Purple-Yellow")) # a color scheme
farv<-grDevices::colorRampPalette(c("yellow", "green","black"))(length(brks)-1) # alternative color scheme
sp::plot(bg,col=farv[nuk],main="Median Household Income",lty=0)
sp::plot(cnty,add=TRUE,lwd=.5,lty=1,border="blue",col=NULL)
text(cnty@data$x,cnty@data$y,cnty@data$NAME,cex=.5) # add county name labels
zbrks<-round(brks,3)
zlab<-sapply(2:length(brks),function(i) paste(zbrks[i-1],zbrks[i],sep=" : "))
legend("bottomright",zlab,fill=farv)
dev.off()
Median Household Income in Middle Tennessee (View image to enlarge)

Median Household Income in Middle Tennessee (View image to enlarge)