Compiled on 2021-02-06 by E. Anthon Eff
Jones College of Business, Middle Tennessee State University

This script can serve as a template for bringing in county-level data from the US Bureau of the Census, and displaying that data on a chloropleth map.

# ===============================================================================================
# -------------api_census-----------------------------------------

options(encoding = 'UTF-8')
setwd("~/Dropbox/4500/") # change to your own working directory
library(censusapi)
library(rgdal)
library(maps)
library(maptools)
library(tigris)
library(sp)
library(sf)
library(plyr)

# ----county level data from census-----------
library(censusapi) 

# Get data from the census API
# https://cran.r-project.org/web/packages/censusapi/vignettes/getting-started.html
capi<-"b142681d75c7e72cb7c6191c665472bd3c9cc884" # My API key -- you might want to get your own (see link above)
Sys.setenv(CENSUS_KEY=capi) # Add key to .Renviron
readRenviron("~/.Renviron") # Reload .Renviron
Sys.getenv("CENSUS_KEY") # Check to see that the expected key is output in your R console

# ---get a list of all variables in census acs5-2018
dim(j<-listCensusMetadata("acs/acs5", vintage = 2018, type = "variables", group = NULL))
# apply(sapply(j,function(i) nchar(i)),2,max,na.rm=TRUE)
j<-j[which(nchar(j$concept)<=300),1:7] #necessary to remove an erroneous record; also redundant column removed
str(j)
write.csv(j,file="acs5_2018varbs.csv") # can open this in in a spreadsheet
getwd() # the file is written in this location (your working directory)

# ---finding variables-----------
z<-grep("Median household income",j$label,ignore.case=TRUE)
a<-j[z,]
a<-a[order(a$name),c("name","group","concept","label")]
# ---by looking at both concept and label you can figure out the meaning of each variable------
write.csv(a,"z.csv") # easiest to look at this in a spreadsheet

xaa<-getCensus(name="acs/acs5",vintage=2018,vars=a$name,region="county:*",regionin="state:*") # getting values for all variables selected above
# # some alternative specifications below
# values for specific variable(s):
# xaa<-getCensus(name="acs/acs5",vintage=2018,vars=c("B24022_060E","B19001B_014E"),region="county:*",regionin="state:*")
# values for all variables in group:
# xaa<-getCensus(name="acs/acs5",vintage=2018,vars="group(B24022)",region="county:*",regionin="state:*") 
for (i in colnames(xaa)){xaa[which(xaa[,i]==-666666666),i]<-NA} # -666666666 is the placeholder for missing values
xaa$geoid<-paste0(xaa$state,xaa$county) # county FIPS code to allow merging with attribute table
rownames(xaa)<-xaa$geoid # use FIPS as rownames

# --------Two ways to obtain county and state boundary files (choose one)---------------
# 1) Download from Census Tiger site
rxx<-"https://www2.census.gov/geo/tiger/TIGER2018/COUNTY/tl_2018_us_county.zip"
download.file(rxx,"z.zip")
unzip("z.zip")
cnty<-rgdal::readOGR(dsn=getwd(),layer="tl_2018_us_county")

rxx<-"https://www2.census.gov/geo/tiger/TIGER2018/STATE/tl_2018_us_state.zip"
download.file(rxx,"z.zip")
unzip("z.zip")
state<-rgdal::readOGR(dsn=getwd(),layer="tl_2018_us_state")

# 2) Use package tigris
cnty<-tigris::counties() # bring in US county boundaries
cnty<-sf::as_Spatial(cnty) # convert from sf to sp format

state<-tigris::states() # bring in US state boundaries
state<-sf::as_Spatial(state) # convert from sf to sp format
# -----------------------------------------------------------
str(cnty,max.level=2) # see structure of county SpatialPolygonsDataFrame
str(cnty@data) # cnty@data is the attribute table for SpatialPolygonsDataFrame cnty
rownames(cnty@data)<-cnty@data$GEOID

fii<-unique(maps::state.fips$fips) # get state fips for lower 48 states + DC
fii<-sprintf("%02d",fii) # put leading zeros on single-digit FIPS
cnty<-cnty[which(cnty@data$STATEFP %in% fii),] # restrict county map to lower 48 + DC
state<-state[which(state@data$STATEFP %in% fii),] # restrict state map to lower 48 + DC

cnty@data<-data.frame(cnty@data,xaa[rownames(cnty@data),]) # join attribute table with census data
cnty@data$bwmdhhy<-cnty@data$B19013B_001E/cnty@data$B19013A_001E # ratio of black to white median hh income 
cnty@data$nhwtmdhhy<-cnty@data$B19013H_001E/cnty@data$B19013_001E # ratio of nonhispwhite to total median hh income 
cnty@data$ormdhhy<-cnty@data$B25119_002E/cnty@data$B25119_003E # ratio of owner to renter median hh income 
cnty@data$a6525mdhhy<-cnty@data$B19049_005E/cnty@data$B19049_003E # ratio of 65up to 25-44 median hh income 

# ---below we print a map of the age65up/age25-44 median HH income ratio to a png file---
ix<-"a6525mdhhy"
nint<-5 # number of categories (different colors)
png(file=paste0(ix,".png"),width=12,height=8,units="in",pointsize=10,res=800)
length(brks<-classInt::classIntervals(cnty@data[,ix],nint,style="kmeans")$brks)
nuk<-findInterval(cnty@data[,ix], brks, all.inside=TRUE)
farv<-rev(colorspace::sequential_hcl(length(brks)-1,"Purple-Yellow")) # a color scheme
# farv<-grDevices::colorRampPalette(c("yellow", "green","black"))(length(brks)-1) # alternative color scheme
sp::plot(cnty,col=farv[nuk],main="age65up/age25-44 median HH income ratio",lty=1,lwd=0.1,border="black")
sp::plot(state,add=TRUE,lwd=.5,lty=1,border="blue",col=NULL)
zbrks<-round(brks,3)
zlab<-sapply(2:length(brks),function(i) paste(zbrks[i-1],zbrks[i],sep=" : "))
legend("bottomleft",zlab,fill=farv)
dev.off()
Ratio of Median Household Income: Householders of different ages (65up over 25-44)

Ratio of Median Household Income: Householders of different ages (65up over 25-44)