Commuting Sheds

Compiled on 2021-02-05 by E. Anthon Eff
Jones College of Business, Middle Tennessee State University

Local Labor Market Areas (LMAs) are defined by looking at the movement of workers across county boundaries. This movement can be visualized as as commuting shed, acting much like a watershed, in directing the flow of workers from peripheral areas to a nodal county that provides specialized services to the other counties in its hinterland. While counties are not local economies, LMAs are, and it is often useful to use LMAs in regression analyses examining features of local economies.

options(encoding = 'UTF-8')
setwd("~/Dropbox/4500/") # change to your own working directory
library(readxl)
library(Matrix)
library(maptools)
library(xml2)
library(rvest)
library(igraph)
library(tigris)
library(spdep)

# ---bring in Journey to Work data from Census---
rxx<-"https://www2.census.gov/programs-surveys/demo/tables/metro-micro/2015/commuting-flows-2015/table1.xlsx"
download.file(rxx,"jtw2015.xlsx") # download file to working directory; save as jtw2015.xlsx
readxl::excel_sheets("jtw2015.xlsx") # list the names of the worksheets in the workbook (there is only one)
gg<-data.frame(readxl::read_xlsx("jtw2015.xlsx",skip=6)) # read in worksheet, skipping first six lines
gg$rfips<-paste0(gg[,1],gg[,2]) # residence county fips
gg$wfips<-paste0(substr(gg[,5],2,3),gg[,6]) # work county fips (problem with format of work state fixed here)
gg$flow<-gg[,9]                 # flow of workers
gg<-gg[complete.cases(gg),c("rfips","wfips","flow")] # remove observations with missing values; keep only 3 variables

# ----make sparse matrix from three-column data.frame
gg[,"rfips"]<-as.factor(gg[,"rfips"])
gg[,"wfips"]<-as.factor(gg[,"wfips"])
dd<-Matrix::sparseMatrix(as.integer(gg[,"rfips"]), as.integer(gg[,"wfips"]), x = gg[,"flow"])
rownames(dd)<-levels(gg[,"rfips"])
colnames(dd)<-levels(gg[,"wfips"])

fii<-intersect(colnames(dd),rownames(dd)) # counties that are listed both as residence and workplace
dd<-as.matrix(dd[fii,fii]) # ensures that rows and columns are aligned; change to matrix format
str(dd)


# --make county contiguity matrix--
cnty<-tigris::counties() # bring in US county boundaries
cnty<-sf::as_Spatial(cnty) # convert from sf to sp format
cnty<-cnty[which(cnty@data$GEOID %in% fii),] # restrict counties to those in JTW
rownames(cnty@data)<-cnty@data$GEOID
str(cnty,max.level=2)

navn<-cnty@data$GEOID
bz<-spdep::poly2nb(cnty,row.names=navn) # converting polygon to neighbors list
names(bz)<-navn
table(sapply(bz,length)) # shows the number of occurances of N neighbors
pmm<-matrix(0,length(navn),length(navn));dimnames(pmm)<-list(navn,navn) # matrix of zeros
for (i in navn){pmm[i,bz[[i]]]<-1} # enter ones for neighbors
str(pmm<-pmm[fii,fii])    # align rows and columns with the JTW matrix above


# ---identify commuting sheds------
uu<-dd*pmm # consider only flows between contiguous counties
uu<-(uu==apply(uu,1,max))*(uu>0) # matrix of zeros, with ones for residence county largest outflow
table(rowSums(uu)) # a few counties have no outflow to neighbors; a few others have 2 equal-sized max outflows
y<-igraph::graph.adjacency(uu) # create an adjacency matrix
k<-igraph::clusters(y)         # identify the components of the graph
c1<-data.frame(k=k$membership,fips=names(k$membership)) # shows membership of each county in each component
c1<-c1[rownames(cnty@data),]    # order exactly like the attribute table of the cnty map file
cnty@data$comshd<-c1$k          # add cluster id to the attribute table

# ---get name for nodal county-----
aj<-distances(y) # a matrix of path lengths from each vertex to every other vertex
aj[which(aj==Inf)]<-NA  # unreachable vertices have path length of infinity; change these to missing
j<-colSums(aj,na.rm=TRUE) # sum of finite path lengths for each vertex
# below we pick out the vertex with the smallest path length within each cluster -- this is our nodal county
z<-sapply(names(table(k$membership)),function(i) {z<-j[names(k$membership)[which(k$membership==i)]];names(z)[which.min(z)]})
o<-data.frame(GEOID=z,comshd=names(z))
o<-merge(o,cnty@data[,c("GEOID","STATEFP","NAME")],by="GEOID")

# ---get postal abbreviation for state fips code------
a<-xml2::read_html("https://en.wikipedia.org/wiki/Federal_Information_Processing_Standard_state_code")
b<-rvest::html_table(a,fill=TRUE) # extract the tables from a wikipedia article
str(b,max.level=1) # look at the size of each table, pick the table that seems the right size
b<-b[[1]]  # the first table seems the right size
b<-b[,2:3];colnames(b)<-c("abb","STATEFP") # keeping only two columns; renaming the columns
b$STATEFP<-sprintf("%02d",b$STATEFP) # put leading zeros on single-digit state fips
o<-merge(o,b,by="STATEFP") 
o$csname<-paste(o$NAME,o$abb,sep=", ") # name for each commuting shed is node county with state postal abbreviation
o<-o[,c("comshd","csname")]
rownames(o)<-o$comshd


# ----aggregate counties into commuting sheds---
f<-maptools::unionSpatialPolygons(cnty,cnty@data$comshd) # creates commuting sheds spatial polygons
# below we make an attribute table for the spatial polygons
dim(j<-t(sapply(1:length(f@polygons),function(i) data.frame(comshd=f@polygons[[i]]@ID,area=f@polygons[[i]]@area))))
xy<-coordinates(f);colnames(xy)<-c("x","y") # centroids for commuting sheds
j<-data.frame(j,xy)
for (i in colnames(j)){j[,i]<-as.numeric(j[,i])} # set all columns to numeric class
j<-data.frame(j,csname=o[rownames(j),"csname"]) # add name of node county

# below we combine the spatial polygons with the attribute table
comshd<-SpatialPolygonsDataFrame(f,j) # like a shapefile -- shape information along with attribute data
# save(comshd,file="comshd.Rdata")  # uncomment to save your SpatialPolygonsDataFrame


# -----print map outlines to file-----------
comshd@bbox # gives the geographic limits of the map view
o<-comshd   # make a copy of comshd
o@bbox<-matrix(c(-125.7,24.8,-66,49.44),2,2) # set the bounding box for the lower 49 

# ---create png file with commuting shed map for lower 49---------
png(file="commutersheds.png",width=12,height=8,units="in",pointsize=10,res=800)
plot(o,lwd=.1) # print lower 49 commuting sheds map
text(o@data$x,o@data$y,o@data$csname,cex=.1) # print node county name in very small letters
dev.off()

Commuting Sheds as Labor Market Areas (View image to enlarge)