Spatial joins and distances between points in R

We start with a _*.csv_ file of Rutherford County home attributes. The fields include longitude/latitude coordinates, geocoded in ArcGIS. It is possible to geocode directly in R, using a Google Maps API, but there are restrictions in the number of address assignments that can be calculated per day.

library(sp)  
library(geosphere)
library(maptools)
## Checking rgeos availability: TRUE
library(rgeos)
## rgeos version: 0.3-6, (SVN revision 450)
##  GEOS runtime version: 3.4.2-CAPI-1.8.2 r3921 
##  Polygon checking: TRUE

We load the geocoded home attribute file and use the coordinates to create a spatial points object. We then read in polygon shape files, and perform a spatial join. Note that at each step we are increasing the number of fields in the home attribute file by adding the fields from the attribute tables of the polygon shape files.

u<-read.csv("S:/TEFF/450/arcview/5a/zrr.csv",as.is=TRUE)
names(u)
##  [1] "cityname" "yr"       "county"   "price"    "lndval"   "impval"  
##  [7] "age"      "yrblt"    "efyrb"    "sqft"     "acres"    "month"   
## [13] "sound"    "vacant"   "drywall"  "brick"    "sewer"    "level"   
## [19] "pracc"    "mailaddr" "mailzip"  "POINT_X"  "POINT_Y"
spx<-SpatialPoints(u[,c("POINT_X","POINT_Y")])
v<-readShapePoly("S:/TEFF/450/arcview/5a/ruthBG.shp")
k<-over(spx,v)
u<-data.frame(u,k)
names(u)
##  [1] "cityname"   "yr"         "county"     "price"      "lndval"    
##  [6] "impval"     "age"        "yrblt"      "efyrb"      "sqft"      
## [11] "acres"      "month"      "sound"      "vacant"     "drywall"   
## [16] "brick"      "sewer"      "level"      "pracc"      "mailaddr"  
## [21] "mailzip"    "POINT_X"    "POINT_Y"    "SP_ID"      "STATEFP"   
## [26] "COUNTYFP"   "TRACTCE"    "BLKGRPCE"   "GEOID"      "NAMELSAD"  
## [31] "MTFCC"      "FUNCSTAT"   "ALAND"      "AWATER"     "INTPTLAT"  
## [36] "INTPTLON"   "geoid"      "B03002_001" "B03002_002" "B03002_003"
## [41] "B03002_004" "B03002_005" "B03002_006" "B03002_007" "B03002_008"
## [46] "B03002_009" "B03002_010" "B03002_011" "B03002_012" "B03002_013"
## [51] "B03002_014" "B03002_015" "B03002_016" "B03002_017" "B03002_018"
## [56] "B03002_019" "B03002_020" "B03002_021" "B19013_001" "B19051_001"
## [61] "B19051_002" "B19051_003" "B19052_001" "B19052_002" "B19052_003"
## [66] "B19053_001" "B19053_002" "B19053_003" "B19054_001" "B19054_002"
## [71] "B19054_003" "B19055_001" "B19055_002" "B19055_003" "B19056_001"
## [76] "B19056_002" "B19056_003" "B19057_001" "B19057_002" "B19057_003"
## [81] "B19059_001" "B19059_002" "B19059_003" "B19060_001" "B19060_002"
## [86] "B19060_003"
v<-readShapePoly("S:/TEFF/450/arcview/5a/TNurbanAreas.shp")
k<-over(spx,v)
u<-data.frame(u,k)
names(u)
##   [1] "cityname"   "yr"         "county"     "price"      "lndval"    
##   [6] "impval"     "age"        "yrblt"      "efyrb"      "sqft"      
##  [11] "acres"      "month"      "sound"      "vacant"     "drywall"   
##  [16] "brick"      "sewer"      "level"      "pracc"      "mailaddr"  
##  [21] "mailzip"    "POINT_X"    "POINT_Y"    "SP_ID"      "STATEFP"   
##  [26] "COUNTYFP"   "TRACTCE"    "BLKGRPCE"   "GEOID"      "NAMELSAD"  
##  [31] "MTFCC"      "FUNCSTAT"   "ALAND"      "AWATER"     "INTPTLAT"  
##  [36] "INTPTLON"   "geoid"      "B03002_001" "B03002_002" "B03002_003"
##  [41] "B03002_004" "B03002_005" "B03002_006" "B03002_007" "B03002_008"
##  [46] "B03002_009" "B03002_010" "B03002_011" "B03002_012" "B03002_013"
##  [51] "B03002_014" "B03002_015" "B03002_016" "B03002_017" "B03002_018"
##  [56] "B03002_019" "B03002_020" "B03002_021" "B19013_001" "B19051_001"
##  [61] "B19051_002" "B19051_003" "B19052_001" "B19052_002" "B19052_003"
##  [66] "B19053_001" "B19053_002" "B19053_003" "B19054_001" "B19054_002"
##  [71] "B19054_003" "B19055_001" "B19055_002" "B19055_003" "B19056_001"
##  [76] "B19056_002" "B19056_003" "B19057_001" "B19057_002" "B19057_003"
##  [81] "B19059_001" "B19059_002" "B19059_003" "B19060_001" "B19060_002"
##  [86] "B19060_003" "OBJECTID"   "UACE10"     "GEOID10"    "NAME10"    
##  [91] "NAMELSAD10" "LSAD10"     "MTFCC10"    "UATYP10"    "FUNCSTAT10"
##  [96] "ALAND10"    "AWATER10"   "INTPTLAT10" "INTPTLON10" "Shape_Leng"
## [101] "Shape_Area"
v<-readShapePoly("S:/TEFF/450/arcview/5a/tl_2014_47_place/tl_2014_47_place.shp")
k<-over(spx,v)
u<-data.frame(u,k)
names(u)
##   [1] "cityname"   "yr"         "county"     "price"      "lndval"    
##   [6] "impval"     "age"        "yrblt"      "efyrb"      "sqft"      
##  [11] "acres"      "month"      "sound"      "vacant"     "drywall"   
##  [16] "brick"      "sewer"      "level"      "pracc"      "mailaddr"  
##  [21] "mailzip"    "POINT_X"    "POINT_Y"    "SP_ID"      "STATEFP"   
##  [26] "COUNTYFP"   "TRACTCE"    "BLKGRPCE"   "GEOID"      "NAMELSAD"  
##  [31] "MTFCC"      "FUNCSTAT"   "ALAND"      "AWATER"     "INTPTLAT"  
##  [36] "INTPTLON"   "geoid"      "B03002_001" "B03002_002" "B03002_003"
##  [41] "B03002_004" "B03002_005" "B03002_006" "B03002_007" "B03002_008"
##  [46] "B03002_009" "B03002_010" "B03002_011" "B03002_012" "B03002_013"
##  [51] "B03002_014" "B03002_015" "B03002_016" "B03002_017" "B03002_018"
##  [56] "B03002_019" "B03002_020" "B03002_021" "B19013_001" "B19051_001"
##  [61] "B19051_002" "B19051_003" "B19052_001" "B19052_002" "B19052_003"
##  [66] "B19053_001" "B19053_002" "B19053_003" "B19054_001" "B19054_002"
##  [71] "B19054_003" "B19055_001" "B19055_002" "B19055_003" "B19056_001"
##  [76] "B19056_002" "B19056_003" "B19057_001" "B19057_002" "B19057_003"
##  [81] "B19059_001" "B19059_002" "B19059_003" "B19060_001" "B19060_002"
##  [86] "B19060_003" "OBJECTID"   "UACE10"     "GEOID10"    "NAME10"    
##  [91] "NAMELSAD10" "LSAD10"     "MTFCC10"    "UATYP10"    "FUNCSTAT10"
##  [96] "ALAND10"    "AWATER10"   "INTPTLAT10" "INTPTLON10" "Shape_Leng"
## [101] "Shape_Area" "STATEFP.1"  "PLACEFP"    "PLACENS"    "GEOID.1"   
## [106] "NAME"       "NAMELSAD.1" "LSAD"       "CLASSFP"    "PCICBSA"   
## [111] "PCINECTA"   "MTFCC.1"    "FUNCSTAT.1" "ALAND.1"    "AWATER.1"  
## [116] "INTPTLAT.1" "INTPTLON.1"

We next load point data for landmarks, and clip it to include only those points that are located in Rutherford County. The field that interests us in the landmark data is MTFCC, which gives the category of landmark. The table() command shows that there are only a few landmark categories with significant numbers present in Rutherford County.

v<-readShapePoints("S:/TEFF/450/arcview/5a/tl_2014_47_pointlm/tl_2014_47_pointlm.shp")
w<-readShapePoly("S:/TEFF/450/arcview/5a/TNcounties.shp")
k<-over(v,w)
z<-which(as.character(k$NAME)=="Rutherford")
v<-v[z,]
table(as.character(v@data$MTFCC))
## 
## C3022 C3061 C3081 K1231 K2190 K2451 K2561 K2582 
##    50   211   251     1     3     2     2   539

We pick the category for cemeteries, and calculate for each home the distance in miles to the closest cemetery.

gg<-v[which(v@data[,"MTFCC"]=="K2582"),]
dim(gg)
## [1] 539   5
uhh<-distm(coordinates(gg),spx)/1000*0.621371
## Warning: Coordinate reference system of SpatialPoints object is not set.
## Assuming it is degrees (longitude/latitude)!
dim(uhh)
## [1]  539 3718
ti<-apply(uhh, 2, which.min)
u<-data.frame(u,cemedist=sapply(1:length(ti),function(x) uhh[ti[x],x]))
names(u)
##   [1] "cityname"   "yr"         "county"     "price"      "lndval"    
##   [6] "impval"     "age"        "yrblt"      "efyrb"      "sqft"      
##  [11] "acres"      "month"      "sound"      "vacant"     "drywall"   
##  [16] "brick"      "sewer"      "level"      "pracc"      "mailaddr"  
##  [21] "mailzip"    "POINT_X"    "POINT_Y"    "SP_ID"      "STATEFP"   
##  [26] "COUNTYFP"   "TRACTCE"    "BLKGRPCE"   "GEOID"      "NAMELSAD"  
##  [31] "MTFCC"      "FUNCSTAT"   "ALAND"      "AWATER"     "INTPTLAT"  
##  [36] "INTPTLON"   "geoid"      "B03002_001" "B03002_002" "B03002_003"
##  [41] "B03002_004" "B03002_005" "B03002_006" "B03002_007" "B03002_008"
##  [46] "B03002_009" "B03002_010" "B03002_011" "B03002_012" "B03002_013"
##  [51] "B03002_014" "B03002_015" "B03002_016" "B03002_017" "B03002_018"
##  [56] "B03002_019" "B03002_020" "B03002_021" "B19013_001" "B19051_001"
##  [61] "B19051_002" "B19051_003" "B19052_001" "B19052_002" "B19052_003"
##  [66] "B19053_001" "B19053_002" "B19053_003" "B19054_001" "B19054_002"
##  [71] "B19054_003" "B19055_001" "B19055_002" "B19055_003" "B19056_001"
##  [76] "B19056_002" "B19056_003" "B19057_001" "B19057_002" "B19057_003"
##  [81] "B19059_001" "B19059_002" "B19059_003" "B19060_001" "B19060_002"
##  [86] "B19060_003" "OBJECTID"   "UACE10"     "GEOID10"    "NAME10"    
##  [91] "NAMELSAD10" "LSAD10"     "MTFCC10"    "UATYP10"    "FUNCSTAT10"
##  [96] "ALAND10"    "AWATER10"   "INTPTLAT10" "INTPTLON10" "Shape_Leng"
## [101] "Shape_Area" "STATEFP.1"  "PLACEFP"    "PLACENS"    "GEOID.1"   
## [106] "NAME"       "NAMELSAD.1" "LSAD"       "CLASSFP"    "PCICBSA"   
## [111] "PCINECTA"   "MTFCC.1"    "FUNCSTAT.1" "ALAND.1"    "AWATER.1"  
## [116] "INTPTLAT.1" "INTPTLON.1" "cemedist"

The previous example calculated distance between spatial points. One can also calculate the closest distance between a spatial point and spatial polygons or lines. First the polygon example:

v<-readShapePoly("S:/TEFF/450/arcview/5a/mt_small_airports.shp")
uhh<-gDistance(spx,v, byid=TRUE)
## Warning: Polygons object missing comment attribute ignoring hole(s). See function createSPComment.
## Warning: Polygons object missing comment attribute ignoring hole(s). See function createSPComment.
## Warning: Polygons object missing comment attribute ignoring hole(s). See function createSPComment.
## Warning: Polygons object missing comment attribute ignoring hole(s). See function createSPComment.
## Warning: Polygons object missing comment attribute ignoring hole(s). See function createSPComment.
## Warning: Polygons object missing comment attribute ignoring hole(s). See function createSPComment.
## Warning: Polygons object missing comment attribute ignoring hole(s). See function createSPComment.
## Warning: Polygons object missing comment attribute ignoring hole(s). See function createSPComment.
## Warning: Polygons object missing comment attribute ignoring hole(s). See function createSPComment.
## Warning: Polygons object missing comment attribute ignoring hole(s). See function createSPComment.
## Warning: Polygons object missing comment attribute ignoring hole(s). See function createSPComment.
## Warning: Polygons object missing comment attribute ignoring hole(s). See function createSPComment.
ti<-apply(uhh, 2, which.min)
u<-data.frame(u,airpdist=sapply(1:length(ti),function(x) uhh[ti[x],x]))
names(u)
##   [1] "cityname"   "yr"         "county"     "price"      "lndval"    
##   [6] "impval"     "age"        "yrblt"      "efyrb"      "sqft"      
##  [11] "acres"      "month"      "sound"      "vacant"     "drywall"   
##  [16] "brick"      "sewer"      "level"      "pracc"      "mailaddr"  
##  [21] "mailzip"    "POINT_X"    "POINT_Y"    "SP_ID"      "STATEFP"   
##  [26] "COUNTYFP"   "TRACTCE"    "BLKGRPCE"   "GEOID"      "NAMELSAD"  
##  [31] "MTFCC"      "FUNCSTAT"   "ALAND"      "AWATER"     "INTPTLAT"  
##  [36] "INTPTLON"   "geoid"      "B03002_001" "B03002_002" "B03002_003"
##  [41] "B03002_004" "B03002_005" "B03002_006" "B03002_007" "B03002_008"
##  [46] "B03002_009" "B03002_010" "B03002_011" "B03002_012" "B03002_013"
##  [51] "B03002_014" "B03002_015" "B03002_016" "B03002_017" "B03002_018"
##  [56] "B03002_019" "B03002_020" "B03002_021" "B19013_001" "B19051_001"
##  [61] "B19051_002" "B19051_003" "B19052_001" "B19052_002" "B19052_003"
##  [66] "B19053_001" "B19053_002" "B19053_003" "B19054_001" "B19054_002"
##  [71] "B19054_003" "B19055_001" "B19055_002" "B19055_003" "B19056_001"
##  [76] "B19056_002" "B19056_003" "B19057_001" "B19057_002" "B19057_003"
##  [81] "B19059_001" "B19059_002" "B19059_003" "B19060_001" "B19060_002"
##  [86] "B19060_003" "OBJECTID"   "UACE10"     "GEOID10"    "NAME10"    
##  [91] "NAMELSAD10" "LSAD10"     "MTFCC10"    "UATYP10"    "FUNCSTAT10"
##  [96] "ALAND10"    "AWATER10"   "INTPTLAT10" "INTPTLON10" "Shape_Leng"
## [101] "Shape_Area" "STATEFP.1"  "PLACEFP"    "PLACENS"    "GEOID.1"   
## [106] "NAME"       "NAMELSAD.1" "LSAD"       "CLASSFP"    "PCICBSA"   
## [111] "PCINECTA"   "MTFCC.1"    "FUNCSTAT.1" "ALAND.1"    "AWATER.1"  
## [116] "INTPTLAT.1" "INTPTLON.1" "cemedist"   "airpdist"

Now the lines example:

v<-readShapeLines("S:/TEFF/450/arcview/5a/tl_2014_47149_roads/tl_2014_47149_roads.shp")
table(v@data$MTFCC)
## 
## S1100 S1200 S1400 S1500 S1630 S1640 S1730 S1740 S1750 S1780 
##     2   161  6111    11   147   183     5   144   348   183
v1<-v[which(v@data$MTFCC=="S1630"),] #ramp giving access to limited access highway
uhh<-gDistance(spx,v1, byid=TRUE)
ti<-apply(uhh, 2, which.min)
u<-data.frame(u,rampdist=sapply(1:length(ti),function(x) uhh[ti[x],x]))
names(u)
##   [1] "cityname"   "yr"         "county"     "price"      "lndval"    
##   [6] "impval"     "age"        "yrblt"      "efyrb"      "sqft"      
##  [11] "acres"      "month"      "sound"      "vacant"     "drywall"   
##  [16] "brick"      "sewer"      "level"      "pracc"      "mailaddr"  
##  [21] "mailzip"    "POINT_X"    "POINT_Y"    "SP_ID"      "STATEFP"   
##  [26] "COUNTYFP"   "TRACTCE"    "BLKGRPCE"   "GEOID"      "NAMELSAD"  
##  [31] "MTFCC"      "FUNCSTAT"   "ALAND"      "AWATER"     "INTPTLAT"  
##  [36] "INTPTLON"   "geoid"      "B03002_001" "B03002_002" "B03002_003"
##  [41] "B03002_004" "B03002_005" "B03002_006" "B03002_007" "B03002_008"
##  [46] "B03002_009" "B03002_010" "B03002_011" "B03002_012" "B03002_013"
##  [51] "B03002_014" "B03002_015" "B03002_016" "B03002_017" "B03002_018"
##  [56] "B03002_019" "B03002_020" "B03002_021" "B19013_001" "B19051_001"
##  [61] "B19051_002" "B19051_003" "B19052_001" "B19052_002" "B19052_003"
##  [66] "B19053_001" "B19053_002" "B19053_003" "B19054_001" "B19054_002"
##  [71] "B19054_003" "B19055_001" "B19055_002" "B19055_003" "B19056_001"
##  [76] "B19056_002" "B19056_003" "B19057_001" "B19057_002" "B19057_003"
##  [81] "B19059_001" "B19059_002" "B19059_003" "B19060_001" "B19060_002"
##  [86] "B19060_003" "OBJECTID"   "UACE10"     "GEOID10"    "NAME10"    
##  [91] "NAMELSAD10" "LSAD10"     "MTFCC10"    "UATYP10"    "FUNCSTAT10"
##  [96] "ALAND10"    "AWATER10"   "INTPTLAT10" "INTPTLON10" "Shape_Leng"
## [101] "Shape_Area" "STATEFP.1"  "PLACEFP"    "PLACENS"    "GEOID.1"   
## [106] "NAME"       "NAMELSAD.1" "LSAD"       "CLASSFP"    "PCICBSA"   
## [111] "PCINECTA"   "MTFCC.1"    "FUNCSTAT.1" "ALAND.1"    "AWATER.1"  
## [116] "INTPTLAT.1" "INTPTLON.1" "cemedist"   "airpdist"   "rampdist"