# Kristian Skrede Gleditsch # Michael D. Ward # ksg@essex.ac.uk # mw160@duke.edu # # # This file contains code to replicate all the examples with R code # apparing in the book An Introduction to Spatial Regression # Models in the Social Sciences by Michael D. Ward # and Kristian Skrede Gleditsch # # This code is "enhanced" and does not always match exactly what # is given in the book. In particular, some comments that do not # appear in the book have been added to provide additional # explanations, and some formatting has been changed. Moreover, we # have updated the spatial object processing to be compatible with # R version 2.13.2. # # The code assumes that the relevant input files are in # the working directory. # # Last revised on 8 March 2013 # Set a working directory # Use Unix-style / or \\ for directories in R dd <- c("C:\\work\\spatialbook\\") setwd(dd) ### Replicates code reproduced in Chapter 1 # Ex 1: Plotting map with centroids and capitals # Load required libraries # These libraries can be downloaded from the packages menu within R, or # directly from Cran. # # Mac rgdal is now available for OS/X directly via XXXXXXXXXXX library(RColorBrewer) library(maptools) library(spdep) library(sp) library(rgdal) # Read a Robinson projection map from an ESRI shapefile newprojection <- "+proj=robin +lon_0=0 +x_0=0 +y_0=0" rob.shp <- readShapeSpatial("wg2002worldmap.shp",proj4string = CRS(newprojection), IDvar = "FIPS_CNTRY",repair=TRUE,force_ring=TRUE, verbose=TRUE) # Add coordinates coords <- coordinates(rob.shp) # Replot the map itself without a bounding box plot(rob.shp, border="Grey", xaxt="n", yaxt="n", bty="n", lwd=.75, las=1, ylab="", main="Centroids and Capitals", xlab="") # Add the centroids points(coordinates(rob.shp), pch=19,cex=.25, col="red") # Latitude and Longitude ll.mat <- cbind(as.numeric(as.character(rob.shp$Long)), as.numeric(as.character(rob.shp$Lat))) row.names(ll.mat) <- 1:nrow(ll.mat) llCRS <- CRS("+proj=longlat +ellps+WGS84") longlat <- SpatialPoints(ll.mat,proj4string = llCRS) longlat.transf <- spTransform(longlat, CRS("+proj=robin +ellps+WGS84+lon_0=0 + x_0=0 +y_0=0")) points(coordinates(longlat.transf),pch=19,cex=.25,col="black") # Add segments between centroids and capitals # but delete Kiribati, as it crosses date line diffs <- cbind(coordinates(rob.shp),coordinates(longlat.transf)) segments(diffs[-91,1],diffs[-91,2],diffs[-91,3],diffs[-91,4],col="slategray4") # Ex 2. Spatial correlation test for democracy and development example # Load data # # This archive contains four objects: # # sldv contains the data on democracy, gdp per capita, and population # mdd2 contains the minimum distance data # nblist is a list of connectivities based on a 200 km threshold # (see below for code to generate this) # wmat is a row normalized connectivty matrix based on a 200 km threshold # (see below for code to generate this) source("chapter1data.R") # Estimate OLS ols1.fit <- glm(democracy ~ log(gdp.2002/population), data=sldv) # Load spdep library for moran.test() library(sp) library(spdep) # Moran's I test under randomization moran.test(resid(ols1.fit),nb2listw(nblist)) # Moran's I test for residuals from a regression model # based on weighted residuals lm.morantest(ols1.fit,nb2listw(nblist)) # Ex. 3 Create a plot of standardized democracy against its spatial lag # with regression line and rug added # Print to a PDF file # pdffilename <- c("file name and path") # pdf(file=pdffilename, width = 5.0, height = 5.0,family="Times") # Extract residuals from OLS regresion dem <- (resid(ols1.fit)) # Create a standardized democracy variable ds <- (dem-mean(dem))/sqrt(var(dem)) # Create a spatial lag ds.slag <- as.vector(wmat%*%ds) # Standardize spatial lag ds.slag <- (ds.slag-mean(ds.slag))/sqrt(var(ds.slag)) # Plot democracy against its spatial lag plot(ds, ds.slag, xlim=c(-3,3), ylim=c(-3,3), pch=20, las=1, xlab="standardized democracy", ylab="spatial lag of standardized democracy", bty="n") # Regression of spatial lag of democracy on democracy - slope is the Moran's I reg1 <- lm(ds.slag~ds) # Establish a grid for a confidence interval xgrid <- seq(-3,1.5,length.out=158) x0 <- list(ds=xgrid) pred.out <- predict(reg1, x0, interval="confidence") # Put 1 and 2 sd boxes on the plot lines( c(-2,-2,+2,+2,-2), c(-2,+2,+2,-2,-2) ) lines( c(-1,-1,+1,+1,-1), c(-1,+1,+1,-1,-1) ) lines( c(-2,+2), c(0,0) ) lines( c(0,0), c(-2,+2) ) # Add some text for context text(-2.5,3,"(low,high)"); text(2.5,3,"(high,high)") text(-2.5,-3,"(low,low)"); text(2.5,-3,"(high,low)") polygon(x=c(-1,0,0,-1), y=c(-1,-1,0,0), col = "slategray3") polygon(x=c(0,1,1,0), y=c(0,0,1,1), col = "slategray3") # Plot the c.i. region polygon(x=c(xgrid, rev(xgrid)), y=c(pred.out[,3], rev(pred.out[,2])), col="slategray3", border=T) # Put the data on the plot points(ds, ds.slag, pch=20) # Add densities sldensity <- density(ds.slag) lines(sldensity$y+2, sldensity$x, lty=2, col="slategray4") ddensity <- density(ds) lines(ddensity$x, ddensity$y+2, lty=2, col="slategray4",xlim=c(-2,2)) # Add regression line to the plot lines(xgrid, pred.out[,1], type="l", lty=2, col="gray80", lwd=2) # Add rugs for the data densities on the two sides rug(jitter(ds, factor=2), col="slategray3") rug(ds.slag, side=2, col="slategray3") # Label a group of points explicitly text(-2., -2.3, "Oil Exporters", col="slategray4") # Turn off the device, i.e., close file if printing to file # dev.off() # P. 33 # sldv is the data.frame # mdd2 is the minimum distance data.frame # Create a neighbor minimum distance matrix # from the dyadic minimum distance data # (Note that the cshapes library provides a # convienent way to create a matrix directly) mddmat <- matrix(9999,ncol=dim(sldv)[1],nrow=dim(sldv)[1]) dimnames(mddmat) <- list(c(sldv$tla),c(sldv$tla)) for(i in 1:dim(mdd2)[1]){ mddmat[mdd2$ida[i],mdd2$idb[i]] <- mdd2$mindist[i] } # Create a binary matrix with a 200 km threshold m200mat <- mddmat m200mat[m200mat<=200] <- 1 m200mat[m200mat>200] <- 0 # Create a row standardized weights matrix wmat <- m200mat/apply(m200mat,1,sum) # calculate the spatial lag of democracy democracy.spatial.lag <- as.vector(wmat%*%sldv$democracy) # Create a weights list object (a list object is required for spdep) cmat.lw <- mat2listw(m200mat, row.names=row.names(m200mat)) # Extract a neighbor list cmat.nb <- cmat.lw$neighbours #### Chapter 2 source("chapter2adata.R") # P. 47 sldv.fit <- lagsarlm(democracy ~ log(gdp.2002/population), data=sldv, nb2listw(cmat.nb), method="eigen", quiet=FALSE) summary(sldv.fit) moran.test(resid(sldv.fit),nb2listw(cmat.nb)) # P. 50-52 # Code to calculate equilibrium effect of changes in GDP per capita # Create vector to store the estimate for each states ee.est <- rep(NA,dim(sldv)[1]) # Assign the country name labels names(ee.est) <- sldv$tla # Create a null vector to use in loop svec <- rep(0,dim(sldv)[1]) # Create a N x N identity matrix eye <- matrix(0,nrow=dim(sldv)[1],ncol=dim(sldv)[1]) diag(eye) <- 1 # Loop over 1:n states and store effect of change in # each state i in ee.est[i] for(i in 1:length(ee.est)){ cvec <- svec cvec[i] <- 1 res <- solve(eye - 0.56315 * wmat) %*% cvec * 0.99877 ee.est[i] <- res[i] } # Russia example of impact on other states (observation 120) cvec <- rep(0,dim(sldv)[1]) cvec[120] <- 1 # Store estimates for impact of change in Russia in rus.est eye <- matrix(0,nrow=dim(sldv)[1],ncol=dim(sldv)[1]) diag(eye) <- 1 rus.est <- solve(eye - 0.56315 * wmat) %*% cvec*0.99877 # Find ten highest values of rus.est vector rus.est <- round(rus.est,3) rus.est <- data.frame(sldv$tla,rus.est) rus.est[rev(order(rus.est$rus.est)),][1:10,] # P. 53 # Impact of change in $y$ to 10 in China R code # China is observation 32 cvec <- rep(0,dim(sldv)[1]) cvec[32] <- 10 # Store estimates of change in China in chn.est chn.est <- c(cbind (0, 0, wmat%*%cvec) %*% c(summary(sldv.fit)$Coef[,1],summary(sldv.fit)$rho)) chn.est <- round(chn.est,3) # Find all states where non-zero impact chn.est <- data.frame(sldv$tla,chn.est) # P. 56-7 shin <- read.csv("italyturnout.csv",sep=",",header=T) tr <- readShapePoly("turnout", IDvar="FID_1", proj4string=CRS("+proj=robin +lon 0=0")) dnn50km <- dnearneigh(coordinates(tr), 0, 50000) summary(dnn50km) # wmat is the weights matrix wmat <- nb2mat(dnn50km,style="W") sldv.fit <- lagsarlm(turnout ~ log(gdpcap), data=shin, nb2listw(dnn50km), method="eigen", quiet=FALSE) summary(sldv.fit) # p. 58-9 # Extract estimated rho rho <- coef(sldv.fit)[3] # Extract estimated beta beta <- coef(sldv.fit)[1:2] # Create a X matrix X <- cbind(1,log(shin$gdpcap)) # Create an alternative X matrix, changing value for # Reggio Calabria-Sbarre (obs 432) Xs <- X Xs[432]<- log(35) # Create an identity matrix I <- diag(length(shin$gdpcap)) # Find equilibrium effect by looking at # the difference in expected value for the # the two X matrices Ey <- solve(I - rho*wmat)%*%(X%*%beta) EyS <- solve(I - rho*wmat)%*%(Xs%*%beta) dif <- EyS-Ey # P. 61 library(foreign) library(maptools) library(network) library(spdep) library(sp) library(rgdal) setwd(dd) # read in 2004 presidential votes presvote <- read.table("2004presvote.csv",sep=",",header=T) # read in shape files for 48 US States plus District of Columbia usa.shp <- readShapePoly("48_states.shp", proj4string=CRS("+proj=robin +lon 0=0")) # use equal area projection (Robinson) usaall <- merge(data.frame(usa.shp), presvote, by.x = "STATE_NAME", by.y = "State", sort = F) # Create a distance matrix from original polygon shape file tr <- readShapePoly("48_states.shp", IDvar="ObjectID", proj4string=CRS("+proj=robin +lon 0=0")) centroids <- coordinates(tr) # Create neighbors, list, and matrix objects from polygon centroids us48.nb <- poly2nb(usa.shp, row.names = as.character(usa.shp$STATE_NAME)) us48.listw <- nb2listw(us48.nb, style = "B") us48.mat <- (nb2mat(us48.nb, style="B")) # plots the network among the centroids colnames(us48.mat) <- rownames(us48.mat) <- usa.shp$STATE_ABBR usa <- network(us48.mat,directed=F) set.seed(123) # plot network first; then add state boundaries plot.network(usa, displayisolates=T, displaylabels=F, boxed.labels=F, coord=centroids, label.col="gray20", usearrows=F, edge.col=rep("gray60",190), vertex.col="gray30", edge.lty=1) plot(usa.shp,bty="n", border="slategray3" ,xaxt="n",yaxt="n", lwd=.000000000125,las=1,ylab="",xlab="",add=T) # P. 62 library(RColorBrewer) # now plot the Bush:Kerry vote ratio bk <- usaall$Bush/usaall$Kerry # set up five categories and assign colors breaks <-round(quantile((bk), seq(0,1,1/5), na.rm=TRUE),1) # cols <- brewer.pal(length(breaks), "Greys") cols <- brewer.pal(length(breaks), "Greys") # use findInterval to color states by bk variable plot(usa.shp, bty="n", border="slategray3", xaxt="n", yaxt="n",lwd=.000000000125,las=1,ylab="",xlab="") plot(usa.shp, bty="n", col=cols[findInterval(bk, breaks, all.inside=T)], add=T) # P. 69 # data and variables as employed in chapter 2. sem.fit <- errorsarlm(democracy ~ log(gdp.2002/population), data=sldv, nb2listw(cmat.nb), method="eigen", quiet=FALSE) summary(sem.fit) logLik(sem.fit) # P. 78 source("chapter3data_v2.R") tab3.sem <- errorsarlm(logtrade ~ logdem + logapop + logbpop + logargdppc + logbrgdppc + logs + logdist + logmid, data=logdat98,na.action=na.omit, nb2listw(dlist2,style="W"), method="eigen") summary(tab3.sem) logLik(tab3.sem)