1 Try downloading this webpage

The functionality of this webpage is constrained in D2L, and you might find it easier to read and navigate if you download this html file to your own computer.

2 Resources to learn R

3 Monte-Carlo simulation for omitted variable bias

In this simulation, we create two sets of independent variables: one is orthogonal, the other highly collinear. We see what happens to coefficient estimates when there is an omitted variable. We run the simulation 500 iterations, each time with the same coefficients and independent variables, but with different error terms.

#---We estimate the coefficients and compare the estimates with true values--
# --We compare collinear case with orthogonal case for omitted variables--

#make 2 independent variables
xc<-matrix(rnorm(100),50,2)  
xo<-princomp(xc)$scores       #make 2 orthogonal indep. varbs
xc[,1]<-xc[,2]^3              #make 2 highly correlated indep. varbs.
rro<-round(cor(xo)[1,2],4)             #correlation between indep. varbs. (orthogonal)
rrc<-round(cor(xc)[1,2],4)             #correlation between indep. varbs. (collinear)
true<-c(.5,7) #true value of slope coefficients

aac<-aao<-NULL #--empty objects to store our results from the 500 simulations--
for (i in 1:500){
  y<-10 + true[1]*xo[,1]+true[2]*xo[,2]+rnorm(50) # orthogonal indep. varbs.
  o1<-coef(lm(y~xo[,1]))       #include only first indep. varb.
  o2<-coef(lm(y~xo))           #include both indep. varbs.
  aao<-rbind(aao,cbind(o1[2],o2[2],true[1],rro)) # append results
  y<-10 + true[1]*xc[,1]+true[2]*xc[,2]+rnorm(50) # collinear indep. varbs.   
  o1<-coef(lm(y~xc[,1]))       #include only first indep. varb.
  o2<-coef(lm(y~xc))           #include both indep. varbs.
  aac<-rbind(aac,cbind(o1[2],o2[2],true[1],rrc)) # append results
}

colnames(aac)<-colnames(aao)<-c("drop1","useBoth","trueVal","corr")
dim(aac)
## [1] 500   4
head(aac)
##            drop1   useBoth trueVal   corr
## xc[, 1] 3.438549 0.5319643     0.5 0.8523
## xc[, 1] 3.453009 0.2203317     0.5 0.8523
## xc[, 1] 3.563868 0.4831295     0.5 0.8523
## xc[, 1] 3.423266 0.3898045     0.5 0.8523
## xc[, 1] 3.547133 0.5261530     0.5 0.8523
## xc[, 1] 3.488229 0.5165311     0.5 0.8523

3.1 Mean and standard deviation of results

3.1.1 Mean

You can see that the mean estimated coefficient is biased in the collinear case, when one variable is dropped. However, there is no bias for the orthogonal case.

apply(aac,2,mean) # collinear case
##     drop1   useBoth   trueVal      corr 
## 3.4897190 0.5066219 0.5000000 0.8523000
apply(aao,2,mean) # orthogonal case
##     drop1   useBoth   trueVal      corr 
## 0.5020301 0.5020301 0.5000000 0.0000000

3.1.2 Standard deviation

You can see that dropping a variable reduces the standard error in the collinear case. But there is no change in the orthogonal case.

apply(aac[,1:2],2,sd) # collinear case
##      drop1    useBoth 
## 0.08164859 0.15470117
apply(aao[,1:2],2,sd) # orthogonal case
##    drop1  useBoth 
## 0.146022 0.146022

3.2 Histograms of results for collinear case

#--plot histograms of results for collinear case--
xrange<-range(aac[,c("drop1","useBoth","trueVal")])

layout(matrix(1:2,1,2))
hist(aac[,"drop1"],breaks=30,xlim=xrange)
lines(density(aac[,"drop1"]),col="blue")
abline(v=true[1],col="red",lty=2,lwd=2)
hist(aac[,"useBoth"],breaks=30,xlim=xrange)
lines(density(aac[,"useBoth"]),col="blue")
abline(v=true[1],col="red",lty=2,lwd=2)

layout(1)

3.3 Histograms of results for orthogonal case

#--plot histograms of results for orthogonal case--
xrange<-range(aao[,c("drop1","useBoth","trueVal")])

layout(matrix(1:2,1,2))
hist(aao[,"drop1"],breaks=30,xlim=xrange)
lines(density(aao[,"drop1"]),col="blue")
abline(v=true[1],col="red",lty=2,lwd=2)
hist(aao[,"useBoth"],breaks=30,xlim=xrange)
lines(density(aao[,"useBoth"]),col="blue")
abline(v=true[1],col="red",lty=2,lwd=2)

layout(1)