The functionality of this webpage is constrained in D2L, and you might find it easier to read and navigate if you download this html file to your own computer.
In this simulation, we create two sets of independent variables: one is orthogonal, the other highly collinear. We see what happens to coefficient estimates when there is an omitted variable. We run the simulation 500 iterations, each time with the same coefficients and independent variables, but with different error terms.
#---We estimate the coefficients and compare the estimates with true values--
# --We compare collinear case with orthogonal case for omitted variables--
#make 2 independent variables
xc<-matrix(rnorm(100),50,2)
xo<-princomp(xc)$scores #make 2 orthogonal indep. varbs
xc[,1]<-xc[,2]^3 #make 2 highly correlated indep. varbs.
rro<-round(cor(xo)[1,2],4) #correlation between indep. varbs. (orthogonal)
rrc<-round(cor(xc)[1,2],4) #correlation between indep. varbs. (collinear)
true<-c(.5,7) #true value of slope coefficients
aac<-aao<-NULL #--empty objects to store our results from the 500 simulations--
for (i in 1:500){
y<-10 + true[1]*xo[,1]+true[2]*xo[,2]+rnorm(50) # orthogonal indep. varbs.
o1<-coef(lm(y~xo[,1])) #include only first indep. varb.
o2<-coef(lm(y~xo)) #include both indep. varbs.
aao<-rbind(aao,cbind(o1[2],o2[2],true[1],rro)) # append results
y<-10 + true[1]*xc[,1]+true[2]*xc[,2]+rnorm(50) # collinear indep. varbs.
o1<-coef(lm(y~xc[,1])) #include only first indep. varb.
o2<-coef(lm(y~xc)) #include both indep. varbs.
aac<-rbind(aac,cbind(o1[2],o2[2],true[1],rrc)) # append results
}
colnames(aac)<-colnames(aao)<-c("drop1","useBoth","trueVal","corr")
dim(aac)
## [1] 500 4
head(aac)
## drop1 useBoth trueVal corr
## xc[, 1] 3.438549 0.5319643 0.5 0.8523
## xc[, 1] 3.453009 0.2203317 0.5 0.8523
## xc[, 1] 3.563868 0.4831295 0.5 0.8523
## xc[, 1] 3.423266 0.3898045 0.5 0.8523
## xc[, 1] 3.547133 0.5261530 0.5 0.8523
## xc[, 1] 3.488229 0.5165311 0.5 0.8523
You can see that the mean estimated coefficient is biased in the collinear case, when one variable is dropped. However, there is no bias for the orthogonal case.
apply(aac,2,mean) # collinear case
## drop1 useBoth trueVal corr
## 3.4897190 0.5066219 0.5000000 0.8523000
apply(aao,2,mean) # orthogonal case
## drop1 useBoth trueVal corr
## 0.5020301 0.5020301 0.5000000 0.0000000
You can see that dropping a variable reduces the standard error in the collinear case. But there is no change in the orthogonal case.
apply(aac[,1:2],2,sd) # collinear case
## drop1 useBoth
## 0.08164859 0.15470117
apply(aao[,1:2],2,sd) # orthogonal case
## drop1 useBoth
## 0.146022 0.146022
#--plot histograms of results for collinear case--
xrange<-range(aac[,c("drop1","useBoth","trueVal")])
layout(matrix(1:2,1,2))
hist(aac[,"drop1"],breaks=30,xlim=xrange)
lines(density(aac[,"drop1"]),col="blue")
abline(v=true[1],col="red",lty=2,lwd=2)
hist(aac[,"useBoth"],breaks=30,xlim=xrange)
lines(density(aac[,"useBoth"]),col="blue")
abline(v=true[1],col="red",lty=2,lwd=2)
layout(1)
#--plot histograms of results for orthogonal case--
xrange<-range(aao[,c("drop1","useBoth","trueVal")])
layout(matrix(1:2,1,2))
hist(aao[,"drop1"],breaks=30,xlim=xrange)
lines(density(aao[,"drop1"]),col="blue")
abline(v=true[1],col="red",lty=2,lwd=2)
hist(aao[,"useBoth"],breaks=30,xlim=xrange)
lines(density(aao[,"useBoth"]),col="blue")
abline(v=true[1],col="red",lty=2,lwd=2)
layout(1)