################################################## # # MA 575: Linear Models, Fall 2013. # Department of Maths and Stats, Boston University # Week 4: Lecture 2. # ################################################## library(alr3) ################################################## # The R^2 is monotonic increasing wrt the number of covariates. n <- 100 p <- 100; mu <- 0; sig <- 1; y <- rnorm(n,mu,sig); X <- matrix(0,n,p); for(j in 1:p) X[,j] <- rnorm(n,mu,sig); data <- data.frame(cbind(y,X)); ### Fit increasing complex models: R2.vec <- vector("numeric",p); R2.adj.vec <- vector("numeric",p); for(j in 1:p){ formulae <- "y ~ V2"; for(l in 1:(j-1)) formulae <- paste(formulae,"+",names(data)[l+2]); mod <- lm(formulae,data); R2.vec[j] <- summary(mod)\$r.squared R2.adj.vec[j] <- summary(mod)\$adj.r.squared }#p ### Plot if(sum(is.nan(c(R2.vec,R2.adj.vec)))>0){ylim <- c(-.3,1)}else{ylim=range(c(R2.vec,R2.adj.vec))} plot(1:p,R2.vec,"b",col="red3",main="R-squared is monotonically increasing", xlab="Number of parameters",ylab="R-squared",ylim=ylim,pch=20) abline(h=0,lty=2); abline(h=1,lty=2) legend("topleft",legend=c("R-squared","Adj-R-squared"), col=c("red3","blue3"),pch=c(20,20),lty=c(1,1),inset=.05) lines(1:p,R2.adj.vec,"b",col="blue3",pch=20) ################################################## ################################################## ################################################## # Matrix algebra in R. library(alr3) ############################################################ # fuel2001 fuel2001\$Dlic <- 1000*fuel2001\$Drivers/fuel2001\$Pop fuel2001\$Fuel <- 1000*fuel2001\$FuelC/fuel2001\$Pop fuel2001\$Income <- fuel2001\$Income/1000 fuel2001\$logMiles <- log2(fuel2001\$Miles) # Fig 3.3 f <- fuel2001[,c(7, 9, 8, 3, 10)] pairs(f) summary(f) round(cor(f), 4) round(var(f), 4) # This figure is not in the text "../EPS-figs/fuel3.eps" oldpar <- par(mfrow=c(2,2),mai=c(.6,.6,.1,.1),mgp=c(2,1,0)) plot(fuel2001\$Dlic, fuel2001\$Fuel, xlab="(a) Dlic",ylab="Fuel") m1 <- lm(Fuel ~ Dlic, data=fuel2001) abline(m1) plot(fuel2001\$Dlic, fuel2001\$Tax, xlab="(b) Dlic", ylab="Tax") m2 <- lm(Tax ~ Dlic, data=fuel2001) abline(m2) plot(fuel2001\$Tax, fuel2001\$Fuel, xlab="(c) Tax", ylab="Fuel") abline(lm(Fuel ~ Tax, data=fuel2001)) plot(resid(m1), resid(m2), xlab="(d) Residuals from Tax on Dlic", ylab="Residuals from Fuel on Dlic") abline(lm(resid(m2)~resid(m1))) par(oldpar) # Computations of OLS estimator using the fuel data: f\$Intercept <- rep(1, 51) head(f) X <- as.matrix(f[, c(6, 1, 3, 4, 5)]) Y <- f\$Fuel xtx <- t(X) %*% X xtxinv <- solve(xtx) print(xtxinv, digits=4) xty <- t(X) %*% Y print(betahat <- xtxinv %*% xty) m1 <- lm(formula = Fuel ~ Tax + Dlic + Income + logMiles, data = f) s1 <- summary(m1) print(s1, signif.stars=FALSE) m2 <- update(m1, ~ . - Tax) anova(m2, m1) a1 <- anova(m1) SSreg <- sum(a1[1:4, 2]) MSreg <- SSreg/4 SSreg MSreg a1 m2 <- update(m1, ~Dlic + Income + logMiles + Tax) anova(m1,m2) a2 <- anova(m2) df <- c(3, 1, 46) SS1 <- a2\$"Sum Sq" SS <- c(sum(SS1[1:3]), SS1[4], SS1[5]) MS <- SS/df Fval <- MS/MS[3] atab <- data.frame(df, SS=round(SS, 0), MS=round(MS, 0), Fval) row.names(atab) <- c("Regression excluding Tax", "Tax after others", "Residual") round(atab, 2) m3 <- update(m2, ~ . - Tax) m4 <- update(m3, ~Dlic + Tax + Income + logMiles) m5 <- update(m3, ~logMiles + Income + Dlic + Tax) print(anova(m4), signif.stars=FALSE) print(anova(m5), signif.stars=FALSE) m1 <- lm(Fuel ~ Tax + Dlic + Income +logMiles, f) #### # EoF