################################################## # # MA 575: Linear Models, Fall 2013. # Department of Maths and Stats, Boston University # Week 10: Lecture 2. # ######################### # This lecture relies on codes provided John Fox and Sanford Weisberg in ### An R Companion to Applied Regression, Second Edition ### Script for Chapter 4. ### Sage Publications, 2011 # ################################################## library(alr3) library(car) options(show.signif.stars=TRUE) # turn on/off significance stars in lm output ############################################## ############################################## ############################################## ### Prestige data set. head(Prestige) some(Prestige) nrow(Prestige) ncol(Prestige) dim(Prestige) Prestige\$type ### This contains some missing values. class(Prestige\$type) ### Class returns the type of variable. levels(Prestige\$type) ### Levels order categories in alphabetical order. ### Re-order the levels of a factor: Prestige\$type <- with(Prestige, factor(type, levels=c("bc", "wc", "prof"))) select <- c(1, 2, 35, 36, 61, 62) # A few rows Prestige\$type[select] # A few values some(Prestige) ### Coercion into numeric. type.number <- as.numeric(Prestige\$type) type.number[select] class(type.number) ### Coercion into character. type.character <- as.character(Prestige\$type) type.character[select] class(type.character) ### Coercion into factor again. type.factor <- factor(type.character, levels=c("bc", "wc", "prof")) type.factor[select] ### Plotting the data: dim(Prestige); plot(Prestige) ############################## ### Plotting each variable: dim(Prestige); par(mfrow=c(2,ncol(Prestige)/2)); for(j in 1:ncol(Prestige)){ if(j<=4){ hist(Prestige[,j],col="red3", main=paste("(",letters[j],") ",names(Prestige)[j],sep="")); }else{ plot(Prestige[,j], col="red3", main=paste("(",letters[j],") ",names(Prestige)[j],sep="")); }#fi }#j ### Coercion into factor again. ?contr.treatment (z <- factor(rep(c("a", "b", "c", "d"), c(3, 2, 4, 1)))) model.matrix(~ z) contrasts(z) set.seed(1234) # to reproduce results in the text some(Baumann) # sample 10 observations nrow(Baumann) xtabs(~ group, data=Baumann) ### Tapply for group: with(Baumann, tapply(post.test.3, group, mean)) with(Baumann, tapply(post.test.3, group, sd)) plot(post.test.3 ~ group, data=Baumann, xlab="Group", ylab="Reading Score") ### ANOVA: baum.mod.1 <- lm(post.test.3 ~ group, data=Baumann); summary(baum.mod.1) ### Model matrix. model.matrix(baum.mod.1) ### Contrast type: L <- 5; contr.helmert(L) contr.sum(L) contr.treatment(L) contr.SAS(L) ### Removing the intercept for obtaining a group cell design: mod <- lm(post.test.3 ~ group -1 , data=Baumann); model.matrix(mod) summary(mod) ### One can also change the reference group in the factor: summary(update(baum.mod.1, . ~ . - group + relevel(group, ref="DRTA"))) ############################################## ### ANCOVA: Analysis of Covariance. #### Update model prestige.mod <- lm(prestige ~ education + women,data=Prestige); summary(prestige.mod) #### Update model prestige.mod.1 <- update(prestige.mod, ~ . - women + type) summary(prestige.mod.1) ### Add an interaction effect: model.matrix(~ type + education + education:type, data=Prestige) prestige.mod.2 <- update(prestige.mod.1, . ~ . + log2(income):type + education:type) summary(prestige.mod.2) ### All effects using the * symbol: mod1 <- lm(prestige ~ education + type + education:type, data=Prestige) mod2 <- lm(prestige ~ education*type, data=Prestige) summary(mod1); summary(mod2); ### These are exactly equivalent. ### Compare these two models. They're not identical. prestige.mod.2a <- lm(prestige ~ education*type + log2(income)*type, data=Prestige) prestige.mod.2b <- lm(prestige ~ type*(education + log2(income)) , data=Prestige) ### The model.matrix is very informative: some(model.matrix(prestige.mod.2a)) some(model.matrix(prestige.mod.2b)) ############################################## ############################################## ############################################## ### Compare anova(mod), anova(mod1,mod2), and Anova(mod); # (anova with one or several arguments are different.) ### anova with more than 1 argument: anova(prestige.mod.0, prestige.mod.1) # Compare two models. ### anova with 1 argument: anova(prestige.mod.1) # Test for nested models. ### This compares in a hierarchical manner: # prestige ~ 1 vs. prestige ~ education # prestige ~ education vs. prestige ~ education + log2(income) # prestige ~ education + log2(income) vs. prestige ~ education + log2(income) + type ### Upper case Anova: Anova(prestige.mod.1) # Type II tests. ### This compares models marginally: # prestige ~ log2(income) + type vs. prestige ~ education + log2(income) + type # prestige ~ education + type vs. prestige ~ education + log2(income) + type # prestige ~ education + log2(income) vs. prestige ~ education + log2(income) + type ################################################ ################################################ ################################################ ### Delta Method. # (Non-linear functions of the coefficients.) library(car) head(Davis) # first 6 rows nrow(Davis) davis.mod <- lm(weight ~ repwt, data=Davis) davis.mod summary(davis.mod) confint(davis.mod) scatterplot(weight ~ repwt, data=Davis, smooth=FALSE, id.n=1) davis.mod.2 <- update(davis.mod, subset=-12) summary(davis.mod.2) ### Cross-over weight: deltaMethod(davis.mod.2, "(Intercept)/(1 - repwt)") ################################################ ################################################ ################################################ ### Confidence ellipses. duncan.mod <- lm(prestige ~ income + education, data=Duncan) (ci <- confint(duncan.mod)) confidenceEllipse(duncan.mod, levels=c(0.85, 0.95)) for (j in 1:2) abline(v=ci[2, j], lty=2) # marginal CI for education for (j in 1:2) abline(h=ci[3, j], lty=2) # marginal CI for income with(Duncan, { dataEllipse(income, education, levels=c(0.5, 0.75, 0.9, 0.95)) # to exit from identify, right-click in Windows, esc in Mac OS X identify(income, education, rownames(Duncan)) }) tval <- (coef(davis.mod.2)[2] - 1)/sqrt(vcov(davis.mod.2)[2, 2]) pval <- 2*pt(abs(tval), df.residual(davis.mod.2), lower.tail=FALSE) c(tval=tval, pval=pval) prestige.mod.1 <- lm(prestige ~ education + log2(income) + type, data=na.omit(Prestige)) # full model prestige.mod.0 <- update(prestige.mod.1, . ~ 1) # intercept only #### # EoF